Added a feature to run the parser without any of the built-in sanity checks (like loop checks). This improved performance, but at the risk of missing some runtime issues with the parser implementation.

This commit is contained in:
Maurice Makaay 2019-06-30 01:05:54 +00:00
parent 7ce12d1632
commit 4b0309453f
4 changed files with 74 additions and 19 deletions

5
Makefile Normal file
View File

@ -0,0 +1,5 @@
test:
@(cd read; go test | grep -v ^PASS)
@(cd tokenize; go test | grep -v ^PASS)
@(cd parse; go test | grep -v ^PASS)
@(cd examples; go test | grep -v ^PASS)

View File

@ -16,11 +16,24 @@ import (
// //
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle) // • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
type API struct { type API struct {
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek() result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
loopCheck map[string]bool // used for parser loop detection sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
err error // parse error, retrieved by Error(), using API methods is denied when set loopCheck map[filepos]bool // used for parser loop detection
stopped bool // a boolean set to true by Stop(), using API methods is denied when true err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop()
}
// DisableSanityChecks disables the built-in parser implementation sanity checks,
// which detects parser implementation errors like loops and continuing parsing
// after an error or invoking Stop().
//
// These tests do cause a performance hit. When your parser has to handle a lot
// of input data and is fairly complex, you might want to disable the sanity
// checks. When you're not sure, You probably don't want to use this method,
// and enjoy the added safety of the built-in checks.
func (p *API) DisableSanityChecks() {
p.sanityChecksEnabled = true
} }
// Peek checks if the upcoming input data matches the provided tokenize.Handler. // Peek checks if the upcoming input data matches the provided tokenize.Handler.
@ -54,7 +67,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
forkedAPI.Merge() forkedAPI.Merge()
p.result = p.tokenAPI.Result() p.result = p.tokenAPI.Result()
forkedAPI.Dispose() forkedAPI.Dispose()
if p.tokenAPI.FlushInput() { if p.sanityChecksEnabled && p.tokenAPI.FlushInput() {
p.initLoopCheck() p.initLoopCheck()
} }
} }
@ -62,10 +75,12 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
} }
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) { func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) {
p.panicWhenStoppedOrInError(name) if p.sanityChecksEnabled {
p.checkForLoops(name) p.panicWhenStoppedOrInError(name)
if tokenHandler == nil { p.checkForLoops(name)
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") if tokenHandler == nil {
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
}
} }
p.result = nil p.result = nil
@ -108,7 +123,7 @@ func (p *API) IsStoppedOrInError() bool {
// When Accept() is called, and the parser moved forward in the input data, // When Accept() is called, and the parser moved forward in the input data,
// this method is called to reset the map for the new read cursor position. // this method is called to reset the map for the new read cursor position.
func (p *API) initLoopCheck() { func (p *API) initLoopCheck() {
p.loopCheck = map[string]bool{} p.loopCheck = make(map[filepos]bool)
} }
// checkForLoops checks if the line of code from which Accept() or Peek() // checkForLoops checks if the line of code from which Accept() or Peek()
@ -152,9 +167,13 @@ func (p *API) Result() *tokenize.Result {
// of this method, because it performs some sanity checks and it will return // of this method, because it performs some sanity checks and it will return
// an easy to use boolean indicating whether the parser can continue or not. // an easy to use boolean indicating whether the parser can continue or not.
func (p *API) Handle(parseHandler ...Handler) bool { func (p *API) Handle(parseHandler ...Handler) bool {
p.panicWhenStoppedOrInError("Handle") if p.sanityChecksEnabled {
p.panicWhenStoppedOrInError("Handle")
}
for _, handler := range parseHandler { for _, handler := range parseHandler {
p.panicWhenHandlerNil("Handle", handler) if p.sanityChecksEnabled {
p.panicWhenHandlerNil("Handle", handler)
}
handler(p) handler(p)
if p.IsStoppedOrInError() { if p.IsStoppedOrInError() {
return false return false
@ -204,7 +223,9 @@ func (p *API) Error(format string, data ...interface{}) {
// will be stopped through Stop(). Otherwise, the unexpected input is reported // will be stopped through Stop(). Otherwise, the unexpected input is reported
// using Expected("end of file"). // using Expected("end of file").
func (p *API) ExpectEndOfFile() { func (p *API) ExpectEndOfFile() {
p.panicWhenStoppedOrInError("ExpectEndofFile") if p.sanityChecksEnabled {
p.panicWhenStoppedOrInError("ExpectEndofFile")
}
if p.Peek(tokenize.A.EndOfFile) { if p.Peek(tokenize.A.EndOfFile) {
p.Stop() p.Stop()
} else { } else {
@ -226,7 +247,9 @@ func (p *API) ExpectEndOfFile() {
// //
// • there was an error while reading the input. // • there was an error while reading the input.
func (p *API) Expected(expected string) { func (p *API) Expected(expected string) {
p.panicWhenStoppedOrInError("Expected") if p.sanityChecksEnabled {
p.panicWhenStoppedOrInError("Expected")
}
_, err := p.tokenAPI.NextRune() _, err := p.tokenAPI.NextRune()
switch { switch {
case err == nil: case err == nil:

View File

@ -24,10 +24,19 @@ func callerBefore(name string) string {
} }
} }
func callerFilepos(depth int) string { type filepos struct {
file string
line int
}
func (pos *filepos) String() string {
return fmt.Sprintf("%s:%d", pos.file, pos.line)
}
func callerFilepos(depth int) filepos {
// No error handling, because we call this method ourselves with safe depth values. // No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1) _, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line) return filepos{file, line}
} }
func callerPanic(name, f string, data ...interface{}) { func callerPanic(name, f string, data ...interface{}) {

View File

@ -26,13 +26,31 @@ type Func func(interface{}) error
// against the provided input data. For an overview of allowed inputs, take a // against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New(). // look at the documentation for parsekit.read.New().
func New(startHandler Handler) Func { func New(startHandler Handler) Func {
return new(startHandler, true)
}
// NewWithoutSanityChecks instantiates a new parser, which does not have
// parsekit's built-in sanith checks enabled (e.g. checks for loops or
// or calls to parse.API methods after an error or Stop()).
//
// Disabling sanity checks does improve parsing performance, but for
// most use cases this is not an issue. Only disable sanity checks when
// you really need the extra performance.
// You can of course create a normal sanity-checked parser that is used
// during development / unit testing, and an unchecked one for production.
func NewWithoutSanityChecks(startHandler Handler) Func {
return new(startHandler, false)
}
func new(startHandler Handler, sanityChecksEnabled bool) Func {
if startHandler == nil { if startHandler == nil {
callerPanic("New", "parsekit.parse.{name}(): {name}() called with nil input at {caller}") callerPanic("New", "parsekit.parse.{name}(): {name}() called with nil input at {caller}")
} }
return func(input interface{}) error { return func(input interface{}) error {
api := &API{ api := &API{
tokenAPI: tokenize.NewAPI(input), tokenAPI: tokenize.NewAPI(input),
loopCheck: map[string]bool{}, loopCheck: make(map[filepos]bool),
sanityChecksEnabled: sanityChecksEnabled,
} }
if api.Handle(startHandler) { if api.Handle(startHandler) {
// Handle returned true, indicating that parsing could still continue. // Handle returned true, indicating that parsing could still continue.