From 3c9a678d7a5350f48de163cfd3d4f56b13b49ada Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Thu, 11 Jul 2019 14:52:12 +0000 Subject: [PATCH] Fixed the ModifyDrop() behavior. It worked, but it caused memory build-up in the old implementation. --- parse/api.go | 63 ++++++++---------------------------- parse/parse.go | 22 ++----------- tokenize/api.go | 2 -- tokenize/handler.go | 2 +- tokenize/handlers_builtin.go | 7 ++-- tokenize/tokenize.go | 20 ++++++++++-- 6 files changed, 39 insertions(+), 77 deletions(-) diff --git a/parse/api.go b/parse/api.go index b0216be..8d44ec2 100644 --- a/parse/api.go +++ b/parse/api.go @@ -16,34 +16,11 @@ import ( // // • call other parse.Handler functions, the core of recursive-descent parsing (Handle) type API struct { - tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions - Result TokenizeResult // a struct, holding the results of the last Peek() or Accept() call - sanityChecksEnabled bool // whether or not runtime sanity checks are enabled - loopCheck map[uintptr]bool // used for parser loop detection - err error // parse error, retrieved by Error(), using API methods is denied when set - stopped bool // a boolean set to true by Stop() -} - -// TokenizeResult holds the results of the last Peek() or Accept() call. -type TokenizeResult struct { - Tokens []tokenize.Token // the resulting tokens from the last call to Peek() or Accept() - Runes []rune // the resulting runes from the last call to Peek() or Accept() -} - -func (result *TokenizeResult) String() string { - return string(result.Runes) -} - -// DisableSanityChecks disables the built-in parser implementation sanity checks, -// which detects parser implementation errors like loops and continuing parsing -// after an error or invoking Stop(). -// -// These tests do cause a performance hit. When your parser has to handle a lot -// of input data and is fairly complex, you might want to disable the sanity -// checks. When you're not sure, You probably don't want to use this method, -// and enjoy the added safety of the built-in checks. -func (p *API) DisableSanityChecks() { - p.sanityChecksEnabled = true + tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions + Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call + loopCheck map[uintptr]bool // used for parser loop detection + err error // parse error, retrieved by Error(), using API methods is denied when set + stopped bool // a boolean set to true by Stop() } // Peek checks if the upcoming input data matches the provided tokenize.Handler. @@ -79,9 +56,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool { // And flush the input reader buffer. if p.tokenAPI.FlushInput() { - if p.sanityChecksEnabled { - p.initLoopCheck() - } + p.initLoopCheck() } } else { p.tokenAPI.Dispose(forkedAPI) @@ -90,12 +65,10 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool { } func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) { - if p.sanityChecksEnabled { - p.panicWhenStoppedOrInError(name) - p.checkForLoops(name) - if tokenHandler == nil { - callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") - } + p.panicWhenStoppedOrInError(name) + p.checkForLoops(name) + if tokenHandler == nil { + callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") } p.tokenAPI.Reset() @@ -167,13 +140,9 @@ func (p *API) checkForLoops(name string) { // of this method, because it performs some sanity checks and it will return // an easy to use boolean indicating whether the parser can continue or not. func (p *API) Handle(parseHandler ...Handler) bool { - if p.sanityChecksEnabled { - p.panicWhenStoppedOrInError("Handle") - } + p.panicWhenStoppedOrInError("Handle") for _, handler := range parseHandler { - if p.sanityChecksEnabled { - p.panicWhenHandlerNil("Handle", handler) - } + p.panicWhenHandlerNil("Handle", handler) handler(p) if p.IsStoppedOrInError() { return false @@ -223,9 +192,7 @@ func (p *API) Error(format string, data ...interface{}) { // will be stopped through Stop(). Otherwise, the unexpected input is reported // using Expected("end of file"). func (p *API) ExpectEndOfFile() { - if p.sanityChecksEnabled { - p.panicWhenStoppedOrInError("ExpectEndofFile") - } + p.panicWhenStoppedOrInError("ExpectEndofFile") if p.Peek(tokenize.A.EndOfFile) { p.Stop() } else { @@ -247,9 +214,7 @@ func (p *API) ExpectEndOfFile() { // // • there was an error while reading the input. func (p *API) Expected(expected string) { - if p.sanityChecksEnabled { - p.panicWhenStoppedOrInError("Expected") - } + p.panicWhenStoppedOrInError("Expected") _, err := p.tokenAPI.NextRune() switch { case err == nil: diff --git a/parse/parse.go b/parse/parse.go index 420dba9..227d487 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -26,31 +26,13 @@ type Func func(interface{}) error // against the provided input data. For an overview of allowed inputs, take a // look at the documentation for parsekit.read.New(). func New(startHandler Handler) Func { - return new(startHandler, true) -} - -// NewWithoutSanityChecks instantiates a new parser, which does not have -// parsekit's built-in sanith checks enabled (e.g. checks for loops or -// or calls to parse.API methods after an error or Stop()). -// -// Disabling sanity checks does improve parsing performance, but for -// most use cases this is not an issue. Only disable sanity checks when -// you really need the extra performance. -// You can of course create a normal sanity-checked parser that is used -// during development / unit testing, and an unchecked one for production. -func NewWithoutSanityChecks(startHandler Handler) Func { - return new(startHandler, false) -} - -func new(startHandler Handler, sanityChecksEnabled bool) Func { if startHandler == nil { callerPanic("New", "parsekit.parse.{name}(): {name}() called with nil input at {caller}") } return func(input interface{}) error { api := &API{ - tokenAPI: tokenize.NewAPI(input), - loopCheck: make(map[uintptr]bool), - sanityChecksEnabled: sanityChecksEnabled, + tokenAPI: tokenize.NewAPI(input), + loopCheck: make(map[uintptr]bool), } if api.Handle(startHandler) { // Handle returned true, indicating that parsing could still continue. diff --git a/tokenize/api.go b/tokenize/api.go index e00162c..6a9a01a 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -140,8 +140,6 @@ func (i *API) NextRune() (rune, error) { // It is not allowed to call Accept() when the previous call to NextRune() // returned an error. Calling Accept() in such case will result in a panic. func (i *API) Accept() { - // TODO can go after completing the code for performance. - //fmt.Println("STACK [", i.stackLevel, "] runes", len(i.runes), "/", cap(i.runes), "tokens", len(i.tokens), "/", cap(i.tokens)) if !i.runeRead { callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+ "without first calling NextRune()") diff --git a/tokenize/handler.go b/tokenize/handler.go index cd9241a..ff3a4e5 100644 --- a/tokenize/handler.go +++ b/tokenize/handler.go @@ -11,7 +11,7 @@ type Handler func(t *API) bool // Match is syntactic sugar that allows you to write a construction like // NewTokenizer(handler).Execute(input) as handler.Match(input). -func (handler Handler) Match(input interface{}) (*API, error) { +func (handler Handler) Match(input interface{}) (*Result, error) { tokenizer := New(handler) return tokenizer(input) } diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 5fc3f13..6bb1aff 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -1076,8 +1076,11 @@ func ModifyDrop(handler Handler) Handler { return func(t *API) bool { child := t.Fork() if handler(t) { - t.Reset() - t.Merge(child) + // Do a partial merge: only move the cursor and read offset forward. + // Otherwise we'd have to do a Reset() + Merge() call to get the same result. + parent := &t.stackFrames[t.stackLevel-1] + parent.offset = t.stackFrame.offset + parent.cursor = t.stackFrame.cursor t.Dispose(child) return true } diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go index 8a725ba..a06ab8b 100644 --- a/tokenize/tokenize.go +++ b/tokenize/tokenize.go @@ -9,7 +9,17 @@ import ( // Func is the function signature as returned by New: a function that takes // any supported type of input, executes a tokenizer run and returns a // Result struct (possibly nil) and an error (possibly nil). -type Func func(input interface{}) (*API, error) +type Func func(input interface{}) (*Result, error) + +// Result holds the runes and tokens as produced by the tokenizer. +type Result struct { + Tokens []Token + Runes []rune +} + +func (result *Result) String() string { + return string(result.Runes) +} // New instantiates a new tokenizer. // @@ -28,7 +38,7 @@ type Func func(input interface{}) (*API, error) // against the provided input data. For an overview of allowed inputs, take a // look at the documentation for parsekit.read.New(). func New(tokenHandler Handler) Func { - return func(input interface{}) (*API, error) { + return func(input interface{}) (*Result, error) { api := NewAPI(input) ok := tokenHandler(api) @@ -36,6 +46,10 @@ func New(tokenHandler Handler) Func { err := fmt.Errorf("mismatch at %s", Cursor{}) return nil, err } - return api, nil + result := &Result{ + Runes: api.Runes(), + Tokens: api.Tokens(), + } + return result, nil } }