Lowering the number of forks required.

2019-07-24 22:42:40 +00:00 · 2019-07-24 22:42:40 +00:00 · bc9e718e47
parent 99b0abc490
commit bc9e718e47
13 changed files with 95 additions and 77 deletions
--- a/examples/example_helloManyStateParser_test.go
+++ b/examples/example_helloManyStateParser_test.go
@ -144,7 +144,7 @@ func (h *helloparser1) end(p *parse.API) {

 	h.greetee = strings.TrimSpace(h.greetee)
 	if h.greetee == "" {
-		p.Error("The name cannot be empty")
+		p.SetError("The name cannot be empty")
 	} else {
 		p.Stop()
 	}
--- a/examples/example_helloSingleStateParser_test.go
+++ b/examples/example_helloSingleStateParser_test.go
@ -82,27 +82,27 @@ func (h *helloparser2) Parse(input string) (string, error) {
 func (h *helloparser2) start(p *parse.API) {
 	c, a, m := tokenize.C, tokenize.A, tokenize.M
 	if !p.Accept(a.StrNoCase("hello")) {
-		p.Error("the greeting is not being friendly")
+		p.SetError("the greeting is not being friendly")
 		return
 	}
 	if !p.Accept(c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks))) {
-		p.Error("the greeting is not properly separated")
+		p.SetError("the greeting is not properly separated")
 		return
 	}
 	if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
 		h.greetee = p.Result.String()
 		if h.greetee == "" {
-			p.Error("the name cannot be empty")
+			p.SetError("the name cannot be empty")
 			return
 		}
 	} else {
-		p.Error("the greeting is targeted at thin air")
+		p.SetError("the greeting is targeted at thin air")
 		return
 	}
 	if !p.Accept(a.Excl) {
-		p.Error("the greeting is not loud enough")
+		p.SetError("the greeting is not loud enough")
 	} else if !p.Accept(a.EndOfFile) {
-		p.Error("too much stuff going on after the closing '!'")
+		p.SetError("too much stuff going on after the closing '!'")
 	} else {
 		p.Stop()
 	}
--- a/parse/api.go
+++ b/parse/api.go
@ -30,13 +30,14 @@ type API struct {
 // On a successful peek, the results (data + tokens) are returned by the peek.
 // They are availablel (as with Accept()) through parse.API.Result.
 func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
-	child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
+	_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
 	tokenAPI := parseAPI.tokenAPI
 	if ok {
 		parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
 		parseAPI.Result.Runes = tokenAPI.Output.Runes()
 	}
-	tokenAPI.Dispose(child)
+	tokenAPI.Input.Reset()
+	tokenAPI.Output.Reset()
 	return ok
 }

@ -48,11 +49,12 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
 // No results (data + tokens) are returned by Peek(). If want access to the data
 // through parse.API.Result, make use of PeekWithResult() instead.
 func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
-	child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
-	t := parseAPI.tokenAPI
+	_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
+	tokenAPI := parseAPI.tokenAPI
 	parseAPI.Result.Tokens = nil
 	parseAPI.Result.Runes = nil
-	t.Dispose(child)
+	tokenAPI.Input.Reset()
+	tokenAPI.Output.Reset()
 	return ok
 }

@ -61,37 +63,42 @@ func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
 // forward to beyond the match that was found. Otherwise false will be
 // and the read cursor will stay at the same position.
 //
-// After calling this method, you can retrieve the results using the Result() method.
+// After calling this method, you can retrieve the results through the API.Result field.
 func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
-	t := parseAPI.tokenAPI
-	child, ok := parseAPI.invokeHandler("Accept", tokenHandler)
+	tokenAPI := parseAPI.tokenAPI
+	_, ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler)
 	if ok {
 		// Keep track of the results as produced by this child.
-		parseAPI.Result.Tokens = t.Output.Tokens()
-		parseAPI.Result.Runes = t.Output.Runes()
+		parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
+		parseAPI.Result.Runes = tokenAPI.Output.Runes()

-		// Merge to the parent level.
-		t.Merge(child)
-		t.Dispose(child)
+		// Now the results are stored, we can reset the results for the next handler.
+		tokenAPI.Output.Reset()

 		// And flush the input reader buffer.
-		t.Input.Flush()
+		tokenAPI.Input.Flush()
 	} else {
-		t.Dispose(child)
+		// No match, so reset the tokenize.API for the next handler.
+		// This moves back the read cursor to the start and clears all results.
+		tokenAPI.Input.Reset()
+		tokenAPI.Output.Reset()
 	}
 	return ok
 }

-func (parseAPI *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
+// invokeTokenizeHandler forks the tokenize.API, and invokes the tokenize.Handler
+// in the context of the created child. The child is returned, so the caller
+// has full control over merging and disposing the child.
+func (parseAPI *API) invokeTokenizeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
 	parseAPI.panicWhenStoppedOrInError(name)
 	if tokenHandler == nil {
 		callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
 	}

-	child := parseAPI.tokenAPI.Fork()
+	//child := parseAPI.tokenAPI.Fork()
 	ok := tokenHandler(parseAPI.tokenAPI)

-	return child, ok
+	return 0, ok
 }

 // panicWhenStoppedOrInError will panic when the parser has produced an error
@ -165,19 +172,18 @@ func (parseAPI *API) Stop() {
 	parseAPI.stopped = true
 }

-// Error sets the error message in the API.
+// SetError sets the error message in the API.
 //
 // After setting an error, no more calls to API methods are allowed.
 // Calling a method in this state will result in a panic.
-// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
-func (parseAPI *API) Error(format string, data ...interface{}) {
-	// No call to p.panicWhenStoppedOrInError(), to allow a parser to
-	// set a different error message when needed.
+// You can still call SetError() though, to set a different error message
+// if you feel the need to do so.
+func (parseAPI *API) SetError(format string, data ...interface{}) {
 	message := fmt.Sprintf(format, data...)
 	parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor())
 }

-// ExpectEndOfFile can be used to check if the input is at end of file.
+// ExpectEndOfFile checks if the end of the input file has been reached.
 //
 // When it finds that the end of the file was indeed reached, then the parser
 // will be stopped through Stop(). Otherwise, the unexpected input is reported
@ -209,11 +215,11 @@ func (parseAPI *API) Expected(expected string) {
 	_, err := parseAPI.tokenAPI.Byte.Peek(0)
 	switch {
 	case err == nil:
-		parseAPI.Error("unexpected input%s", fmtExpects(expected))
+		parseAPI.SetError("unexpected input%s", fmtExpects(expected))
 	case err == io.EOF:
-		parseAPI.Error("unexpected end of file%s", fmtExpects(expected))
+		parseAPI.SetError("unexpected end of file%s", fmtExpects(expected))
 	default:
-		parseAPI.Error("unexpected error '%s'%s", err, fmtExpects(expected))
+		parseAPI.SetError("unexpected error '%s'%s", err, fmtExpects(expected))
 	}
 }

--- a/parse/parse.go
+++ b/parse/parse.go
@ -41,6 +41,7 @@ func New(startHandler Handler) Func {
 			// and try to make the best of it.
 			api.ExpectEndOfFile()
 		}
+
 		return api.err
 	}
 }
--- a/parse/parse_test.go
+++ b/parse/parse_test.go
@ -245,7 +245,7 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
 		panic("This is not the handler you're looking for")
 	}
 	p := parse.New(func(p *parse.API) {
-		p.Error("It ends here")
+		p.SetError("It ends here")
 		p.Handle(otherHandler)
 	})
 	parse.AssertPanic(t, parse.PanicT{
--- a/read/read.go
+++ b/read/read.go
@ -213,6 +213,7 @@ func (buf *Buffer) fill(minBytes int) {
 }

 const defaultBufferSize = 1024
+const runeCacheSize = 128

 // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
 var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
--- a/tokenize/api.go
+++ b/tokenize/api.go
@ -86,13 +86,14 @@ type API struct {
 }

 type stackFrame struct {
-	offset     int // the read offset (relative to the start of the reader buffer) for this stack frame
-	column     int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
-	line       int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
-	bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
-	bytesEnd   int // the end point in the API.bytes slice for runes produced by this stack level
-	tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
-	tokenEnd   int // the end point in the API.tokens slice for tokens produced by this stack level
+	offsetLocal int // the read offset, relative to the start if this stack frame
+	offset      int // the read offset, relative to the start of the reader buffer
+	column      int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
+	line        int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
+	bytesStart  int // the starting point in the API.bytes slice for runes produced by this stack level
+	bytesEnd    int // the end point in the API.bytes slice for runes produced by this stack level
+	tokenStart  int // the starting point in the API.tokens slice for tokens produced by this stack level
+	tokenEnd    int // the end point in the API.tokens slice for tokens produced by this stack level

 	// TODO
 	err error // can be used by a Handler to report a specific issue with the input
@ -177,16 +178,7 @@ func (tokenAPI *API) Fork() int {
 // Once the child is no longer needed, it can be disposed of by using the
 // method Dispose(), which will return the tokenizer to the parent.
 func (tokenAPI *API) Merge(stackLevel int) {
-	if stackLevel == 0 {
-		callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
-			"on the top-level API stack level 0")
-	}
-	if stackLevel != tokenAPI.stackLevel {
-		callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
-			"on API stack level %d, but the current stack level is %d "+
-			"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
-	}
-
+	tokenAPI.checkStackLevelForMethod("Merge", stackLevel)
 	parent := &tokenAPI.stackFrames[stackLevel-1]
 	f := tokenAPI.stackFrame

@ -206,6 +198,7 @@ func (tokenAPI *API) Merge(stackLevel int) {
 	f.tokenStart = f.tokenEnd

 	// Update the parent read offset.
+	parent.offsetLocal = parent.offsetLocal + (f.offset - parent.offset)
 	parent.offset = f.offset

 	// Update the parent cursor position.
@ -221,24 +214,20 @@ func (tokenAPI *API) Merge(stackLevel int) {
 	f.err = nil
 }

-// Reset moves the read cursor back to the beginning for the currently active API child.
-// Aditionally, all output (bytes and tokens) that was emitted from the API child is
-// cleared as well.
-func (tokenAPI *API) Reset() {
-	f := tokenAPI.stackFrame
-	f.bytesEnd = f.bytesStart
-	f.tokenEnd = f.tokenStart
-	f.column = 0
-	f.line = 0
-	if tokenAPI.stackLevel == 0 {
-		f.offset = 0
-	} else {
-		f.offset = tokenAPI.stackFrames[tokenAPI.stackLevel-1].offset
-	}
-	f.err = nil
-}
-
 func (tokenAPI *API) Dispose(stackLevel int) {
+	tokenAPI.checkStackLevelForMethod("Dispose", stackLevel)
 	tokenAPI.stackLevel = stackLevel - 1
 	tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
 }
+
+func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
+	if stackLevel == 0 {
+		callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
+			"on the top-level API stack level 0")
+	}
+	if stackLevel != tokenAPI.stackLevel {
+		callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
+			"on API stack level %d, but the current stack level is %d "+
+			"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
+	}
+}
--- a/tokenize/api_bytemode.go
+++ b/tokenize/api_bytemode.go
@ -30,7 +30,7 @@ func (byteMode InputByteMode) Accept(b byte) {
 	byteMode.MoveCursor(b)
 }

-// AcceptMulti is used to accept one or more bytes that were read from the input.
+// AcceptMulti accepts one or more bytes that were read from the input.
 // This tells the tokenizer: "I've seen these bytes. I want to make use of them
 // for the final output, so please remember them for me. I will now continue
 // reading after these bytes."
@ -62,6 +62,7 @@ func (byteMode InputByteMode) MoveCursor(b byte) {
 	}

 	f.offset++
+	f.offsetLocal++
 }

 // MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
--- a/tokenize/api_input.go
+++ b/tokenize/api_input.go
@ -30,6 +30,16 @@ func (i Input) Cursor() string {
 	return fmt.Sprintf("line %d, column %d", line+1, column+1)
 }

+func (i Input) Reset() {
+	f := i.api.stackFrame
+	if f.offsetLocal > 0 {
+		f.column = 0
+		f.line = 0
+		f.offset -= f.offsetLocal
+		f.offsetLocal = 0
+	}
+}
+
 // Flush flushes input data from the read buffer up to the current
 // read cursor position of the tokenizer.
 //
@ -41,6 +51,7 @@ func (i Input) Flush() bool {
 	if f.offset > 0 {
 		i.reader.Flush(f.offset)
 		f.offset = 0
+		f.offsetLocal = 0
 		return true
 	}
 	return false
--- a/tokenize/api_output.go
+++ b/tokenize/api_output.go
@ -26,6 +26,13 @@ func (o Output) Rune(offset int) rune {
 	return r
 }

+func (o Output) Reset() {
+	f := o.api.stackFrame
+	f.bytesEnd = f.bytesStart
+	f.tokenEnd = f.tokenStart
+	f.err = nil
+}
+
 func (o Output) ClearData() {
 	f := o.api.stackFrame
 	f.bytesEnd = f.bytesStart
--- a/tokenize/api_runemode.go
+++ b/tokenize/api_runemode.go
@ -97,6 +97,7 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int {

 	width := utf8.RuneLen(r)
 	f.offset += width
+	f.offsetLocal += width
 	return width
 }

--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@ -663,23 +663,23 @@ func MatchStrNoCase(expected string) Handler {

 	return func(tokenAPI *API) bool {
 		matches := make([]rune, l)
-		width := 0
+		offset := 0
 		i := 0
 		for _, e := range expected {
 			if e <= '\x7F' {
-				b, err := tokenAPI.Byte.Peek(width)
+				b, err := tokenAPI.Byte.Peek(offset)
 				if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
 					return false
 				}
 				matches[i] = rune(b)
-				width++
+				offset++
 			} else {
-				r, w, err := tokenAPI.Rune.Peek(width)
+				r, w, err := tokenAPI.Rune.Peek(offset)
 				if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
 					return false
 				}
 				matches[i] = r
-				width += w
+				offset += w
 			}
 			i++
 		}
@ -737,7 +737,8 @@ func MatchAny(handlers ...Handler) Handler {
 				tokenAPI.Dispose(child)
 				return true
 			}
-			tokenAPI.Reset()
+			tokenAPI.Input.Reset()
+			tokenAPI.Output.Reset()
 		}
 		tokenAPI.Dispose(child)

--- a/tokenize/tokenize.go
+++ b/tokenize/tokenize.go
@ -34,7 +34,7 @@ func (result *Result) String() string {
 // other tokenize.Handler functions can be invoked recursively to implement the
 // tokenizing process.
 //
-// THis function returns a function that can be invoked to run the tokenizer
+// This function returns a function that can be invoked to run the tokenizer
 // against the provided input data. For an overview of allowed inputs, take a
 // look at the documentation for parsekit.read.New().
 func New(tokenHandler Handler) Func {