From 17935b7534d7f7236b2e626c0429fca61d36e868 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Fri, 12 Jul 2019 21:32:40 +0000 Subject: [PATCH] Further performance optimization and code cleanup. --- parse/api.go | 1 - tokenize/api.go | 21 +++++-- tokenize/api_test.go | 21 +++---- tokenize/handlers_builtin.go | 109 +++++++++-------------------------- 4 files changed, 51 insertions(+), 101 deletions(-) diff --git a/parse/api.go b/parse/api.go index d404057..01a0785 100644 --- a/parse/api.go +++ b/parse/api.go @@ -67,7 +67,6 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bo callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") } - p.tokenAPI.Reset() // TODO uh, why did I do this again? Just for i.runeRead = false ? child := p.tokenAPI.Fork() ok := tokenHandler(p.tokenAPI) diff --git a/tokenize/api.go b/tokenize/api.go index ce62a61..b9b23d1 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -94,9 +94,9 @@ type stackFrame struct { err error // can be used by a Handler to report a specific issue with the input } -const initialStackDepth = 10 -const initialTokenDepth = 10 -const initialRuneDepth = 10 +const initialStackDepth = 64 +const initialTokenStoreLength = 32 +const initialRuneStoreLength = 128 // NewAPI initializes a new API struct, wrapped around the provided input. // For an overview of allowed inputs, take a look at the documentation @@ -104,8 +104,8 @@ const initialRuneDepth = 10 func NewAPI(input interface{}) *API { api := &API{ reader: read.New(input), - runes: make([]rune, 0, initialRuneDepth), - tokens: make([]Token, 0, initialTokenDepth), + runes: make([]rune, 0, initialRuneStoreLength), + tokens: make([]Token, 0, initialTokenStoreLength), stackFrames: make([]stackFrame, 1, initialStackDepth), } api.stackFrame = &api.stackFrames[0] @@ -305,6 +305,17 @@ func (i *API) Dispose(stackLevel int) { func (i *API) Reset() { i.runeRead = false + if i.stackLevel == 0 { + i.stackFrame.column = 0 + i.stackFrame.line = 0 + i.stackFrame.offset = 0 + } else { + // TODO simplify! Store line/column/offset using a 0-based index in a fork. On merge add them to the parent's offsets? + parent := i.stackFrames[i.stackLevel-1] + i.stackFrame.column = parent.column + i.stackFrame.line = parent.line + i.stackFrame.offset = parent.offset + } i.stackFrame.runeEnd = i.stackFrame.runeStart i.stackFrame.tokenEnd = i.stackFrame.tokenStart i.stackFrame.err = nil diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 941b8bc..8c01d94 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -94,26 +94,27 @@ func ExampleAPI_modifyingResults() { func ExampleAPI_Reset() { api := tokenize.NewAPI("Very important input!") - api.NextRune() + api.NextRune() // read 'V' api.Accept() - api.NextRune() + api.NextRune() // read 'e' api.Accept() fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) - // Reset clears the results, but keeps the cursor position. + // Reset clears the results. api.Reset() fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) - api.NextRune() + // So then doing the same read operations, the same data are read. + api.NextRune() // read 'V' api.Accept() - api.NextRune() + api.NextRune() // read 'e' api.Accept() fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) // Output: // API results: "Ve" at line 1, column 3 - // API results: "" at line 1, column 3 - // API results: "ry" at line 1, column 5 + // API results: "" at start of file + // API results: "Ve" at line 1, column 3 } func ExampleAPI_Fork() { @@ -316,9 +317,6 @@ func TestMergeScenariosForTokens(t *testing.T) { api.AddTokens(token2) - // Here we can merge by expanding the token slice on the parent, - // because the end of the parent slice and the start of the child - // slice align. api.Merge(child) api.Dispose(child) @@ -330,9 +328,6 @@ func TestMergeScenariosForTokens(t *testing.T) { api.Reset() api.AddTokens(token4) - // Here the merge means that token4 will be copied to the end of - // the token slice of the parent, since there's a gap at the place - // where token3 used to be. api.Merge(child) api.Dispose(child) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index e4a0d35..3a1eefd 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -609,7 +609,7 @@ func MatchAny(handlers ...Handler) Handler { t.Dispose(child) return true } - t.Dispose(child) // TODO switch to Reset() and move forking outside the loop? + t.Dispose(child) } return false @@ -957,91 +957,35 @@ func MatchBoolean() Handler { t.accept(r1) return true } - if r1 == 't' { - r2, err := t.PeekRune(1) - if err == nil && r2 == 'r' { - r3, err := t.PeekRune(2) - if err == nil && r3 == 'u' { - r4, err := t.PeekRune(3) - if err == nil && r4 == 'e' { - t.accept(r1, r2, r3, r4) - return true - } - } + if r1 == 't' || r1 == 'T' { + r2, _ := t.PeekRune(1) + r3, _ := t.PeekRune(2) + r4, err := t.PeekRune(3) + if err == nil && r2 == 'r' && r3 == 'u' && r4 == 'e' { + t.accept(r1, r2, r3, r4) + return true + } + if err == nil && r1 == 'T' && r2 == 'R' && r3 == 'U' && r4 == 'E' { + t.accept(r1, r2, r3, r4) + return true } t.accept(r1) return true } - if r1 == 'T' { - r2, err := t.PeekRune(1) - if err == nil && r2 == 'r' { - r3, err := t.PeekRune(2) - if err == nil && r3 == 'u' { - r4, err := t.PeekRune(3) - if err == nil && r4 == 'e' { - t.accept(r1, r2, r3, r4) - return true - } - } + + if r1 == 'f' || r1 == 'F' { + r2, _ := t.PeekRune(1) + r3, _ := t.PeekRune(2) + r4, _ := t.PeekRune(3) + r5, err := t.PeekRune(4) + + if err == nil && r2 == 'a' && r3 == 'l' && r4 == 's' && r5 == 'e' { + t.accept(r1, r2, r3, r4, r5) + return true } - if err == nil && r2 == 'R' { - r3, err := t.PeekRune(2) - if err == nil && r3 == 'U' { - r4, err := t.PeekRune(3) - if err == nil && r4 == 'E' { - t.accept(r1, r2, r3, r4) - return true - } - } - } - t.accept(r1) - return true - } - if r1 == 'f' { - r2, err := t.PeekRune(1) - if err == nil && r2 == 'a' { - r3, err := t.PeekRune(2) - if err == nil && r3 == 'l' { - r4, err := t.PeekRune(3) - if err == nil && r4 == 's' { - r5, err := t.PeekRune(4) - if err == nil && r5 == 'e' { - t.accept(r1, r2, r3, r4, r5) - return true - } - } - } - } - t.accept(r1) - return true - } - if r1 == 'F' { - r2, err := t.PeekRune(1) - if err == nil && r2 == 'a' { - r3, err := t.PeekRune(2) - if err == nil && r3 == 'l' { - r4, err := t.PeekRune(3) - if err == nil && r4 == 's' { - r5, err := t.PeekRune(4) - if err == nil && r5 == 'e' { - t.accept(r1, r2, r3, r4, r5) - return true - } - } - } - } - if err == nil && r2 == 'A' { - r3, err := t.PeekRune(2) - if err == nil && r3 == 'L' { - r4, err := t.PeekRune(3) - if err == nil && r4 == 'S' { - r5, err := t.PeekRune(4) - if err == nil && r5 == 'E' { - t.accept(r1, r2, r3, r4, r5) - return true - } - } - } + if err == nil && r1 == 'F' && r2 == 'A' && r3 == 'L' && r4 == 'S' && r5 == 'E' { + t.accept(r1, r2, r3, r4, r5) + return true } t.accept(r1) return true @@ -1300,7 +1244,8 @@ func ModifyDrop(handler Handler) Handler { child := t.Fork() if handler(t) { // Do a partial merge: only move the cursor and read offset forward. - // Otherwise we'd have to do a Reset() + Merge() call to get the same result. + // Any produced runes and tokens are ignored and not merged to the parent + // (since we're dropping those here). parent := &t.stackFrames[t.stackLevel-1] parent.offset = t.stackFrame.offset parent.line = t.stackFrame.line