From 5fa0b5eacee4756d86ae7fe7626a7ca2ab7057a5 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Mon, 8 Jul 2019 14:31:01 +0000 Subject: [PATCH] Backup work on performance improvements. --- parse/api.go | 1 - tokenize/api.go | 127 +++++++++++++++++++++---------------- tokenize/api_test.go | 20 ------ tokenize/result.go | 24 +++---- tokenize/tokenizer_test.go | 29 +++++---- 5 files changed, 96 insertions(+), 105 deletions(-) diff --git a/parse/api.go b/parse/api.go index 9bb2cb9..5cfcd9f 100644 --- a/parse/api.go +++ b/parse/api.go @@ -66,7 +66,6 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool { if ok { forkedAPI.Merge() p.result = p.tokenAPI.Result() - forkedAPI.Dispose() if p.tokenAPI.FlushInput() { if p.sanityChecksEnabled { p.initLoopCheck() diff --git a/tokenize/api.go b/tokenize/api.go index 431a9d4..537b7a8 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -1,8 +1,6 @@ package tokenize import ( - "fmt" - "git.makaay.nl/mauricem/go-parsekit/read" ) @@ -77,12 +75,13 @@ type API struct { type apiState struct { reader *read.Buffer stack []Result // the stack, used for forking / merging the API. + top int // the index of the current top item in the stack } -// initialAPIstackDepth determines the initial stack depth for th API. -// This value should work in most cases. When a parser requires a higher -// stack depth, then this is no problem. The API will automatically scale -// the stack when forking beyond this default number of stack levels. +// initialAPIstackDepth determines the initial stack depth for the API. +// When a parser requires a higher stack depth, then this is no problem. +// The API will automatically scale the stack when forking beyond this +// default number of stack levels. const initialAPIstackDepth = 10 // NewAPI initializes a new API struct, wrapped around the provided input. @@ -90,16 +89,11 @@ const initialAPIstackDepth = 10 // for parsekit.read.New(). func NewAPI(input interface{}) API { stack := make([]Result, 1, initialAPIstackDepth) - stack[0] = newResult() state := apiState{ reader: read.New(input), stack: stack, } - api := API{ - state: &state, - stackLevel: 0, - } - return api + return API{state: &state} } // NextRune returns the rune at the current read offset. @@ -113,19 +107,25 @@ func NewAPI(input interface{}) API { // without explicitly accepting, this method will panic. You can see this as a // built-in unit test, enforcing correct serialization of API method calls. func (i *API) NextRune() (rune, error) { - if i.stackLevel > len(i.state.stack)-1 { + if i.stackLevel > i.state.top { callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ - "using a non-active API fork (a parent was read or merged, causing this "+ - "fork to be invalidated)") + "using a non-active API fork (a parent was read, forked or merged, "+ + "causing this fork to be invalidated)") } + result := &(i.state.stack[i.stackLevel]) - if result.lastRune != nil { + if result.runeRead { callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ "without a prior call to Accept()") } readRune, err := i.state.reader.RuneAt(result.offset) - result.lastRune = &runeInfo{r: readRune, err: err} + result.lastRune.r = readRune + result.lastRune.err = err + result.runeRead = true + + i.DisposeChilds() + return readRune, err } @@ -135,21 +135,23 @@ func (i *API) NextRune() (rune, error) { // It is not allowed to call Accept() when the previous call to NextRune() // returned an error. Calling Accept() in such case will result in a panic. func (i *API) Accept() { - if i.stackLevel > len(i.state.stack)-1 { - callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ - "using a non-active API fork (a parent was read or merged, causing this "+ - "fork to be invalidated)") + if i.stackLevel > i.state.top { + callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+ + "using a non-active API fork (a parent was read, forked or merged, "+ + "causing this fork to be invalidated)") } + result := &(i.state.stack[i.stackLevel]) - if result.lastRune == nil { + if !result.runeRead { callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()") } else if result.lastRune.err != nil { callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed") } + result.runes = append(result.runes, result.lastRune.r) result.cursor.moveByRune(result.lastRune.r) result.offset++ - result.lastRune = nil + result.runeRead = false } // Fork forks off a child of the API struct. It will reuse the same @@ -170,48 +172,44 @@ func (i *API) Accept() { // The parent API was never modified, so it can safely be used after disposal // as if the lookahead never happened. func (i *API) Fork() API { - if i.stackLevel > len(i.state.stack)-1 { - callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ - "using a non-active API fork (a parent was read or merged, causing this "+ - "fork to be invalidated)") + if i.stackLevel > i.state.top { + callerPanic("Fork", "tokenize.API.{name}(): {name}() called at {caller} "+ + "using a non-active API fork (a parent was read, forked or merged, "+ + "causing this fork to be invalidated)") } + + i.DisposeChilds() result := &(i.state.stack[i.stackLevel]) // Grow the stack storage when needed. newStackSize := i.stackLevel + 2 if cap(i.state.stack) < newStackSize { - newStack := make([]Result, newStackSize, 2*newStackSize) + newStack := make([]Result, newStackSize, newStackSize+initialAPIstackDepth) copy(newStack, i.state.stack) i.state.stack = newStack - } + i.state.stack = i.state.stack[0 : i.stackLevel+1] // Create the new fork. child := API{ state: i.state, stackLevel: i.stackLevel + 1, } - childResult := newResult() - childResult.cursor = result.cursor - childResult.offset = result.offset - i.state.stack = i.state.stack[:newStackSize] // todo use append() directly? - i.state.stack[child.stackLevel] = childResult + childResult := Result{ + cursor: result.cursor, + offset: result.offset, + } + i.state.stack = append(i.state.stack, childResult) + //i.state.stack[i.stackLevel+1] = childResult - // Update the parent. - result.lastRune = nil + // Invalidate parent's last read rune. + result.runeRead = false + + i.state.top = child.stackLevel return child } -// stackDump provides a dump of the currently active stack levels in the API. -// This is used for debugging purposes and is normally not part of the standard -// code flow. -func (i *API) stackDump() { - for i, r := range i.state.stack { - fmt.Printf("[%d] %s: %q\n", i, r.cursor, r.String()) - } -} - // Merge appends the results of a forked child API (runes, tokens) to the // results of its parent. The read cursor of the parent is also updated // to that of the forked child. @@ -222,34 +220,51 @@ func (i *API) stackDump() { // This allows a child to feed results in chunks to its parent. func (i *API) Merge() { if i.stackLevel == 0 { - callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on a non-forked API") + callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on the top-level API") } - if i.stackLevel > len(i.state.stack)-1 { - callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ - "using a non-active API fork (a parent was read or merged, causing this "+ - "fork to be invalidated)") + if i.stackLevel > i.state.top { + callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+ + "using a non-active API fork (a parent was read, forked or merged, "+ + "causing this fork to be invalidated)") } + result := &(i.state.stack[i.stackLevel]) parentResult := &(i.state.stack[i.stackLevel-1]) + + // // Grow parent rune storage when needed. + // newRuneSize := len(parentResult.runes) + len(result.runes) + // if cap(parentResult.runes) < newRuneSize { + // newRunes := make([]rune, len(parentResult.runes), 2*newRuneSize) + // copy(newRunes, parentResult.runes) + // parentResult.runes = newRunes + // //fmt.Println("Beefed up runes", i.stackLevel-1, newRuneSize*2) + // } + + // // Grow parent token storage when needed. + // newTokenSize := len(parentResult.tokens) + len(result.tokens) + // if cap(parentResult.tokens) < newTokenSize { + // newTokens := make([]Token, len(parentResult.tokens), 2*newTokenSize) + // copy(newTokens, parentResult.tokens) + // parentResult.tokens = newTokens + // //fmt.Println("Beefed up tokens", i.stackLevel-1, newTokenSize*2) + // } + parentResult.runes = append(parentResult.runes, result.runes...) parentResult.tokens = append(parentResult.tokens, result.tokens...) parentResult.offset = result.offset parentResult.cursor = result.cursor - i.Reset() i.DisposeChilds() -} - -func (i *API) Dispose() { - i.state.stack = i.state.stack[:i.stackLevel] + i.Reset() } func (i *API) DisposeChilds() { i.state.stack = i.state.stack[:i.stackLevel+1] + i.state.top = i.stackLevel } func (i *API) Reset() { result := &(i.state.stack[i.stackLevel]) - result.lastRune = nil + result.runeRead = false result.runes = result.runes[:0] result.tokens = result.tokens[:0] result.err = nil diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 82374f1..a570b92 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -140,26 +140,6 @@ func ExampleAPI_Fork() { // mismatch at start of file } -func ExampleAPI_Dispose() { - api := tokenize.NewAPI("My uninspired piece of input") - - child := api.Fork() - // ... do stuff with child ... - child.NextRune() - child.Accept() - child.NextRune() - child.Accept() - // ... dispose of the child results ... - child.Dispose() - - // The parent still reads from the start of the input. - r, _ := api.NextRune() - fmt.Printf("Rune read from parent: %c\n", r) - - // Output: - // Rune read from parent: M -} - func ExampleAPI_Merge() { tokenHandler := func(t tokenize.API) bool { child1 := t.Fork() diff --git a/tokenize/result.go b/tokenize/result.go index 1ee77f5..a148b7e 100644 --- a/tokenize/result.go +++ b/tokenize/result.go @@ -8,18 +8,18 @@ import ( // by a tokenize.Handler. It also provides the API that Handlers and Parsers // can use to store and retrieve the results. type Result struct { - lastRune *runeInfo // Information about the last rune read using NextRune() - runes []rune // runes as added to the result by tokenize.Handler functions - tokens []Token // Tokens as added to the result by tokenize.Handler functions - cursor Cursor // current read cursor position, relative to the start of the file - offset int // current rune offset relative to the Reader's sliding window - err error // can be used by a Handler to report a specific issue with the input + lastRune runeInfo // information about the last rune read using NextRune() + runeRead bool // whether or not a rune was read using NextRune() + runes []rune // runes as added to the result by tokenize.Handler functions + tokens []Token // Tokens as added to the result by tokenize.Handler functions + cursor Cursor // current read cursor position, relative to the start of the file + offset int // current rune offset relative to the Reader's sliding window + err error // can be used by a Handler to report a specific issue with the input } type runeInfo struct { - r rune - width int8 - err error + r rune + err error } // Token defines a lexical token as produced by tokenize.Handlers. @@ -67,11 +67,7 @@ func (t Token) String() string { // newResult initializes an empty Result struct. func newResult() Result { - return Result{ - runes: []rune{}, - tokens: []Token{}, - cursor: Cursor{}, - } + return Result{} } // ClearRunes clears the runes in the Result. diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index cd3ec82..ebc508d 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -54,13 +54,13 @@ func ExampleNew() { } func TestCallingNextRune_ReturnsNextRune(t *testing.T) { - input := mkInput() - r, _ := (&input).NextRune() + api := makeTokenizeAPI() + r, _ := (&api).NextRune() AssertEqual(t, 'T', r, "first rune") } func TestInputCanAcceptRunesFromReader(t *testing.T) { - i := mkInput() + i := makeTokenizeAPI() i.NextRune() i.Accept() i.NextRune() @@ -73,7 +73,7 @@ func TestInputCanAcceptRunesFromReader(t *testing.T) { func TestCallingNextRuneTwice_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := mkInput() + i := makeTokenizeAPI() i.NextRune() i.NextRune() }, @@ -83,7 +83,7 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) { } func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { - input := mkInput() + input := makeTokenizeAPI() AssertPanic(t, PanicT{ Function: (&input).Accept, Regexp: true, @@ -94,41 +94,42 @@ func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := mkInput() + i := makeTokenizeAPI() i.Merge() }, Regexp: true, - Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`}) + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`}) } func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := mkInput() + i := makeTokenizeAPI() f := i.Fork() i.NextRune() f.Merge() }, Regexp: true, - Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`}) + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`}) } func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := mkInput() + i := makeTokenizeAPI() f := i.Fork() + g := f.Fork() i.Fork() - f.Merge() + g.Merge() }, Regexp: true, - Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`}) + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`}) } func TestForkingInput_ClearsLastRune(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := mkInput() + i := makeTokenizeAPI() i.NextRune() i.Fork() i.Accept() @@ -176,6 +177,6 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") } -func mkInput() tokenize.API { +func makeTokenizeAPI() tokenize.API { return tokenize.NewAPI("Testing") }