Backup work on performance improvements.

This commit is contained in:
Maurice Makaay 2019-07-08 14:31:01 +00:00
parent 23ca3501e1
commit 5fa0b5eace
5 changed files with 96 additions and 105 deletions

View File

@ -66,7 +66,6 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
if ok {
forkedAPI.Merge()
p.result = p.tokenAPI.Result()
forkedAPI.Dispose()
if p.tokenAPI.FlushInput() {
if p.sanityChecksEnabled {
p.initLoopCheck()

View File

@ -1,8 +1,6 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
)
@ -77,12 +75,13 @@ type API struct {
type apiState struct {
reader *read.Buffer
stack []Result // the stack, used for forking / merging the API.
top int // the index of the current top item in the stack
}
// initialAPIstackDepth determines the initial stack depth for th API.
// This value should work in most cases. When a parser requires a higher
// stack depth, then this is no problem. The API will automatically scale
// the stack when forking beyond this default number of stack levels.
// initialAPIstackDepth determines the initial stack depth for the API.
// When a parser requires a higher stack depth, then this is no problem.
// The API will automatically scale the stack when forking beyond this
// default number of stack levels.
const initialAPIstackDepth = 10
// NewAPI initializes a new API struct, wrapped around the provided input.
@ -90,16 +89,11 @@ const initialAPIstackDepth = 10
// for parsekit.read.New().
func NewAPI(input interface{}) API {
stack := make([]Result, 1, initialAPIstackDepth)
stack[0] = newResult()
state := apiState{
reader: read.New(input),
stack: stack,
}
api := API{
state: &state,
stackLevel: 0,
}
return api
return API{state: &state}
}
// NextRune returns the rune at the current read offset.
@ -113,19 +107,25 @@ func NewAPI(input interface{}) API {
// without explicitly accepting, this method will panic. You can see this as a
// built-in unit test, enforcing correct serialization of API method calls.
func (i *API) NextRune() (rune, error) {
if i.stackLevel > len(i.state.stack)-1 {
if i.stackLevel > i.state.top {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
if result.lastRune != nil {
if result.runeRead {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"without a prior call to Accept()")
}
readRune, err := i.state.reader.RuneAt(result.offset)
result.lastRune = &runeInfo{r: readRune, err: err}
result.lastRune.r = readRune
result.lastRune.err = err
result.runeRead = true
i.DisposeChilds()
return readRune, err
}
@ -135,21 +135,23 @@ func (i *API) NextRune() (rune, error) {
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *API) Accept() {
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
if i.stackLevel > i.state.top {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
if result.lastRune == nil {
if !result.runeRead {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()")
} else if result.lastRune.err != nil {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed")
}
result.runes = append(result.runes, result.lastRune.r)
result.cursor.moveByRune(result.lastRune.r)
result.offset++
result.lastRune = nil
result.runeRead = false
}
// Fork forks off a child of the API struct. It will reuse the same
@ -170,48 +172,44 @@ func (i *API) Accept() {
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
func (i *API) Fork() API {
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
if i.stackLevel > i.state.top {
callerPanic("Fork", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
}
i.DisposeChilds()
result := &(i.state.stack[i.stackLevel])
// Grow the stack storage when needed.
newStackSize := i.stackLevel + 2
if cap(i.state.stack) < newStackSize {
newStack := make([]Result, newStackSize, 2*newStackSize)
newStack := make([]Result, newStackSize, newStackSize+initialAPIstackDepth)
copy(newStack, i.state.stack)
i.state.stack = newStack
}
i.state.stack = i.state.stack[0 : i.stackLevel+1]
// Create the new fork.
child := API{
state: i.state,
stackLevel: i.stackLevel + 1,
}
childResult := newResult()
childResult.cursor = result.cursor
childResult.offset = result.offset
i.state.stack = i.state.stack[:newStackSize] // todo use append() directly?
i.state.stack[child.stackLevel] = childResult
childResult := Result{
cursor: result.cursor,
offset: result.offset,
}
i.state.stack = append(i.state.stack, childResult)
//i.state.stack[i.stackLevel+1] = childResult
// Update the parent.
result.lastRune = nil
// Invalidate parent's last read rune.
result.runeRead = false
i.state.top = child.stackLevel
return child
}
// stackDump provides a dump of the currently active stack levels in the API.
// This is used for debugging purposes and is normally not part of the standard
// code flow.
func (i *API) stackDump() {
for i, r := range i.state.stack {
fmt.Printf("[%d] %s: %q\n", i, r.cursor, r.String())
}
}
// Merge appends the results of a forked child API (runes, tokens) to the
// results of its parent. The read cursor of the parent is also updated
// to that of the forked child.
@ -222,34 +220,51 @@ func (i *API) stackDump() {
// This allows a child to feed results in chunks to its parent.
func (i *API) Merge() {
if i.stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on a non-forked API")
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on the top-level API")
}
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
if i.stackLevel > i.state.top {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
parentResult := &(i.state.stack[i.stackLevel-1])
// // Grow parent rune storage when needed.
// newRuneSize := len(parentResult.runes) + len(result.runes)
// if cap(parentResult.runes) < newRuneSize {
// newRunes := make([]rune, len(parentResult.runes), 2*newRuneSize)
// copy(newRunes, parentResult.runes)
// parentResult.runes = newRunes
// //fmt.Println("Beefed up runes", i.stackLevel-1, newRuneSize*2)
// }
// // Grow parent token storage when needed.
// newTokenSize := len(parentResult.tokens) + len(result.tokens)
// if cap(parentResult.tokens) < newTokenSize {
// newTokens := make([]Token, len(parentResult.tokens), 2*newTokenSize)
// copy(newTokens, parentResult.tokens)
// parentResult.tokens = newTokens
// //fmt.Println("Beefed up tokens", i.stackLevel-1, newTokenSize*2)
// }
parentResult.runes = append(parentResult.runes, result.runes...)
parentResult.tokens = append(parentResult.tokens, result.tokens...)
parentResult.offset = result.offset
parentResult.cursor = result.cursor
i.Reset()
i.DisposeChilds()
}
func (i *API) Dispose() {
i.state.stack = i.state.stack[:i.stackLevel]
i.Reset()
}
func (i *API) DisposeChilds() {
i.state.stack = i.state.stack[:i.stackLevel+1]
i.state.top = i.stackLevel
}
func (i *API) Reset() {
result := &(i.state.stack[i.stackLevel])
result.lastRune = nil
result.runeRead = false
result.runes = result.runes[:0]
result.tokens = result.tokens[:0]
result.err = nil

View File

@ -140,26 +140,6 @@ func ExampleAPI_Fork() {
// <nil> mismatch at start of file
}
func ExampleAPI_Dispose() {
api := tokenize.NewAPI("My uninspired piece of input")
child := api.Fork()
// ... do stuff with child ...
child.NextRune()
child.Accept()
child.NextRune()
child.Accept()
// ... dispose of the child results ...
child.Dispose()
// The parent still reads from the start of the input.
r, _ := api.NextRune()
fmt.Printf("Rune read from parent: %c\n", r)
// Output:
// Rune read from parent: M
}
func ExampleAPI_Merge() {
tokenHandler := func(t tokenize.API) bool {
child1 := t.Fork()

View File

@ -8,18 +8,18 @@ import (
// by a tokenize.Handler. It also provides the API that Handlers and Parsers
// can use to store and retrieve the results.
type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune // runes as added to the result by tokenize.Handler functions
tokens []Token // Tokens as added to the result by tokenize.Handler functions
cursor Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input
lastRune runeInfo // information about the last rune read using NextRune()
runeRead bool // whether or not a rune was read using NextRune()
runes []rune // runes as added to the result by tokenize.Handler functions
tokens []Token // Tokens as added to the result by tokenize.Handler functions
cursor Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input
}
type runeInfo struct {
r rune
width int8
err error
r rune
err error
}
// Token defines a lexical token as produced by tokenize.Handlers.
@ -67,11 +67,7 @@ func (t Token) String() string {
// newResult initializes an empty Result struct.
func newResult() Result {
return Result{
runes: []rune{},
tokens: []Token{},
cursor: Cursor{},
}
return Result{}
}
// ClearRunes clears the runes in the Result.

View File

@ -54,13 +54,13 @@ func ExampleNew() {
}
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
input := mkInput()
r, _ := (&input).NextRune()
api := makeTokenizeAPI()
r, _ := (&api).NextRune()
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := mkInput()
i := makeTokenizeAPI()
i.NextRune()
i.Accept()
i.NextRune()
@ -73,7 +73,7 @@ func TestInputCanAcceptRunesFromReader(t *testing.T) {
func TestCallingNextRuneTwice_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i := makeTokenizeAPI()
i.NextRune()
i.NextRune()
},
@ -83,7 +83,7 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
input := mkInput()
input := makeTokenizeAPI()
AssertPanic(t, PanicT{
Function: (&input).Accept,
Regexp: true,
@ -94,41 +94,42 @@ func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i := makeTokenizeAPI()
i.Merge()
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i := makeTokenizeAPI()
f := i.Fork()
i.NextRune()
f.Merge()
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i := makeTokenizeAPI()
f := i.Fork()
g := f.Fork()
i.Fork()
f.Merge()
g.Merge()
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i := makeTokenizeAPI()
i.NextRune()
i.Fork()
i.Accept()
@ -176,6 +177,6 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() tokenize.API {
func makeTokenizeAPI() tokenize.API {
return tokenize.NewAPI("Testing")
}