From 7795588fe693706aa34b3ad7906018ce6a6a98fd Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Mon, 8 Jul 2019 21:57:32 +0000 Subject: [PATCH] Speed improvement work. --- tokenize/api.go | 56 +++++++++++++++++++++++++++++----------------- tokenize/result.go | 17 +++++--------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/tokenize/api.go b/tokenize/api.go index 537b7a8..f1d301b 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -68,14 +68,23 @@ import ( // can lead to hard to track bugs. I much prefer this forking method, since // no bookkeeping has to be implemented when implementing a parser. type API struct { - state *apiState // shared API state data - stackLevel int // the stack level for this API object + reader *read.Buffer // the input data reader + lastRune rune // the rune as retrieved by the last NextRune() calll + lastRuneErr error // the error for the last NextRune() call + runeRead bool // whether or not a rune was read using NextRune() + runes []rune // the rune stack + tokens []Token // the token stack + runeStart int + runeEnd int + tokenStart int + tokenEnd int + stackLevel int // the stack level for this API object + state *apiState // shared API state data } type apiState struct { - reader *read.Buffer - stack []Result // the stack, used for forking / merging the API. - top int // the index of the current top item in the stack + stack []Result // the stack, used for forking / merging the API. + top int // the index of the current top item in the stack } // initialAPIstackDepth determines the initial stack depth for the API. @@ -90,10 +99,14 @@ const initialAPIstackDepth = 10 func NewAPI(input interface{}) API { stack := make([]Result, 1, initialAPIstackDepth) state := apiState{ - reader: read.New(input), - stack: stack, + stack: stack, + } + return API{ + runes: make([]rune, initialAPIstackDepth), + tokens: make([]Token, initialAPIstackDepth), + reader: read.New(input), + state: &state, } - return API{state: &state} } // NextRune returns the rune at the current read offset. @@ -114,15 +127,15 @@ func (i *API) NextRune() (rune, error) { } result := &(i.state.stack[i.stackLevel]) - if result.runeRead { + if i.runeRead { callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ "without a prior call to Accept()") } - readRune, err := i.state.reader.RuneAt(result.offset) - result.lastRune.r = readRune - result.lastRune.err = err - result.runeRead = true + readRune, err := i.reader.RuneAt(result.offset) + i.lastRune = readRune + i.lastRuneErr = err + i.runeRead = true i.DisposeChilds() @@ -142,16 +155,16 @@ func (i *API) Accept() { } result := &(i.state.stack[i.stackLevel]) - if !result.runeRead { + if !i.runeRead { callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()") - } else if result.lastRune.err != nil { + } else if i.lastRuneErr != nil { callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed") } - result.runes = append(result.runes, result.lastRune.r) - result.cursor.moveByRune(result.lastRune.r) + result.runes = append(result.runes, i.lastRune) + result.cursor.moveByRune(i.lastRune) result.offset++ - result.runeRead = false + i.runeRead = false } // Fork forks off a child of the API struct. It will reuse the same @@ -194,6 +207,7 @@ func (i *API) Fork() API { child := API{ state: i.state, stackLevel: i.stackLevel + 1, + reader: i.reader, } childResult := Result{ cursor: result.cursor, @@ -203,7 +217,7 @@ func (i *API) Fork() API { //i.state.stack[i.stackLevel+1] = childResult // Invalidate parent's last read rune. - result.runeRead = false + i.runeRead = false i.state.top = child.stackLevel @@ -264,7 +278,7 @@ func (i *API) DisposeChilds() { func (i *API) Reset() { result := &(i.state.stack[i.stackLevel]) - result.runeRead = false + i.runeRead = false result.runes = result.runes[:0] result.tokens = result.tokens[:0] result.err = nil @@ -280,7 +294,7 @@ func (i *API) Reset() { func (i API) FlushInput() bool { result := &(i.state.stack[i.stackLevel]) if result.offset > 0 { - i.state.reader.Flush(result.offset) + i.reader.Flush(result.offset) result.offset = 0 return true } diff --git a/tokenize/result.go b/tokenize/result.go index a148b7e..467e6f1 100644 --- a/tokenize/result.go +++ b/tokenize/result.go @@ -8,18 +8,11 @@ import ( // by a tokenize.Handler. It also provides the API that Handlers and Parsers // can use to store and retrieve the results. type Result struct { - lastRune runeInfo // information about the last rune read using NextRune() - runeRead bool // whether or not a rune was read using NextRune() - runes []rune // runes as added to the result by tokenize.Handler functions - tokens []Token // Tokens as added to the result by tokenize.Handler functions - cursor Cursor // current read cursor position, relative to the start of the file - offset int // current rune offset relative to the Reader's sliding window - err error // can be used by a Handler to report a specific issue with the input -} - -type runeInfo struct { - r rune - err error + runes []rune // runes as added to the result by tokenize.Handler functions + tokens []Token // Tokens as added to the result by tokenize.Handler functions + cursor Cursor // current read cursor position, relative to the start of the file + offset int // current rune offset relative to the Reader's sliding window + err error // can be used by a Handler to report a specific issue with the input } // Token defines a lexical token as produced by tokenize.Handlers.