diff --git a/tokenize/api.go b/tokenize/api.go index 52020b3..e556e62 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -72,9 +72,6 @@ import ( // can lead to hard to track bugs. I much prefer this forking method, since // no bookkeeping has to be implemented when implementing a parser. type API struct { - reader *read.Buffer // the input data reader - bytes []byte // accepted bytes - tokens []Token // accepted tokens stackFrames []stackFrame // the stack frames, containing stack level-specific data stackLevel int // the current stack level stackFrame *stackFrame // the current stack frame @@ -97,12 +94,15 @@ type stackFrame struct { // Input provides input-related functionality for the tokenize API. type Input struct { - api *API + api *API + reader *read.Buffer // the input data reader } // Output provides output-related functionality for the tokenize API. type Output struct { - api *API + api *API + tokens []Token // accepted tokens + data []byte // accepted data } const initialStackDepth = 64 @@ -114,13 +114,17 @@ const initialByteStoreLength = 1024 // for parsekit.read.New(). func NewAPI(input interface{}) *API { api := &API{ - reader: read.New(input), - bytes: make([]byte, initialByteStoreLength), - tokens: make([]Token, initialTokenStoreLength), stackFrames: make([]stackFrame, initialStackDepth), } - api.Input = Input{api: api} - api.Output = Output{api: api} + api.Input = Input{ + api: api, + reader: read.New(input), + } + api.Output = Output{ + api: api, + data: make([]byte, initialByteStoreLength), + tokens: make([]Token, initialTokenStoreLength), + } api.stackFrame = &api.stackFrames[0] return api @@ -144,23 +148,23 @@ func NewAPI(input interface{}) *API { // Garbage collection will take care of this automatically. // The parent API was never modified, so it can safely be used after disposal // as if the lookahead never happened. -func (i *API) Fork() int { - newStackLevel := i.stackLevel + 1 +func (tokenAPI *API) Fork() int { + newStackLevel := tokenAPI.stackLevel + 1 newStackSize := newStackLevel + 1 // Grow the stack frames capacity when needed. - if cap(i.stackFrames) < newStackSize { + if cap(tokenAPI.stackFrames) < newStackSize { newFrames := make([]stackFrame, newStackSize*2) - copy(newFrames, i.stackFrames) - i.stackFrames = newFrames + copy(newFrames, tokenAPI.stackFrames) + tokenAPI.stackFrames = newFrames } - i.stackLevel++ + tokenAPI.stackLevel++ // This can be written in a shorter way, but this turned out to // be the best way performance-wise. - parent := i.stackFrame - child := &i.stackFrames[i.stackLevel] + parent := tokenAPI.stackFrame + child := &tokenAPI.stackFrames[tokenAPI.stackLevel] child.offset = parent.offset child.column = parent.column child.line = parent.line @@ -168,9 +172,9 @@ func (i *API) Fork() int { child.bytesEnd = parent.bytesEnd child.tokenStart = parent.tokenEnd child.tokenEnd = parent.tokenEnd - i.stackFrame = child + tokenAPI.stackFrame = child - return i.stackLevel + return tokenAPI.stackLevel } // Merge appends the results of a forked child API (runes, tokens) to the @@ -184,18 +188,18 @@ func (i *API) Fork() int { // // Once the child is no longer needed, it can be disposed of by using the // method Dispose(), which will return the tokenizer to the parent. -func (i *API) Merge(stackLevel int) { +func (tokenAPI *API) Merge(stackLevel int) { if stackLevel == 0 { callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+ "on the top-level API stack level 0") } - if stackLevel != i.stackLevel { + if stackLevel != tokenAPI.stackLevel { callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+ "on API stack level %d, but the current stack level is %d "+ - "(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel) + "(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel) } - parent := &i.stackFrames[stackLevel-1] + parent := &tokenAPI.stackFrames[stackLevel-1] // The end of the parent slice aligns with the start of the child slice. // Because of this, to merge the parent slice can simply be expanded @@ -205,33 +209,33 @@ func (i *API) Merge(stackLevel int) { // After merge operation: // parent: |-----------------| // child: |---> continue reading from here - parent.bytesEnd = i.stackFrame.bytesEnd - i.stackFrame.bytesStart = i.stackFrame.bytesEnd + parent.bytesEnd = tokenAPI.stackFrame.bytesEnd + tokenAPI.stackFrame.bytesStart = tokenAPI.stackFrame.bytesEnd // The same logic applies to tokens. - parent.tokenEnd = i.stackFrame.tokenEnd - i.stackFrame.tokenStart = i.stackFrame.tokenEnd + parent.tokenEnd = tokenAPI.stackFrame.tokenEnd + tokenAPI.stackFrame.tokenStart = tokenAPI.stackFrame.tokenEnd - parent.offset = i.stackFrame.offset - parent.line = i.stackFrame.line - parent.column = i.stackFrame.column + parent.offset = tokenAPI.stackFrame.offset + parent.line = tokenAPI.stackFrame.line + parent.column = tokenAPI.stackFrame.column - i.stackFrame.err = nil + tokenAPI.stackFrame.err = nil } -func (i *API) Dispose(stackLevel int) { +func (tokenAPI *API) Dispose(stackLevel int) { if stackLevel == 0 { callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+ "on the top-level API stack level 0") } - if stackLevel != i.stackLevel { + if stackLevel != tokenAPI.stackLevel { callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+ "on API stack level %d, but the current stack level is %d "+ - "(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel) + "(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel) } - i.stackLevel = stackLevel - 1 - i.stackFrame = &i.stackFrames[stackLevel-1] + tokenAPI.stackLevel = stackLevel - 1 + tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1] } // Reset moves the input cursor back to the beginning for the currently active API child. @@ -266,7 +270,7 @@ func (i Input) Cursor() string { // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. func (i Input) PeekByte(offset int) (byte, error) { - return i.api.reader.ByteAt(i.api.stackFrame.offset + offset) + return i.reader.ByteAt(i.api.stackFrame.offset + offset) } // SkipByte is used to skip over a single bytes that was read from the input. @@ -317,13 +321,13 @@ func (i Input) AcceptByte(b byte) { maxRequiredBytes := curBytesEnd + 1 // Grow the bytes capacity when needed. - if cap(i.api.bytes) < maxRequiredBytes { + if cap(i.api.Output.data) < maxRequiredBytes { newBytes := make([]byte, maxRequiredBytes*2) - copy(newBytes, i.api.bytes) - i.api.bytes = newBytes + copy(newBytes, i.api.Output.data) + i.api.Output.data = newBytes } - i.api.bytes[curBytesEnd] = b + i.api.Output.data[curBytesEnd] = b i.api.stackFrame.moveCursorByByte(b) i.api.stackFrame.bytesEnd++ i.api.stackFrame.offset++ @@ -345,13 +349,13 @@ func (i Input) AcceptBytes(bytes ...byte) { newBytesEnd := curBytesEnd + len(bytes) // Grow the bytes capacity when needed. - if cap(i.api.bytes) < newBytesEnd { + if cap(i.api.Output.data) < newBytesEnd { newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, i.api.bytes) - i.api.bytes = newBytes + copy(newBytes, i.api.Output.data) + i.api.Output.data = newBytes } - copy(i.api.bytes[curBytesEnd:], bytes) + copy(i.api.Output.data[curBytesEnd:], bytes) for _, b := range bytes { i.api.stackFrame.moveCursorByByte(b) i.api.stackFrame.offset++ @@ -373,7 +377,7 @@ func (i Input) AcceptBytes(bytes ...byte) { // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. func (i Input) PeekRune(offset int) (rune, int, error) { - return i.api.reader.RuneAt(i.api.stackFrame.offset + offset) + return i.reader.RuneAt(i.api.stackFrame.offset + offset) } // SkipRune is used to skip over a single rune that was read from the input. @@ -424,14 +428,14 @@ func (i Input) AcceptRune(r rune) { maxRequiredBytes := curBytesEnd + utf8.UTFMax // Grow the runes capacity when needed. - if cap(i.api.bytes) < maxRequiredBytes { + if cap(i.api.Output.data) < maxRequiredBytes { newBytes := make([]byte, maxRequiredBytes*2) - copy(newBytes, i.api.bytes) - i.api.bytes = newBytes + copy(newBytes, i.api.Output.data) + i.api.Output.data = newBytes } i.api.stackFrame.moveCursorByRune(r) - w := utf8.EncodeRune(i.api.bytes[curBytesEnd:], r) + w := utf8.EncodeRune(i.api.Output.data[curBytesEnd:], r) i.api.stackFrame.bytesEnd += w i.api.stackFrame.offset += w } @@ -454,16 +458,16 @@ func (i Input) AcceptRunes(runes ...rune) { newBytesEnd := curBytesEnd + byteLen // Grow the runes capacity when needed. - if cap(i.api.bytes) < newBytesEnd { + if cap(i.api.Output.data) < newBytesEnd { newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, i.api.bytes) - i.api.bytes = newBytes + copy(newBytes, i.api.Output.data) + i.api.Output.data = newBytes } for _, r := range runes { i.api.stackFrame.moveCursorByRune(r) } - copy(i.api.bytes[curBytesEnd:], runesAsString) + copy(i.api.Output.data[curBytesEnd:], runesAsString) i.api.stackFrame.bytesEnd = newBytesEnd i.api.stackFrame.offset += byteLen @@ -477,7 +481,7 @@ func (i Input) AcceptRunes(runes ...rune) { // method yourself. It is automatically called by parsekit when possible. func (i Input) Flush() bool { if i.api.stackFrame.offset > 0 { - i.api.reader.Flush(i.api.stackFrame.offset) + i.reader.Flush(i.api.stackFrame.offset) i.api.stackFrame.offset = 0 return true } @@ -485,17 +489,17 @@ func (i Input) Flush() bool { } func (o Output) String() string { - bytes := o.api.bytes[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd] + bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd] return string(bytes) } func (o Output) Runes() []rune { - bytes := o.api.bytes[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd] + bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd] return []rune(string(bytes)) } func (o Output) Rune(offset int) rune { - r, _ := utf8.DecodeRune(o.api.bytes[o.api.stackFrame.bytesStart+offset:]) + r, _ := utf8.DecodeRune(o.data[o.api.stackFrame.bytesStart+offset:]) return r } @@ -511,13 +515,13 @@ func (o Output) SetBytes(bytes ...byte) { func (o Output) AddBytes(bytes ...byte) { // Grow the runes capacity when needed. newBytesEnd := o.api.stackFrame.bytesEnd + len(bytes) - if cap(o.api.bytes) < newBytesEnd { + if cap(o.data) < newBytesEnd { newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, o.api.bytes) - o.api.bytes = newBytes + copy(newBytes, o.data) + o.data = newBytes } - copy(o.api.bytes[o.api.stackFrame.bytesEnd:], bytes) + copy(o.data[o.api.stackFrame.bytesEnd:], bytes) o.api.stackFrame.bytesEnd = newBytesEnd } @@ -530,13 +534,13 @@ func (o Output) AddRunes(runes ...rune) { // Grow the runes capacity when needed. runesAsString := string(runes) newBytesEnd := o.api.stackFrame.bytesEnd + len(runesAsString) - if cap(o.api.bytes) < newBytesEnd { + if cap(o.data) < newBytesEnd { newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, o.api.bytes) - o.api.bytes = newBytes + copy(newBytes, o.data) + o.data = newBytes } - copy(o.api.bytes[o.api.stackFrame.bytesEnd:], runesAsString) + copy(o.data[o.api.stackFrame.bytesEnd:], runesAsString) o.api.stackFrame.bytesEnd = newBytesEnd } @@ -550,15 +554,15 @@ func (o Output) SetString(s string) { } func (o Output) Tokens() []Token { - return o.api.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd] + return o.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd] } func (o Output) Token(offset int) Token { - return o.api.tokens[o.api.stackFrame.tokenStart+offset] + return o.tokens[o.api.stackFrame.tokenStart+offset] } func (o Output) TokenValue(offset int) interface{} { - return o.api.tokens[o.api.stackFrame.tokenStart+offset].Value + return o.tokens[o.api.stackFrame.tokenStart+offset].Value } func (o Output) ClearTokens() { @@ -573,14 +577,14 @@ func (o Output) SetTokens(tokens ...Token) { func (o Output) AddTokens(tokens ...Token) { // Grow the tokens capacity when needed. newTokenEnd := o.api.stackFrame.tokenEnd + len(tokens) - if cap(o.api.tokens) < newTokenEnd { + if cap(o.tokens) < newTokenEnd { newTokens := make([]Token, newTokenEnd*2) - copy(newTokens, o.api.tokens) - o.api.tokens = newTokens + copy(newTokens, o.tokens) + o.tokens = newTokens } for offset, t := range tokens { - o.api.tokens[o.api.stackFrame.tokenEnd+offset] = t + o.tokens[o.api.stackFrame.tokenEnd+offset] = t } o.api.stackFrame.tokenEnd = newTokenEnd }