Moved Input and Output related fields from the API to their respective sub-structs.
This commit is contained in:
parent
7d2d8dbed3
commit
93c75af87f
152
tokenize/api.go
152
tokenize/api.go
|
@ -72,9 +72,6 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
reader *read.Buffer // the input data reader
|
||||
bytes []byte // accepted bytes
|
||||
tokens []Token // accepted tokens
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
|
@ -97,12 +94,15 @@ type stackFrame struct {
|
|||
|
||||
// Input provides input-related functionality for the tokenize API.
|
||||
type Input struct {
|
||||
api *API
|
||||
api *API
|
||||
reader *read.Buffer // the input data reader
|
||||
}
|
||||
|
||||
// Output provides output-related functionality for the tokenize API.
|
||||
type Output struct {
|
||||
api *API
|
||||
api *API
|
||||
tokens []Token // accepted tokens
|
||||
data []byte // accepted data
|
||||
}
|
||||
|
||||
const initialStackDepth = 64
|
||||
|
@ -114,13 +114,17 @@ const initialByteStoreLength = 1024
|
|||
// for parsekit.read.New().
|
||||
func NewAPI(input interface{}) *API {
|
||||
api := &API{
|
||||
reader: read.New(input),
|
||||
bytes: make([]byte, initialByteStoreLength),
|
||||
tokens: make([]Token, initialTokenStoreLength),
|
||||
stackFrames: make([]stackFrame, initialStackDepth),
|
||||
}
|
||||
api.Input = Input{api: api}
|
||||
api.Output = Output{api: api}
|
||||
api.Input = Input{
|
||||
api: api,
|
||||
reader: read.New(input),
|
||||
}
|
||||
api.Output = Output{
|
||||
api: api,
|
||||
data: make([]byte, initialByteStoreLength),
|
||||
tokens: make([]Token, initialTokenStoreLength),
|
||||
}
|
||||
api.stackFrame = &api.stackFrames[0]
|
||||
|
||||
return api
|
||||
|
@ -144,23 +148,23 @@ func NewAPI(input interface{}) *API {
|
|||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
func (i *API) Fork() int {
|
||||
newStackLevel := i.stackLevel + 1
|
||||
func (tokenAPI *API) Fork() int {
|
||||
newStackLevel := tokenAPI.stackLevel + 1
|
||||
newStackSize := newStackLevel + 1
|
||||
|
||||
// Grow the stack frames capacity when needed.
|
||||
if cap(i.stackFrames) < newStackSize {
|
||||
if cap(tokenAPI.stackFrames) < newStackSize {
|
||||
newFrames := make([]stackFrame, newStackSize*2)
|
||||
copy(newFrames, i.stackFrames)
|
||||
i.stackFrames = newFrames
|
||||
copy(newFrames, tokenAPI.stackFrames)
|
||||
tokenAPI.stackFrames = newFrames
|
||||
}
|
||||
|
||||
i.stackLevel++
|
||||
tokenAPI.stackLevel++
|
||||
|
||||
// This can be written in a shorter way, but this turned out to
|
||||
// be the best way performance-wise.
|
||||
parent := i.stackFrame
|
||||
child := &i.stackFrames[i.stackLevel]
|
||||
parent := tokenAPI.stackFrame
|
||||
child := &tokenAPI.stackFrames[tokenAPI.stackLevel]
|
||||
child.offset = parent.offset
|
||||
child.column = parent.column
|
||||
child.line = parent.line
|
||||
|
@ -168,9 +172,9 @@ func (i *API) Fork() int {
|
|||
child.bytesEnd = parent.bytesEnd
|
||||
child.tokenStart = parent.tokenEnd
|
||||
child.tokenEnd = parent.tokenEnd
|
||||
i.stackFrame = child
|
||||
tokenAPI.stackFrame = child
|
||||
|
||||
return i.stackLevel
|
||||
return tokenAPI.stackLevel
|
||||
}
|
||||
|
||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||
|
@ -184,18 +188,18 @@ func (i *API) Fork() int {
|
|||
//
|
||||
// Once the child is no longer needed, it can be disposed of by using the
|
||||
// method Dispose(), which will return the tokenizer to the parent.
|
||||
func (i *API) Merge(stackLevel int) {
|
||||
func (tokenAPI *API) Merge(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
if stackLevel != tokenAPI.stackLevel {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
|
||||
}
|
||||
|
||||
parent := &i.stackFrames[stackLevel-1]
|
||||
parent := &tokenAPI.stackFrames[stackLevel-1]
|
||||
|
||||
// The end of the parent slice aligns with the start of the child slice.
|
||||
// Because of this, to merge the parent slice can simply be expanded
|
||||
|
@ -205,33 +209,33 @@ func (i *API) Merge(stackLevel int) {
|
|||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
parent.bytesEnd = i.stackFrame.bytesEnd
|
||||
i.stackFrame.bytesStart = i.stackFrame.bytesEnd
|
||||
parent.bytesEnd = tokenAPI.stackFrame.bytesEnd
|
||||
tokenAPI.stackFrame.bytesStart = tokenAPI.stackFrame.bytesEnd
|
||||
|
||||
// The same logic applies to tokens.
|
||||
parent.tokenEnd = i.stackFrame.tokenEnd
|
||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
parent.tokenEnd = tokenAPI.stackFrame.tokenEnd
|
||||
tokenAPI.stackFrame.tokenStart = tokenAPI.stackFrame.tokenEnd
|
||||
|
||||
parent.offset = i.stackFrame.offset
|
||||
parent.line = i.stackFrame.line
|
||||
parent.column = i.stackFrame.column
|
||||
parent.offset = tokenAPI.stackFrame.offset
|
||||
parent.line = tokenAPI.stackFrame.line
|
||||
parent.column = tokenAPI.stackFrame.column
|
||||
|
||||
i.stackFrame.err = nil
|
||||
tokenAPI.stackFrame.err = nil
|
||||
}
|
||||
|
||||
func (i *API) Dispose(stackLevel int) {
|
||||
func (tokenAPI *API) Dispose(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
if stackLevel != tokenAPI.stackLevel {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
|
||||
}
|
||||
|
||||
i.stackLevel = stackLevel - 1
|
||||
i.stackFrame = &i.stackFrames[stackLevel-1]
|
||||
tokenAPI.stackLevel = stackLevel - 1
|
||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
|
||||
}
|
||||
|
||||
// Reset moves the input cursor back to the beginning for the currently active API child.
|
||||
|
@ -266,7 +270,7 @@ func (i Input) Cursor() string {
|
|||
// When an offset is requested that is beyond the length of the available input
|
||||
// data, then the error will be io.EOF.
|
||||
func (i Input) PeekByte(offset int) (byte, error) {
|
||||
return i.api.reader.ByteAt(i.api.stackFrame.offset + offset)
|
||||
return i.reader.ByteAt(i.api.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
// SkipByte is used to skip over a single bytes that was read from the input.
|
||||
|
@ -317,13 +321,13 @@ func (i Input) AcceptByte(b byte) {
|
|||
maxRequiredBytes := curBytesEnd + 1
|
||||
|
||||
// Grow the bytes capacity when needed.
|
||||
if cap(i.api.bytes) < maxRequiredBytes {
|
||||
if cap(i.api.Output.data) < maxRequiredBytes {
|
||||
newBytes := make([]byte, maxRequiredBytes*2)
|
||||
copy(newBytes, i.api.bytes)
|
||||
i.api.bytes = newBytes
|
||||
copy(newBytes, i.api.Output.data)
|
||||
i.api.Output.data = newBytes
|
||||
}
|
||||
|
||||
i.api.bytes[curBytesEnd] = b
|
||||
i.api.Output.data[curBytesEnd] = b
|
||||
i.api.stackFrame.moveCursorByByte(b)
|
||||
i.api.stackFrame.bytesEnd++
|
||||
i.api.stackFrame.offset++
|
||||
|
@ -345,13 +349,13 @@ func (i Input) AcceptBytes(bytes ...byte) {
|
|||
newBytesEnd := curBytesEnd + len(bytes)
|
||||
|
||||
// Grow the bytes capacity when needed.
|
||||
if cap(i.api.bytes) < newBytesEnd {
|
||||
if cap(i.api.Output.data) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, i.api.bytes)
|
||||
i.api.bytes = newBytes
|
||||
copy(newBytes, i.api.Output.data)
|
||||
i.api.Output.data = newBytes
|
||||
}
|
||||
|
||||
copy(i.api.bytes[curBytesEnd:], bytes)
|
||||
copy(i.api.Output.data[curBytesEnd:], bytes)
|
||||
for _, b := range bytes {
|
||||
i.api.stackFrame.moveCursorByByte(b)
|
||||
i.api.stackFrame.offset++
|
||||
|
@ -373,7 +377,7 @@ func (i Input) AcceptBytes(bytes ...byte) {
|
|||
// When an offset is requested that is beyond the length of the available input
|
||||
// data, then the error will be io.EOF.
|
||||
func (i Input) PeekRune(offset int) (rune, int, error) {
|
||||
return i.api.reader.RuneAt(i.api.stackFrame.offset + offset)
|
||||
return i.reader.RuneAt(i.api.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
// SkipRune is used to skip over a single rune that was read from the input.
|
||||
|
@ -424,14 +428,14 @@ func (i Input) AcceptRune(r rune) {
|
|||
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
||||
|
||||
// Grow the runes capacity when needed.
|
||||
if cap(i.api.bytes) < maxRequiredBytes {
|
||||
if cap(i.api.Output.data) < maxRequiredBytes {
|
||||
newBytes := make([]byte, maxRequiredBytes*2)
|
||||
copy(newBytes, i.api.bytes)
|
||||
i.api.bytes = newBytes
|
||||
copy(newBytes, i.api.Output.data)
|
||||
i.api.Output.data = newBytes
|
||||
}
|
||||
|
||||
i.api.stackFrame.moveCursorByRune(r)
|
||||
w := utf8.EncodeRune(i.api.bytes[curBytesEnd:], r)
|
||||
w := utf8.EncodeRune(i.api.Output.data[curBytesEnd:], r)
|
||||
i.api.stackFrame.bytesEnd += w
|
||||
i.api.stackFrame.offset += w
|
||||
}
|
||||
|
@ -454,16 +458,16 @@ func (i Input) AcceptRunes(runes ...rune) {
|
|||
newBytesEnd := curBytesEnd + byteLen
|
||||
|
||||
// Grow the runes capacity when needed.
|
||||
if cap(i.api.bytes) < newBytesEnd {
|
||||
if cap(i.api.Output.data) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, i.api.bytes)
|
||||
i.api.bytes = newBytes
|
||||
copy(newBytes, i.api.Output.data)
|
||||
i.api.Output.data = newBytes
|
||||
}
|
||||
|
||||
for _, r := range runes {
|
||||
i.api.stackFrame.moveCursorByRune(r)
|
||||
}
|
||||
copy(i.api.bytes[curBytesEnd:], runesAsString)
|
||||
copy(i.api.Output.data[curBytesEnd:], runesAsString)
|
||||
|
||||
i.api.stackFrame.bytesEnd = newBytesEnd
|
||||
i.api.stackFrame.offset += byteLen
|
||||
|
@ -477,7 +481,7 @@ func (i Input) AcceptRunes(runes ...rune) {
|
|||
// method yourself. It is automatically called by parsekit when possible.
|
||||
func (i Input) Flush() bool {
|
||||
if i.api.stackFrame.offset > 0 {
|
||||
i.api.reader.Flush(i.api.stackFrame.offset)
|
||||
i.reader.Flush(i.api.stackFrame.offset)
|
||||
i.api.stackFrame.offset = 0
|
||||
return true
|
||||
}
|
||||
|
@ -485,17 +489,17 @@ func (i Input) Flush() bool {
|
|||
}
|
||||
|
||||
func (o Output) String() string {
|
||||
bytes := o.api.bytes[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
||||
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
func (o Output) Runes() []rune {
|
||||
bytes := o.api.bytes[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
||||
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
||||
return []rune(string(bytes))
|
||||
}
|
||||
|
||||
func (o Output) Rune(offset int) rune {
|
||||
r, _ := utf8.DecodeRune(o.api.bytes[o.api.stackFrame.bytesStart+offset:])
|
||||
r, _ := utf8.DecodeRune(o.data[o.api.stackFrame.bytesStart+offset:])
|
||||
return r
|
||||
}
|
||||
|
||||
|
@ -511,13 +515,13 @@ func (o Output) SetBytes(bytes ...byte) {
|
|||
func (o Output) AddBytes(bytes ...byte) {
|
||||
// Grow the runes capacity when needed.
|
||||
newBytesEnd := o.api.stackFrame.bytesEnd + len(bytes)
|
||||
if cap(o.api.bytes) < newBytesEnd {
|
||||
if cap(o.data) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, o.api.bytes)
|
||||
o.api.bytes = newBytes
|
||||
copy(newBytes, o.data)
|
||||
o.data = newBytes
|
||||
}
|
||||
|
||||
copy(o.api.bytes[o.api.stackFrame.bytesEnd:], bytes)
|
||||
copy(o.data[o.api.stackFrame.bytesEnd:], bytes)
|
||||
o.api.stackFrame.bytesEnd = newBytesEnd
|
||||
}
|
||||
|
||||
|
@ -530,13 +534,13 @@ func (o Output) AddRunes(runes ...rune) {
|
|||
// Grow the runes capacity when needed.
|
||||
runesAsString := string(runes)
|
||||
newBytesEnd := o.api.stackFrame.bytesEnd + len(runesAsString)
|
||||
if cap(o.api.bytes) < newBytesEnd {
|
||||
if cap(o.data) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, o.api.bytes)
|
||||
o.api.bytes = newBytes
|
||||
copy(newBytes, o.data)
|
||||
o.data = newBytes
|
||||
}
|
||||
|
||||
copy(o.api.bytes[o.api.stackFrame.bytesEnd:], runesAsString)
|
||||
copy(o.data[o.api.stackFrame.bytesEnd:], runesAsString)
|
||||
o.api.stackFrame.bytesEnd = newBytesEnd
|
||||
}
|
||||
|
||||
|
@ -550,15 +554,15 @@ func (o Output) SetString(s string) {
|
|||
}
|
||||
|
||||
func (o Output) Tokens() []Token {
|
||||
return o.api.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd]
|
||||
return o.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (o Output) Token(offset int) Token {
|
||||
return o.api.tokens[o.api.stackFrame.tokenStart+offset]
|
||||
return o.tokens[o.api.stackFrame.tokenStart+offset]
|
||||
}
|
||||
|
||||
func (o Output) TokenValue(offset int) interface{} {
|
||||
return o.api.tokens[o.api.stackFrame.tokenStart+offset].Value
|
||||
return o.tokens[o.api.stackFrame.tokenStart+offset].Value
|
||||
}
|
||||
|
||||
func (o Output) ClearTokens() {
|
||||
|
@ -573,14 +577,14 @@ func (o Output) SetTokens(tokens ...Token) {
|
|||
func (o Output) AddTokens(tokens ...Token) {
|
||||
// Grow the tokens capacity when needed.
|
||||
newTokenEnd := o.api.stackFrame.tokenEnd + len(tokens)
|
||||
if cap(o.api.tokens) < newTokenEnd {
|
||||
if cap(o.tokens) < newTokenEnd {
|
||||
newTokens := make([]Token, newTokenEnd*2)
|
||||
copy(newTokens, o.api.tokens)
|
||||
o.api.tokens = newTokens
|
||||
copy(newTokens, o.tokens)
|
||||
o.tokens = newTokens
|
||||
}
|
||||
|
||||
for offset, t := range tokens {
|
||||
o.api.tokens[o.api.stackFrame.tokenEnd+offset] = t
|
||||
o.tokens[o.api.stackFrame.tokenEnd+offset] = t
|
||||
}
|
||||
o.api.stackFrame.tokenEnd = newTokenEnd
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue