Moved Input and Output related fields from the API to their respective sub-structs.

This commit is contained in:
Maurice Makaay 2019-07-20 00:28:37 +00:00
parent 7d2d8dbed3
commit 93c75af87f
1 changed files with 78 additions and 74 deletions

View File

@ -72,9 +72,6 @@ import (
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type API struct {
reader *read.Buffer // the input data reader
bytes []byte // accepted bytes
tokens []Token // accepted tokens
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame
@ -97,12 +94,15 @@ type stackFrame struct {
// Input provides input-related functionality for the tokenize API.
type Input struct {
api *API
api *API
reader *read.Buffer // the input data reader
}
// Output provides output-related functionality for the tokenize API.
type Output struct {
api *API
api *API
tokens []Token // accepted tokens
data []byte // accepted data
}
const initialStackDepth = 64
@ -114,13 +114,17 @@ const initialByteStoreLength = 1024
// for parsekit.read.New().
func NewAPI(input interface{}) *API {
api := &API{
reader: read.New(input),
bytes: make([]byte, initialByteStoreLength),
tokens: make([]Token, initialTokenStoreLength),
stackFrames: make([]stackFrame, initialStackDepth),
}
api.Input = Input{api: api}
api.Output = Output{api: api}
api.Input = Input{
api: api,
reader: read.New(input),
}
api.Output = Output{
api: api,
data: make([]byte, initialByteStoreLength),
tokens: make([]Token, initialTokenStoreLength),
}
api.stackFrame = &api.stackFrames[0]
return api
@ -144,23 +148,23 @@ func NewAPI(input interface{}) *API {
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
func (i *API) Fork() int {
newStackLevel := i.stackLevel + 1
func (tokenAPI *API) Fork() int {
newStackLevel := tokenAPI.stackLevel + 1
newStackSize := newStackLevel + 1
// Grow the stack frames capacity when needed.
if cap(i.stackFrames) < newStackSize {
if cap(tokenAPI.stackFrames) < newStackSize {
newFrames := make([]stackFrame, newStackSize*2)
copy(newFrames, i.stackFrames)
i.stackFrames = newFrames
copy(newFrames, tokenAPI.stackFrames)
tokenAPI.stackFrames = newFrames
}
i.stackLevel++
tokenAPI.stackLevel++
// This can be written in a shorter way, but this turned out to
// be the best way performance-wise.
parent := i.stackFrame
child := &i.stackFrames[i.stackLevel]
parent := tokenAPI.stackFrame
child := &tokenAPI.stackFrames[tokenAPI.stackLevel]
child.offset = parent.offset
child.column = parent.column
child.line = parent.line
@ -168,9 +172,9 @@ func (i *API) Fork() int {
child.bytesEnd = parent.bytesEnd
child.tokenStart = parent.tokenEnd
child.tokenEnd = parent.tokenEnd
i.stackFrame = child
tokenAPI.stackFrame = child
return i.stackLevel
return tokenAPI.stackLevel
}
// Merge appends the results of a forked child API (runes, tokens) to the
@ -184,18 +188,18 @@ func (i *API) Fork() int {
//
// Once the child is no longer needed, it can be disposed of by using the
// method Dispose(), which will return the tokenizer to the parent.
func (i *API) Merge(stackLevel int) {
func (tokenAPI *API) Merge(stackLevel int) {
if stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != i.stackLevel {
if stackLevel != tokenAPI.stackLevel {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
parent := &i.stackFrames[stackLevel-1]
parent := &tokenAPI.stackFrames[stackLevel-1]
// The end of the parent slice aligns with the start of the child slice.
// Because of this, to merge the parent slice can simply be expanded
@ -205,33 +209,33 @@ func (i *API) Merge(stackLevel int) {
// After merge operation:
// parent: |-----------------|
// child: |---> continue reading from here
parent.bytesEnd = i.stackFrame.bytesEnd
i.stackFrame.bytesStart = i.stackFrame.bytesEnd
parent.bytesEnd = tokenAPI.stackFrame.bytesEnd
tokenAPI.stackFrame.bytesStart = tokenAPI.stackFrame.bytesEnd
// The same logic applies to tokens.
parent.tokenEnd = i.stackFrame.tokenEnd
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
parent.tokenEnd = tokenAPI.stackFrame.tokenEnd
tokenAPI.stackFrame.tokenStart = tokenAPI.stackFrame.tokenEnd
parent.offset = i.stackFrame.offset
parent.line = i.stackFrame.line
parent.column = i.stackFrame.column
parent.offset = tokenAPI.stackFrame.offset
parent.line = tokenAPI.stackFrame.line
parent.column = tokenAPI.stackFrame.column
i.stackFrame.err = nil
tokenAPI.stackFrame.err = nil
}
func (i *API) Dispose(stackLevel int) {
func (tokenAPI *API) Dispose(stackLevel int) {
if stackLevel == 0 {
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != i.stackLevel {
if stackLevel != tokenAPI.stackLevel {
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
i.stackLevel = stackLevel - 1
i.stackFrame = &i.stackFrames[stackLevel-1]
tokenAPI.stackLevel = stackLevel - 1
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
}
// Reset moves the input cursor back to the beginning for the currently active API child.
@ -266,7 +270,7 @@ func (i Input) Cursor() string {
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (i Input) PeekByte(offset int) (byte, error) {
return i.api.reader.ByteAt(i.api.stackFrame.offset + offset)
return i.reader.ByteAt(i.api.stackFrame.offset + offset)
}
// SkipByte is used to skip over a single bytes that was read from the input.
@ -317,13 +321,13 @@ func (i Input) AcceptByte(b byte) {
maxRequiredBytes := curBytesEnd + 1
// Grow the bytes capacity when needed.
if cap(i.api.bytes) < maxRequiredBytes {
if cap(i.api.Output.data) < maxRequiredBytes {
newBytes := make([]byte, maxRequiredBytes*2)
copy(newBytes, i.api.bytes)
i.api.bytes = newBytes
copy(newBytes, i.api.Output.data)
i.api.Output.data = newBytes
}
i.api.bytes[curBytesEnd] = b
i.api.Output.data[curBytesEnd] = b
i.api.stackFrame.moveCursorByByte(b)
i.api.stackFrame.bytesEnd++
i.api.stackFrame.offset++
@ -345,13 +349,13 @@ func (i Input) AcceptBytes(bytes ...byte) {
newBytesEnd := curBytesEnd + len(bytes)
// Grow the bytes capacity when needed.
if cap(i.api.bytes) < newBytesEnd {
if cap(i.api.Output.data) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, i.api.bytes)
i.api.bytes = newBytes
copy(newBytes, i.api.Output.data)
i.api.Output.data = newBytes
}
copy(i.api.bytes[curBytesEnd:], bytes)
copy(i.api.Output.data[curBytesEnd:], bytes)
for _, b := range bytes {
i.api.stackFrame.moveCursorByByte(b)
i.api.stackFrame.offset++
@ -373,7 +377,7 @@ func (i Input) AcceptBytes(bytes ...byte) {
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (i Input) PeekRune(offset int) (rune, int, error) {
return i.api.reader.RuneAt(i.api.stackFrame.offset + offset)
return i.reader.RuneAt(i.api.stackFrame.offset + offset)
}
// SkipRune is used to skip over a single rune that was read from the input.
@ -424,14 +428,14 @@ func (i Input) AcceptRune(r rune) {
maxRequiredBytes := curBytesEnd + utf8.UTFMax
// Grow the runes capacity when needed.
if cap(i.api.bytes) < maxRequiredBytes {
if cap(i.api.Output.data) < maxRequiredBytes {
newBytes := make([]byte, maxRequiredBytes*2)
copy(newBytes, i.api.bytes)
i.api.bytes = newBytes
copy(newBytes, i.api.Output.data)
i.api.Output.data = newBytes
}
i.api.stackFrame.moveCursorByRune(r)
w := utf8.EncodeRune(i.api.bytes[curBytesEnd:], r)
w := utf8.EncodeRune(i.api.Output.data[curBytesEnd:], r)
i.api.stackFrame.bytesEnd += w
i.api.stackFrame.offset += w
}
@ -454,16 +458,16 @@ func (i Input) AcceptRunes(runes ...rune) {
newBytesEnd := curBytesEnd + byteLen
// Grow the runes capacity when needed.
if cap(i.api.bytes) < newBytesEnd {
if cap(i.api.Output.data) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, i.api.bytes)
i.api.bytes = newBytes
copy(newBytes, i.api.Output.data)
i.api.Output.data = newBytes
}
for _, r := range runes {
i.api.stackFrame.moveCursorByRune(r)
}
copy(i.api.bytes[curBytesEnd:], runesAsString)
copy(i.api.Output.data[curBytesEnd:], runesAsString)
i.api.stackFrame.bytesEnd = newBytesEnd
i.api.stackFrame.offset += byteLen
@ -477,7 +481,7 @@ func (i Input) AcceptRunes(runes ...rune) {
// method yourself. It is automatically called by parsekit when possible.
func (i Input) Flush() bool {
if i.api.stackFrame.offset > 0 {
i.api.reader.Flush(i.api.stackFrame.offset)
i.reader.Flush(i.api.stackFrame.offset)
i.api.stackFrame.offset = 0
return true
}
@ -485,17 +489,17 @@ func (i Input) Flush() bool {
}
func (o Output) String() string {
bytes := o.api.bytes[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
return string(bytes)
}
func (o Output) Runes() []rune {
bytes := o.api.bytes[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
return []rune(string(bytes))
}
func (o Output) Rune(offset int) rune {
r, _ := utf8.DecodeRune(o.api.bytes[o.api.stackFrame.bytesStart+offset:])
r, _ := utf8.DecodeRune(o.data[o.api.stackFrame.bytesStart+offset:])
return r
}
@ -511,13 +515,13 @@ func (o Output) SetBytes(bytes ...byte) {
func (o Output) AddBytes(bytes ...byte) {
// Grow the runes capacity when needed.
newBytesEnd := o.api.stackFrame.bytesEnd + len(bytes)
if cap(o.api.bytes) < newBytesEnd {
if cap(o.data) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, o.api.bytes)
o.api.bytes = newBytes
copy(newBytes, o.data)
o.data = newBytes
}
copy(o.api.bytes[o.api.stackFrame.bytesEnd:], bytes)
copy(o.data[o.api.stackFrame.bytesEnd:], bytes)
o.api.stackFrame.bytesEnd = newBytesEnd
}
@ -530,13 +534,13 @@ func (o Output) AddRunes(runes ...rune) {
// Grow the runes capacity when needed.
runesAsString := string(runes)
newBytesEnd := o.api.stackFrame.bytesEnd + len(runesAsString)
if cap(o.api.bytes) < newBytesEnd {
if cap(o.data) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, o.api.bytes)
o.api.bytes = newBytes
copy(newBytes, o.data)
o.data = newBytes
}
copy(o.api.bytes[o.api.stackFrame.bytesEnd:], runesAsString)
copy(o.data[o.api.stackFrame.bytesEnd:], runesAsString)
o.api.stackFrame.bytesEnd = newBytesEnd
}
@ -550,15 +554,15 @@ func (o Output) SetString(s string) {
}
func (o Output) Tokens() []Token {
return o.api.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd]
return o.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd]
}
func (o Output) Token(offset int) Token {
return o.api.tokens[o.api.stackFrame.tokenStart+offset]
return o.tokens[o.api.stackFrame.tokenStart+offset]
}
func (o Output) TokenValue(offset int) interface{} {
return o.api.tokens[o.api.stackFrame.tokenStart+offset].Value
return o.tokens[o.api.stackFrame.tokenStart+offset].Value
}
func (o Output) ClearTokens() {
@ -573,14 +577,14 @@ func (o Output) SetTokens(tokens ...Token) {
func (o Output) AddTokens(tokens ...Token) {
// Grow the tokens capacity when needed.
newTokenEnd := o.api.stackFrame.tokenEnd + len(tokens)
if cap(o.api.tokens) < newTokenEnd {
if cap(o.tokens) < newTokenEnd {
newTokens := make([]Token, newTokenEnd*2)
copy(newTokens, o.api.tokens)
o.api.tokens = newTokens
copy(newTokens, o.tokens)
o.tokens = newTokens
}
for offset, t := range tokens {
o.api.tokens[o.api.stackFrame.tokenEnd+offset] = t
o.tokens[o.api.stackFrame.tokenEnd+offset] = t
}
o.api.stackFrame.tokenEnd = newTokenEnd
}