Work in progress on switching to byte stack. Committing to do some performance checks against master.
This commit is contained in:
parent
e659380a5f
commit
b9eeac3480
|
@ -2,6 +2,7 @@ package tokenize
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
)
|
||||
|
@ -75,7 +76,7 @@ type API struct {
|
|||
lastRuneWidth int // the width in bytes of the last read rune
|
||||
lastRuneErr error // the error for the last NextRune() call
|
||||
runeRead bool // whether or not a rune was read using NextRune()
|
||||
runes []rune // accepted runes
|
||||
bytes []byte // accepted bytes
|
||||
tokens []Token // accepted tokens
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
|
@ -86,8 +87,8 @@ type stackFrame struct {
|
|||
offset int // current rune read offset relative to the Reader's sliding window
|
||||
column int // The column at which the cursor is (0-indexed)
|
||||
line int // The line at which the cursor is (0-indexed)
|
||||
runeStart int // the starting point in the API.runes slice for runes produced by this stack level
|
||||
runeEnd int // the end point in the API.runes slice for runes produced by this stack level
|
||||
runeStart int // the starting point in the APi.bytes slice for runes produced by this stack level
|
||||
runeEnd int // the end point in the APi.bytes slice for runes produced by this stack level
|
||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||
|
||||
|
@ -95,9 +96,9 @@ type stackFrame struct {
|
|||
err error // can be used by a Handler to report a specific issue with the input
|
||||
}
|
||||
|
||||
const initialStackDepth = 64
|
||||
const initialStackDepth = 32
|
||||
const initialTokenStoreLength = 32
|
||||
const initialRuneStoreLength = 128
|
||||
const initialByteStoreLength = 256
|
||||
|
||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||
// For an overview of allowed inputs, take a look at the documentation
|
||||
|
@ -105,7 +106,7 @@ const initialRuneStoreLength = 128
|
|||
func NewAPI(input interface{}) *API {
|
||||
api := &API{
|
||||
reader: read.New(input),
|
||||
runes: make([]rune, initialRuneStoreLength),
|
||||
bytes: make([]byte, initialByteStoreLength),
|
||||
tokens: make([]Token, initialTokenStoreLength),
|
||||
stackFrames: make([]stackFrame, initialStackDepth),
|
||||
}
|
||||
|
@ -179,21 +180,21 @@ func (i *API) skipBytes(bytes ...byte) {
|
|||
}
|
||||
|
||||
func (i *API) acceptBytes(bytes ...byte) {
|
||||
curRuneEnd := i.stackFrame.runeEnd
|
||||
newRuneEnd := curRuneEnd + len(bytes)
|
||||
curBytesEnd := i.stackFrame.runeEnd
|
||||
newBytesEnd := curBytesEnd + len(bytes)
|
||||
|
||||
// Grow the runes capacity when needed.
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
// Grow the bytes capacity when needed.
|
||||
if cap(i.bytes) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, i.bytes)
|
||||
i.bytes = newBytes
|
||||
}
|
||||
|
||||
for offset, b := range bytes {
|
||||
i.runes[curRuneEnd+offset] = rune(b)
|
||||
i.bytes[curBytesEnd+offset] = b
|
||||
i.stackFrame.moveCursorByByte(b)
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
i.stackFrame.runeEnd = newBytesEnd
|
||||
i.stackFrame.offset += len(bytes)
|
||||
i.runeRead = false
|
||||
}
|
||||
|
@ -207,21 +208,23 @@ func (i *API) skipRunes(width int, runes ...rune) {
|
|||
}
|
||||
|
||||
func (i *API) acceptRunes(width int, runes ...rune) {
|
||||
curRuneEnd := i.stackFrame.runeEnd
|
||||
newRuneEnd := curRuneEnd + len(runes)
|
||||
runesAsString := string(runes)
|
||||
curBytesEnd := i.stackFrame.runeEnd
|
||||
newBytesEnd := curBytesEnd + len(runesAsString)
|
||||
|
||||
// Grow the runes capacity when needed.
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
if cap(i.bytes) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, i.bytes)
|
||||
i.bytes = newBytes
|
||||
}
|
||||
|
||||
for offset, r := range runes {
|
||||
i.runes[curRuneEnd+offset] = r
|
||||
for _, r := range runes {
|
||||
i.stackFrame.moveCursorByRune(r)
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
copy(i.bytes[curBytesEnd:], runesAsString)
|
||||
|
||||
i.stackFrame.runeEnd = newBytesEnd
|
||||
i.stackFrame.offset += width
|
||||
i.runeRead = false
|
||||
}
|
||||
|
@ -362,7 +365,6 @@ func (i *API) Reset() {
|
|||
// When writing your own TokenHandler, you normally won't have to call this
|
||||
// method yourself. It is automatically called by parsekit when needed.
|
||||
func (i *API) FlushInput() bool {
|
||||
// result := &(i.state.stack[i.stackLevel])
|
||||
if i.stackFrame.offset > 0 {
|
||||
i.reader.Flush(i.stackFrame.offset)
|
||||
i.stackFrame.offset = 0
|
||||
|
@ -372,15 +374,16 @@ func (i *API) FlushInput() bool {
|
|||
}
|
||||
|
||||
func (i *API) String() string {
|
||||
return string(i.Runes())
|
||||
return string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd])
|
||||
}
|
||||
|
||||
func (i *API) Runes() []rune {
|
||||
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
||||
return []rune(string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd]))
|
||||
}
|
||||
|
||||
func (i *API) Rune(offset int) rune {
|
||||
return i.runes[i.stackFrame.runeStart+offset]
|
||||
r, _ := utf8.DecodeRune(i.bytes[i.stackFrame.runeStart+offset:])
|
||||
return r
|
||||
}
|
||||
|
||||
func (i *API) ClearRunes() {
|
||||
|
@ -389,32 +392,30 @@ func (i *API) ClearRunes() {
|
|||
|
||||
func (i *API) SetRunes(runes ...rune) {
|
||||
// Grow the runes capacity when needed.
|
||||
newRuneEnd := i.stackFrame.runeStart + len(runes)
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
runesAsString := string(runes)
|
||||
newBytesEnd := i.stackFrame.runeStart + len(runesAsString)
|
||||
if cap(i.bytes) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, i.bytes)
|
||||
i.bytes = newBytes
|
||||
}
|
||||
|
||||
for offset, r := range runes {
|
||||
i.runes[i.stackFrame.runeStart+offset] = r
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
copy(i.bytes[i.stackFrame.runeStart:], runesAsString)
|
||||
i.stackFrame.runeEnd = newBytesEnd
|
||||
}
|
||||
|
||||
func (i *API) AddRunes(runes ...rune) {
|
||||
// Grow the runes capacity when needed.
|
||||
newRuneEnd := i.stackFrame.runeEnd + len(runes)
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
runesAsString := string(runes)
|
||||
newBytesEnd := i.stackFrame.runeEnd + len(runesAsString)
|
||||
if cap(i.bytes) < newBytesEnd {
|
||||
newBytes := make([]byte, newBytesEnd*2)
|
||||
copy(newBytes, i.bytes)
|
||||
i.bytes = newBytes
|
||||
}
|
||||
|
||||
for offset, r := range runes {
|
||||
i.runes[i.stackFrame.runeEnd+offset] = r
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
copy(i.bytes[i.stackFrame.runeEnd:], runesAsString)
|
||||
i.stackFrame.runeEnd = newBytesEnd
|
||||
}
|
||||
|
||||
func (i *API) AddString(s string) {
|
||||
|
|
Loading…
Reference in New Issue