Work in progress on switching to byte stack. Committing to do some performance checks against master.

This commit is contained in:
Maurice Makaay 2019-07-18 08:06:26 +00:00
parent e659380a5f
commit b9eeac3480
1 changed files with 47 additions and 46 deletions

View File

@ -2,6 +2,7 @@ package tokenize
import (
"fmt"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/read"
)
@ -75,7 +76,7 @@ type API struct {
lastRuneWidth int // the width in bytes of the last read rune
lastRuneErr error // the error for the last NextRune() call
runeRead bool // whether or not a rune was read using NextRune()
runes []rune // accepted runes
bytes []byte // accepted bytes
tokens []Token // accepted tokens
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
@ -86,8 +87,8 @@ type stackFrame struct {
offset int // current rune read offset relative to the Reader's sliding window
column int // The column at which the cursor is (0-indexed)
line int // The line at which the cursor is (0-indexed)
runeStart int // the starting point in the API.runes slice for runes produced by this stack level
runeEnd int // the end point in the API.runes slice for runes produced by this stack level
runeStart int // the starting point in the APi.bytes slice for runes produced by this stack level
runeEnd int // the end point in the APi.bytes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
@ -95,9 +96,9 @@ type stackFrame struct {
err error // can be used by a Handler to report a specific issue with the input
}
const initialStackDepth = 64
const initialStackDepth = 32
const initialTokenStoreLength = 32
const initialRuneStoreLength = 128
const initialByteStoreLength = 256
// NewAPI initializes a new API struct, wrapped around the provided input.
// For an overview of allowed inputs, take a look at the documentation
@ -105,7 +106,7 @@ const initialRuneStoreLength = 128
func NewAPI(input interface{}) *API {
api := &API{
reader: read.New(input),
runes: make([]rune, initialRuneStoreLength),
bytes: make([]byte, initialByteStoreLength),
tokens: make([]Token, initialTokenStoreLength),
stackFrames: make([]stackFrame, initialStackDepth),
}
@ -179,21 +180,21 @@ func (i *API) skipBytes(bytes ...byte) {
}
func (i *API) acceptBytes(bytes ...byte) {
curRuneEnd := i.stackFrame.runeEnd
newRuneEnd := curRuneEnd + len(bytes)
curBytesEnd := i.stackFrame.runeEnd
newBytesEnd := curBytesEnd + len(bytes)
// Grow the runes capacity when needed.
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
// Grow the bytes capacity when needed.
if cap(i.bytes) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, i.bytes)
i.bytes = newBytes
}
for offset, b := range bytes {
i.runes[curRuneEnd+offset] = rune(b)
i.bytes[curBytesEnd+offset] = b
i.stackFrame.moveCursorByByte(b)
}
i.stackFrame.runeEnd = newRuneEnd
i.stackFrame.runeEnd = newBytesEnd
i.stackFrame.offset += len(bytes)
i.runeRead = false
}
@ -207,21 +208,23 @@ func (i *API) skipRunes(width int, runes ...rune) {
}
func (i *API) acceptRunes(width int, runes ...rune) {
curRuneEnd := i.stackFrame.runeEnd
newRuneEnd := curRuneEnd + len(runes)
runesAsString := string(runes)
curBytesEnd := i.stackFrame.runeEnd
newBytesEnd := curBytesEnd + len(runesAsString)
// Grow the runes capacity when needed.
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
if cap(i.bytes) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, i.bytes)
i.bytes = newBytes
}
for offset, r := range runes {
i.runes[curRuneEnd+offset] = r
for _, r := range runes {
i.stackFrame.moveCursorByRune(r)
}
i.stackFrame.runeEnd = newRuneEnd
copy(i.bytes[curBytesEnd:], runesAsString)
i.stackFrame.runeEnd = newBytesEnd
i.stackFrame.offset += width
i.runeRead = false
}
@ -362,7 +365,6 @@ func (i *API) Reset() {
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when needed.
func (i *API) FlushInput() bool {
// result := &(i.state.stack[i.stackLevel])
if i.stackFrame.offset > 0 {
i.reader.Flush(i.stackFrame.offset)
i.stackFrame.offset = 0
@ -372,15 +374,16 @@ func (i *API) FlushInput() bool {
}
func (i *API) String() string {
return string(i.Runes())
return string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd])
}
func (i *API) Runes() []rune {
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
return []rune(string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd]))
}
func (i *API) Rune(offset int) rune {
return i.runes[i.stackFrame.runeStart+offset]
r, _ := utf8.DecodeRune(i.bytes[i.stackFrame.runeStart+offset:])
return r
}
func (i *API) ClearRunes() {
@ -389,32 +392,30 @@ func (i *API) ClearRunes() {
func (i *API) SetRunes(runes ...rune) {
// Grow the runes capacity when needed.
newRuneEnd := i.stackFrame.runeStart + len(runes)
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
runesAsString := string(runes)
newBytesEnd := i.stackFrame.runeStart + len(runesAsString)
if cap(i.bytes) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, i.bytes)
i.bytes = newBytes
}
for offset, r := range runes {
i.runes[i.stackFrame.runeStart+offset] = r
}
i.stackFrame.runeEnd = newRuneEnd
copy(i.bytes[i.stackFrame.runeStart:], runesAsString)
i.stackFrame.runeEnd = newBytesEnd
}
func (i *API) AddRunes(runes ...rune) {
// Grow the runes capacity when needed.
newRuneEnd := i.stackFrame.runeEnd + len(runes)
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
runesAsString := string(runes)
newBytesEnd := i.stackFrame.runeEnd + len(runesAsString)
if cap(i.bytes) < newBytesEnd {
newBytes := make([]byte, newBytesEnd*2)
copy(newBytes, i.bytes)
i.bytes = newBytes
}
for offset, r := range runes {
i.runes[i.stackFrame.runeEnd+offset] = r
}
i.stackFrame.runeEnd = newRuneEnd
copy(i.bytes[i.stackFrame.runeEnd:], runesAsString)
i.stackFrame.runeEnd = newBytesEnd
}
func (i *API) AddString(s string) {