Work in progress on switching to byte stack. Committing to do some performance checks against master.

This commit is contained in:
Maurice Makaay 2019-07-18 08:06:26 +00:00
parent e659380a5f
commit b9eeac3480
1 changed files with 47 additions and 46 deletions

View File

@ -2,6 +2,7 @@ package tokenize
import ( import (
"fmt" "fmt"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/read" "git.makaay.nl/mauricem/go-parsekit/read"
) )
@ -75,7 +76,7 @@ type API struct {
lastRuneWidth int // the width in bytes of the last read rune lastRuneWidth int // the width in bytes of the last read rune
lastRuneErr error // the error for the last NextRune() call lastRuneErr error // the error for the last NextRune() call
runeRead bool // whether or not a rune was read using NextRune() runeRead bool // whether or not a rune was read using NextRune()
runes []rune // accepted runes bytes []byte // accepted bytes
tokens []Token // accepted tokens tokens []Token // accepted tokens
stackFrames []stackFrame // the stack frames, containing stack level-specific data stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level stackLevel int // the current stack level
@ -86,8 +87,8 @@ type stackFrame struct {
offset int // current rune read offset relative to the Reader's sliding window offset int // current rune read offset relative to the Reader's sliding window
column int // The column at which the cursor is (0-indexed) column int // The column at which the cursor is (0-indexed)
line int // The line at which the cursor is (0-indexed) line int // The line at which the cursor is (0-indexed)
runeStart int // the starting point in the API.runes slice for runes produced by this stack level runeStart int // the starting point in the APi.bytes slice for runes produced by this stack level
runeEnd int // the end point in the API.runes slice for runes produced by this stack level runeEnd int // the end point in the APi.bytes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
@ -95,9 +96,9 @@ type stackFrame struct {
err error // can be used by a Handler to report a specific issue with the input err error // can be used by a Handler to report a specific issue with the input
} }
const initialStackDepth = 64 const initialStackDepth = 32
const initialTokenStoreLength = 32 const initialTokenStoreLength = 32
const initialRuneStoreLength = 128 const initialByteStoreLength = 256
// NewAPI initializes a new API struct, wrapped around the provided input. // NewAPI initializes a new API struct, wrapped around the provided input.
// For an overview of allowed inputs, take a look at the documentation // For an overview of allowed inputs, take a look at the documentation
@ -105,7 +106,7 @@ const initialRuneStoreLength = 128
func NewAPI(input interface{}) *API { func NewAPI(input interface{}) *API {
api := &API{ api := &API{
reader: read.New(input), reader: read.New(input),
runes: make([]rune, initialRuneStoreLength), bytes: make([]byte, initialByteStoreLength),
tokens: make([]Token, initialTokenStoreLength), tokens: make([]Token, initialTokenStoreLength),
stackFrames: make([]stackFrame, initialStackDepth), stackFrames: make([]stackFrame, initialStackDepth),
} }
@ -179,21 +180,21 @@ func (i *API) skipBytes(bytes ...byte) {
} }
func (i *API) acceptBytes(bytes ...byte) { func (i *API) acceptBytes(bytes ...byte) {
curRuneEnd := i.stackFrame.runeEnd curBytesEnd := i.stackFrame.runeEnd
newRuneEnd := curRuneEnd + len(bytes) newBytesEnd := curBytesEnd + len(bytes)
// Grow the runes capacity when needed. // Grow the bytes capacity when needed.
if cap(i.runes) < newRuneEnd { if cap(i.bytes) < newBytesEnd {
newRunes := make([]rune, newRuneEnd*2) newBytes := make([]byte, newBytesEnd*2)
copy(newRunes, i.runes) copy(newBytes, i.bytes)
i.runes = newRunes i.bytes = newBytes
} }
for offset, b := range bytes { for offset, b := range bytes {
i.runes[curRuneEnd+offset] = rune(b) i.bytes[curBytesEnd+offset] = b
i.stackFrame.moveCursorByByte(b) i.stackFrame.moveCursorByByte(b)
} }
i.stackFrame.runeEnd = newRuneEnd i.stackFrame.runeEnd = newBytesEnd
i.stackFrame.offset += len(bytes) i.stackFrame.offset += len(bytes)
i.runeRead = false i.runeRead = false
} }
@ -207,21 +208,23 @@ func (i *API) skipRunes(width int, runes ...rune) {
} }
func (i *API) acceptRunes(width int, runes ...rune) { func (i *API) acceptRunes(width int, runes ...rune) {
curRuneEnd := i.stackFrame.runeEnd runesAsString := string(runes)
newRuneEnd := curRuneEnd + len(runes) curBytesEnd := i.stackFrame.runeEnd
newBytesEnd := curBytesEnd + len(runesAsString)
// Grow the runes capacity when needed. // Grow the runes capacity when needed.
if cap(i.runes) < newRuneEnd { if cap(i.bytes) < newBytesEnd {
newRunes := make([]rune, newRuneEnd*2) newBytes := make([]byte, newBytesEnd*2)
copy(newRunes, i.runes) copy(newBytes, i.bytes)
i.runes = newRunes i.bytes = newBytes
} }
for offset, r := range runes { for _, r := range runes {
i.runes[curRuneEnd+offset] = r
i.stackFrame.moveCursorByRune(r) i.stackFrame.moveCursorByRune(r)
} }
i.stackFrame.runeEnd = newRuneEnd copy(i.bytes[curBytesEnd:], runesAsString)
i.stackFrame.runeEnd = newBytesEnd
i.stackFrame.offset += width i.stackFrame.offset += width
i.runeRead = false i.runeRead = false
} }
@ -362,7 +365,6 @@ func (i *API) Reset() {
// When writing your own TokenHandler, you normally won't have to call this // When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when needed. // method yourself. It is automatically called by parsekit when needed.
func (i *API) FlushInput() bool { func (i *API) FlushInput() bool {
// result := &(i.state.stack[i.stackLevel])
if i.stackFrame.offset > 0 { if i.stackFrame.offset > 0 {
i.reader.Flush(i.stackFrame.offset) i.reader.Flush(i.stackFrame.offset)
i.stackFrame.offset = 0 i.stackFrame.offset = 0
@ -372,15 +374,16 @@ func (i *API) FlushInput() bool {
} }
func (i *API) String() string { func (i *API) String() string {
return string(i.Runes()) return string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd])
} }
func (i *API) Runes() []rune { func (i *API) Runes() []rune {
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd] return []rune(string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd]))
} }
func (i *API) Rune(offset int) rune { func (i *API) Rune(offset int) rune {
return i.runes[i.stackFrame.runeStart+offset] r, _ := utf8.DecodeRune(i.bytes[i.stackFrame.runeStart+offset:])
return r
} }
func (i *API) ClearRunes() { func (i *API) ClearRunes() {
@ -389,32 +392,30 @@ func (i *API) ClearRunes() {
func (i *API) SetRunes(runes ...rune) { func (i *API) SetRunes(runes ...rune) {
// Grow the runes capacity when needed. // Grow the runes capacity when needed.
newRuneEnd := i.stackFrame.runeStart + len(runes) runesAsString := string(runes)
if cap(i.runes) < newRuneEnd { newBytesEnd := i.stackFrame.runeStart + len(runesAsString)
newRunes := make([]rune, newRuneEnd*2) if cap(i.bytes) < newBytesEnd {
copy(newRunes, i.runes) newBytes := make([]byte, newBytesEnd*2)
i.runes = newRunes copy(newBytes, i.bytes)
i.bytes = newBytes
} }
for offset, r := range runes { copy(i.bytes[i.stackFrame.runeStart:], runesAsString)
i.runes[i.stackFrame.runeStart+offset] = r i.stackFrame.runeEnd = newBytesEnd
}
i.stackFrame.runeEnd = newRuneEnd
} }
func (i *API) AddRunes(runes ...rune) { func (i *API) AddRunes(runes ...rune) {
// Grow the runes capacity when needed. // Grow the runes capacity when needed.
newRuneEnd := i.stackFrame.runeEnd + len(runes) runesAsString := string(runes)
if cap(i.runes) < newRuneEnd { newBytesEnd := i.stackFrame.runeEnd + len(runesAsString)
newRunes := make([]rune, newRuneEnd*2) if cap(i.bytes) < newBytesEnd {
copy(newRunes, i.runes) newBytes := make([]byte, newBytesEnd*2)
i.runes = newRunes copy(newBytes, i.bytes)
i.bytes = newBytes
} }
for offset, r := range runes { copy(i.bytes[i.stackFrame.runeEnd:], runesAsString)
i.runes[i.stackFrame.runeEnd+offset] = r i.stackFrame.runeEnd = newBytesEnd
}
i.stackFrame.runeEnd = newRuneEnd
} }
func (i *API) AddString(s string) { func (i *API) AddString(s string) {