Work in progress on switching to byte stack. Committing to do some performance checks against master.
This commit is contained in:
parent
e659380a5f
commit
b9eeac3480
|
@ -2,6 +2,7 @@ package tokenize
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||||
)
|
)
|
||||||
|
@ -75,7 +76,7 @@ type API struct {
|
||||||
lastRuneWidth int // the width in bytes of the last read rune
|
lastRuneWidth int // the width in bytes of the last read rune
|
||||||
lastRuneErr error // the error for the last NextRune() call
|
lastRuneErr error // the error for the last NextRune() call
|
||||||
runeRead bool // whether or not a rune was read using NextRune()
|
runeRead bool // whether or not a rune was read using NextRune()
|
||||||
runes []rune // accepted runes
|
bytes []byte // accepted bytes
|
||||||
tokens []Token // accepted tokens
|
tokens []Token // accepted tokens
|
||||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||||
stackLevel int // the current stack level
|
stackLevel int // the current stack level
|
||||||
|
@ -86,8 +87,8 @@ type stackFrame struct {
|
||||||
offset int // current rune read offset relative to the Reader's sliding window
|
offset int // current rune read offset relative to the Reader's sliding window
|
||||||
column int // The column at which the cursor is (0-indexed)
|
column int // The column at which the cursor is (0-indexed)
|
||||||
line int // The line at which the cursor is (0-indexed)
|
line int // The line at which the cursor is (0-indexed)
|
||||||
runeStart int // the starting point in the API.runes slice for runes produced by this stack level
|
runeStart int // the starting point in the APi.bytes slice for runes produced by this stack level
|
||||||
runeEnd int // the end point in the API.runes slice for runes produced by this stack level
|
runeEnd int // the end point in the APi.bytes slice for runes produced by this stack level
|
||||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||||
|
|
||||||
|
@ -95,9 +96,9 @@ type stackFrame struct {
|
||||||
err error // can be used by a Handler to report a specific issue with the input
|
err error // can be used by a Handler to report a specific issue with the input
|
||||||
}
|
}
|
||||||
|
|
||||||
const initialStackDepth = 64
|
const initialStackDepth = 32
|
||||||
const initialTokenStoreLength = 32
|
const initialTokenStoreLength = 32
|
||||||
const initialRuneStoreLength = 128
|
const initialByteStoreLength = 256
|
||||||
|
|
||||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||||
// For an overview of allowed inputs, take a look at the documentation
|
// For an overview of allowed inputs, take a look at the documentation
|
||||||
|
@ -105,7 +106,7 @@ const initialRuneStoreLength = 128
|
||||||
func NewAPI(input interface{}) *API {
|
func NewAPI(input interface{}) *API {
|
||||||
api := &API{
|
api := &API{
|
||||||
reader: read.New(input),
|
reader: read.New(input),
|
||||||
runes: make([]rune, initialRuneStoreLength),
|
bytes: make([]byte, initialByteStoreLength),
|
||||||
tokens: make([]Token, initialTokenStoreLength),
|
tokens: make([]Token, initialTokenStoreLength),
|
||||||
stackFrames: make([]stackFrame, initialStackDepth),
|
stackFrames: make([]stackFrame, initialStackDepth),
|
||||||
}
|
}
|
||||||
|
@ -179,21 +180,21 @@ func (i *API) skipBytes(bytes ...byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) acceptBytes(bytes ...byte) {
|
func (i *API) acceptBytes(bytes ...byte) {
|
||||||
curRuneEnd := i.stackFrame.runeEnd
|
curBytesEnd := i.stackFrame.runeEnd
|
||||||
newRuneEnd := curRuneEnd + len(bytes)
|
newBytesEnd := curBytesEnd + len(bytes)
|
||||||
|
|
||||||
// Grow the runes capacity when needed.
|
// Grow the bytes capacity when needed.
|
||||||
if cap(i.runes) < newRuneEnd {
|
if cap(i.bytes) < newBytesEnd {
|
||||||
newRunes := make([]rune, newRuneEnd*2)
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
copy(newRunes, i.runes)
|
copy(newBytes, i.bytes)
|
||||||
i.runes = newRunes
|
i.bytes = newBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
for offset, b := range bytes {
|
for offset, b := range bytes {
|
||||||
i.runes[curRuneEnd+offset] = rune(b)
|
i.bytes[curBytesEnd+offset] = b
|
||||||
i.stackFrame.moveCursorByByte(b)
|
i.stackFrame.moveCursorByByte(b)
|
||||||
}
|
}
|
||||||
i.stackFrame.runeEnd = newRuneEnd
|
i.stackFrame.runeEnd = newBytesEnd
|
||||||
i.stackFrame.offset += len(bytes)
|
i.stackFrame.offset += len(bytes)
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
}
|
}
|
||||||
|
@ -207,21 +208,23 @@ func (i *API) skipRunes(width int, runes ...rune) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) acceptRunes(width int, runes ...rune) {
|
func (i *API) acceptRunes(width int, runes ...rune) {
|
||||||
curRuneEnd := i.stackFrame.runeEnd
|
runesAsString := string(runes)
|
||||||
newRuneEnd := curRuneEnd + len(runes)
|
curBytesEnd := i.stackFrame.runeEnd
|
||||||
|
newBytesEnd := curBytesEnd + len(runesAsString)
|
||||||
|
|
||||||
// Grow the runes capacity when needed.
|
// Grow the runes capacity when needed.
|
||||||
if cap(i.runes) < newRuneEnd {
|
if cap(i.bytes) < newBytesEnd {
|
||||||
newRunes := make([]rune, newRuneEnd*2)
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
copy(newRunes, i.runes)
|
copy(newBytes, i.bytes)
|
||||||
i.runes = newRunes
|
i.bytes = newBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
for offset, r := range runes {
|
for _, r := range runes {
|
||||||
i.runes[curRuneEnd+offset] = r
|
|
||||||
i.stackFrame.moveCursorByRune(r)
|
i.stackFrame.moveCursorByRune(r)
|
||||||
}
|
}
|
||||||
i.stackFrame.runeEnd = newRuneEnd
|
copy(i.bytes[curBytesEnd:], runesAsString)
|
||||||
|
|
||||||
|
i.stackFrame.runeEnd = newBytesEnd
|
||||||
i.stackFrame.offset += width
|
i.stackFrame.offset += width
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
}
|
}
|
||||||
|
@ -362,7 +365,6 @@ func (i *API) Reset() {
|
||||||
// When writing your own TokenHandler, you normally won't have to call this
|
// When writing your own TokenHandler, you normally won't have to call this
|
||||||
// method yourself. It is automatically called by parsekit when needed.
|
// method yourself. It is automatically called by parsekit when needed.
|
||||||
func (i *API) FlushInput() bool {
|
func (i *API) FlushInput() bool {
|
||||||
// result := &(i.state.stack[i.stackLevel])
|
|
||||||
if i.stackFrame.offset > 0 {
|
if i.stackFrame.offset > 0 {
|
||||||
i.reader.Flush(i.stackFrame.offset)
|
i.reader.Flush(i.stackFrame.offset)
|
||||||
i.stackFrame.offset = 0
|
i.stackFrame.offset = 0
|
||||||
|
@ -372,15 +374,16 @@ func (i *API) FlushInput() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) String() string {
|
func (i *API) String() string {
|
||||||
return string(i.Runes())
|
return string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) Runes() []rune {
|
func (i *API) Runes() []rune {
|
||||||
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
return []rune(string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd]))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) Rune(offset int) rune {
|
func (i *API) Rune(offset int) rune {
|
||||||
return i.runes[i.stackFrame.runeStart+offset]
|
r, _ := utf8.DecodeRune(i.bytes[i.stackFrame.runeStart+offset:])
|
||||||
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) ClearRunes() {
|
func (i *API) ClearRunes() {
|
||||||
|
@ -389,32 +392,30 @@ func (i *API) ClearRunes() {
|
||||||
|
|
||||||
func (i *API) SetRunes(runes ...rune) {
|
func (i *API) SetRunes(runes ...rune) {
|
||||||
// Grow the runes capacity when needed.
|
// Grow the runes capacity when needed.
|
||||||
newRuneEnd := i.stackFrame.runeStart + len(runes)
|
runesAsString := string(runes)
|
||||||
if cap(i.runes) < newRuneEnd {
|
newBytesEnd := i.stackFrame.runeStart + len(runesAsString)
|
||||||
newRunes := make([]rune, newRuneEnd*2)
|
if cap(i.bytes) < newBytesEnd {
|
||||||
copy(newRunes, i.runes)
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
i.runes = newRunes
|
copy(newBytes, i.bytes)
|
||||||
|
i.bytes = newBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
for offset, r := range runes {
|
copy(i.bytes[i.stackFrame.runeStart:], runesAsString)
|
||||||
i.runes[i.stackFrame.runeStart+offset] = r
|
i.stackFrame.runeEnd = newBytesEnd
|
||||||
}
|
|
||||||
i.stackFrame.runeEnd = newRuneEnd
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) AddRunes(runes ...rune) {
|
func (i *API) AddRunes(runes ...rune) {
|
||||||
// Grow the runes capacity when needed.
|
// Grow the runes capacity when needed.
|
||||||
newRuneEnd := i.stackFrame.runeEnd + len(runes)
|
runesAsString := string(runes)
|
||||||
if cap(i.runes) < newRuneEnd {
|
newBytesEnd := i.stackFrame.runeEnd + len(runesAsString)
|
||||||
newRunes := make([]rune, newRuneEnd*2)
|
if cap(i.bytes) < newBytesEnd {
|
||||||
copy(newRunes, i.runes)
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
i.runes = newRunes
|
copy(newBytes, i.bytes)
|
||||||
|
i.bytes = newBytes
|
||||||
}
|
}
|
||||||
|
|
||||||
for offset, r := range runes {
|
copy(i.bytes[i.stackFrame.runeEnd:], runesAsString)
|
||||||
i.runes[i.stackFrame.runeEnd+offset] = r
|
i.stackFrame.runeEnd = newBytesEnd
|
||||||
}
|
|
||||||
i.stackFrame.runeEnd = newRuneEnd
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) AddString(s string) {
|
func (i *API) AddString(s string) {
|
||||||
|
|
Loading…
Reference in New Issue