From 1771e237c0cda44e39c0d4c434e40dcc9fa5e029 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Thu, 18 Jul 2019 09:26:11 +0000 Subject: [PATCH] Switched to a []byte backing store instead of []rune for collecting input data (we can use both bytes and runes for input in an easy way now) --- parse/api.go | 2 +- tokenize/api.go | 79 ++++++++++++++---------------------- tokenize/handlers_builtin.go | 4 +- 3 files changed, 33 insertions(+), 52 deletions(-) diff --git a/parse/api.go b/parse/api.go index 01a0785..90dd2b4 100644 --- a/parse/api.go +++ b/parse/api.go @@ -191,7 +191,7 @@ func (p *API) ExpectEndOfFile() { // • there was an error while reading the input. func (p *API) Expected(expected string) { p.panicWhenStoppedOrInError("Expected") - _, err := p.tokenAPI.NextRune() + _, err := p.tokenAPI.PeekByte(0) switch { case err == nil: p.Error("unexpected input%s", fmtExpects(expected)) diff --git a/tokenize/api.go b/tokenize/api.go index 74eca7c..50f1cba 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -87,8 +87,8 @@ type stackFrame struct { offset int // current rune read offset relative to the Reader's sliding window column int // The column at which the cursor is (0-indexed) line int // The line at which the cursor is (0-indexed) - runeStart int // the starting point in the APi.bytes slice for runes produced by this stack level - runeEnd int // the end point in the APi.bytes slice for runes produced by this stack level + bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level + bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level @@ -96,9 +96,9 @@ type stackFrame struct { err error // can be used by a Handler to report a specific issue with the input } -const initialStackDepth = 32 -const initialTokenStoreLength = 32 -const initialByteStoreLength = 256 +const initialStackDepth = 64 +const initialTokenStoreLength = 64 +const initialByteStoreLength = 1024 // NewAPI initializes a new API struct, wrapped around the provided input. // For an overview of allowed inputs, take a look at the documentation @@ -174,13 +174,13 @@ func (i *API) Accept() { func (i *API) skipBytes(bytes ...byte) { for _, b := range bytes { i.stackFrame.moveCursorByByte(b) + i.stackFrame.offset++ } - i.stackFrame.offset += len(bytes) i.runeRead = false } func (i *API) acceptBytes(bytes ...byte) { - curBytesEnd := i.stackFrame.runeEnd + curBytesEnd := i.stackFrame.bytesEnd newBytesEnd := curBytesEnd + len(bytes) // Grow the bytes capacity when needed. @@ -190,12 +190,12 @@ func (i *API) acceptBytes(bytes ...byte) { i.bytes = newBytes } - for offset, b := range bytes { - i.bytes[curBytesEnd+offset] = b + copy(i.bytes[curBytesEnd:], bytes) + for _, b := range bytes { i.stackFrame.moveCursorByByte(b) + i.stackFrame.offset++ } - i.stackFrame.runeEnd = newBytesEnd - i.stackFrame.offset += len(bytes) + i.stackFrame.bytesEnd = newBytesEnd i.runeRead = false } @@ -209,7 +209,7 @@ func (i *API) skipRunes(width int, runes ...rune) { func (i *API) acceptRunes(width int, runes ...rune) { runesAsString := string(runes) - curBytesEnd := i.stackFrame.runeEnd + curBytesEnd := i.stackFrame.bytesEnd newBytesEnd := curBytesEnd + len(runesAsString) // Grow the runes capacity when needed. @@ -224,8 +224,8 @@ func (i *API) acceptRunes(width int, runes ...rune) { } copy(i.bytes[curBytesEnd:], runesAsString) - i.stackFrame.runeEnd = newBytesEnd - i.stackFrame.offset += width + i.stackFrame.bytesEnd = newBytesEnd + i.stackFrame.offset += len(runesAsString) i.runeRead = false } @@ -267,8 +267,8 @@ func (i *API) Fork() int { child.offset = parent.offset child.column = parent.column child.line = parent.line - child.runeStart = parent.runeEnd - child.runeEnd = parent.runeEnd + child.bytesStart = parent.bytesEnd + child.bytesEnd = parent.bytesEnd child.tokenStart = parent.tokenEnd child.tokenEnd = parent.tokenEnd i.stackFrame = child @@ -308,8 +308,8 @@ func (i *API) Merge(stackLevel int) { // After merge operation: // parent: |-----------------| // child: |---> continue reading from here - parent.runeEnd = i.stackFrame.runeEnd - i.stackFrame.runeStart = i.stackFrame.runeEnd + parent.bytesEnd = i.stackFrame.bytesEnd + i.stackFrame.bytesStart = i.stackFrame.bytesEnd // The same logic applies to tokens. parent.tokenEnd = i.stackFrame.tokenEnd @@ -352,7 +352,7 @@ func (i *API) Reset() { i.stackFrame.line = parent.line i.stackFrame.offset = parent.offset } - i.stackFrame.runeEnd = i.stackFrame.runeStart + i.stackFrame.bytesEnd = i.stackFrame.bytesStart i.stackFrame.tokenEnd = i.stackFrame.tokenStart i.stackFrame.err = nil } @@ -374,48 +374,39 @@ func (i *API) FlushInput() bool { } func (i *API) String() string { - return string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd]) + return string(i.bytes[i.stackFrame.bytesStart:i.stackFrame.bytesEnd]) } func (i *API) Runes() []rune { - return []rune(string(i.bytes[i.stackFrame.runeStart:i.stackFrame.runeEnd])) + return []rune(string(i.bytes[i.stackFrame.bytesStart:i.stackFrame.bytesEnd])) } func (i *API) Rune(offset int) rune { - r, _ := utf8.DecodeRune(i.bytes[i.stackFrame.runeStart+offset:]) + r, _ := utf8.DecodeRune(i.bytes[i.stackFrame.bytesStart+offset:]) return r } func (i *API) ClearRunes() { - i.stackFrame.runeEnd = i.stackFrame.runeStart + i.stackFrame.bytesEnd = i.stackFrame.bytesStart } func (i *API) SetRunes(runes ...rune) { - // Grow the runes capacity when needed. - runesAsString := string(runes) - newBytesEnd := i.stackFrame.runeStart + len(runesAsString) - if cap(i.bytes) < newBytesEnd { - newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, i.bytes) - i.bytes = newBytes - } - - copy(i.bytes[i.stackFrame.runeStart:], runesAsString) - i.stackFrame.runeEnd = newBytesEnd + i.ClearRunes() + i.AddRunes(runes...) } func (i *API) AddRunes(runes ...rune) { // Grow the runes capacity when needed. runesAsString := string(runes) - newBytesEnd := i.stackFrame.runeEnd + len(runesAsString) + newBytesEnd := i.stackFrame.bytesEnd + len(runesAsString) if cap(i.bytes) < newBytesEnd { newBytes := make([]byte, newBytesEnd*2) copy(newBytes, i.bytes) i.bytes = newBytes } - copy(i.bytes[i.stackFrame.runeEnd:], runesAsString) - i.stackFrame.runeEnd = newBytesEnd + copy(i.bytes[i.stackFrame.bytesEnd:], runesAsString) + i.stackFrame.bytesEnd = newBytesEnd } func (i *API) AddString(s string) { @@ -450,18 +441,8 @@ func (i *API) ClearTokens() { } func (i *API) SetTokens(tokens ...Token) { - // Grow the tokens capacity when needed. - newTokenEnd := i.stackFrame.tokenStart + len(tokens) - if cap(i.tokens) < newTokenEnd { - newTokens := make([]Token, newTokenEnd*2) - copy(newTokens, tokens) - i.tokens = newTokens - } - - for offset, t := range tokens { - i.tokens[i.stackFrame.tokenStart+offset] = t - } - i.stackFrame.tokenEnd = newTokenEnd + i.ClearTokens() + i.AddTokens(tokens...) } func (i *API) AddTokens(tokens ...Token) { diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 3a7ad41..2b4d624 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -1538,12 +1538,12 @@ func MatchIPv6Net(normalize bool) Handler { // In both cases, it would match the first form. func ModifyDrop(handler Handler) Handler { return func(t *API) bool { - runeEnd := t.stackFrame.runeEnd + runeEnd := t.stackFrame.bytesEnd tokenEnd := t.stackFrame.tokenEnd if handler(t) { // We keep offset and cursor updates, but rollback any runes / tokens // that were added by the handler. - t.stackFrame.runeEnd = runeEnd + t.stackFrame.bytesEnd = runeEnd t.stackFrame.tokenEnd = tokenEnd return true }