diff --git a/parse/api.go b/parse/api.go index 14c7016..1a8d758 100644 --- a/parse/api.go +++ b/parse/api.go @@ -26,7 +26,7 @@ type API struct { // If it does, then true will be returned, false otherwise. The read cursor // will be kept at the same position, so the next call to Peek() or Accept() // will start from the same cursor position. -func (p *API) Peek(tokenHandler tokenize.Handler) bool { +func (p *API) PeekWithResult(tokenHandler tokenize.Handler) bool { forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) t := p.tokenAPI if ok { @@ -37,6 +37,15 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool { return ok } +func (p *API) Peek(tokenHandler tokenize.Handler) bool { + forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) + t := p.tokenAPI + p.Result.Tokens = nil + p.Result.Runes = nil + t.Dispose(forkedAPI) + return ok +} + // Accept checks if the upcoming input data matches the provided tokenize.Handler. // If it does, then true will be returned and the read cursor will be moved // forward to beyond the match that was found. Otherwise false will be diff --git a/tokenize/api.go b/tokenize/api.go index 580889c..d0192cf 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -136,22 +136,21 @@ func NewAPI(input interface{}) *API { // The parent API was never modified, so it can safely be used after disposal // as if the lookahead never happened. func (tokenAPI *API) Fork() int { - newStackLevel := tokenAPI.stackLevel + 1 - newStackSize := newStackLevel + 1 + tokenAPI.stackLevel++ + newStackLevel := tokenAPI.stackLevel // Grow the stack frames capacity when needed. - if cap(tokenAPI.stackFrames) < newStackSize { - newFrames := make([]stackFrame, newStackSize*2) - copy(newFrames, tokenAPI.stackFrames) - tokenAPI.stackFrames = newFrames + frames := tokenAPI.stackFrames + if cap(frames) < (newStackLevel + 1) { + newFrames := make([]stackFrame, (newStackLevel+1)*2) + copy(newFrames, frames) + frames = newFrames } - tokenAPI.stackLevel++ - // This can be written in a shorter way, but this turned out to // be the best way performance-wise. parent := tokenAPI.stackFrame - child := &tokenAPI.stackFrames[tokenAPI.stackLevel] + child := &frames[newStackLevel] child.offset = parent.offset child.column = parent.column child.line = parent.line @@ -161,7 +160,7 @@ func (tokenAPI *API) Fork() int { child.tokenEnd = parent.tokenEnd tokenAPI.stackFrame = child - return tokenAPI.stackLevel + return newStackLevel } // Merge appends the results of a forked child API (runes, tokens) to the @@ -187,6 +186,7 @@ func (tokenAPI *API) Merge(stackLevel int) { } parent := &tokenAPI.stackFrames[stackLevel-1] + f := tokenAPI.stackFrame // The end of the parent slice aligns with the start of the child slice. // Because of this, to merge the parent slice can simply be expanded @@ -196,37 +196,38 @@ func (tokenAPI *API) Merge(stackLevel int) { // After merge operation: // parent: |-----------------| // child: |---> continue reading from here - parent.bytesEnd = tokenAPI.stackFrame.bytesEnd - tokenAPI.stackFrame.bytesStart = tokenAPI.stackFrame.bytesEnd + parent.bytesEnd = f.bytesEnd + f.bytesStart = f.bytesEnd // The same logic applies to tokens. - parent.tokenEnd = tokenAPI.stackFrame.tokenEnd - tokenAPI.stackFrame.tokenStart = tokenAPI.stackFrame.tokenEnd + parent.tokenEnd = f.tokenEnd + f.tokenStart = f.tokenEnd - parent.offset = tokenAPI.stackFrame.offset - parent.line = tokenAPI.stackFrame.line - parent.column = tokenAPI.stackFrame.column + parent.offset = f.offset + parent.line = f.line + parent.column = f.column - tokenAPI.stackFrame.err = nil + f.err = nil } // Reset moves the input cursor back to the beginning for the currently active API child. // Aditionally, any output (bytes and tokens) that was emitted from the API child are // cleared as well. func (api *API) Reset() { + f := api.stackFrame if api.stackLevel == 0 { - api.stackFrame.column = 0 - api.stackFrame.line = 0 - api.stackFrame.offset = 0 + f.column = 0 + f.line = 0 + f.offset = 0 } else { parent := api.stackFrames[api.stackLevel-1] - api.stackFrame.column = parent.column - api.stackFrame.line = parent.line - api.stackFrame.offset = parent.offset + f.column = parent.column + f.line = parent.line + f.offset = parent.offset } - api.stackFrame.bytesEnd = api.stackFrame.bytesStart - api.stackFrame.tokenEnd = api.stackFrame.tokenStart - api.stackFrame.err = nil + f.bytesEnd = f.bytesStart + f.tokenEnd = f.tokenStart + f.err = nil } func (tokenAPI *API) Dispose(stackLevel int) { diff --git a/tokenize/api_bytemode.go b/tokenize/api_bytemode.go index 8e13b90..7836984 100644 --- a/tokenize/api_bytemode.go +++ b/tokenize/api_bytemode.go @@ -40,18 +40,10 @@ func (byteMode ByteMode) SkipMulti(bytes ...byte) { } func (byteMode ByteMode) Accept(b byte) { - byteMode.Append(b) + byteMode.api.Output.AddByte(b) byteMode.Skip(b) } -func (byteMode ByteMode) Append(b byte) { - a := byteMode.api - f := a.stackFrame - a.growOutputData(f.bytesEnd + 1) - a.outputData[f.bytesEnd] = b - f.bytesEnd++ -} - // AcceptMulti is used to accept one or more bytes that were read from the input. // This tells the tokenizer: "I've seen these bytes. I want to make use of them // for the final output, so please remember them for me. I will now continue @@ -64,29 +56,6 @@ func (byteMode ByteMode) Append(b byte) { // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted bytes. func (byteMode ByteMode) AcceptMulti(bytes ...byte) { - byteMode.AppendMulti(bytes...) + byteMode.api.Output.AddBytes(bytes...) byteMode.SkipMulti(bytes...) } - -func (byteMode ByteMode) AppendMulti(bytes ...byte) { - a := byteMode.api - f := a.stackFrame - curBytesEnd := f.bytesEnd - newBytesEnd := curBytesEnd + len(bytes) - - a.growOutputData(newBytesEnd) - copy(a.outputData[curBytesEnd:], bytes) - f.bytesEnd = newBytesEnd -} - -func (api *API) dataAddByte(b byte) { - curBytesEnd := api.stackFrame.bytesEnd - api.growOutputData(curBytesEnd + 1) - api.outputData[curBytesEnd] = b - api.stackFrame.bytesEnd++ -} - -func (api *API) dataSetBytes(bytes ...byte) { - api.Output.ClearData() - api.Byte.AppendMulti(bytes...) -} diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 1bfd86f..1684bdb 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -33,15 +33,16 @@ func (o Output) ClearData() { func (o Output) SetBytes(bytes ...byte) { o.ClearData() - o.api.dataSetBytes(bytes...) + o.AddBytes(bytes...) } func (o Output) AddByte(b byte) { - o.api.dataAddByte(b) -} - -func (o Output) AddBytes(bytes ...byte) { - o.api.Byte.AppendMulti(bytes...) + a := o.api + f := a.stackFrame + curBytesEnd := f.bytesEnd + a.growOutputData(curBytesEnd + 1) + a.outputData[curBytesEnd] = b + f.bytesEnd++ } func (o Output) SetRunes(runes ...rune) { @@ -49,6 +50,16 @@ func (o Output) SetRunes(runes ...rune) { o.AddRunes(runes...) } +func (o Output) AddBytes(bytes ...byte) { + a := o.api + f := a.stackFrame + curBytesEnd := f.bytesEnd + newBytesEnd := curBytesEnd + len(bytes) + a.growOutputData(newBytesEnd) + copy(a.outputData[curBytesEnd:], bytes) + f.bytesEnd = newBytesEnd +} + func (o Output) AddRunes(runes ...rune) { a := o.api f := a.stackFrame @@ -60,7 +71,7 @@ func (o Output) AddRunes(runes ...rune) { } func (o Output) AddString(s string) { - o.api.Byte.AppendMulti([]byte(s)...) + o.AddBytes([]byte(s)...) } func (o Output) SetString(s string) { diff --git a/tokenize/api_runemode.go b/tokenize/api_runemode.go index 2fcd4be..819e981 100644 --- a/tokenize/api_runemode.go +++ b/tokenize/api_runemode.go @@ -37,7 +37,7 @@ func (runeMode RuneMode) Peek(offset int) (rune, int, error) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the skipped rune. -func (runeMode *RuneMode) Skip(r rune) { +func (runeMode RuneMode) Skip(r rune) { f := runeMode.api.stackFrame f.moveCursorByRune(r) f.offset += utf8.RuneLen(r) @@ -53,7 +53,7 @@ func (runeMode *RuneMode) Skip(r rune) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the skipped runes. -func (runeMode *RuneMode) SkipMulti(runes ...rune) { +func (runeMode RuneMode) SkipMulti(runes ...rune) { f := runeMode.api.stackFrame for _, r := range runes { f.moveCursorByRune(r) @@ -72,7 +72,7 @@ func (runeMode *RuneMode) SkipMulti(runes ...rune) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted rune. -func (runeMode *RuneMode) Accept(r rune) { +func (runeMode RuneMode) Accept(r rune) { a := runeMode.api f := a.stackFrame curBytesEnd := f.bytesEnd @@ -97,7 +97,7 @@ func (runeMode *RuneMode) Accept(r rune) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted runes. -func (runeMode *RuneMode) AcceptMulti(runes ...rune) { +func (runeMode RuneMode) AcceptMulti(runes ...rune) { a := runeMode.api f := a.stackFrame runesAsString := string(runes) diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index bcdf512..c190899 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -7,16 +7,6 @@ import ( tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize" ) -// TODO cleanup when I'm finished with these. -func TestCombinatorsTempDebug(t *testing.T) { - var a = tokenize.A - AssertHandlers(t, []HandlerT{ - // {"024", a.IPv4CIDRMask, true, "24"}, - // {"024", a.Octet, true, "24"}, - {"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"}, - }) -} - func TestCombinators(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M AssertHandlers(t, []HandlerT{