diff --git a/parse/api.go b/parse/api.go index 64fcc22..1366e7c 100644 --- a/parse/api.go +++ b/parse/api.go @@ -30,14 +30,17 @@ type API struct { // On a successful peek, the results (data + tokens) are returned by the peek. // They are availablel (as with Accept()) through parse.API.Result. func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { - _, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler) tokenAPI := parseAPI.tokenAPI + snap := tokenAPI.MakeSnapshot() + _, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler) if ok { parseAPI.Result.Tokens = tokenAPI.Output.Tokens() - parseAPI.Result.Runes = tokenAPI.Output.Runes() + parseAPI.Result.Runes = tokenAPI.Output.Runes() // TODO use bytes! + } else { + parseAPI.Result.Tokens = nil + parseAPI.Result.Runes = nil } - tokenAPI.Input.Reset() - tokenAPI.Output.Reset() + tokenAPI.RestoreSnapshot(snap) return ok } @@ -49,12 +52,12 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { // No results (data + tokens) are returned by Peek(). If want access to the data // through parse.API.Result, make use of PeekWithResult() instead. func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { - _, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler) tokenAPI := parseAPI.tokenAPI + snap := tokenAPI.MakeSnapshot() + _, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler) parseAPI.Result.Tokens = nil parseAPI.Result.Runes = nil - tokenAPI.Input.Reset() - tokenAPI.Output.Reset() + tokenAPI.RestoreSnapshot(snap) return ok } @@ -67,8 +70,10 @@ func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { tokenAPI := parseAPI.tokenAPI _, ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler) + snap := tokenAPI.MakeSnapshot() if ok { // Keep track of the results as produced by this child. + // TODO put in function and also in Peek() Record Cursor() / error too? parseAPI.Result.Tokens = tokenAPI.Output.Tokens() parseAPI.Result.Runes = tokenAPI.Output.Runes() @@ -79,13 +84,13 @@ func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { tokenAPI.Input.Flush() } else { // No match, so reset the tokenize.API for the next handler. - // This moves back the read cursor to the start and clears all results. - tokenAPI.Input.Reset() - tokenAPI.Output.Reset() + tokenAPI.RestoreSnapshot(snap) } return ok } +// TODO make a func Skip() which is like Accept() but without storing results. + // invokeTokenizeHandler forks the tokenize.API, and invokes the tokenize.Handler // in the context of the created child. The child is returned, so the caller // has full control over merging and disposing the child. diff --git a/tokenize/api.go b/tokenize/api.go index 4fc734d..82ed102 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -83,6 +83,8 @@ type API struct { Output Output // provides output-related functionality outputTokens []Token // accepted tokens outputBytes []byte // accepted bytes + + snapshot [9]int // storage for the Snapshot() / RestoreSnapshot() feature } type stackFrame struct { @@ -119,6 +121,7 @@ func NewAPI(input interface{}) *API { tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader} tokenAPI.Output = Output{api: tokenAPI} tokenAPI.stackFrame = &tokenAPI.stackFrames[0] + tokenAPI.snapshot[0] = -1 return tokenAPI } @@ -231,3 +234,40 @@ func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) { "(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel) } } + +type Snapshot [9]int + +func (tokenAPI *API) MakeSnapshot() Snapshot { + f := tokenAPI.stackFrame + + return Snapshot{ + tokenAPI.stackLevel, + f.bytesStart, + f.bytesEnd, + f.tokenStart, + f.tokenEnd, + f.offset, + f.offsetLocal, + f.line, + f.column, + } +} + +func (tokenAPI *API) RestoreSnapshot(snap Snapshot) { + f := tokenAPI.stackFrame + + if snap[0] != tokenAPI.stackLevel { + callerPanic("RestoreSnapshot", "tokenize.API.{name}(): {name}() called at {caller} "+ + "on API stack level %d, but the provided snapshot was created for stack level %d", + tokenAPI.stackLevel, snap[0]) + } + + f.bytesStart = snap[1] + f.bytesEnd = snap[2] + f.tokenStart = snap[3] + f.tokenEnd = snap[4] + f.offset = snap[5] + f.offsetLocal = snap[6] + f.line = snap[7] + f.column = snap[8] +} diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 786029d..1b315dc 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -26,6 +26,22 @@ func (o Output) Rune(offset int) rune { return r } +type Split [2]int + +func (o Output) Split() Split { + f := o.api.stackFrame + split := Split{f.bytesStart, f.tokenStart} + f.bytesStart = f.bytesEnd + f.tokenStart = f.tokenEnd + return split +} + +func (o Output) MergeSplit(split Split) { + f := o.api.stackFrame + f.bytesStart = split[0] + f.tokenStart = split[1] +} + func (o Output) Reset() { f := o.api.stackFrame f.bytesEnd = f.bytesStart @@ -121,6 +137,21 @@ func (o Output) AddToken(token Token) { f.tokenEnd++ } +func (o Output) InsertTokenAtStart(token Token) { + a := o.api + f := a.stackFrame + tokenEnd := f.tokenEnd + tokenStart := f.tokenStart + a.growOutputTokens(tokenEnd + 1) + if tokenStart == tokenEnd { + a.outputTokens[tokenEnd] = token + } else { + copy(a.outputTokens[tokenStart+1:], a.outputTokens[tokenStart:tokenEnd]) + a.outputTokens[tokenStart] = token + } + f.tokenEnd++ +} + func (o Output) AddTokens(tokens ...Token) { a := o.api f := a.stackFrame diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 42893b0..01bb8e4 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -180,8 +180,9 @@ func ExampleAPI_Reset() { tokenAPI.Rune.Accept(r) fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) - // Reset clears the results. - tokenAPI.Reset() + // Reset input and output. + tokenAPI.Input.Reset() + tokenAPI.Output.Reset() fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // So then doing the same read operations, the same data are read. @@ -403,7 +404,7 @@ func TestMergeScenariosForTokens(t *testing.T) { child = tokenAPI.Fork() tokenAPI.Output.AddToken(token3) - tokenAPI.Reset() + tokenAPI.Output.Reset() tokenAPI.Output.AddToken(token4) tokenAPI.Merge(child) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index b4b9576..069d721 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -694,11 +694,10 @@ func MatchStrNoCase(expected string) Handler { // result will be empty). func MatchOptional(handler Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() - if handler(tokenAPI) { - tokenAPI.Merge(child) + snap := tokenAPI.MakeSnapshot() + if !handler(tokenAPI) { + tokenAPI.RestoreSnapshot(snap) } - tokenAPI.Dispose(child) return true } } @@ -708,19 +707,27 @@ func MatchOptional(handler Handler) Handler { // reports successful match. func MatchSeq(handlers ...Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() + f := tokenAPI.stackFrame + snap := tokenAPI.MakeSnapshot() for _, handler := range handlers { - subchild := tokenAPI.Fork() + tokenAPI.Output.Split() + // Move forward the output pointers, so the handler that we're about + // to call will make use of a fresh output buffer. + f.bytesStart = f.bytesEnd + f.tokenStart = f.tokenEnd + if !handler(tokenAPI) { - tokenAPI.Dispose(subchild) - tokenAPI.Dispose(child) + tokenAPI.RestoreSnapshot(snap) return false } - tokenAPI.Merge(subchild) - tokenAPI.Dispose(subchild) } - tokenAPI.Merge(child) - tokenAPI.Dispose(child) + + // Move back the output pointers to where they were originally. This + // stiches together all the pieces of output that were genarated by + // the individual handlers in the sequence. + f.bytesStart = snap[1] + f.tokenStart = snap[3] + return true } } @@ -730,18 +737,13 @@ func MatchSeq(handlers ...Handler) Handler { // that applies is used for reporting back a match. func MatchAny(handlers ...Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() + snap := tokenAPI.MakeSnapshot() for _, handler := range handlers { if handler(tokenAPI) { - tokenAPI.Merge(child) - tokenAPI.Dispose(child) return true } - tokenAPI.Input.Reset() - tokenAPI.Output.Reset() + tokenAPI.RestoreSnapshot(snap) } - tokenAPI.Dispose(child) - return false } } @@ -751,12 +753,11 @@ func MatchAny(handlers ...Handler) Handler { // does not, then the next rune from the input will be reported as a match. func MatchNot(handler Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() + snap := tokenAPI.MakeSnapshot() if handler(tokenAPI) { - tokenAPI.Dispose(child) + tokenAPI.RestoreSnapshot(snap) return false } - tokenAPI.Dispose(child) r, _, err := tokenAPI.Rune.Peek(0) if err == nil { tokenAPI.Rune.Accept(r) @@ -838,11 +839,11 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler { total := 0 // Check for the minimum required amount of matches. - child := tokenAPI.Fork() + snap := tokenAPI.MakeSnapshot() for total < min { total++ if !handler(tokenAPI) { - tokenAPI.Dispose(child) + tokenAPI.RestoreSnapshot(snap) return false } } @@ -856,8 +857,6 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler { break } } - tokenAPI.Merge(child) - tokenAPI.Dispose(child) return true } } @@ -876,12 +875,11 @@ func MatchSeparated(separator Handler, separated Handler) Handler { // as a whole will be treated as a mismatch. func MatchExcept(handler Handler, except Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() + snap := tokenAPI.MakeSnapshot() if except(tokenAPI) { - tokenAPI.Dispose(child) + tokenAPI.RestoreSnapshot(snap) return false } - tokenAPI.Dispose(child) return handler(tokenAPI) } } @@ -893,10 +891,10 @@ func MatchExcept(handler Handler, except Handler) Handler { func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { - child := tokenAPI.Fork() - result := lookAhead(tokenAPI) - tokenAPI.Dispose(child) - return result + snap := tokenAPI.MakeSnapshot() + ok := lookAhead(tokenAPI) + tokenAPI.RestoreSnapshot(snap) + return ok } return false } @@ -909,10 +907,10 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { return func(tokenAPI *API) bool { if handler(tokenAPI) { - child := tokenAPI.Fork() - result := !lookAhead(tokenAPI) - tokenAPI.Dispose(child) - return result + snap := tokenAPI.MakeSnapshot() + ok := !lookAhead(tokenAPI) + tokenAPI.RestoreSnapshot(snap) + return ok } return false } @@ -950,21 +948,18 @@ func MakeInputFlusher(handler Handler) Handler { // C.Signed(A.Integer) func MatchSigned(handler Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() b, err := tokenAPI.Byte.Peek(0) if err != nil { - tokenAPI.Dispose(child) return false } + snap := tokenAPI.MakeSnapshot() if b == '-' || b == '+' { tokenAPI.Byte.Accept(b) } if handler(tokenAPI) { - tokenAPI.Merge(child) - tokenAPI.Dispose(child) return true } - tokenAPI.Dispose(child) + tokenAPI.RestoreSnapshot(snap) return false } } @@ -996,9 +991,7 @@ func MatchIntegerBetween(min int64, max int64) Handler { // a successful or a failing match through its boolean return value. func MatchEndOfFile() Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() _, err := tokenAPI.Byte.Peek(0) - tokenAPI.Dispose(child) return err == io.EOF } } @@ -1462,7 +1455,8 @@ func MatchIPv6(normalize bool) Handler { } // Invalid IPv6, when net.ParseIP() cannot handle it. - parsed := net.ParseIP(tokenAPI.Output.String()) + input := tokenAPI.Output.String() + parsed := net.ParseIP(input) if parsed == nil { return false } @@ -1491,7 +1485,8 @@ func matchCIDRMask(bits int64, normalize bool) Handler { if !mask(tokenAPI) { return false } - bits, _ := strconv.Atoi(tokenAPI.Output.String()) + maskStr := tokenAPI.Output.String() + bits, _ := strconv.Atoi(maskStr) tokenAPI.Output.SetString(fmt.Sprintf("%d", bits)) return true } @@ -1631,18 +1626,18 @@ func ModifyReplace(handler Handler, replaceWith string) Handler { // resulting output. func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() + snap := tokenAPI.MakeSnapshot() + split := tokenAPI.Output.Split() if handler(tokenAPI) { origS := tokenAPI.Output.String() s := modfunc(origS) if s != origS { tokenAPI.Output.SetString(s) } - tokenAPI.Merge(child) - tokenAPI.Dispose(child) + tokenAPI.Output.MergeSplit(split) return true } - tokenAPI.Dispose(child) + tokenAPI.RestoreSnapshot(snap) return false } } @@ -1926,23 +1921,21 @@ func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) H // its input and must return the token value. func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() + snap := tokenAPI.MakeSnapshot() + split := tokenAPI.Output.Split() if handler(tokenAPI) { - // The token is not added to the child here. The child might have produced its own - // tokens and we want those to come after the token for the current parsing level. - // By adding the token to the input API and then merging the child tokens, the order - // of the tokens will match the expectations. - // e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the - // tokens will end up in the order "date", "year", "month", "day". When we'd add the - // token to the child here, the order would have been "year", "month", "day", "date". + // When a parsing hierarchy looks like ("date" ("year", "month" "day")), the + // tokens must end up in the order "date", "year", "month", "day" and not + // "year", "month", "day", "date". Therefore (since the inner tokens have already + // been produced at this point) we have to insert this token before any tokens + // that were already created by the handler call. token := Token{Type: toktype, Value: makeValue(tokenAPI)} - tokenAPI.Output.AddToken(token) - tokenAPI.Merge(child) - tokenAPI.Dispose(child) - + tokenAPI.Output.InsertTokenAtStart(token) + tokenAPI.Output.MergeSplit(split) return true } - tokenAPI.Dispose(child) + + tokenAPI.RestoreSnapshot(snap) return false } } @@ -1951,17 +1944,13 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(to // take the tokens as produced by the handler and group them together in a single token. func MakeTokenGroup(toktype interface{}, handler Handler) Handler { return func(tokenAPI *API) bool { - child := tokenAPI.Fork() if handler(tokenAPI) { tokens := tokenAPI.Output.Tokens() tokensCopy := make([]Token, len(tokens)) copy(tokensCopy, tokens) tokenAPI.Output.SetTokens(Token{Type: toktype, Value: tokensCopy}) - tokenAPI.Merge(child) - tokenAPI.Dispose(child) return true } - tokenAPI.Dispose(child) return false } }