Hmm... this whole snapshot idea seems to work and a valid replacement for the forking method.

This commit is contained in:
Maurice Makaay 2019-07-26 08:02:37 +00:00
parent bc9e718e47
commit 87cdadae78
5 changed files with 146 additions and 80 deletions

View File

@ -30,14 +30,17 @@ type API struct {
// On a successful peek, the results (data + tokens) are returned by the peek.
// They are availablel (as with Accept()) through parse.API.Result.
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
tokenAPI := parseAPI.tokenAPI
snap := tokenAPI.MakeSnapshot()
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
if ok {
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = tokenAPI.Output.Runes()
parseAPI.Result.Runes = tokenAPI.Output.Runes() // TODO use bytes!
} else {
parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil
}
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
tokenAPI.RestoreSnapshot(snap)
return ok
}
@ -49,12 +52,12 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
// No results (data + tokens) are returned by Peek(). If want access to the data
// through parse.API.Result, make use of PeekWithResult() instead.
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
tokenAPI := parseAPI.tokenAPI
snap := tokenAPI.MakeSnapshot()
_, ok := parseAPI.invokeTokenizeHandler("Peek", tokenHandler)
parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
tokenAPI.RestoreSnapshot(snap)
return ok
}
@ -67,8 +70,10 @@ func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
tokenAPI := parseAPI.tokenAPI
_, ok := parseAPI.invokeTokenizeHandler("Accept", tokenHandler)
snap := tokenAPI.MakeSnapshot()
if ok {
// Keep track of the results as produced by this child.
// TODO put in function and also in Peek() Record Cursor() / error too?
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = tokenAPI.Output.Runes()
@ -79,13 +84,13 @@ func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
tokenAPI.Input.Flush()
} else {
// No match, so reset the tokenize.API for the next handler.
// This moves back the read cursor to the start and clears all results.
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
tokenAPI.RestoreSnapshot(snap)
}
return ok
}
// TODO make a func Skip() which is like Accept() but without storing results.
// invokeTokenizeHandler forks the tokenize.API, and invokes the tokenize.Handler
// in the context of the created child. The child is returned, so the caller
// has full control over merging and disposing the child.

View File

@ -83,6 +83,8 @@ type API struct {
Output Output // provides output-related functionality
outputTokens []Token // accepted tokens
outputBytes []byte // accepted bytes
snapshot [9]int // storage for the Snapshot() / RestoreSnapshot() feature
}
type stackFrame struct {
@ -119,6 +121,7 @@ func NewAPI(input interface{}) *API {
tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader}
tokenAPI.Output = Output{api: tokenAPI}
tokenAPI.stackFrame = &tokenAPI.stackFrames[0]
tokenAPI.snapshot[0] = -1
return tokenAPI
}
@ -231,3 +234,40 @@ func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
}
}
type Snapshot [9]int
func (tokenAPI *API) MakeSnapshot() Snapshot {
f := tokenAPI.stackFrame
return Snapshot{
tokenAPI.stackLevel,
f.bytesStart,
f.bytesEnd,
f.tokenStart,
f.tokenEnd,
f.offset,
f.offsetLocal,
f.line,
f.column,
}
}
func (tokenAPI *API) RestoreSnapshot(snap Snapshot) {
f := tokenAPI.stackFrame
if snap[0] != tokenAPI.stackLevel {
callerPanic("RestoreSnapshot", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the provided snapshot was created for stack level %d",
tokenAPI.stackLevel, snap[0])
}
f.bytesStart = snap[1]
f.bytesEnd = snap[2]
f.tokenStart = snap[3]
f.tokenEnd = snap[4]
f.offset = snap[5]
f.offsetLocal = snap[6]
f.line = snap[7]
f.column = snap[8]
}

View File

@ -26,6 +26,22 @@ func (o Output) Rune(offset int) rune {
return r
}
type Split [2]int
func (o Output) Split() Split {
f := o.api.stackFrame
split := Split{f.bytesStart, f.tokenStart}
f.bytesStart = f.bytesEnd
f.tokenStart = f.tokenEnd
return split
}
func (o Output) MergeSplit(split Split) {
f := o.api.stackFrame
f.bytesStart = split[0]
f.tokenStart = split[1]
}
func (o Output) Reset() {
f := o.api.stackFrame
f.bytesEnd = f.bytesStart
@ -121,6 +137,21 @@ func (o Output) AddToken(token Token) {
f.tokenEnd++
}
func (o Output) InsertTokenAtStart(token Token) {
a := o.api
f := a.stackFrame
tokenEnd := f.tokenEnd
tokenStart := f.tokenStart
a.growOutputTokens(tokenEnd + 1)
if tokenStart == tokenEnd {
a.outputTokens[tokenEnd] = token
} else {
copy(a.outputTokens[tokenStart+1:], a.outputTokens[tokenStart:tokenEnd])
a.outputTokens[tokenStart] = token
}
f.tokenEnd++
}
func (o Output) AddTokens(tokens ...Token) {
a := o.api
f := a.stackFrame

View File

@ -180,8 +180,9 @@ func ExampleAPI_Reset() {
tokenAPI.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// Reset clears the results.
tokenAPI.Reset()
// Reset input and output.
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// So then doing the same read operations, the same data are read.
@ -403,7 +404,7 @@ func TestMergeScenariosForTokens(t *testing.T) {
child = tokenAPI.Fork()
tokenAPI.Output.AddToken(token3)
tokenAPI.Reset()
tokenAPI.Output.Reset()
tokenAPI.Output.AddToken(token4)
tokenAPI.Merge(child)

View File

@ -694,11 +694,10 @@ func MatchStrNoCase(expected string) Handler {
// result will be empty).
func MatchOptional(handler Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
if handler(tokenAPI) {
tokenAPI.Merge(child)
snap := tokenAPI.MakeSnapshot()
if !handler(tokenAPI) {
tokenAPI.RestoreSnapshot(snap)
}
tokenAPI.Dispose(child)
return true
}
}
@ -708,19 +707,27 @@ func MatchOptional(handler Handler) Handler {
// reports successful match.
func MatchSeq(handlers ...Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
f := tokenAPI.stackFrame
snap := tokenAPI.MakeSnapshot()
for _, handler := range handlers {
subchild := tokenAPI.Fork()
tokenAPI.Output.Split()
// Move forward the output pointers, so the handler that we're about
// to call will make use of a fresh output buffer.
f.bytesStart = f.bytesEnd
f.tokenStart = f.tokenEnd
if !handler(tokenAPI) {
tokenAPI.Dispose(subchild)
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
tokenAPI.Merge(subchild)
tokenAPI.Dispose(subchild)
}
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
// Move back the output pointers to where they were originally. This
// stiches together all the pieces of output that were genarated by
// the individual handlers in the sequence.
f.bytesStart = snap[1]
f.tokenStart = snap[3]
return true
}
}
@ -730,18 +737,13 @@ func MatchSeq(handlers ...Handler) Handler {
// that applies is used for reporting back a match.
func MatchAny(handlers ...Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
snap := tokenAPI.MakeSnapshot()
for _, handler := range handlers {
if handler(tokenAPI) {
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
return true
}
tokenAPI.Input.Reset()
tokenAPI.Output.Reset()
tokenAPI.RestoreSnapshot(snap)
}
tokenAPI.Dispose(child)
return false
}
}
@ -751,12 +753,11 @@ func MatchAny(handlers ...Handler) Handler {
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
snap := tokenAPI.MakeSnapshot()
if handler(tokenAPI) {
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
tokenAPI.Dispose(child)
r, _, err := tokenAPI.Rune.Peek(0)
if err == nil {
tokenAPI.Rune.Accept(r)
@ -838,11 +839,11 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
total := 0
// Check for the minimum required amount of matches.
child := tokenAPI.Fork()
snap := tokenAPI.MakeSnapshot()
for total < min {
total++
if !handler(tokenAPI) {
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
}
@ -856,8 +857,6 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
break
}
}
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
return true
}
}
@ -876,12 +875,11 @@ func MatchSeparated(separator Handler, separated Handler) Handler {
// as a whole will be treated as a mismatch.
func MatchExcept(handler Handler, except Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
snap := tokenAPI.MakeSnapshot()
if except(tokenAPI) {
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
tokenAPI.Dispose(child)
return handler(tokenAPI)
}
}
@ -893,10 +891,10 @@ func MatchExcept(handler Handler, except Handler) Handler {
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(tokenAPI *API) bool {
if handler(tokenAPI) {
child := tokenAPI.Fork()
result := lookAhead(tokenAPI)
tokenAPI.Dispose(child)
return result
snap := tokenAPI.MakeSnapshot()
ok := lookAhead(tokenAPI)
tokenAPI.RestoreSnapshot(snap)
return ok
}
return false
}
@ -909,10 +907,10 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(tokenAPI *API) bool {
if handler(tokenAPI) {
child := tokenAPI.Fork()
result := !lookAhead(tokenAPI)
tokenAPI.Dispose(child)
return result
snap := tokenAPI.MakeSnapshot()
ok := !lookAhead(tokenAPI)
tokenAPI.RestoreSnapshot(snap)
return ok
}
return false
}
@ -950,21 +948,18 @@ func MakeInputFlusher(handler Handler) Handler {
// C.Signed(A.Integer)
func MatchSigned(handler Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
b, err := tokenAPI.Byte.Peek(0)
if err != nil {
tokenAPI.Dispose(child)
return false
}
snap := tokenAPI.MakeSnapshot()
if b == '-' || b == '+' {
tokenAPI.Byte.Accept(b)
}
if handler(tokenAPI) {
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
return true
}
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
}
@ -996,9 +991,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
_, err := tokenAPI.Byte.Peek(0)
tokenAPI.Dispose(child)
return err == io.EOF
}
}
@ -1462,7 +1455,8 @@ func MatchIPv6(normalize bool) Handler {
}
// Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(tokenAPI.Output.String())
input := tokenAPI.Output.String()
parsed := net.ParseIP(input)
if parsed == nil {
return false
}
@ -1491,7 +1485,8 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
if !mask(tokenAPI) {
return false
}
bits, _ := strconv.Atoi(tokenAPI.Output.String())
maskStr := tokenAPI.Output.String()
bits, _ := strconv.Atoi(maskStr)
tokenAPI.Output.SetString(fmt.Sprintf("%d", bits))
return true
}
@ -1631,18 +1626,18 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
// resulting output.
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
snap := tokenAPI.MakeSnapshot()
split := tokenAPI.Output.Split()
if handler(tokenAPI) {
origS := tokenAPI.Output.String()
s := modfunc(origS)
if s != origS {
tokenAPI.Output.SetString(s)
}
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
tokenAPI.Output.MergeSplit(split)
return true
}
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
}
@ -1926,23 +1921,21 @@ func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) H
// its input and must return the token value.
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
snap := tokenAPI.MakeSnapshot()
split := tokenAPI.Output.Split()
if handler(tokenAPI) {
// The token is not added to the child here. The child might have produced its own
// tokens and we want those to come after the token for the current parsing level.
// By adding the token to the input API and then merging the child tokens, the order
// of the tokens will match the expectations.
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
// token to the child here, the order would have been "year", "month", "day", "date".
// When a parsing hierarchy looks like ("date" ("year", "month" "day")), the
// tokens must end up in the order "date", "year", "month", "day" and not
// "year", "month", "day", "date". Therefore (since the inner tokens have already
// been produced at this point) we have to insert this token before any tokens
// that were already created by the handler call.
token := Token{Type: toktype, Value: makeValue(tokenAPI)}
tokenAPI.Output.AddToken(token)
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
tokenAPI.Output.InsertTokenAtStart(token)
tokenAPI.Output.MergeSplit(split)
return true
}
tokenAPI.Dispose(child)
tokenAPI.RestoreSnapshot(snap)
return false
}
}
@ -1951,17 +1944,13 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(to
// take the tokens as produced by the handler and group them together in a single token.
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
return func(tokenAPI *API) bool {
child := tokenAPI.Fork()
if handler(tokenAPI) {
tokens := tokenAPI.Output.Tokens()
tokensCopy := make([]Token, len(tokens))
copy(tokensCopy, tokens)
tokenAPI.Output.SetTokens(Token{Type: toktype, Value: tokensCopy})
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
return true
}
tokenAPI.Dispose(child)
return false
}
}