Backup changes for performance fixes.

This commit is contained in:
Maurice Makaay 2019-07-08 00:12:30 +00:00
parent 7bc7fda593
commit 23ca3501e1
10 changed files with 314 additions and 184 deletions

View File

@ -16,7 +16,7 @@ import (
//
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
type API struct {
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
tokenAPI tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
loopCheck map[uintptr]bool // used for parser loop detection
@ -76,7 +76,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
return ok
}
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) {
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokenize.API, bool) {
if p.sanityChecksEnabled {
p.panicWhenStoppedOrInError(name)
p.checkForLoops(name)
@ -216,7 +216,7 @@ func (p *API) Error(format string, data ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, data...)
p.err = fmt.Errorf("%s at %s", message, *p.tokenAPI.Result().Cursor())
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Result().Cursor())
}
// ExpectEndOfFile can be used to check if the input is at end of file.

View File

@ -1,6 +1,8 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
)
@ -68,20 +70,36 @@ import (
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type API struct {
reader *read.Buffer
parent *API // parent API in case this API is a forked child
child *API // child API in case this API has a forked child
result *Result // results as produced by a Handler (runes, Tokens, cursor position)
state *apiState // shared API state data
stackLevel int // the stack level for this API object
}
type apiState struct {
reader *read.Buffer
stack []Result // the stack, used for forking / merging the API.
}
// initialAPIstackDepth determines the initial stack depth for th API.
// This value should work in most cases. When a parser requires a higher
// stack depth, then this is no problem. The API will automatically scale
// the stack when forking beyond this default number of stack levels.
const initialAPIstackDepth = 10
// NewAPI initializes a new API struct, wrapped around the provided input.
// For an overview of allowed inputs, take a look at the documentation
// for parsekit.read.New().
func NewAPI(input interface{}) *API {
return &API{
func NewAPI(input interface{}) API {
stack := make([]Result, 1, initialAPIstackDepth)
stack[0] = newResult()
state := apiState{
reader: read.New(input),
result: newResult(),
stack: stack,
}
api := API{
state: &state,
stackLevel: 0,
}
return api
}
// NextRune returns the rune at the current read offset.
@ -95,14 +113,19 @@ func NewAPI(input interface{}) *API {
// without explicitly accepting, this method will panic. You can see this as a
// built-in unit test, enforcing correct serialization of API method calls.
func (i *API) NextRune() (rune, error) {
if i.result.lastRune != nil {
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
if result.lastRune != nil {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"without a prior call to Accept()")
}
i.detachChild()
readRune, err := i.reader.RuneAt(i.result.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err}
readRune, err := i.state.reader.RuneAt(result.offset)
result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err
}
@ -112,15 +135,21 @@ func (i *API) NextRune() (rune, error) {
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *API) Accept() {
if i.result.lastRune == nil {
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
if result.lastRune == nil {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()")
} else if i.result.lastRune.err != nil {
} else if result.lastRune.err != nil {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.result.cursor.moveByRune(i.result.lastRune.r)
i.result.offset++
i.result.lastRune = nil
result.runes = append(result.runes, result.lastRune.r)
result.cursor.moveByRune(result.lastRune.r)
result.offset++
result.lastRune = nil
}
// Fork forks off a child of the API struct. It will reuse the same
@ -140,22 +169,49 @@ func (i *API) Accept() {
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
func (i *API) Fork() *API {
// Cleanup current forking / reading state.
i.detachChild()
i.result.lastRune = nil
func (i *API) Fork() API {
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
// Grow the stack storage when needed.
newStackSize := i.stackLevel + 2
if cap(i.state.stack) < newStackSize {
newStack := make([]Result, newStackSize, 2*newStackSize)
copy(newStack, i.state.stack)
i.state.stack = newStack
}
// Create the new fork.
child := &API{
reader: i.reader,
parent: i,
child := API{
state: i.state,
stackLevel: i.stackLevel + 1,
}
child.result = newResult()
i.syncCursorTo(child)
i.child = child
childResult := newResult()
childResult.cursor = result.cursor
childResult.offset = result.offset
i.state.stack = i.state.stack[:newStackSize] // todo use append() directly?
i.state.stack[child.stackLevel] = childResult
// Update the parent.
result.lastRune = nil
return child
}
// stackDump provides a dump of the currently active stack levels in the API.
// This is used for debugging purposes and is normally not part of the standard
// code flow.
func (i *API) stackDump() {
for i, r := range i.state.stack {
fmt.Printf("[%d] %s: %q\n", i, r.cursor, r.String())
}
}
// Merge appends the results of a forked child API (runes, tokens) to the
// results of its parent. The read cursor of the parent is also updated
// to that of the forked child.
@ -165,59 +221,38 @@ func (i *API) Fork() *API {
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
func (i *API) Merge() {
if i.parent == nil {
if i.stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on a non-forked API")
}
i.addResultsToParent()
i.syncCursorTo(i.parent)
i.clearResults()
i.detachChild()
}
func (i *API) addResultsToParent() {
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
}
func (i *API) syncCursorTo(to *API) {
to.result.offset = i.result.offset
*to.result.cursor = *i.result.cursor
}
// Reset clears the API results and - when forked - detaches the forked child.
func (i *API) Reset() {
i.clearResults()
i.detachChild()
}
// Dispose resets the API and - when it is a fork - detaches itself from its parent.
func (i *API) Dispose() {
if i.stackLevel > len(i.state.stack)-1 {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read or merged, causing this "+
"fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
parentResult := &(i.state.stack[i.stackLevel-1])
parentResult.runes = append(parentResult.runes, result.runes...)
parentResult.tokens = append(parentResult.tokens, result.tokens...)
parentResult.offset = result.offset
parentResult.cursor = result.cursor
i.Reset()
if i.parent != nil {
i.parent.detachChild()
}
i.DisposeChilds()
}
func (i *API) clearResults() {
i.result.lastRune = nil
i.result.runes = []rune{}
i.result.tokens = []Token{}
i.result.err = nil
func (i *API) Dispose() {
i.state.stack = i.state.stack[:i.stackLevel]
}
func (i *API) detachChild() {
if i.child != nil {
i.child.detachChildsRecurse()
i.child = nil
}
func (i *API) DisposeChilds() {
i.state.stack = i.state.stack[:i.stackLevel+1]
}
func (i *API) detachChildsRecurse() {
if i.child != nil {
i.child.detachChildsRecurse()
}
i.child = nil
i.parent = nil
func (i *API) Reset() {
result := &(i.state.stack[i.stackLevel])
result.lastRune = nil
result.runes = result.runes[:0]
result.tokens = result.tokens[:0]
result.err = nil
}
// FlushInput flushes processed input data from the read.Buffer.
@ -227,10 +262,11 @@ func (i *API) detachChildsRecurse() {
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when needed.
func (i *API) FlushInput() bool {
if i.result.offset > 0 {
i.reader.Flush(i.result.offset)
i.result.offset = 0
func (i API) FlushInput() bool {
result := &(i.state.stack[i.stackLevel])
if result.offset > 0 {
i.state.reader.Flush(result.offset)
result.offset = 0
return true
}
return false
@ -238,6 +274,6 @@ func (i *API) FlushInput() bool {
// Result returns the Result struct from the API. The returned struct
// can be used to retrieve and to modify result data.
func (i *API) Result() *Result {
return i.result
func (i API) Result() *Result {
return &(i.state.stack[i.stackLevel])
}

View File

@ -2,6 +2,7 @@ package tokenize_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
@ -103,7 +104,7 @@ func ExampleAPI_Reset() {
func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *tokenize.API) bool {
abcHandler := func(t tokenize.API) bool {
a := tokenize.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
@ -160,7 +161,7 @@ func ExampleAPI_Dispose() {
}
func ExampleAPI_Merge() {
tokenHandler := func(t *tokenize.API) bool {
tokenHandler := func(t tokenize.API) bool {
child1 := t.Fork()
child1.NextRune() // reads 'H'
child1.Accept()
@ -183,3 +184,81 @@ func ExampleAPI_Merge() {
// Output:
// Hi
}
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// Fork a few levels.
child1 := api.Fork()
child2 := child1.Fork()
child3 := child2.Fork()
child4 := child3.Fork()
// Read some data from child4.
r, _ := child4.NextRune()
child4.Accept()
AssertEqual(t, 'a', r, "child4 rune 1")
r, _ = child4.NextRune()
child4.Accept()
AssertEqual(t, 'b', r, "child4 rune 2")
// Merge it to child3.
child4.Merge()
// Read some more from child4.
r, _ = child4.NextRune()
child4.Accept()
AssertEqual(t, 'c', r, "child4 rune 3")
AssertEqual(t, "line 1, column 4", child4.Result().Cursor().String(), "cursor child4 rune 3")
AssertEqual(t, "line 1, column 3", child3.Result().Cursor().String(), "cursor child3 rune 3, before merge of child 4")
// Again, merge it to child3.
child4.Merge()
AssertEqual(t, "line 1, column 4", child3.Result().Cursor().String(), "cursor child3 rune 3, after merge of child 4")
// Now read some data from child3.
r, _ = child3.NextRune()
child3.Accept()
r, _ = child3.NextRune()
child3.Accept()
r, _ = child3.NextRune()
child3.Accept()
AssertEqual(t, 'f', r, "child3 rune 5")
AssertEqual(t, "abcdef", child3.Result().String(), "child3 total result after rune 6")
// Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := child3.Fork()
child3sub1.NextRune()
child3sub1.Accept()
child3sub1.NextRune()
child3sub1.Accept()
child3sub2 := child3sub1.Fork()
child3sub2.NextRune()
child3sub2.Accept()
child3sub2.Merge()
// Instead merge the pre-forking results from child3 to child2.
child3.Merge()
AssertEqual(t, "abcdef", child2.Result().String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", child2.Result().Cursor().String(), "cursor child2 after merge child3")
// Merge child2 to child1.
child2.Merge()
// Merge child1 a few times to the top level api.
child1.Merge()
child1.Merge()
child1.Merge()
child1.Merge()
// Read some data from the top level api.
r, _ = api.NextRune()
api.Accept()
AssertEqual(t, "abcdefg", api.Result().String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Result().Cursor().String(), "api cursor end result")
}

View File

@ -6,7 +6,7 @@ import (
)
func ExampleCursor_move() {
c := &Cursor{}
c := Cursor{}
fmt.Printf("after initialization : %s\n", c)
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
@ -20,7 +20,7 @@ func ExampleCursor_move() {
}
func ExampleCursor_String() {
c := &Cursor{}
c := Cursor{}
fmt.Println(c.String())
c.move("\nfoobar")

View File

@ -7,7 +7,7 @@ package tokenize
// A Handler function gets an API as its input and returns a boolean to
// indicate whether or not it found a match on the input. The API is used
// for retrieving input data to match against and for reporting back results.
type Handler func(t *API) bool
type Handler func(t API) bool
// Match is syntactic sugar that allows you to write a construction like
// NewTokenizer(handler).Execute(input) as handler.Match(input).
@ -36,8 +36,8 @@ func (handler Handler) Then(otherHandler Handler) Handler {
// SeparatedBy is syntactic sugar that allows you to write a construction like
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
func (handler Handler) SeparatedBy(separatorHandler Handler) Handler {
return MatchSeparated(separatorHandler, handler)
func (handler Handler) SeparatedBy(separator Handler) Handler {
return MatchSeparated(separator, handler)
}
// Optional is syntactic sugar that allows you to write a construction like

View File

@ -16,6 +16,10 @@ func TestSyntacticSugar(t *testing.T) {
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""},
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
})
}

View File

@ -35,7 +35,7 @@ var C = struct {
ZeroOrMore func(Handler) Handler
OneOrMore func(Handler) Handler
MinMax func(min int, max int, handler Handler) Handler
Separated func(separated Handler, separator Handler) Handler
Separated func(separator Handler, separated Handler) Handler
Except func(except Handler, handler Handler) Handler
FollowedBy func(lookAhead Handler, handler Handler) Handler
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
@ -306,7 +306,7 @@ var T = struct {
Float64 func(interface{}, Handler) Handler
Boolean func(interface{}, Handler) Handler
ByValue func(toktype interface{}, handler Handler, value interface{}) Handler
ByCallback func(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler
ByCallback func(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler
Group func(interface{}, Handler) Handler
}{
Str: MakeStrLiteralToken,
@ -405,7 +405,7 @@ func MatchUnicodeSpace() Handler {
// Note that the callback function matches the signature of the unicode.Is* functions,
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchRuneByCallback(callback func(rune) bool) Handler {
return func(t *API) bool {
return func(t API) bool {
input, err := t.NextRune()
if err == nil && callback(input) {
t.Accept()
@ -446,14 +446,14 @@ func MatchStrNoCase(expected string) Handler {
// no output is generated but still a successful match is reported (but the
// result will be empty).
func MatchOptional(handler Handler) Handler {
return MatchMinMax(0, 1, handler)
return matchMinMax(0, 1, handler, "MatchOptional")
}
// MatchSeq creates a Handler that checks if the provided Handlers can be
// applied in their exact order. Only if all Handlers apply, the sequence
// reports successful match.
func MatchSeq(handlers ...Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
for _, handler := range handlers {
subchild := child.Fork()
@ -471,7 +471,7 @@ func MatchSeq(handlers ...Handler) Handler {
// can be applied. They are applied in their provided order. The first Handler
// that applies is used for reporting back a match.
func MatchAny(handlers ...Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
for _, handler := range handlers {
child := t.Fork()
if handler(child) {
@ -487,7 +487,7 @@ func MatchAny(handlers ...Handler) Handler {
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
if handler(t.Fork()) {
return false
}
@ -568,7 +568,7 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
if max >= 0 && min > max {
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
}
return func(t *API) bool {
return func(t API) bool {
total := 0
// Check for the minimum required amount of matches.
for total < min {
@ -607,7 +607,7 @@ func MatchSeparated(separator Handler, separated Handler) Handler {
// applied. If the handler applies, but the except Handler as well, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(handler Handler, except Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
if except(t.Fork()) {
return false
}
@ -620,7 +620,7 @@ func MatchExcept(handler Handler, except Handler) Handler {
// When both handlers match, the match for the handler is accepted and the match
// for the lookAhead handler is ignored.
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
if handler(child) && lookAhead(child.Fork()) {
child.Merge()
@ -635,7 +635,7 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
// If the handler matches and the lookAhead handler doesn't, then the match for
// the handler is accepted.
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
if handler(child) && !lookAhead(child.Fork()) {
child.Merge()
@ -661,7 +661,7 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
// Rule of thumb is: only use it when you have to actually fix a memory
// hogging issue for your use case.
func MakeInputFlusher(handler Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
if handler(t) {
t.FlushInput()
return true
@ -689,7 +689,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
}
digits := MatchSigned(MatchDigits())
return func(t *API) bool {
return func(t API) bool {
if !digits(t) {
return false
}
@ -705,7 +705,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
// has been reached. This Handler will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
_, err := child.NextRune()
return err == io.EOF
@ -723,7 +723,7 @@ func MatchUntilEndOfLine() Handler {
// read from the input. Invalid runes on the input are replaced with the UTF8
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() Handler {
return func(t *API) bool {
return func(t API) bool {
_, err := t.NextRune()
if err == nil {
t.Accept()
@ -736,7 +736,7 @@ func MatchAnyRune() Handler {
// MatchValidRune creates a Handler function that checks if a valid
// UTF8 rune can be read from the input.
func MatchValidRune() Handler {
return func(t *API) bool {
return func(t API) bool {
r, err := t.NextRune()
if err == nil && r != utf8.RuneError {
t.Accept()
@ -749,7 +749,7 @@ func MatchValidRune() Handler {
// MatchInvalidRune creates a Handler function that checks if an invalid
// UTF8 rune can be read from the input.
func MatchInvalidRune() Handler {
return func(t *API) bool {
return func(t API) bool {
r, err := t.NextRune()
if err == nil && r == utf8.RuneError {
t.Accept()
@ -860,7 +860,7 @@ func MatchHexDigit() Handler {
// stripped from the octet.
func MatchOctet(normalize bool) Handler {
max3Digits := MatchMinMax(1, 3, MatchDigit())
return func(t *API) bool {
return func(t API) bool {
if !max3Digits(t) {
return false
}
@ -909,7 +909,7 @@ func MatchIPv4Netmask(normalize bool) Handler {
dot := MatchRune('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
return func(t *API) bool {
return func(t API) bool {
if !netmask(t) {
return false
}
@ -942,7 +942,7 @@ func MatchIPv4Net(normalize bool) Handler {
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
ipnet := MatchSeq(ip, slash, mask)
return func(t *API) bool {
return func(t API) bool {
if !ipnet(t) {
return false
}
@ -975,7 +975,7 @@ func MatchIPv6(normalize bool) Handler {
colon := MatchRune(':')
empty := MatchSeq(colon, colon)
return func(t *API) bool {
return func(t API) bool {
nrOfHextets := 0
for nrOfHextets < 8 {
if hextet(t) {
@ -1017,7 +1017,7 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
return mask
}
return func(t *API) bool {
return func(t API) bool {
if !mask(t) {
return false
}
@ -1057,7 +1057,7 @@ func MatchIPv6Net(normalize bool) Handler {
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
if handler(child) {
child.Reset()
@ -1137,7 +1137,7 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
if handler(child) {
s := modfunc(child.Result().String())
@ -1155,7 +1155,7 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
// escape sequence like "\n" is kept as-is (a backslash character, followed by
// an 'n'-character).
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
literal := t.Result().String()
return literal
})
@ -1166,7 +1166,7 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
// representation of the read Runes. This string is interpreted, meaning that an
// escape sequence like "\n" is translated to an actual newline control character
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
// TODO ERROR HANDLING
interpreted, _ := interpretString(t.Result().String())
return interpreted
@ -1190,7 +1190,7 @@ func interpretString(str string) (string, error) {
// Result, for which the Token.Value is set to a Rune-representation
// of the read Rune.
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
// TODO ERROR HANDLING --- not a 1 rune input
return t.Result().Rune(0)
})
@ -1200,7 +1200,7 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to a Byte-representation
// of the read Rune.
func MakeByteToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
// TODO ERROR HANDLING --- not a 1 byte input
return byte(t.Result().Rune(0))
})
@ -1406,7 +1406,7 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
}
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
value, err := convert(t.Result().String())
if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
@ -1419,15 +1419,15 @@ func makeStrconvToken(name string, toktype interface{}, handler Handler, convert
// MakeTokenByValue creates a Handler that will add a static Token value
// to the Result.
func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return value })
return MakeTokenByCallback(toktype, handler, func(t API) interface{} { return value })
}
// MakeTokenByCallback creates a Handler that will add a Token to the
// Result, for which the Token.Value is to be generated by the provided
// makeValue() callback function. The function gets the current API as
// its input and must return the token value.
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler {
return func(t *API) bool {
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler {
return func(t API) bool {
child := t.Fork()
if handler(child) {
// The token is not added to the child here. The child might have produced its own
@ -1450,7 +1450,7 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
// take the tokens as produced by the handler and group them together in a single token.
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
return func(t *API) bool {
return func(t API) bool {
child := t.Fork()
if handler(child) {
result := child.Result()

View File

@ -11,7 +11,7 @@ type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune // runes as added to the result by tokenize.Handler functions
tokens []Token // Tokens as added to the result by tokenize.Handler functions
cursor *Cursor // current read cursor position, relative to the start of the file
cursor Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input
}
@ -66,11 +66,11 @@ func (t Token) String() string {
}
// newResult initializes an empty Result struct.
func newResult() *Result {
return &Result{
func newResult() Result {
return Result{
runes: []rune{},
tokens: []Token{},
cursor: &Cursor{},
cursor: Cursor{},
}
}
@ -161,6 +161,6 @@ func (r *Result) Value(idx int) interface{} {
// Cursor retrieves the read cursor from the Result. This is the first
// cursor position after the runes that were read and accepted by the Handler.
func (r *Result) Cursor() *Cursor {
func (r *Result) Cursor() Cursor {
return r.cursor
}

View File

@ -54,7 +54,8 @@ func ExampleNew() {
}
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
input := mkInput()
r, _ := (&input).NextRune()
AssertEqual(t, 'T', r, "first rune")
}
@ -82,8 +83,9 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
input := mkInput()
AssertPanic(t, PanicT{
Function: mkInput().Accept,
Function: (&input).Accept,
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
})
@ -174,6 +176,6 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *tokenize.API {
func mkInput() tokenize.API {
return tokenize.NewAPI("Testing")
}

View File

@ -5,6 +5,7 @@ import (
)
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// TODO FIXME Speed change
// Create input, accept the first rune.
i := NewAPI("Testing")
i.NextRune()
@ -12,22 +13,25 @@ func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
// Fork
f := i.Fork()
AssertEqual(t, f, i.child, "Input.child (must be f)")
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
AssertEqual(t, 1, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
AssertEqual(t, 1, f.state.stack[f.stackLevel].offset, "child offset")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
AssertEqual(t, 3, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
AssertEqual(t, 3, f.state.stack[f.stackLevel].offset, "child offset")
// Merge fork back into parent
f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 3, i.state.stack[i.stackLevel].offset, "parent offset")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
@ -40,72 +44,77 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.result.offset, "i.offset A")
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 2, f1.result.offset, "f1.offset A")
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset A")
f2.Merge()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.result.offset, "i.offset B")
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.result.offset, "f1.offset B")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset B")
f1.Merge()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, 3, i.result.offset, "i.offset C")
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.result.offset, "f1.offset C")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset C")
// TODO FIXME Speed changes
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
// AssertEqual(t, 1, i.result.offset, "i.offset A")
// AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
// AssertEqual(t, 2, f1.result.offset, "f1.offset A")
// AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
// AssertEqual(t, 3, f2.result.offset, "f2.offset A")
// f2.Merge()
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
// AssertEqual(t, 1, i.result.offset, "i.offset B")
// AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
// AssertEqual(t, 3, f1.result.offset, "f1.offset B")
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
// AssertEqual(t, 3, f2.result.offset, "f2.offset B")
// f1.Merge()
// AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
// AssertEqual(t, 3, i.result.offset, "i.offset C")
// AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
// AssertEqual(t, 3, f1.result.offset, "f1.offset C")
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
// AssertEqual(t, 3, f2.result.offset, "f2.offset C")
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := NewAPI("Testing")
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
//f3 := f2.Fork()
f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
f5 := f4.Fork()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == f1, "i.child == f1")
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
//f5 := f4.Fork()
f4.Fork()
// TODO FIXME Speed changes
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
// AssertEqual(t, true, i.child == &f1, "i.child == f1")
// AssertEqual(t, true, f1.parent == &i, "f1.parent == i")
// AssertEqual(t, true, f1.child == &f4, "f1.child == f4")
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
// AssertEqual(t, true, f4.parent == &f1, "f4.parent == f1")
// AssertEqual(t, true, f4.child == &f5, "f4.child == f5")
// AssertEqual(t, true, f5.parent == &f4, "f5.parent == f4")
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == nil, "i.child == nil")
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
// AssertEqual(t, true, i.child == nil, "i.child == nil")
// AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
// AssertEqual(t, true, f1.child == nil, "f1.child == nil")
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
// AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
// AssertEqual(t, true, f4.child == nil, "f4.child == nil")
// AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
// TODO FIXME Speed changes
i := NewAPI("Testing")
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil")
// AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil")
i.Accept()
AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.result.offset, "API.result.offset")
// AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil")
// AssertEqual(t, 1, i.result.offset, "API.result.offset")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}