A nice performance gain by making a difference between AcceptRunes/AcceptBytes and the new simpler AcceptRune/AcceptByte functions. The simpler versions are faster when only accepting a single byte or rune (which is the case in most situations).

This commit is contained in:
Maurice Makaay 2019-07-19 21:13:15 +00:00
parent 9a53ea9012
commit 458d6f60a6
5 changed files with 382 additions and 369 deletions

View File

@ -128,6 +128,9 @@ func (i *API) PeekByte(offset int) (byte, error) {
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The byte is not added to
// the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped byte.
func (i *API) SkipByte(b byte) {
i.stackFrame.moveCursorByByte(b)
i.stackFrame.offset++
@ -140,6 +143,9 @@ func (i *API) SkipByte(b byte) {
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The bytes are not added to
// the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped bytes.
func (i *API) SkipBytes(bytes ...byte) {
for _, b := range bytes {
i.stackFrame.moveCursorByByte(b)
@ -155,6 +161,9 @@ func (i *API) SkipBytes(bytes ...byte) {
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the byte to the tokenizer
// results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted byte.
func (i *API) AcceptByte(b byte) {
curBytesEnd := i.stackFrame.bytesEnd
maxRequiredBytes := curBytesEnd + 1
@ -180,6 +189,9 @@ func (i *API) AcceptByte(b byte) {
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the bytes to the tokenizer
// results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes.
func (i *API) AcceptBytes(bytes ...byte) {
curBytesEnd := i.stackFrame.bytesEnd
newBytesEnd := curBytesEnd + len(bytes)
@ -223,8 +235,12 @@ func (i *API) PeekRune(offset int) (rune, int, error) {
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The rune is not added to
// the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped rune.
func (i *API) SkipRune(r rune) {
i.stackFrame.moveCursorByRune(r)
i.stackFrame.offset += utf8.RuneLen(r)
}
// SkipRunes is used to skip over one or more runes that were read from the input.
@ -234,6 +250,9 @@ func (i *API) SkipRune(r rune) {
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The runes are not added to
// the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped runes.
func (i *API) SkipRunes(runes ...rune) {
for _, r := range runes {
i.stackFrame.moveCursorByRune(r)
@ -249,6 +268,9 @@ func (i *API) SkipRunes(runes ...rune) {
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the rune to the tokenizer
// results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune.
func (i *API) AcceptRune(r rune) {
curBytesEnd := i.stackFrame.bytesEnd
maxRequiredBytes := curBytesEnd + utf8.UTFMax
@ -274,6 +296,9 @@ func (i *API) AcceptRune(r rune) {
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the runes to the tokenizer
// results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted runes.
func (i *API) AcceptRunes(runes ...rune) {
runesAsString := string(runes)
byteLen := len(runesAsString)

View File

@ -2,6 +2,7 @@ package tokenize_test
import (
"fmt"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
@ -43,15 +44,15 @@ func ExampleAPI_PeekRune() {
func ExampleAPI_AcceptRune() {
api := tokenize.NewAPI("The input that the API will handle")
// reads 'T' and adds it to the API results
// Reads 'T' and accepts it to the API results.
r, _, _ := api.PeekRune(0)
api.AcceptRune(r)
// reads 'h' and adds it to the API results
// Reads 'h' and accepts it to the API results.
r, _, _ = api.PeekRune(0)
api.AcceptRune(r)
// reads 'e', but does not add it to the API results
// Reads 'e', but does not accept it to the API results.
r, _, _ = api.PeekRune(0)
fmt.Printf("API results: %q\n", api.String())
@ -60,6 +61,50 @@ func ExampleAPI_AcceptRune() {
// API results: "Th"
}
func ExampleAPI_AcceptRunes() {
api := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'.
r0, _, _ := api.PeekRune(0)
r1, _, _ := api.PeekRune(1)
// Peeks at the third rune 'e'.
api.PeekRune(2)
// Accepts only 'T' and 'h' into the API results.
api.AcceptRunes(r0, r1)
fmt.Printf("API results: %q\n", api.String())
// Output:
// API results: "Th"
}
func ExampleAPI_SkipRune() {
api := tokenize.NewAPI("The input that the API will handle")
for {
r, _, err := api.PeekRune(0)
// EOF reached.
if err != nil {
break
}
// Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) {
api.AcceptRune(r)
} else {
api.SkipRune(r)
}
}
fmt.Printf("API results: %q\n", api.String())
// Output:
// API results: "eiuaeAIiae"
}
func ExampleAPI_modifyingResults() {
api := tokenize.NewAPI("")
@ -97,32 +142,31 @@ func ExampleAPI_modifyingResults() {
// API second result token: 73("Zaphod")
}
// TODO FIXME
// func ExampleAPI_Reset() {
// api := tokenize.NewAPI("Very important input!")
func ExampleAPI_Reset() {
api := tokenize.NewAPI("Very important input!")
// api.NextRune() // read 'V'
// api.Accept()
// api.NextRune() // read 'e'
// api.Accept()
// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
r, _, _ := api.PeekRune(0) // read 'V'
api.AcceptRune(r)
r, _, _ = api.PeekRune(0) // read 'e'
api.AcceptRune(r)
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// // Reset clears the results.
// api.Reset()
// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// Reset clears the results.
api.Reset()
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// // So then doing the same read operations, the same data are read.
// api.NextRune() // read 'V'
// api.Accept()
// api.NextRune() // read 'e'
// api.Accept()
// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// So then doing the same read operations, the same data are read.
r, _, _ = api.PeekRune(0) // read 'V'
api.AcceptRune(r)
r, _, _ = api.PeekRune(0) // read 'e'
api.AcceptRune(r)
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// // Output:
// // API results: "Ve" at line 1, column 3
// // API results: "" at start of file
// // API results: "Ve" at line 1, column 3
// }
// Output:
// API results: "Ve" at line 1, column 3
// API results: "" at start of file
// API results: "Ve" at line 1, column 3
}
func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'.
@ -164,149 +208,144 @@ func ExampleAPI_Fork() {
// <nil> mismatch at start of file
}
// TODO FIXME
// func ExampleAPI_Merge() {
// tokenHandler := func(t *tokenize.API) bool {
// child1 := t.Fork()
// t.NextRune() // reads 'H'
// t.Accept()
// t.NextRune() // reads 'i'
// t.Accept()
func ExampleAPI_Merge() {
tokenHandler := func(t *tokenize.API) bool {
child1 := t.Fork()
r0, _, _ := t.PeekRune(0) // reads 'H'
r1, _, _ := t.PeekRune(1) // reads 'i'
t.AcceptRunes(r0, r1) // these runes are accepted in the API results for child1
// child2 := t.Fork()
// t.NextRune() // reads ' '
// t.Accept()
// t.NextRune() // reads 'm'
// t.Accept()
// t.Dispose(child2)
child2 := t.Fork()
r0, _, _ = t.PeekRune(0) // reads ' '
r1, _, _ = t.PeekRune(1) // reads 'm'
t.AcceptRunes(r0, r1) // these runes are accepted in the API results for child2
t.Dispose(child2) // but they are not merged and thefore not used by child1
// t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
// t.Dispose(child1) // and clean up child1 to return to the parent
// return true
// }
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
t.Dispose(child1) // and clean up child1 to return to the parent
return true
}
// result, _ := tokenize.New(tokenHandler)("Hi mister X!")
// fmt.Println(result.String())
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
fmt.Println(result.String())
// // Output:
// // Hi
// }
// Output:
// Hi
}
// TODO FIXME
// func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
// api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// // Fork a few levels.
// child1 := api.Fork()
// child2 := api.Fork()
// child3 := api.Fork()
// child4 := api.Fork()
// Fork a few levels.
child1 := api.Fork()
child2 := api.Fork()
child3 := api.Fork()
child4 := api.Fork()
// // Read a rune 'a' from child4.
// r, _ := api.NextRune()
// AssertEqual(t, 'a', r, "child4 rune 1")
// api.Accept()
// AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
// Read a rune 'a' from child4.
r, _, _ := api.PeekRune(0)
AssertEqual(t, 'a', r, "child4 rune 1")
api.AcceptRune(r)
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
// // Read another rune 'b' from child4.
// r, _ = api.NextRune()
// AssertEqual(t, 'b', r, "child4 rune 2")
// api.Accept()
// AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
// Read another rune 'b' from child4.
r, _, _ = api.PeekRune(0)
AssertEqual(t, 'b', r, "child4 rune 2")
api.AcceptRune(r)
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
// // Merge "ab" from child4 to child3.
// api.Merge(child4)
// AssertEqual(t, "", api.String(), "child4 runes after first merge")
// Merge "ab" from child4 to child3.
api.Merge(child4)
AssertEqual(t, "", api.String(), "child4 runes after first merge")
// // Read some more from child4.
// r, _ = api.NextRune()
// AssertEqual(t, 'c', r, "child4 rune 3")
// api.Accept()
// AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
// AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
// Read some more from child4.
r, _, _ = api.PeekRune(0)
AssertEqual(t, 'c', r, "child4 rune 3")
api.AcceptRune(r)
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
// // Merge "c" from child4 to child3.
// api.Merge(child4)
// Merge "c" from child4 to child3.
api.Merge(child4)
// // And dispose of child4, making child3 the active stack level.
// api.Dispose(child4)
// And dispose of child4, making child3 the active stack level.
api.Dispose(child4)
// // Child3 should now have the compbined results "abc" from child4's work.
// AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
// AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
// Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
// // Now read some data from child3.
// r, _ = api.NextRune()
// AssertEqual(t, 'd', r, "child3 rune 5")
// api.Accept()
// Now read some data from child3.
r, _, _ = api.PeekRune(0)
AssertEqual(t, 'd', r, "child3 rune 5")
api.AcceptRune(r)
// r, _ = api.NextRune()
// AssertEqual(t, 'e', r, "child3 rune 5")
// api.Accept()
r, _, _ = api.PeekRune(0)
AssertEqual(t, 'e', r, "child3 rune 5")
api.AcceptRune(r)
// r, _ = api.NextRune()
// AssertEqual(t, 'f', r, "child3 rune 5")
// api.Accept()
r, _, _ = api.PeekRune(0)
AssertEqual(t, 'f', r, "child3 rune 5")
api.AcceptRune(r)
// AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
// // Temporarily go some new forks from here, but don't use their outcome.
// child3sub1 := api.Fork()
// api.NextRune()
// api.Accept()
// api.NextRune()
// api.Accept()
// child3sub2 := api.Fork()
// api.NextRune()
// api.Accept()
// api.Merge(child3sub2) // do merge sub2 down to sub1
// api.Dispose(child3sub2) // and dispose of sub2
// api.Dispose(child3sub1) // but dispose of sub1 without merging
// Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := api.Fork()
r, _, _ = api.PeekRune(0)
api.AcceptRune(r)
r, _, _ = api.PeekRune(0)
api.AcceptRune(r)
child3sub2 := api.Fork()
r, _, _ = api.PeekRune(0)
api.AcceptRune(r)
api.Merge(child3sub2) // do merge sub2 down to sub1
api.Dispose(child3sub2) // and dispose of sub2
api.Dispose(child3sub1) // but dispose of sub1 without merging
// // Instead merge the results from before this forking segway from child3 to child2
// // and dispose of it.
// api.Merge(child3)
// api.Dispose(child3)
// Instead merge the results from before this forking segway from child3 to child2
// and dispose of it.
api.Merge(child3)
api.Dispose(child3)
// AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
// AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")
// // Merge child2 to child1 and dispose of it.
// api.Merge(child2)
// api.Dispose(child2)
// Merge child2 to child1 and dispose of it.
api.Merge(child2)
api.Dispose(child2)
// // Merge child1 a few times to the top level api.
// api.Merge(child1)
// api.Merge(child1)
// api.Merge(child1)
// api.Merge(child1)
// Merge child1 a few times to the top level api.
api.Merge(child1)
api.Merge(child1)
api.Merge(child1)
api.Merge(child1)
// // And dispose of it.
// api.Dispose(child1)
// And dispose of it.
api.Dispose(child1)
// // Read some data from the top level api.
// r, _ = api.NextRune()
// api.Accept()
// Read some data from the top level api.
r, _, _ = api.PeekRune(0)
api.AcceptRune(r)
// AssertEqual(t, "abcdefg", api.String(), "api string end result")
// AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
// }
AssertEqual(t, "abcdefg", api.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
}
// TODO FIXME
// func TestClearRunes(t *testing.T) {
// api := tokenize.NewAPI("Laphroaig")
// api.NextRune() // Read 'L'
// api.Accept() // Add to runes
// api.NextRune() // Read 'a'
// api.Accept() // Add to runes
// api.ClearRunes() // Clear the runes, giving us a fresh start.
// api.NextRune() // Read 'p'
// api.Accept() // Add to runes
// api.NextRune() // Read 'r'
// api.Accept() // Add to runes
func TestClearRunes(t *testing.T) {
api := tokenize.NewAPI("Laphroaig")
r, _, _ := api.PeekRune(0) // Read 'L'
api.AcceptRune(r) // Add to runes
r, _, _ = api.PeekRune(0) // Read 'a'
api.AcceptRune(r) // Add to runes
api.ClearRunes() // Clear the runes, giving us a fresh start.
r, _, _ = api.PeekRune(0) // Read 'p'
api.AcceptRune(r) // Add to runes
r, _, _ = api.PeekRune(0) // Read 'r'
api.AcceptRune(r) // Add to runes
// AssertEqual(t, "ph", api.String(), "api string end result")
// }
AssertEqual(t, "ph", api.String(), "api string end result")
}
func TestMergeScenariosForTokens(t *testing.T) {
api := tokenize.NewAPI("")

View File

@ -352,7 +352,7 @@ func MatchByte(expected byte) Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil && b == expected {
t.AcceptBytes(b)
t.AcceptByte(b)
return true
}
return false
@ -367,7 +367,7 @@ func MatchRune(expected rune) Handler {
return func(t *API) bool {
r, _, err := t.PeekRune(0)
if err == nil && r == expected {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -384,7 +384,7 @@ func MatchBytes(expected ...byte) Handler {
}
for _, e := range expected {
if b == e {
t.AcceptBytes(b)
t.AcceptByte(b)
return true
}
}
@ -414,7 +414,7 @@ func MatchRunes(expected ...rune) Handler {
}
for _, e := range expected {
if r == e {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
}
@ -436,7 +436,7 @@ func MatchByteRange(start byte, end byte) Handler {
return func(t *API) bool {
r, err := t.PeekByte(0)
if err == nil && r >= start && r <= end {
t.AcceptBytes(r)
t.AcceptByte(r)
return true
}
return false
@ -460,7 +460,7 @@ func MatchRuneRange(start rune, end rune) Handler {
return func(t *API) bool {
r, _, err := t.PeekRune(0)
if err == nil && r >= start && r <= end {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -499,7 +499,7 @@ func MatchBlank() Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil && (b == ' ' || b == '\t') {
t.AcceptBytes(b)
t.AcceptByte(b)
return true
}
return false
@ -520,7 +520,7 @@ func MatchBlanks() Handler {
if err != nil || (b != ' ' && b != '\t') {
return false
}
t.AcceptBytes(b)
t.AcceptByte(b)
// Now match any number of followup blanks. We've already got
// a successful match at this point, so we'll always return true at the end.
@ -529,7 +529,7 @@ func MatchBlanks() Handler {
if err != nil || (b != ' ' && b != '\t') {
return true
}
t.AcceptBytes(b)
t.AcceptByte(b)
}
}
}
@ -551,7 +551,7 @@ func MatchWhitespace() Handler {
}
t.AcceptBytes(b1, b2)
} else {
t.AcceptBytes(b1)
t.AcceptByte(b1)
}
// Now match any number of followup whitespace. We've already got
@ -568,7 +568,7 @@ func MatchWhitespace() Handler {
}
t.AcceptBytes(b1, b2)
} else {
t.AcceptBytes(b1)
t.AcceptByte(b1)
}
}
}
@ -590,7 +590,7 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil && callback(b) {
t.AcceptBytes(b)
t.AcceptByte(b)
return true
}
return false
@ -607,7 +607,7 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
return func(t *API) bool {
r, _, err := t.PeekRune(0)
if err == nil && callback(r) {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -622,7 +622,7 @@ func MatchEndOfLine() Handler {
return err == io.EOF
}
if b1 == '\n' {
t.AcceptBytes(b1)
t.AcceptByte(b1)
return true
}
if b1 == '\r' {
@ -763,7 +763,7 @@ func MatchNot(handler Handler) Handler {
t.Dispose(child)
r, _, err := t.PeekRune(0)
if err == nil {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -961,7 +961,7 @@ func MatchSigned(handler Handler) Handler {
return false
}
if b == '-' || b == '+' {
t.AcceptBytes(b)
t.AcceptByte(b)
}
if handler(t) {
t.Merge(child)
@ -1019,7 +1019,7 @@ func MatchAnyByte() Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil {
t.AcceptBytes(b)
t.AcceptByte(b)
return true
}
return false
@ -1033,7 +1033,7 @@ func MatchAnyRune() Handler {
return func(t *API) bool {
r, _, err := t.PeekRune(0)
if err == nil {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -1046,7 +1046,7 @@ func MatchValidRune() Handler {
return func(t *API) bool {
r, _, err := t.PeekRune(0)
if err == nil && r != utf8.RuneError {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -1059,7 +1059,7 @@ func MatchInvalidRune() Handler {
return func(t *API) bool {
r, _, err := t.PeekRune(0)
if err == nil && r == utf8.RuneError {
t.AcceptRunes(r)
t.AcceptRune(r)
return true
}
return false
@ -1081,7 +1081,7 @@ func MatchDigits() Handler {
if err != nil || b < '0' || b > '9' {
return false
}
t.AcceptBytes(b)
t.AcceptByte(b)
// Continue accepting bytes as long as they are digits.
for {
@ -1089,7 +1089,7 @@ func MatchDigits() Handler {
if err != nil || b < '0' || b > '9' {
return true
}
t.AcceptBytes(b)
t.AcceptByte(b)
}
}
}
@ -1120,18 +1120,18 @@ func MatchInteger(normalize bool) Handler {
// The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b {
t.SkipBytes('0')
t.SkipByte('0')
continue
}
// The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' {
t.AcceptBytes('0')
t.AcceptByte('0')
return true
}
// The next character is a digit. SKip the leading zero and go with the digit.
t.SkipBytes('0')
t.AcceptBytes(b2)
t.SkipByte('0')
t.AcceptByte(b2)
break
}
}
@ -1142,7 +1142,7 @@ func MatchInteger(normalize bool) Handler {
if err != nil || b < '0' || b > '9' {
return true
}
t.AcceptBytes(b)
t.AcceptByte(b)
}
}
}
@ -1169,24 +1169,24 @@ func MatchDecimal(normalize bool) Handler {
// The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b {
t.SkipBytes('0')
t.SkipByte('0')
continue
}
// The next character is a dot, go with the zero before the dot and
// let the upcoming code handle the dot.
if err == nil && b2 == '.' {
t.AcceptBytes('0')
t.AcceptByte('0')
break
}
// The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' {
t.AcceptBytes('0')
t.AcceptByte('0')
return true
}
// The next character is a digit. SKip the leading zero and go with the digit.
t.SkipBytes('0')
t.AcceptBytes(b2)
t.SkipByte('0')
t.AcceptByte(b2)
break
}
}
@ -1216,7 +1216,7 @@ func MatchDecimal(normalize bool) Handler {
if err != nil || b < '0' || b > '9' {
break
}
t.AcceptBytes(b)
t.AcceptByte(b)
}
return true
}
@ -1236,13 +1236,13 @@ func MatchBoolean() Handler {
return false
}
if b1 == '1' || b1 == '0' {
t.AcceptBytes(b1)
t.AcceptByte(b1)
return true
}
if b1 == 't' || b1 == 'T' {
b2, err := t.PeekByte(1)
if err != nil || (b2 != 'R' && b2 != 'r') {
t.AcceptBytes(b1)
t.AcceptByte(b1)
return true
}
b3, _ := t.PeekByte(2)
@ -1255,14 +1255,14 @@ func MatchBoolean() Handler {
t.AcceptBytes(b1, b2, b3, b4)
return true
}
t.AcceptBytes(b1)
t.AcceptByte(b1)
return true
}
if b1 == 'f' || b1 == 'F' {
b2, err := t.PeekByte(1)
if err != nil || (b2 != 'A' && b2 != 'a') {
t.AcceptBytes(b1)
t.AcceptByte(b1)
return true
}
b3, _ := t.PeekByte(2)
@ -1276,7 +1276,7 @@ func MatchBoolean() Handler {
t.AcceptBytes(b1, b2, b3, b4, b5)
return true
}
t.AcceptBytes(b1)
t.AcceptByte(b1)
return true
}
return false
@ -1325,7 +1325,7 @@ func MatchHexDigit() Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
t.AcceptBytes(b)
t.AcceptByte(b)
return true
}
return false
@ -1560,14 +1560,13 @@ func ModifyDropUntilEndOfLine() Handler {
if err != nil {
if err == io.EOF {
return true
} else {
return false
}
return false
}
if b == '\n' {
return true
}
t.SkipBytes(b)
t.SkipByte(b)
}
}
}

View File

@ -2,7 +2,10 @@ package tokenize_test
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
@ -50,63 +53,24 @@ func ExampleNew() {
// Error: mismatch at start of file
}
// TODO FIXME
// func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
// api := makeTokenizeAPI()
// r, _ := api.NextRune()
// AssertEqual(t, 'T', r, "first rune")
// }
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
api := makeTokenizeAPI()
r, _, _ := api.PeekRune(0)
AssertEqual(t, 'T', r, "first rune")
}
// TODO FIXME
// func TestInputCanAcceptRunesFromReader(t *testing.T) {
// i := makeTokenizeAPI()
// i.NextRune()
// i.Accept()
// i.NextRune()
// i.Accept()
// i.NextRune()
// i.Accept()
// AssertEqual(t, "Tes", i.String(), "i.String()")
// }
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := makeTokenizeAPI()
// TODO FIXME
// func TestCallingNextRuneTwice_Panics(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// i := makeTokenizeAPI()
// i.NextRune()
// i.NextRune()
// },
// Regexp: true,
// Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
// `without a prior call to Accept\(\)`,
// })
// }
r0, _, _ := i.PeekRune(0)
i.AcceptRune(r0)
// TODO FIXME
// func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
// api := makeTokenizeAPI()
// AssertPanic(t, PanicT{
// Function: api.Accept,
// Regexp: true,
// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
// `without first calling NextRune\(\)`,
// })
// }
r1, _, _ := i.PeekRune(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := i.PeekRune(1)
i.AcceptRunes(r1, r2)
// TODO FIXME
// func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
// api := tokenize.NewAPI("")
// AssertPanic(t, PanicT{
// Function: func() {
// api.NextRune()
// api.Accept()
// },
// Regexp: true,
// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
// `, but the prior call to NextRune\(\) failed`,
// })
// }
AssertEqual(t, "Tes", i.String(), "i.String()")
}
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
@ -168,61 +132,55 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
}
// TODO FIXME
// func TestForkingInput_ClearsLastRune(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// i := makeTokenizeAPI()
// i.NextRune()
// i.Fork()
// i.Accept()
// },
// Regexp: true,
// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
// })
// }
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := i.PeekRune(0)
i.AcceptRune(r)
}
AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
// TODO FIXME
// func TestAccept_UpdatesCursor(t *testing.T) {
// i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
// AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
// for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
// i.NextRune()
// i.Accept()
// }
// AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
// i.NextRune() // read "\n", cursor ends up at start of new line
// i.Accept()
// AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
// for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
// i.NextRune()
// i.Accept()
// }
// AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
// }
r, _, _ := i.PeekRune(0) // read "\n", cursor ends up at start of new line
i.AcceptRune(r)
AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
// TODO FIXME
// func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
// i := tokenize.NewAPI(strings.NewReader("X"))
// i.NextRune()
// i.Accept()
// r, err := i.NextRune()
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
// AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
// }
// TODO FIXME
// func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
// i := tokenize.NewAPI(strings.NewReader("X"))
// child := i.Fork()
// i.NextRune()
// i.Accept()
// r, err := i.NextRune()
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
// i.Dispose(child) // brings the read offset back to the start
// r, err = i.NextRune() // so here we should see the same rune
// AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
// AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
// }
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
b, _ := i.PeekByte(0)
i.AcceptByte(b)
}
AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
}
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
r, _, _ := i.PeekRune(0)
i.AcceptRune(r)
r, _, err := i.PeekRune(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
child := i.Fork()
// To to the EOF.
r, _, _ := i.PeekRune(0)
i.AcceptRune(r)
r, _, err := i.PeekRune(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
// Brings the read offset back to the start.
i.Dispose(child)
// So here we should see the same input data as before.
r, _, err = i.PeekRune(0)
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func makeTokenizeAPI() *tokenize.API {
return tokenize.NewAPI("Testing")

View File

@ -4,95 +4,87 @@ import (
"testing"
)
// TODO FIXME
// func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// // Create input, accept the first rune.
// i := NewAPI("Testing")
// i.NextRune()
// i.Accept() // T
// AssertEqual(t, "T", i.String(), "accepted rune in input")
// // Fork
// child := i.Fork()
// AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
// AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// // Accept two runes via fork.
// i.NextRune()
// i.Accept() // e
// i.NextRune()
// i.Accept() // s
// AssertEqual(t, "es", i.String(), "result runes in fork")
// AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
// AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// // Merge fork back into parent
// i.Merge(child)
// i.Dispose(child)
// AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
// AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
// }
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := NewAPI("Testing")
r, _, _ := i.PeekRune(0)
i.AcceptRune(r) // T
AssertEqual(t, "T", i.String(), "accepted rune in input")
// TODO FIXME
// func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
// i := NewAPI("Testing")
// i.NextRune()
// i.Accept()
// f1 := i.Fork()
// i.NextRune()
// i.Accept()
// f2 := i.Fork()
// i.NextRune()
// i.Accept()
// AssertEqual(t, "s", i.String(), "f2 String()")
// AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
// i.Merge(f2)
// i.Dispose(f2)
// AssertEqual(t, "es", i.String(), "f1 String()")
// AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// i.Merge(f1)
// i.Dispose(f1)
// AssertEqual(t, "Tes", i.String(), "top-level API String()")
// AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// }
// Fork
child := i.Fork()
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// TODO FIXME
// func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
// i := NewAPI("Testing")
// r, _ := i.NextRune()
// AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
// AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
// AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
// i.Accept()
// AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
// AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
// r, _ = i.NextRune()
// AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
// }
// Accept two runes via fork.
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // e
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // s
AssertEqual(t, "es", i.String(), "result runes in fork")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// TODO FIXME
// func TestFlushInput(t *testing.T) {
// api := NewAPI("cool")
// Merge fork back into parent
i.Merge(child)
i.Dispose(child)
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
}
// // Flushing without any read data is okay. FlushInput() will return
// // false in this case, and nothing else happens.
// AssertTrue(t, api.FlushInput() == false, "flush input at start")
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := NewAPI("Testing")
r, _, _ := i.PeekRune(0)
i.AcceptRune(r) // T
// api.NextRune()
// api.Accept()
// api.NextRune()
// api.Accept()
f1 := i.Fork()
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // e
// AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
// AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
f2 := i.Fork()
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // s
AssertEqual(t, "s", i.String(), "f2 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
// AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
i.Merge(f2)
i.Dispose(f2)
AssertEqual(t, "es", i.String(), "f1 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// // Read offset is now zero, but reading should continue after "co".
// api.NextRune()
// api.Accept()
// api.NextRune()
// api.Accept()
i.Merge(f1)
i.Dispose(f1)
AssertEqual(t, "Tes", i.String(), "top-level API String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
}
// AssertEqual(t, "cool", api.String(), "end result")
// }
func TestFlushInput(t *testing.T) {
i := NewAPI("cool")
// Flushing without any read data is okay. FlushInput() will return
// false in this case, and nothing else happens.
AssertTrue(t, i.FlushInput() == false, "flush input at start")
r, _, _ := i.PeekRune(0)
i.AcceptRune(r) // c
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // o
AssertTrue(t, i.FlushInput() == true, "flush input after reading some data")
AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input")
AssertTrue(t, i.FlushInput() == false, "flush input after flush input")
// Read offset is now zero, but reading should continue after "co".
// The output so far isn't modified, so the following accept calls
// will add their runes to the already accepted string "co".
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // o
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // o
AssertEqual(t, "cool", i.String(), "end result")
}
func TestInputFlusherWrapper(t *testing.T) {
runeA := A.Rune('a')