A nice performance gain by making a difference between AcceptRunes/AcceptBytes and the new simpler AcceptRune/AcceptByte functions. The simpler versions are faster when only accepting a single byte or rune (which is the case in most situations).

This commit is contained in:
Maurice Makaay 2019-07-19 21:13:15 +00:00
parent 9a53ea9012
commit 458d6f60a6
5 changed files with 382 additions and 369 deletions

View File

@ -128,6 +128,9 @@ func (i *API) PeekByte(offset int) (byte, error) {
// This will merely update the position of the cursor (which keeps track of what // This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The byte is not added to // line and column we are on in the input data). The byte is not added to
// the results. // the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped byte.
func (i *API) SkipByte(b byte) { func (i *API) SkipByte(b byte) {
i.stackFrame.moveCursorByByte(b) i.stackFrame.moveCursorByByte(b)
i.stackFrame.offset++ i.stackFrame.offset++
@ -140,6 +143,9 @@ func (i *API) SkipByte(b byte) {
// This will merely update the position of the cursor (which keeps track of what // This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The bytes are not added to // line and column we are on in the input data). The bytes are not added to
// the results. // the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped bytes.
func (i *API) SkipBytes(bytes ...byte) { func (i *API) SkipBytes(bytes ...byte) {
for _, b := range bytes { for _, b := range bytes {
i.stackFrame.moveCursorByByte(b) i.stackFrame.moveCursorByByte(b)
@ -155,6 +161,9 @@ func (i *API) SkipBytes(bytes ...byte) {
// This will update the position of the cursor (which keeps track of what line // This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the byte to the tokenizer // and column we are on in the input data) and add the byte to the tokenizer
// results. // results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted byte.
func (i *API) AcceptByte(b byte) { func (i *API) AcceptByte(b byte) {
curBytesEnd := i.stackFrame.bytesEnd curBytesEnd := i.stackFrame.bytesEnd
maxRequiredBytes := curBytesEnd + 1 maxRequiredBytes := curBytesEnd + 1
@ -180,6 +189,9 @@ func (i *API) AcceptByte(b byte) {
// This will update the position of the cursor (which keeps track of what line // This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the bytes to the tokenizer // and column we are on in the input data) and add the bytes to the tokenizer
// results. // results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes.
func (i *API) AcceptBytes(bytes ...byte) { func (i *API) AcceptBytes(bytes ...byte) {
curBytesEnd := i.stackFrame.bytesEnd curBytesEnd := i.stackFrame.bytesEnd
newBytesEnd := curBytesEnd + len(bytes) newBytesEnd := curBytesEnd + len(bytes)
@ -223,8 +235,12 @@ func (i *API) PeekRune(offset int) (rune, int, error) {
// This will merely update the position of the cursor (which keeps track of what // This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The rune is not added to // line and column we are on in the input data). The rune is not added to
// the results. // the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped rune.
func (i *API) SkipRune(r rune) { func (i *API) SkipRune(r rune) {
i.stackFrame.moveCursorByRune(r) i.stackFrame.moveCursorByRune(r)
i.stackFrame.offset += utf8.RuneLen(r)
} }
// SkipRunes is used to skip over one or more runes that were read from the input. // SkipRunes is used to skip over one or more runes that were read from the input.
@ -234,6 +250,9 @@ func (i *API) SkipRune(r rune) {
// This will merely update the position of the cursor (which keeps track of what // This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The runes are not added to // line and column we are on in the input data). The runes are not added to
// the results. // the results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped runes.
func (i *API) SkipRunes(runes ...rune) { func (i *API) SkipRunes(runes ...rune) {
for _, r := range runes { for _, r := range runes {
i.stackFrame.moveCursorByRune(r) i.stackFrame.moveCursorByRune(r)
@ -249,6 +268,9 @@ func (i *API) SkipRunes(runes ...rune) {
// This will update the position of the cursor (which keeps track of what line // This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the rune to the tokenizer // and column we are on in the input data) and add the rune to the tokenizer
// results. // results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune.
func (i *API) AcceptRune(r rune) { func (i *API) AcceptRune(r rune) {
curBytesEnd := i.stackFrame.bytesEnd curBytesEnd := i.stackFrame.bytesEnd
maxRequiredBytes := curBytesEnd + utf8.UTFMax maxRequiredBytes := curBytesEnd + utf8.UTFMax
@ -274,6 +296,9 @@ func (i *API) AcceptRune(r rune) {
// This will update the position of the cursor (which keeps track of what line // This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the runes to the tokenizer // and column we are on in the input data) and add the runes to the tokenizer
// results. // results.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted runes.
func (i *API) AcceptRunes(runes ...rune) { func (i *API) AcceptRunes(runes ...rune) {
runesAsString := string(runes) runesAsString := string(runes)
byteLen := len(runesAsString) byteLen := len(runesAsString)

View File

@ -2,6 +2,7 @@ package tokenize_test
import ( import (
"fmt" "fmt"
"strings"
"testing" "testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize" "git.makaay.nl/mauricem/go-parsekit/tokenize"
@ -43,15 +44,15 @@ func ExampleAPI_PeekRune() {
func ExampleAPI_AcceptRune() { func ExampleAPI_AcceptRune() {
api := tokenize.NewAPI("The input that the API will handle") api := tokenize.NewAPI("The input that the API will handle")
// reads 'T' and adds it to the API results // Reads 'T' and accepts it to the API results.
r, _, _ := api.PeekRune(0) r, _, _ := api.PeekRune(0)
api.AcceptRune(r) api.AcceptRune(r)
// reads 'h' and adds it to the API results // Reads 'h' and accepts it to the API results.
r, _, _ = api.PeekRune(0) r, _, _ = api.PeekRune(0)
api.AcceptRune(r) api.AcceptRune(r)
// reads 'e', but does not add it to the API results // Reads 'e', but does not accept it to the API results.
r, _, _ = api.PeekRune(0) r, _, _ = api.PeekRune(0)
fmt.Printf("API results: %q\n", api.String()) fmt.Printf("API results: %q\n", api.String())
@ -60,6 +61,50 @@ func ExampleAPI_AcceptRune() {
// API results: "Th" // API results: "Th"
} }
func ExampleAPI_AcceptRunes() {
api := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'.
r0, _, _ := api.PeekRune(0)
r1, _, _ := api.PeekRune(1)
// Peeks at the third rune 'e'.
api.PeekRune(2)
// Accepts only 'T' and 'h' into the API results.
api.AcceptRunes(r0, r1)
fmt.Printf("API results: %q\n", api.String())
// Output:
// API results: "Th"
}
func ExampleAPI_SkipRune() {
api := tokenize.NewAPI("The input that the API will handle")
for {
r, _, err := api.PeekRune(0)
// EOF reached.
if err != nil {
break
}
// Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) {
api.AcceptRune(r)
} else {
api.SkipRune(r)
}
}
fmt.Printf("API results: %q\n", api.String())
// Output:
// API results: "eiuaeAIiae"
}
func ExampleAPI_modifyingResults() { func ExampleAPI_modifyingResults() {
api := tokenize.NewAPI("") api := tokenize.NewAPI("")
@ -97,32 +142,31 @@ func ExampleAPI_modifyingResults() {
// API second result token: 73("Zaphod") // API second result token: 73("Zaphod")
} }
// TODO FIXME func ExampleAPI_Reset() {
// func ExampleAPI_Reset() { api := tokenize.NewAPI("Very important input!")
// api := tokenize.NewAPI("Very important input!")
// api.NextRune() // read 'V' r, _, _ := api.PeekRune(0) // read 'V'
// api.Accept() api.AcceptRune(r)
// api.NextRune() // read 'e' r, _, _ = api.PeekRune(0) // read 'e'
// api.Accept() api.AcceptRune(r)
// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// // Reset clears the results. // Reset clears the results.
// api.Reset() api.Reset()
// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// // So then doing the same read operations, the same data are read. // So then doing the same read operations, the same data are read.
// api.NextRune() // read 'V' r, _, _ = api.PeekRune(0) // read 'V'
// api.Accept() api.AcceptRune(r)
// api.NextRune() // read 'e' r, _, _ = api.PeekRune(0) // read 'e'
// api.Accept() api.AcceptRune(r)
// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// // Output: // Output:
// // API results: "Ve" at line 1, column 3 // API results: "Ve" at line 1, column 3
// // API results: "" at start of file // API results: "" at start of file
// // API results: "Ve" at line 1, column 3 // API results: "Ve" at line 1, column 3
// } }
func ExampleAPI_Fork() { func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'. // This custom Handler checks for input 'a', 'b' or 'c'.
@ -164,149 +208,144 @@ func ExampleAPI_Fork() {
// <nil> mismatch at start of file // <nil> mismatch at start of file
} }
// TODO FIXME func ExampleAPI_Merge() {
// func ExampleAPI_Merge() { tokenHandler := func(t *tokenize.API) bool {
// tokenHandler := func(t *tokenize.API) bool { child1 := t.Fork()
// child1 := t.Fork() r0, _, _ := t.PeekRune(0) // reads 'H'
// t.NextRune() // reads 'H' r1, _, _ := t.PeekRune(1) // reads 'i'
// t.Accept() t.AcceptRunes(r0, r1) // these runes are accepted in the API results for child1
// t.NextRune() // reads 'i'
// t.Accept()
// child2 := t.Fork() child2 := t.Fork()
// t.NextRune() // reads ' ' r0, _, _ = t.PeekRune(0) // reads ' '
// t.Accept() r1, _, _ = t.PeekRune(1) // reads 'm'
// t.NextRune() // reads 'm' t.AcceptRunes(r0, r1) // these runes are accepted in the API results for child2
// t.Accept() t.Dispose(child2) // but they are not merged and thefore not used by child1
// t.Dispose(child2)
// t.Merge(child1) // We merge child1, which has read 'H' and 'i' only. t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
// t.Dispose(child1) // and clean up child1 to return to the parent t.Dispose(child1) // and clean up child1 to return to the parent
// return true return true
// } }
// result, _ := tokenize.New(tokenHandler)("Hi mister X!") result, _ := tokenize.New(tokenHandler)("Hi mister X!")
// fmt.Println(result.String()) fmt.Println(result.String())
// // Output: // Output:
// // Hi // Hi
// } }
// TODO FIXME func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
// func TestMultipleLevelsOfForksAndMerges(t *testing.T) { api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// // Fork a few levels. // Fork a few levels.
// child1 := api.Fork() child1 := api.Fork()
// child2 := api.Fork() child2 := api.Fork()
// child3 := api.Fork() child3 := api.Fork()
// child4 := api.Fork() child4 := api.Fork()
// // Read a rune 'a' from child4. // Read a rune 'a' from child4.
// r, _ := api.NextRune() r, _, _ := api.PeekRune(0)
// AssertEqual(t, 'a', r, "child4 rune 1") AssertEqual(t, 'a', r, "child4 rune 1")
// api.Accept() api.AcceptRune(r)
// AssertEqual(t, "a", api.String(), "child4 runes after rune 1") AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
// // Read another rune 'b' from child4. // Read another rune 'b' from child4.
// r, _ = api.NextRune() r, _, _ = api.PeekRune(0)
// AssertEqual(t, 'b', r, "child4 rune 2") AssertEqual(t, 'b', r, "child4 rune 2")
// api.Accept() api.AcceptRune(r)
// AssertEqual(t, "ab", api.String(), "child4 runes after rune 2") AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
// // Merge "ab" from child4 to child3. // Merge "ab" from child4 to child3.
// api.Merge(child4) api.Merge(child4)
// AssertEqual(t, "", api.String(), "child4 runes after first merge") AssertEqual(t, "", api.String(), "child4 runes after first merge")
// // Read some more from child4. // Read some more from child4.
// r, _ = api.NextRune() r, _, _ = api.PeekRune(0)
// AssertEqual(t, 'c', r, "child4 rune 3") AssertEqual(t, 'c', r, "child4 rune 3")
// api.Accept() api.AcceptRune(r)
// AssertEqual(t, "c", api.String(), "child4 runes after rune 1") AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
// AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
// // Merge "c" from child4 to child3. // Merge "c" from child4 to child3.
// api.Merge(child4) api.Merge(child4)
// // And dispose of child4, making child3 the active stack level. // And dispose of child4, making child3 the active stack level.
// api.Dispose(child4) api.Dispose(child4)
// // Child3 should now have the compbined results "abc" from child4's work. // Child3 should now have the compbined results "abc" from child4's work.
// AssertEqual(t, "abc", api.String(), "child3 after merge of child4") AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
// AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
// // Now read some data from child3. // Now read some data from child3.
// r, _ = api.NextRune() r, _, _ = api.PeekRune(0)
// AssertEqual(t, 'd', r, "child3 rune 5") AssertEqual(t, 'd', r, "child3 rune 5")
// api.Accept() api.AcceptRune(r)
// r, _ = api.NextRune() r, _, _ = api.PeekRune(0)
// AssertEqual(t, 'e', r, "child3 rune 5") AssertEqual(t, 'e', r, "child3 rune 5")
// api.Accept() api.AcceptRune(r)
// r, _ = api.NextRune() r, _, _ = api.PeekRune(0)
// AssertEqual(t, 'f', r, "child3 rune 5") AssertEqual(t, 'f', r, "child3 rune 5")
// api.Accept() api.AcceptRune(r)
// AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6") AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
// // Temporarily go some new forks from here, but don't use their outcome. // Temporarily go some new forks from here, but don't use their outcome.
// child3sub1 := api.Fork() child3sub1 := api.Fork()
// api.NextRune() r, _, _ = api.PeekRune(0)
// api.Accept() api.AcceptRune(r)
// api.NextRune() r, _, _ = api.PeekRune(0)
// api.Accept() api.AcceptRune(r)
// child3sub2 := api.Fork() child3sub2 := api.Fork()
// api.NextRune() r, _, _ = api.PeekRune(0)
// api.Accept() api.AcceptRune(r)
// api.Merge(child3sub2) // do merge sub2 down to sub1 api.Merge(child3sub2) // do merge sub2 down to sub1
// api.Dispose(child3sub2) // and dispose of sub2 api.Dispose(child3sub2) // and dispose of sub2
// api.Dispose(child3sub1) // but dispose of sub1 without merging api.Dispose(child3sub1) // but dispose of sub1 without merging
// // Instead merge the results from before this forking segway from child3 to child2 // Instead merge the results from before this forking segway from child3 to child2
// // and dispose of it. // and dispose of it.
// api.Merge(child3) api.Merge(child3)
// api.Dispose(child3) api.Dispose(child3)
// AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3") AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
// AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")
// // Merge child2 to child1 and dispose of it. // Merge child2 to child1 and dispose of it.
// api.Merge(child2) api.Merge(child2)
// api.Dispose(child2) api.Dispose(child2)
// // Merge child1 a few times to the top level api. // Merge child1 a few times to the top level api.
// api.Merge(child1) api.Merge(child1)
// api.Merge(child1) api.Merge(child1)
// api.Merge(child1) api.Merge(child1)
// api.Merge(child1) api.Merge(child1)
// // And dispose of it. // And dispose of it.
// api.Dispose(child1) api.Dispose(child1)
// // Read some data from the top level api. // Read some data from the top level api.
// r, _ = api.NextRune() r, _, _ = api.PeekRune(0)
// api.Accept() api.AcceptRune(r)
// AssertEqual(t, "abcdefg", api.String(), "api string end result") AssertEqual(t, "abcdefg", api.String(), "api string end result")
// AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
// } }
// TODO FIXME func TestClearRunes(t *testing.T) {
// func TestClearRunes(t *testing.T) { api := tokenize.NewAPI("Laphroaig")
// api := tokenize.NewAPI("Laphroaig") r, _, _ := api.PeekRune(0) // Read 'L'
// api.NextRune() // Read 'L' api.AcceptRune(r) // Add to runes
// api.Accept() // Add to runes r, _, _ = api.PeekRune(0) // Read 'a'
// api.NextRune() // Read 'a' api.AcceptRune(r) // Add to runes
// api.Accept() // Add to runes api.ClearRunes() // Clear the runes, giving us a fresh start.
// api.ClearRunes() // Clear the runes, giving us a fresh start. r, _, _ = api.PeekRune(0) // Read 'p'
// api.NextRune() // Read 'p' api.AcceptRune(r) // Add to runes
// api.Accept() // Add to runes r, _, _ = api.PeekRune(0) // Read 'r'
// api.NextRune() // Read 'r' api.AcceptRune(r) // Add to runes
// api.Accept() // Add to runes
// AssertEqual(t, "ph", api.String(), "api string end result") AssertEqual(t, "ph", api.String(), "api string end result")
// } }
func TestMergeScenariosForTokens(t *testing.T) { func TestMergeScenariosForTokens(t *testing.T) {
api := tokenize.NewAPI("") api := tokenize.NewAPI("")

View File

@ -352,7 +352,7 @@ func MatchByte(expected byte) Handler {
return func(t *API) bool { return func(t *API) bool {
b, err := t.PeekByte(0) b, err := t.PeekByte(0)
if err == nil && b == expected { if err == nil && b == expected {
t.AcceptBytes(b) t.AcceptByte(b)
return true return true
} }
return false return false
@ -367,7 +367,7 @@ func MatchRune(expected rune) Handler {
return func(t *API) bool { return func(t *API) bool {
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil && r == expected { if err == nil && r == expected {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -384,7 +384,7 @@ func MatchBytes(expected ...byte) Handler {
} }
for _, e := range expected { for _, e := range expected {
if b == e { if b == e {
t.AcceptBytes(b) t.AcceptByte(b)
return true return true
} }
} }
@ -414,7 +414,7 @@ func MatchRunes(expected ...rune) Handler {
} }
for _, e := range expected { for _, e := range expected {
if r == e { if r == e {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
} }
@ -436,7 +436,7 @@ func MatchByteRange(start byte, end byte) Handler {
return func(t *API) bool { return func(t *API) bool {
r, err := t.PeekByte(0) r, err := t.PeekByte(0)
if err == nil && r >= start && r <= end { if err == nil && r >= start && r <= end {
t.AcceptBytes(r) t.AcceptByte(r)
return true return true
} }
return false return false
@ -460,7 +460,7 @@ func MatchRuneRange(start rune, end rune) Handler {
return func(t *API) bool { return func(t *API) bool {
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil && r >= start && r <= end { if err == nil && r >= start && r <= end {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -499,7 +499,7 @@ func MatchBlank() Handler {
return func(t *API) bool { return func(t *API) bool {
b, err := t.PeekByte(0) b, err := t.PeekByte(0)
if err == nil && (b == ' ' || b == '\t') { if err == nil && (b == ' ' || b == '\t') {
t.AcceptBytes(b) t.AcceptByte(b)
return true return true
} }
return false return false
@ -520,7 +520,7 @@ func MatchBlanks() Handler {
if err != nil || (b != ' ' && b != '\t') { if err != nil || (b != ' ' && b != '\t') {
return false return false
} }
t.AcceptBytes(b) t.AcceptByte(b)
// Now match any number of followup blanks. We've already got // Now match any number of followup blanks. We've already got
// a successful match at this point, so we'll always return true at the end. // a successful match at this point, so we'll always return true at the end.
@ -529,7 +529,7 @@ func MatchBlanks() Handler {
if err != nil || (b != ' ' && b != '\t') { if err != nil || (b != ' ' && b != '\t') {
return true return true
} }
t.AcceptBytes(b) t.AcceptByte(b)
} }
} }
} }
@ -551,7 +551,7 @@ func MatchWhitespace() Handler {
} }
t.AcceptBytes(b1, b2) t.AcceptBytes(b1, b2)
} else { } else {
t.AcceptBytes(b1) t.AcceptByte(b1)
} }
// Now match any number of followup whitespace. We've already got // Now match any number of followup whitespace. We've already got
@ -568,7 +568,7 @@ func MatchWhitespace() Handler {
} }
t.AcceptBytes(b1, b2) t.AcceptBytes(b1, b2)
} else { } else {
t.AcceptBytes(b1) t.AcceptByte(b1)
} }
} }
} }
@ -590,7 +590,7 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
return func(t *API) bool { return func(t *API) bool {
b, err := t.PeekByte(0) b, err := t.PeekByte(0)
if err == nil && callback(b) { if err == nil && callback(b) {
t.AcceptBytes(b) t.AcceptByte(b)
return true return true
} }
return false return false
@ -607,7 +607,7 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
return func(t *API) bool { return func(t *API) bool {
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil && callback(r) { if err == nil && callback(r) {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -622,7 +622,7 @@ func MatchEndOfLine() Handler {
return err == io.EOF return err == io.EOF
} }
if b1 == '\n' { if b1 == '\n' {
t.AcceptBytes(b1) t.AcceptByte(b1)
return true return true
} }
if b1 == '\r' { if b1 == '\r' {
@ -763,7 +763,7 @@ func MatchNot(handler Handler) Handler {
t.Dispose(child) t.Dispose(child)
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil { if err == nil {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -961,7 +961,7 @@ func MatchSigned(handler Handler) Handler {
return false return false
} }
if b == '-' || b == '+' { if b == '-' || b == '+' {
t.AcceptBytes(b) t.AcceptByte(b)
} }
if handler(t) { if handler(t) {
t.Merge(child) t.Merge(child)
@ -1019,7 +1019,7 @@ func MatchAnyByte() Handler {
return func(t *API) bool { return func(t *API) bool {
b, err := t.PeekByte(0) b, err := t.PeekByte(0)
if err == nil { if err == nil {
t.AcceptBytes(b) t.AcceptByte(b)
return true return true
} }
return false return false
@ -1033,7 +1033,7 @@ func MatchAnyRune() Handler {
return func(t *API) bool { return func(t *API) bool {
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil { if err == nil {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -1046,7 +1046,7 @@ func MatchValidRune() Handler {
return func(t *API) bool { return func(t *API) bool {
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil && r != utf8.RuneError { if err == nil && r != utf8.RuneError {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -1059,7 +1059,7 @@ func MatchInvalidRune() Handler {
return func(t *API) bool { return func(t *API) bool {
r, _, err := t.PeekRune(0) r, _, err := t.PeekRune(0)
if err == nil && r == utf8.RuneError { if err == nil && r == utf8.RuneError {
t.AcceptRunes(r) t.AcceptRune(r)
return true return true
} }
return false return false
@ -1081,7 +1081,7 @@ func MatchDigits() Handler {
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return false return false
} }
t.AcceptBytes(b) t.AcceptByte(b)
// Continue accepting bytes as long as they are digits. // Continue accepting bytes as long as they are digits.
for { for {
@ -1089,7 +1089,7 @@ func MatchDigits() Handler {
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return true return true
} }
t.AcceptBytes(b) t.AcceptByte(b)
} }
} }
} }
@ -1120,18 +1120,18 @@ func MatchInteger(normalize bool) Handler {
// The next character is a zero, skip the leading zero and check again. // The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b { if err == nil && b2 == b {
t.SkipBytes('0') t.SkipByte('0')
continue continue
} }
// The next character is not a zero, nor a digit at all. // The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here. // We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' { if err != nil || b2 < '1' || b2 > '9' {
t.AcceptBytes('0') t.AcceptByte('0')
return true return true
} }
// The next character is a digit. SKip the leading zero and go with the digit. // The next character is a digit. SKip the leading zero and go with the digit.
t.SkipBytes('0') t.SkipByte('0')
t.AcceptBytes(b2) t.AcceptByte(b2)
break break
} }
} }
@ -1142,7 +1142,7 @@ func MatchInteger(normalize bool) Handler {
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return true return true
} }
t.AcceptBytes(b) t.AcceptByte(b)
} }
} }
} }
@ -1169,24 +1169,24 @@ func MatchDecimal(normalize bool) Handler {
// The next character is a zero, skip the leading zero and check again. // The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b { if err == nil && b2 == b {
t.SkipBytes('0') t.SkipByte('0')
continue continue
} }
// The next character is a dot, go with the zero before the dot and // The next character is a dot, go with the zero before the dot and
// let the upcoming code handle the dot. // let the upcoming code handle the dot.
if err == nil && b2 == '.' { if err == nil && b2 == '.' {
t.AcceptBytes('0') t.AcceptByte('0')
break break
} }
// The next character is not a zero, nor a digit at all. // The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here. // We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' { if err != nil || b2 < '1' || b2 > '9' {
t.AcceptBytes('0') t.AcceptByte('0')
return true return true
} }
// The next character is a digit. SKip the leading zero and go with the digit. // The next character is a digit. SKip the leading zero and go with the digit.
t.SkipBytes('0') t.SkipByte('0')
t.AcceptBytes(b2) t.AcceptByte(b2)
break break
} }
} }
@ -1216,7 +1216,7 @@ func MatchDecimal(normalize bool) Handler {
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
break break
} }
t.AcceptBytes(b) t.AcceptByte(b)
} }
return true return true
} }
@ -1236,13 +1236,13 @@ func MatchBoolean() Handler {
return false return false
} }
if b1 == '1' || b1 == '0' { if b1 == '1' || b1 == '0' {
t.AcceptBytes(b1) t.AcceptByte(b1)
return true return true
} }
if b1 == 't' || b1 == 'T' { if b1 == 't' || b1 == 'T' {
b2, err := t.PeekByte(1) b2, err := t.PeekByte(1)
if err != nil || (b2 != 'R' && b2 != 'r') { if err != nil || (b2 != 'R' && b2 != 'r') {
t.AcceptBytes(b1) t.AcceptByte(b1)
return true return true
} }
b3, _ := t.PeekByte(2) b3, _ := t.PeekByte(2)
@ -1255,14 +1255,14 @@ func MatchBoolean() Handler {
t.AcceptBytes(b1, b2, b3, b4) t.AcceptBytes(b1, b2, b3, b4)
return true return true
} }
t.AcceptBytes(b1) t.AcceptByte(b1)
return true return true
} }
if b1 == 'f' || b1 == 'F' { if b1 == 'f' || b1 == 'F' {
b2, err := t.PeekByte(1) b2, err := t.PeekByte(1)
if err != nil || (b2 != 'A' && b2 != 'a') { if err != nil || (b2 != 'A' && b2 != 'a') {
t.AcceptBytes(b1) t.AcceptByte(b1)
return true return true
} }
b3, _ := t.PeekByte(2) b3, _ := t.PeekByte(2)
@ -1276,7 +1276,7 @@ func MatchBoolean() Handler {
t.AcceptBytes(b1, b2, b3, b4, b5) t.AcceptBytes(b1, b2, b3, b4, b5)
return true return true
} }
t.AcceptBytes(b1) t.AcceptByte(b1)
return true return true
} }
return false return false
@ -1325,7 +1325,7 @@ func MatchHexDigit() Handler {
return func(t *API) bool { return func(t *API) bool {
b, err := t.PeekByte(0) b, err := t.PeekByte(0)
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) { if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
t.AcceptBytes(b) t.AcceptByte(b)
return true return true
} }
return false return false
@ -1560,14 +1560,13 @@ func ModifyDropUntilEndOfLine() Handler {
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
return true return true
} else {
return false
} }
return false
} }
if b == '\n' { if b == '\n' {
return true return true
} }
t.SkipBytes(b) t.SkipByte(b)
} }
} }
} }

View File

@ -2,7 +2,10 @@ package tokenize_test
import ( import (
"fmt" "fmt"
"io"
"strings"
"testing" "testing"
"unicode/utf8"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize" tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
) )
@ -50,63 +53,24 @@ func ExampleNew() {
// Error: mismatch at start of file // Error: mismatch at start of file
} }
// TODO FIXME func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
// func TestCallingNextRune_ReturnsNextRune(t *testing.T) { api := makeTokenizeAPI()
// api := makeTokenizeAPI() r, _, _ := api.PeekRune(0)
// r, _ := api.NextRune() AssertEqual(t, 'T', r, "first rune")
// AssertEqual(t, 'T', r, "first rune") }
// }
// TODO FIXME func TestInputCanAcceptRunesFromReader(t *testing.T) {
// func TestInputCanAcceptRunesFromReader(t *testing.T) { i := makeTokenizeAPI()
// i := makeTokenizeAPI()
// i.NextRune()
// i.Accept()
// i.NextRune()
// i.Accept()
// i.NextRune()
// i.Accept()
// AssertEqual(t, "Tes", i.String(), "i.String()")
// }
// TODO FIXME r0, _, _ := i.PeekRune(0)
// func TestCallingNextRuneTwice_Panics(t *testing.T) { i.AcceptRune(r0)
// AssertPanic(t, PanicT{
// Function: func() {
// i := makeTokenizeAPI()
// i.NextRune()
// i.NextRune()
// },
// Regexp: true,
// Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
// `without a prior call to Accept\(\)`,
// })
// }
// TODO FIXME r1, _, _ := i.PeekRune(0) // 0, because read offset resets to 0 after Accept* calls.
// func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { r2, _, _ := i.PeekRune(1)
// api := makeTokenizeAPI() i.AcceptRunes(r1, r2)
// AssertPanic(t, PanicT{
// Function: api.Accept,
// Regexp: true,
// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
// `without first calling NextRune\(\)`,
// })
// }
// TODO FIXME AssertEqual(t, "Tes", i.String(), "i.String()")
// func TestCallingAcceptAfterReadError_Panics(t *testing.T) { }
// api := tokenize.NewAPI("")
// AssertPanic(t, PanicT{
// Function: func() {
// api.NextRune()
// api.Accept()
// },
// Regexp: true,
// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
// `, but the prior call to NextRune\(\) failed`,
// })
// }
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) { func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
@ -168,61 +132,55 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`}) `on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
} }
// TODO FIXME func TestAccept_UpdatesCursor(t *testing.T) {
// func TestForkingInput_ClearsLastRune(t *testing.T) { i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
// AssertPanic(t, PanicT{ AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
// Function: func() { for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
// i := makeTokenizeAPI() r, _, _ := i.PeekRune(0)
// i.NextRune() i.AcceptRune(r)
// i.Fork() }
// i.Accept() AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
// },
// Regexp: true,
// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
// })
// }
// TODO FIXME r, _, _ := i.PeekRune(0) // read "\n", cursor ends up at start of new line
// func TestAccept_UpdatesCursor(t *testing.T) { i.AcceptRune(r)
// i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
// AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
// for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
// i.NextRune()
// i.Accept()
// }
// AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
// i.NextRune() // read "\n", cursor ends up at start of new line
// i.Accept()
// AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
// for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
// i.NextRune()
// i.Accept()
// }
// AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
// }
// TODO FIXME for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
// func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { b, _ := i.PeekByte(0)
// i := tokenize.NewAPI(strings.NewReader("X")) i.AcceptByte(b)
// i.NextRune() }
// i.Accept() AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
// r, err := i.NextRune() }
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
// AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
// } i := tokenize.NewAPI(strings.NewReader("X"))
// TODO FIXME r, _, _ := i.PeekRune(0)
// func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { i.AcceptRune(r)
// i := tokenize.NewAPI(strings.NewReader("X")) r, _, err := i.PeekRune(0)
// child := i.Fork()
// i.NextRune() AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
// i.Accept() AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
// r, err := i.NextRune() }
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
// i.Dispose(child) // brings the read offset back to the start func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
// r, err = i.NextRune() // so here we should see the same rune i := tokenize.NewAPI(strings.NewReader("X"))
// AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") child := i.Fork()
// AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
// } // To to the EOF.
r, _, _ := i.PeekRune(0)
i.AcceptRune(r)
r, _, err := i.PeekRune(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
// Brings the read offset back to the start.
i.Dispose(child)
// So here we should see the same input data as before.
r, _, err = i.PeekRune(0)
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func makeTokenizeAPI() *tokenize.API { func makeTokenizeAPI() *tokenize.API {
return tokenize.NewAPI("Testing") return tokenize.NewAPI("Testing")

View File

@ -4,95 +4,87 @@ import (
"testing" "testing"
) )
// TODO FIXME func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { // Create input, accept the first rune.
// // Create input, accept the first rune. i := NewAPI("Testing")
// i := NewAPI("Testing") r, _, _ := i.PeekRune(0)
// i.NextRune() i.AcceptRune(r) // T
// i.Accept() // T AssertEqual(t, "T", i.String(), "accepted rune in input")
// AssertEqual(t, "T", i.String(), "accepted rune in input")
// // Fork
// child := i.Fork()
// AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
// AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// // Accept two runes via fork.
// i.NextRune()
// i.Accept() // e
// i.NextRune()
// i.Accept() // s
// AssertEqual(t, "es", i.String(), "result runes in fork")
// AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
// AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// // Merge fork back into parent
// i.Merge(child)
// i.Dispose(child)
// AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
// AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
// }
// TODO FIXME // Fork
// func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { child := i.Fork()
// i := NewAPI("Testing") AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
// i.NextRune() AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// i.Accept()
// f1 := i.Fork()
// i.NextRune()
// i.Accept()
// f2 := i.Fork()
// i.NextRune()
// i.Accept()
// AssertEqual(t, "s", i.String(), "f2 String()")
// AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
// i.Merge(f2)
// i.Dispose(f2)
// AssertEqual(t, "es", i.String(), "f1 String()")
// AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// i.Merge(f1)
// i.Dispose(f1)
// AssertEqual(t, "Tes", i.String(), "top-level API String()")
// AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// }
// TODO FIXME // Accept two runes via fork.
// func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { r, _, _ = i.PeekRune(0)
// i := NewAPI("Testing") i.AcceptRune(r) // e
// r, _ := i.NextRune() r, _, _ = i.PeekRune(0)
// AssertEqual(t, 'T', r, "result from 1st call to NextRune()") i.AcceptRune(r) // s
// AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'") AssertEqual(t, "es", i.String(), "result runes in fork")
// AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true") AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
// i.Accept() AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
// AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
// r, _ = i.NextRune()
// AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
// }
// TODO FIXME // Merge fork back into parent
// func TestFlushInput(t *testing.T) { i.Merge(child)
// api := NewAPI("cool") i.Dispose(child)
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
}
// // Flushing without any read data is okay. FlushInput() will return func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
// // false in this case, and nothing else happens. i := NewAPI("Testing")
// AssertTrue(t, api.FlushInput() == false, "flush input at start") r, _, _ := i.PeekRune(0)
i.AcceptRune(r) // T
// api.NextRune() f1 := i.Fork()
// api.Accept() r, _, _ = i.PeekRune(0)
// api.NextRune() i.AcceptRune(r) // e
// api.Accept()
// AssertTrue(t, api.FlushInput() == true, "flush input after reading some data") f2 := i.Fork()
// AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input") r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // s
AssertEqual(t, "s", i.String(), "f2 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
// AssertTrue(t, api.FlushInput() == false, "flush input after flush input") i.Merge(f2)
i.Dispose(f2)
AssertEqual(t, "es", i.String(), "f1 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// // Read offset is now zero, but reading should continue after "co". i.Merge(f1)
// api.NextRune() i.Dispose(f1)
// api.Accept() AssertEqual(t, "Tes", i.String(), "top-level API String()")
// api.NextRune() AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
// api.Accept() }
// AssertEqual(t, "cool", api.String(), "end result") func TestFlushInput(t *testing.T) {
// } i := NewAPI("cool")
// Flushing without any read data is okay. FlushInput() will return
// false in this case, and nothing else happens.
AssertTrue(t, i.FlushInput() == false, "flush input at start")
r, _, _ := i.PeekRune(0)
i.AcceptRune(r) // c
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // o
AssertTrue(t, i.FlushInput() == true, "flush input after reading some data")
AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input")
AssertTrue(t, i.FlushInput() == false, "flush input after flush input")
// Read offset is now zero, but reading should continue after "co".
// The output so far isn't modified, so the following accept calls
// will add their runes to the already accepted string "co".
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // o
r, _, _ = i.PeekRune(0)
i.AcceptRune(r) // o
AssertEqual(t, "cool", i.String(), "end result")
}
func TestInputFlusherWrapper(t *testing.T) { func TestInputFlusherWrapper(t *testing.T) {
runeA := A.Rune('a') runeA := A.Rune('a')