diff --git a/tokenize/api.go b/tokenize/api.go index 4c19dbf..ecf0985 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -128,6 +128,9 @@ func (i *API) PeekByte(offset int) (byte, error) { // This will merely update the position of the cursor (which keeps track of what // line and column we are on in the input data). The byte is not added to // the results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the skipped byte. func (i *API) SkipByte(b byte) { i.stackFrame.moveCursorByByte(b) i.stackFrame.offset++ @@ -140,6 +143,9 @@ func (i *API) SkipByte(b byte) { // This will merely update the position of the cursor (which keeps track of what // line and column we are on in the input data). The bytes are not added to // the results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the skipped bytes. func (i *API) SkipBytes(bytes ...byte) { for _, b := range bytes { i.stackFrame.moveCursorByByte(b) @@ -155,6 +161,9 @@ func (i *API) SkipBytes(bytes ...byte) { // This will update the position of the cursor (which keeps track of what line // and column we are on in the input data) and add the byte to the tokenizer // results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the accepted byte. func (i *API) AcceptByte(b byte) { curBytesEnd := i.stackFrame.bytesEnd maxRequiredBytes := curBytesEnd + 1 @@ -180,6 +189,9 @@ func (i *API) AcceptByte(b byte) { // This will update the position of the cursor (which keeps track of what line // and column we are on in the input data) and add the bytes to the tokenizer // results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the accepted bytes. func (i *API) AcceptBytes(bytes ...byte) { curBytesEnd := i.stackFrame.bytesEnd newBytesEnd := curBytesEnd + len(bytes) @@ -223,8 +235,12 @@ func (i *API) PeekRune(offset int) (rune, int, error) { // This will merely update the position of the cursor (which keeps track of what // line and column we are on in the input data). The rune is not added to // the results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the skipped rune. func (i *API) SkipRune(r rune) { i.stackFrame.moveCursorByRune(r) + i.stackFrame.offset += utf8.RuneLen(r) } // SkipRunes is used to skip over one or more runes that were read from the input. @@ -234,6 +250,9 @@ func (i *API) SkipRune(r rune) { // This will merely update the position of the cursor (which keeps track of what // line and column we are on in the input data). The runes are not added to // the results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the skipped runes. func (i *API) SkipRunes(runes ...rune) { for _, r := range runes { i.stackFrame.moveCursorByRune(r) @@ -249,6 +268,9 @@ func (i *API) SkipRunes(runes ...rune) { // This will update the position of the cursor (which keeps track of what line // and column we are on in the input data) and add the rune to the tokenizer // results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the accepted rune. func (i *API) AcceptRune(r rune) { curBytesEnd := i.stackFrame.bytesEnd maxRequiredBytes := curBytesEnd + utf8.UTFMax @@ -274,6 +296,9 @@ func (i *API) AcceptRune(r rune) { // This will update the position of the cursor (which keeps track of what line // and column we are on in the input data) and add the runes to the tokenizer // results. +// +// After the call, byte offset 0 for PeekByte() and PeekRune() will point at +// the first byte after the accepted runes. func (i *API) AcceptRunes(runes ...rune) { runesAsString := string(runes) byteLen := len(runesAsString) diff --git a/tokenize/api_test.go b/tokenize/api_test.go index e148083..7c917dc 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -2,6 +2,7 @@ package tokenize_test import ( "fmt" + "strings" "testing" "git.makaay.nl/mauricem/go-parsekit/tokenize" @@ -43,15 +44,15 @@ func ExampleAPI_PeekRune() { func ExampleAPI_AcceptRune() { api := tokenize.NewAPI("The input that the API will handle") - // reads 'T' and adds it to the API results + // Reads 'T' and accepts it to the API results. r, _, _ := api.PeekRune(0) api.AcceptRune(r) - // reads 'h' and adds it to the API results + // Reads 'h' and accepts it to the API results. r, _, _ = api.PeekRune(0) api.AcceptRune(r) - // reads 'e', but does not add it to the API results + // Reads 'e', but does not accept it to the API results. r, _, _ = api.PeekRune(0) fmt.Printf("API results: %q\n", api.String()) @@ -60,6 +61,50 @@ func ExampleAPI_AcceptRune() { // API results: "Th" } +func ExampleAPI_AcceptRunes() { + api := tokenize.NewAPI("The input that the API will handle") + + // Peeks at the first two runes 'T' and 'h'. + r0, _, _ := api.PeekRune(0) + r1, _, _ := api.PeekRune(1) + + // Peeks at the third rune 'e'. + api.PeekRune(2) + + // Accepts only 'T' and 'h' into the API results. + api.AcceptRunes(r0, r1) + + fmt.Printf("API results: %q\n", api.String()) + + // Output: + // API results: "Th" +} + +func ExampleAPI_SkipRune() { + api := tokenize.NewAPI("The input that the API will handle") + + for { + r, _, err := api.PeekRune(0) + + // EOF reached. + if err != nil { + break + } + + // Only accept runes that are vowels. + if strings.ContainsRune("aeiouAEIOU", r) { + api.AcceptRune(r) + } else { + api.SkipRune(r) + } + } + + fmt.Printf("API results: %q\n", api.String()) + + // Output: + // API results: "eiuaeAIiae" +} + func ExampleAPI_modifyingResults() { api := tokenize.NewAPI("") @@ -97,32 +142,31 @@ func ExampleAPI_modifyingResults() { // API second result token: 73("Zaphod") } -// TODO FIXME -// func ExampleAPI_Reset() { -// api := tokenize.NewAPI("Very important input!") +func ExampleAPI_Reset() { + api := tokenize.NewAPI("Very important input!") -// api.NextRune() // read 'V' -// api.Accept() -// api.NextRune() // read 'e' -// api.Accept() -// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) + r, _, _ := api.PeekRune(0) // read 'V' + api.AcceptRune(r) + r, _, _ = api.PeekRune(0) // read 'e' + api.AcceptRune(r) + fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) -// // Reset clears the results. -// api.Reset() -// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) + // Reset clears the results. + api.Reset() + fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) -// // So then doing the same read operations, the same data are read. -// api.NextRune() // read 'V' -// api.Accept() -// api.NextRune() // read 'e' -// api.Accept() -// fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) + // So then doing the same read operations, the same data are read. + r, _, _ = api.PeekRune(0) // read 'V' + api.AcceptRune(r) + r, _, _ = api.PeekRune(0) // read 'e' + api.AcceptRune(r) + fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor()) -// // Output: -// // API results: "Ve" at line 1, column 3 -// // API results: "" at start of file -// // API results: "Ve" at line 1, column 3 -// } + // Output: + // API results: "Ve" at line 1, column 3 + // API results: "" at start of file + // API results: "Ve" at line 1, column 3 +} func ExampleAPI_Fork() { // This custom Handler checks for input 'a', 'b' or 'c'. @@ -164,149 +208,144 @@ func ExampleAPI_Fork() { // mismatch at start of file } -// TODO FIXME -// func ExampleAPI_Merge() { -// tokenHandler := func(t *tokenize.API) bool { -// child1 := t.Fork() -// t.NextRune() // reads 'H' -// t.Accept() -// t.NextRune() // reads 'i' -// t.Accept() +func ExampleAPI_Merge() { + tokenHandler := func(t *tokenize.API) bool { + child1 := t.Fork() + r0, _, _ := t.PeekRune(0) // reads 'H' + r1, _, _ := t.PeekRune(1) // reads 'i' + t.AcceptRunes(r0, r1) // these runes are accepted in the API results for child1 -// child2 := t.Fork() -// t.NextRune() // reads ' ' -// t.Accept() -// t.NextRune() // reads 'm' -// t.Accept() -// t.Dispose(child2) + child2 := t.Fork() + r0, _, _ = t.PeekRune(0) // reads ' ' + r1, _, _ = t.PeekRune(1) // reads 'm' + t.AcceptRunes(r0, r1) // these runes are accepted in the API results for child2 + t.Dispose(child2) // but they are not merged and thefore not used by child1 -// t.Merge(child1) // We merge child1, which has read 'H' and 'i' only. -// t.Dispose(child1) // and clean up child1 to return to the parent -// return true -// } + t.Merge(child1) // We merge child1, which has read 'H' and 'i' only. + t.Dispose(child1) // and clean up child1 to return to the parent + return true + } -// result, _ := tokenize.New(tokenHandler)("Hi mister X!") -// fmt.Println(result.String()) + result, _ := tokenize.New(tokenHandler)("Hi mister X!") + fmt.Println(result.String()) -// // Output: -// // Hi -// } + // Output: + // Hi +} -// TODO FIXME -// func TestMultipleLevelsOfForksAndMerges(t *testing.T) { -// api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz") +func TestMultipleLevelsOfForksAndMerges(t *testing.T) { + api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz") -// // Fork a few levels. -// child1 := api.Fork() -// child2 := api.Fork() -// child3 := api.Fork() -// child4 := api.Fork() + // Fork a few levels. + child1 := api.Fork() + child2 := api.Fork() + child3 := api.Fork() + child4 := api.Fork() -// // Read a rune 'a' from child4. -// r, _ := api.NextRune() -// AssertEqual(t, 'a', r, "child4 rune 1") -// api.Accept() -// AssertEqual(t, "a", api.String(), "child4 runes after rune 1") + // Read a rune 'a' from child4. + r, _, _ := api.PeekRune(0) + AssertEqual(t, 'a', r, "child4 rune 1") + api.AcceptRune(r) + AssertEqual(t, "a", api.String(), "child4 runes after rune 1") -// // Read another rune 'b' from child4. -// r, _ = api.NextRune() -// AssertEqual(t, 'b', r, "child4 rune 2") -// api.Accept() -// AssertEqual(t, "ab", api.String(), "child4 runes after rune 2") + // Read another rune 'b' from child4. + r, _, _ = api.PeekRune(0) + AssertEqual(t, 'b', r, "child4 rune 2") + api.AcceptRune(r) + AssertEqual(t, "ab", api.String(), "child4 runes after rune 2") -// // Merge "ab" from child4 to child3. -// api.Merge(child4) -// AssertEqual(t, "", api.String(), "child4 runes after first merge") + // Merge "ab" from child4 to child3. + api.Merge(child4) + AssertEqual(t, "", api.String(), "child4 runes after first merge") -// // Read some more from child4. -// r, _ = api.NextRune() -// AssertEqual(t, 'c', r, "child4 rune 3") -// api.Accept() -// AssertEqual(t, "c", api.String(), "child4 runes after rune 1") -// AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") + // Read some more from child4. + r, _, _ = api.PeekRune(0) + AssertEqual(t, 'c', r, "child4 rune 3") + api.AcceptRune(r) + AssertEqual(t, "c", api.String(), "child4 runes after rune 1") + AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") -// // Merge "c" from child4 to child3. -// api.Merge(child4) + // Merge "c" from child4 to child3. + api.Merge(child4) -// // And dispose of child4, making child3 the active stack level. -// api.Dispose(child4) + // And dispose of child4, making child3 the active stack level. + api.Dispose(child4) -// // Child3 should now have the compbined results "abc" from child4's work. -// AssertEqual(t, "abc", api.String(), "child3 after merge of child4") -// AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") + // Child3 should now have the compbined results "abc" from child4's work. + AssertEqual(t, "abc", api.String(), "child3 after merge of child4") + AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") -// // Now read some data from child3. -// r, _ = api.NextRune() -// AssertEqual(t, 'd', r, "child3 rune 5") -// api.Accept() + // Now read some data from child3. + r, _, _ = api.PeekRune(0) + AssertEqual(t, 'd', r, "child3 rune 5") + api.AcceptRune(r) -// r, _ = api.NextRune() -// AssertEqual(t, 'e', r, "child3 rune 5") -// api.Accept() + r, _, _ = api.PeekRune(0) + AssertEqual(t, 'e', r, "child3 rune 5") + api.AcceptRune(r) -// r, _ = api.NextRune() -// AssertEqual(t, 'f', r, "child3 rune 5") -// api.Accept() + r, _, _ = api.PeekRune(0) + AssertEqual(t, 'f', r, "child3 rune 5") + api.AcceptRune(r) -// AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6") + AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6") -// // Temporarily go some new forks from here, but don't use their outcome. -// child3sub1 := api.Fork() -// api.NextRune() -// api.Accept() -// api.NextRune() -// api.Accept() -// child3sub2 := api.Fork() -// api.NextRune() -// api.Accept() -// api.Merge(child3sub2) // do merge sub2 down to sub1 -// api.Dispose(child3sub2) // and dispose of sub2 -// api.Dispose(child3sub1) // but dispose of sub1 without merging + // Temporarily go some new forks from here, but don't use their outcome. + child3sub1 := api.Fork() + r, _, _ = api.PeekRune(0) + api.AcceptRune(r) + r, _, _ = api.PeekRune(0) + api.AcceptRune(r) + child3sub2 := api.Fork() + r, _, _ = api.PeekRune(0) + api.AcceptRune(r) + api.Merge(child3sub2) // do merge sub2 down to sub1 + api.Dispose(child3sub2) // and dispose of sub2 + api.Dispose(child3sub1) // but dispose of sub1 without merging -// // Instead merge the results from before this forking segway from child3 to child2 -// // and dispose of it. -// api.Merge(child3) -// api.Dispose(child3) + // Instead merge the results from before this forking segway from child3 to child2 + // and dispose of it. + api.Merge(child3) + api.Dispose(child3) -// AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3") -// AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") + AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3") + AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") -// // Merge child2 to child1 and dispose of it. -// api.Merge(child2) -// api.Dispose(child2) + // Merge child2 to child1 and dispose of it. + api.Merge(child2) + api.Dispose(child2) -// // Merge child1 a few times to the top level api. -// api.Merge(child1) -// api.Merge(child1) -// api.Merge(child1) -// api.Merge(child1) + // Merge child1 a few times to the top level api. + api.Merge(child1) + api.Merge(child1) + api.Merge(child1) + api.Merge(child1) -// // And dispose of it. -// api.Dispose(child1) + // And dispose of it. + api.Dispose(child1) -// // Read some data from the top level api. -// r, _ = api.NextRune() -// api.Accept() + // Read some data from the top level api. + r, _, _ = api.PeekRune(0) + api.AcceptRune(r) -// AssertEqual(t, "abcdefg", api.String(), "api string end result") -// AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") -// } + AssertEqual(t, "abcdefg", api.String(), "api string end result") + AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") +} -// TODO FIXME -// func TestClearRunes(t *testing.T) { -// api := tokenize.NewAPI("Laphroaig") -// api.NextRune() // Read 'L' -// api.Accept() // Add to runes -// api.NextRune() // Read 'a' -// api.Accept() // Add to runes -// api.ClearRunes() // Clear the runes, giving us a fresh start. -// api.NextRune() // Read 'p' -// api.Accept() // Add to runes -// api.NextRune() // Read 'r' -// api.Accept() // Add to runes +func TestClearRunes(t *testing.T) { + api := tokenize.NewAPI("Laphroaig") + r, _, _ := api.PeekRune(0) // Read 'L' + api.AcceptRune(r) // Add to runes + r, _, _ = api.PeekRune(0) // Read 'a' + api.AcceptRune(r) // Add to runes + api.ClearRunes() // Clear the runes, giving us a fresh start. + r, _, _ = api.PeekRune(0) // Read 'p' + api.AcceptRune(r) // Add to runes + r, _, _ = api.PeekRune(0) // Read 'r' + api.AcceptRune(r) // Add to runes -// AssertEqual(t, "ph", api.String(), "api string end result") -// } + AssertEqual(t, "ph", api.String(), "api string end result") +} func TestMergeScenariosForTokens(t *testing.T) { api := tokenize.NewAPI("") diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index a3e44e1..e848268 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -352,7 +352,7 @@ func MatchByte(expected byte) Handler { return func(t *API) bool { b, err := t.PeekByte(0) if err == nil && b == expected { - t.AcceptBytes(b) + t.AcceptByte(b) return true } return false @@ -367,7 +367,7 @@ func MatchRune(expected rune) Handler { return func(t *API) bool { r, _, err := t.PeekRune(0) if err == nil && r == expected { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -384,7 +384,7 @@ func MatchBytes(expected ...byte) Handler { } for _, e := range expected { if b == e { - t.AcceptBytes(b) + t.AcceptByte(b) return true } } @@ -414,7 +414,7 @@ func MatchRunes(expected ...rune) Handler { } for _, e := range expected { if r == e { - t.AcceptRunes(r) + t.AcceptRune(r) return true } } @@ -436,7 +436,7 @@ func MatchByteRange(start byte, end byte) Handler { return func(t *API) bool { r, err := t.PeekByte(0) if err == nil && r >= start && r <= end { - t.AcceptBytes(r) + t.AcceptByte(r) return true } return false @@ -460,7 +460,7 @@ func MatchRuneRange(start rune, end rune) Handler { return func(t *API) bool { r, _, err := t.PeekRune(0) if err == nil && r >= start && r <= end { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -499,7 +499,7 @@ func MatchBlank() Handler { return func(t *API) bool { b, err := t.PeekByte(0) if err == nil && (b == ' ' || b == '\t') { - t.AcceptBytes(b) + t.AcceptByte(b) return true } return false @@ -520,7 +520,7 @@ func MatchBlanks() Handler { if err != nil || (b != ' ' && b != '\t') { return false } - t.AcceptBytes(b) + t.AcceptByte(b) // Now match any number of followup blanks. We've already got // a successful match at this point, so we'll always return true at the end. @@ -529,7 +529,7 @@ func MatchBlanks() Handler { if err != nil || (b != ' ' && b != '\t') { return true } - t.AcceptBytes(b) + t.AcceptByte(b) } } } @@ -551,7 +551,7 @@ func MatchWhitespace() Handler { } t.AcceptBytes(b1, b2) } else { - t.AcceptBytes(b1) + t.AcceptByte(b1) } // Now match any number of followup whitespace. We've already got @@ -568,7 +568,7 @@ func MatchWhitespace() Handler { } t.AcceptBytes(b1, b2) } else { - t.AcceptBytes(b1) + t.AcceptByte(b1) } } } @@ -590,7 +590,7 @@ func MatchByteByCallback(callback func(byte) bool) Handler { return func(t *API) bool { b, err := t.PeekByte(0) if err == nil && callback(b) { - t.AcceptBytes(b) + t.AcceptByte(b) return true } return false @@ -607,7 +607,7 @@ func MatchRuneByCallback(callback func(rune) bool) Handler { return func(t *API) bool { r, _, err := t.PeekRune(0) if err == nil && callback(r) { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -622,7 +622,7 @@ func MatchEndOfLine() Handler { return err == io.EOF } if b1 == '\n' { - t.AcceptBytes(b1) + t.AcceptByte(b1) return true } if b1 == '\r' { @@ -763,7 +763,7 @@ func MatchNot(handler Handler) Handler { t.Dispose(child) r, _, err := t.PeekRune(0) if err == nil { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -961,7 +961,7 @@ func MatchSigned(handler Handler) Handler { return false } if b == '-' || b == '+' { - t.AcceptBytes(b) + t.AcceptByte(b) } if handler(t) { t.Merge(child) @@ -1019,7 +1019,7 @@ func MatchAnyByte() Handler { return func(t *API) bool { b, err := t.PeekByte(0) if err == nil { - t.AcceptBytes(b) + t.AcceptByte(b) return true } return false @@ -1033,7 +1033,7 @@ func MatchAnyRune() Handler { return func(t *API) bool { r, _, err := t.PeekRune(0) if err == nil { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -1046,7 +1046,7 @@ func MatchValidRune() Handler { return func(t *API) bool { r, _, err := t.PeekRune(0) if err == nil && r != utf8.RuneError { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -1059,7 +1059,7 @@ func MatchInvalidRune() Handler { return func(t *API) bool { r, _, err := t.PeekRune(0) if err == nil && r == utf8.RuneError { - t.AcceptRunes(r) + t.AcceptRune(r) return true } return false @@ -1081,7 +1081,7 @@ func MatchDigits() Handler { if err != nil || b < '0' || b > '9' { return false } - t.AcceptBytes(b) + t.AcceptByte(b) // Continue accepting bytes as long as they are digits. for { @@ -1089,7 +1089,7 @@ func MatchDigits() Handler { if err != nil || b < '0' || b > '9' { return true } - t.AcceptBytes(b) + t.AcceptByte(b) } } } @@ -1120,18 +1120,18 @@ func MatchInteger(normalize bool) Handler { // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { - t.SkipBytes('0') + t.SkipByte('0') continue } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { - t.AcceptBytes('0') + t.AcceptByte('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. - t.SkipBytes('0') - t.AcceptBytes(b2) + t.SkipByte('0') + t.AcceptByte(b2) break } } @@ -1142,7 +1142,7 @@ func MatchInteger(normalize bool) Handler { if err != nil || b < '0' || b > '9' { return true } - t.AcceptBytes(b) + t.AcceptByte(b) } } } @@ -1169,24 +1169,24 @@ func MatchDecimal(normalize bool) Handler { // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { - t.SkipBytes('0') + t.SkipByte('0') continue } // The next character is a dot, go with the zero before the dot and // let the upcoming code handle the dot. if err == nil && b2 == '.' { - t.AcceptBytes('0') + t.AcceptByte('0') break } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { - t.AcceptBytes('0') + t.AcceptByte('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. - t.SkipBytes('0') - t.AcceptBytes(b2) + t.SkipByte('0') + t.AcceptByte(b2) break } } @@ -1216,7 +1216,7 @@ func MatchDecimal(normalize bool) Handler { if err != nil || b < '0' || b > '9' { break } - t.AcceptBytes(b) + t.AcceptByte(b) } return true } @@ -1236,13 +1236,13 @@ func MatchBoolean() Handler { return false } if b1 == '1' || b1 == '0' { - t.AcceptBytes(b1) + t.AcceptByte(b1) return true } if b1 == 't' || b1 == 'T' { b2, err := t.PeekByte(1) if err != nil || (b2 != 'R' && b2 != 'r') { - t.AcceptBytes(b1) + t.AcceptByte(b1) return true } b3, _ := t.PeekByte(2) @@ -1255,14 +1255,14 @@ func MatchBoolean() Handler { t.AcceptBytes(b1, b2, b3, b4) return true } - t.AcceptBytes(b1) + t.AcceptByte(b1) return true } if b1 == 'f' || b1 == 'F' { b2, err := t.PeekByte(1) if err != nil || (b2 != 'A' && b2 != 'a') { - t.AcceptBytes(b1) + t.AcceptByte(b1) return true } b3, _ := t.PeekByte(2) @@ -1276,7 +1276,7 @@ func MatchBoolean() Handler { t.AcceptBytes(b1, b2, b3, b4, b5) return true } - t.AcceptBytes(b1) + t.AcceptByte(b1) return true } return false @@ -1325,7 +1325,7 @@ func MatchHexDigit() Handler { return func(t *API) bool { b, err := t.PeekByte(0) if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) { - t.AcceptBytes(b) + t.AcceptByte(b) return true } return false @@ -1560,14 +1560,13 @@ func ModifyDropUntilEndOfLine() Handler { if err != nil { if err == io.EOF { return true - } else { - return false } + return false } if b == '\n' { return true } - t.SkipBytes(b) + t.SkipByte(b) } } } diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index 9751e27..0706a0f 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -2,7 +2,10 @@ package tokenize_test import ( "fmt" + "io" + "strings" "testing" + "unicode/utf8" tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize" ) @@ -50,63 +53,24 @@ func ExampleNew() { // Error: mismatch at start of file } -// TODO FIXME -// func TestCallingNextRune_ReturnsNextRune(t *testing.T) { -// api := makeTokenizeAPI() -// r, _ := api.NextRune() -// AssertEqual(t, 'T', r, "first rune") -// } +func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) { + api := makeTokenizeAPI() + r, _, _ := api.PeekRune(0) + AssertEqual(t, 'T', r, "first rune") +} -// TODO FIXME -// func TestInputCanAcceptRunesFromReader(t *testing.T) { -// i := makeTokenizeAPI() -// i.NextRune() -// i.Accept() -// i.NextRune() -// i.Accept() -// i.NextRune() -// i.Accept() -// AssertEqual(t, "Tes", i.String(), "i.String()") -// } +func TestInputCanAcceptRunesFromReader(t *testing.T) { + i := makeTokenizeAPI() -// TODO FIXME -// func TestCallingNextRuneTwice_Panics(t *testing.T) { -// AssertPanic(t, PanicT{ -// Function: func() { -// i := makeTokenizeAPI() -// i.NextRune() -// i.NextRune() -// }, -// Regexp: true, -// Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` + -// `without a prior call to Accept\(\)`, -// }) -// } + r0, _, _ := i.PeekRune(0) + i.AcceptRune(r0) -// TODO FIXME -// func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { -// api := makeTokenizeAPI() -// AssertPanic(t, PanicT{ -// Function: api.Accept, -// Regexp: true, -// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` + -// `without first calling NextRune\(\)`, -// }) -// } + r1, _, _ := i.PeekRune(0) // 0, because read offset resets to 0 after Accept* calls. + r2, _, _ := i.PeekRune(1) + i.AcceptRunes(r1, r2) -// TODO FIXME -// func TestCallingAcceptAfterReadError_Panics(t *testing.T) { -// api := tokenize.NewAPI("") -// AssertPanic(t, PanicT{ -// Function: func() { -// api.NextRune() -// api.Accept() -// }, -// Regexp: true, -// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` + -// `, but the prior call to NextRune\(\) failed`, -// }) -// } + AssertEqual(t, "Tes", i.String(), "i.String()") +} func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) { AssertPanic(t, PanicT{ @@ -168,61 +132,55 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) { `on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`}) } -// TODO FIXME -// func TestForkingInput_ClearsLastRune(t *testing.T) { -// AssertPanic(t, PanicT{ -// Function: func() { -// i := makeTokenizeAPI() -// i.NextRune() -// i.Fork() -// i.Accept() -// }, -// Regexp: true, -// Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`, -// }) -// } +func TestAccept_UpdatesCursor(t *testing.T) { + i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) + AssertEqual(t, "start of file", i.Cursor(), "cursor 1") + for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" + r, _, _ := i.PeekRune(0) + i.AcceptRune(r) + } + AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2") -// TODO FIXME -// func TestAccept_UpdatesCursor(t *testing.T) { -// i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) -// AssertEqual(t, "start of file", i.Cursor(), "cursor 1") -// for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" -// i.NextRune() -// i.Accept() -// } -// AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2") -// i.NextRune() // read "\n", cursor ends up at start of new line -// i.Accept() -// AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3") -// for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" -// i.NextRune() -// i.Accept() -// } -// AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4") -// } + r, _, _ := i.PeekRune(0) // read "\n", cursor ends up at start of new line + i.AcceptRune(r) + AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3") -// TODO FIXME -// func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { -// i := tokenize.NewAPI(strings.NewReader("X")) -// i.NextRune() -// i.Accept() -// r, err := i.NextRune() -// AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") -// AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") -// } -// TODO FIXME -// func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { -// i := tokenize.NewAPI(strings.NewReader("X")) -// child := i.Fork() -// i.NextRune() -// i.Accept() -// r, err := i.NextRune() -// AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") -// i.Dispose(child) // brings the read offset back to the start -// r, err = i.NextRune() // so here we should see the same rune -// AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") -// AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") -// } + for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" + b, _ := i.PeekByte(0) + i.AcceptByte(b) + } + AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4") +} + +func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) { + i := tokenize.NewAPI(strings.NewReader("X")) + r, _, _ := i.PeekRune(0) + i.AcceptRune(r) + r, _, err := i.PeekRune(0) + + AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") + AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") +} + +func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { + i := tokenize.NewAPI(strings.NewReader("X")) + child := i.Fork() + + // To to the EOF. + r, _, _ := i.PeekRune(0) + i.AcceptRune(r) + r, _, err := i.PeekRune(0) + AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") + AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()") + + // Brings the read offset back to the start. + i.Dispose(child) + + // So here we should see the same input data as before. + r, _, err = i.PeekRune(0) + AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") + AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") +} func makeTokenizeAPI() *tokenize.API { return tokenize.NewAPI("Testing") diff --git a/tokenize/tokenizer_whitebox_test.go b/tokenize/tokenizer_whitebox_test.go index a9f7265..9c9e715 100644 --- a/tokenize/tokenizer_whitebox_test.go +++ b/tokenize/tokenizer_whitebox_test.go @@ -4,95 +4,87 @@ import ( "testing" ) -// TODO FIXME -// func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { -// // Create input, accept the first rune. -// i := NewAPI("Testing") -// i.NextRune() -// i.Accept() // T -// AssertEqual(t, "T", i.String(), "accepted rune in input") -// // Fork -// child := i.Fork() -// AssertEqual(t, 1, i.stackFrame.offset, "parent offset") -// AssertEqual(t, 1, i.stackFrame.offset, "child offset") -// // Accept two runes via fork. -// i.NextRune() -// i.Accept() // e -// i.NextRune() -// i.Accept() // s -// AssertEqual(t, "es", i.String(), "result runes in fork") -// AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset") -// AssertEqual(t, 3, i.stackFrame.offset, "child offset") -// // Merge fork back into parent -// i.Merge(child) -// i.Dispose(child) -// AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()") -// AssertEqual(t, 3, i.stackFrame.offset, "parent offset") -// } +func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { + // Create input, accept the first rune. + i := NewAPI("Testing") + r, _, _ := i.PeekRune(0) + i.AcceptRune(r) // T + AssertEqual(t, "T", i.String(), "accepted rune in input") -// TODO FIXME -// func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { -// i := NewAPI("Testing") -// i.NextRune() -// i.Accept() -// f1 := i.Fork() -// i.NextRune() -// i.Accept() -// f2 := i.Fork() -// i.NextRune() -// i.Accept() -// AssertEqual(t, "s", i.String(), "f2 String()") -// AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A") -// i.Merge(f2) -// i.Dispose(f2) -// AssertEqual(t, "es", i.String(), "f1 String()") -// AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") -// i.Merge(f1) -// i.Dispose(f1) -// AssertEqual(t, "Tes", i.String(), "top-level API String()") -// AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") -// } + // Fork + child := i.Fork() + AssertEqual(t, 1, i.stackFrame.offset, "parent offset") + AssertEqual(t, 1, i.stackFrame.offset, "child offset") -// TODO FIXME -// func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { -// i := NewAPI("Testing") -// r, _ := i.NextRune() -// AssertEqual(t, 'T', r, "result from 1st call to NextRune()") -// AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'") -// AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true") -// i.Accept() -// AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false") -// AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset") -// r, _ = i.NextRune() -// AssertEqual(t, 'e', r, "result from 2nd call to NextRune()") -// } + // Accept two runes via fork. + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // e + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // s + AssertEqual(t, "es", i.String(), "result runes in fork") + AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset") + AssertEqual(t, 3, i.stackFrame.offset, "child offset") -// TODO FIXME -// func TestFlushInput(t *testing.T) { -// api := NewAPI("cool") + // Merge fork back into parent + i.Merge(child) + i.Dispose(child) + AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()") + AssertEqual(t, 3, i.stackFrame.offset, "parent offset") +} -// // Flushing without any read data is okay. FlushInput() will return -// // false in this case, and nothing else happens. -// AssertTrue(t, api.FlushInput() == false, "flush input at start") +func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { + i := NewAPI("Testing") + r, _, _ := i.PeekRune(0) + i.AcceptRune(r) // T -// api.NextRune() -// api.Accept() -// api.NextRune() -// api.Accept() + f1 := i.Fork() + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // e -// AssertTrue(t, api.FlushInput() == true, "flush input after reading some data") -// AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input") + f2 := i.Fork() + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // s + AssertEqual(t, "s", i.String(), "f2 String()") + AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A") -// AssertTrue(t, api.FlushInput() == false, "flush input after flush input") + i.Merge(f2) + i.Dispose(f2) + AssertEqual(t, "es", i.String(), "f1 String()") + AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") -// // Read offset is now zero, but reading should continue after "co". -// api.NextRune() -// api.Accept() -// api.NextRune() -// api.Accept() + i.Merge(f1) + i.Dispose(f1) + AssertEqual(t, "Tes", i.String(), "top-level API String()") + AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") +} -// AssertEqual(t, "cool", api.String(), "end result") -// } +func TestFlushInput(t *testing.T) { + i := NewAPI("cool") + + // Flushing without any read data is okay. FlushInput() will return + // false in this case, and nothing else happens. + AssertTrue(t, i.FlushInput() == false, "flush input at start") + + r, _, _ := i.PeekRune(0) + i.AcceptRune(r) // c + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // o + + AssertTrue(t, i.FlushInput() == true, "flush input after reading some data") + AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input") + + AssertTrue(t, i.FlushInput() == false, "flush input after flush input") + + // Read offset is now zero, but reading should continue after "co". + // The output so far isn't modified, so the following accept calls + // will add their runes to the already accepted string "co". + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // o + r, _, _ = i.PeekRune(0) + i.AcceptRune(r) // o + + AssertEqual(t, "cool", i.String(), "end result") +} func TestInputFlusherWrapper(t *testing.T) { runeA := A.Rune('a')