Made a big jump in performance on big files with lots of comments, by reading in chunks till end of line, instead of byte-by-byte.

This commit is contained in:
Maurice Makaay 2019-07-28 23:50:58 +00:00
parent 53ae659ef6
commit e0b1039abd
8 changed files with 270 additions and 262 deletions

View File

@ -199,7 +199,7 @@ func (parseAPI *API) ExpectEndOfFile() {
// • there was an error while reading the input. // • there was an error while reading the input.
func (parseAPI *API) Expected(expected string) { func (parseAPI *API) Expected(expected string) {
parseAPI.panicWhenStoppedOrInError("Expected") parseAPI.panicWhenStoppedOrInError("Expected")
_, err := parseAPI.tokenAPI.Byte.Peek(0) _, err := parseAPI.tokenAPI.Input.Byte.Peek(0)
switch { switch {
case err == nil: case err == nil:
parseAPI.SetError("unexpected input%s", fmtExpects(expected)) parseAPI.SetError("unexpected input%s", fmtExpects(expected))

View File

@ -74,8 +74,6 @@ type API struct {
reader read.Buffer // the buffered input reader reader read.Buffer // the buffered input reader
pointers stackFrame // various values for keeping track of input, output, cursor. pointers stackFrame // various values for keeping track of input, output, cursor.
Input Input // access to a set of general input-related methods Input Input // access to a set of general input-related methods
Byte InputByteMode // access to a set of byte-based input methods
Rune InputRuneMode // access to a set of UTF8 rune-based input methods
Output Output // access to a set of output-related methods Output Output // access to a set of output-related methods
Result Result // access to a set of result retrieval methods Result Result // access to a set of result retrieval methods
outputTokens []Token // storage for accepted tokens outputTokens []Token // storage for accepted tokens

View File

@ -4,13 +4,13 @@ import "testing"
func TestMoveCursorByBytes(t *testing.T) { func TestMoveCursorByBytes(t *testing.T) {
tokenAPI := NewAPI("") tokenAPI := NewAPI("")
tokenAPI.Byte.MoveCursor('a') tokenAPI.Input.Byte.MoveCursor('a')
tokenAPI.Byte.MoveCursor('b') tokenAPI.Input.Byte.MoveCursor('b')
tokenAPI.Byte.MoveCursor('c') tokenAPI.Input.Byte.MoveCursor('c')
tokenAPI.Byte.MoveCursor('\r') tokenAPI.Input.Byte.MoveCursor('\r')
tokenAPI.Byte.MoveCursor('\n') tokenAPI.Input.Byte.MoveCursor('\n')
tokenAPI.Byte.MoveCursor('a') tokenAPI.Input.Byte.MoveCursor('a')
tokenAPI.Byte.MoveCursor('b') tokenAPI.Input.Byte.MoveCursor('b')
AssertEqual(t, "line 2, column 3", tokenAPI.Input.Cursor(), "Cursor position after moving by byte") AssertEqual(t, "line 2, column 3", tokenAPI.Input.Cursor(), "Cursor position after moving by byte")
AssertEqual(t, 7, tokenAPI.pointers.offset, "Offset after moving by byte") AssertEqual(t, 7, tokenAPI.pointers.offset, "Offset after moving by byte")

View File

@ -6,12 +6,12 @@ import (
func TestMoveCursorByRunes(t *testing.T) { func TestMoveCursorByRunes(t *testing.T) {
tokenAPI := NewAPI("") tokenAPI := NewAPI("")
tokenAPI.Rune.MoveCursor('ɹ') tokenAPI.Input.Rune.MoveCursor('ɹ')
tokenAPI.Rune.MoveCursor('n') tokenAPI.Input.Rune.MoveCursor('n')
tokenAPI.Rune.MoveCursor('u') tokenAPI.Input.Rune.MoveCursor('u')
tokenAPI.Rune.MoveCursor('\r') tokenAPI.Input.Rune.MoveCursor('\r')
tokenAPI.Rune.MoveCursor('\n') tokenAPI.Input.Rune.MoveCursor('\n')
tokenAPI.Rune.MoveCursor('ǝ') tokenAPI.Input.Rune.MoveCursor('ǝ')
AssertEqual(t, "line 2, column 2", tokenAPI.Input.Cursor(), "Cursor position after moving by rune") AssertEqual(t, "line 2, column 2", tokenAPI.Input.Cursor(), "Cursor position after moving by rune")
AssertEqual(t, 8, tokenAPI.pointers.offset, "Offset after moving by rune") AssertEqual(t, 8, tokenAPI.pointers.offset, "Offset after moving by rune")
@ -38,7 +38,7 @@ func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
tokenAPI := NewAPI("") tokenAPI := NewAPI("")
for _, s := range test.input { for _, s := range test.input {
for _, r := range s { for _, r := range s {
tokenAPI.Rune.MoveCursor(r) tokenAPI.Input.Rune.MoveCursor(r)
} }
} }
if tokenAPI.pointers.line != test.line { if tokenAPI.pointers.line != test.line {

View File

@ -45,10 +45,10 @@ func ExampleNewAPI() {
func ExampleAPI_PeekByte() { func ExampleAPI_PeekByte() {
tokenAPI := tokenize.NewAPI("The input that the API will handle") tokenAPI := tokenize.NewAPI("The input that the API will handle")
r1, _, err := tokenAPI.Rune.Peek(19) // 'A', r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'A',
r2, _, err := tokenAPI.Rune.Peek(20) // 'P' r2, _, err := tokenAPI.Input.Rune.Peek(20) // 'P'
r3, _, err := tokenAPI.Rune.Peek(21) // 'I' r3, _, err := tokenAPI.Input.Rune.Peek(21) // 'I'
_, _, err = tokenAPI.Rune.Peek(100) // EOF _, _, err = tokenAPI.Input.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
@ -59,10 +59,10 @@ func ExampleAPI_PeekByte() {
func ExampleAPI_PeekRune() { func ExampleAPI_PeekRune() {
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle") tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
r1, _, err := tokenAPI.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21 r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
r2, _, err := tokenAPI.Rune.Peek(21) // 'P' r2, _, err := tokenAPI.Input.Rune.Peek(21) // 'P'
r3, _, err := tokenAPI.Rune.Peek(22) // 'I' r3, _, err := tokenAPI.Input.Rune.Peek(22) // 'I'
_, _, err = tokenAPI.Rune.Peek(100) // EOF _, _, err = tokenAPI.Input.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
@ -74,15 +74,15 @@ func ExampleAPI_AcceptRune() {
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle") tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
// Reads 'T' and accepts it to the API output data. // Reads 'T' and accepts it to the API output data.
r, _, _ := tokenAPI.Rune.Peek(0) r, _, _ := tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
// Reads 'h' and accepts it to the API output data. // Reads 'h' and accepts it to the API output data.
r, _, _ = tokenAPI.Rune.Peek(0) r, _, _ = tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
// Reads 'e', but does not accept it to the API output data. // Reads 'e', but does not accept it to the API output data.
r, _, _ = tokenAPI.Rune.Peek(0) r, _, _ = tokenAPI.Input.Rune.Peek(0)
fmt.Printf("API results: %q\n", tokenAPI.Output.String()) fmt.Printf("API results: %q\n", tokenAPI.Output.String())
@ -94,14 +94,14 @@ func ExampleAPI_AcceptRunes() {
tokenAPI := tokenize.NewAPI("The input that the API will handle") tokenAPI := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'. // Peeks at the first two runes 'T' and 'h'.
r0, _, _ := tokenAPI.Rune.Peek(0) r0, _, _ := tokenAPI.Input.Rune.Peek(0)
r1, _, _ := tokenAPI.Rune.Peek(1) r1, _, _ := tokenAPI.Input.Rune.Peek(1)
// Peeks at the third rune 'e'. // Peeks at the third rune 'e'.
tokenAPI.Rune.Peek(2) tokenAPI.Input.Rune.Peek(2)
// Accepts only 'T' and 'h' into the API results. // Accepts only 'T' and 'h' into the API results.
tokenAPI.Rune.AcceptMulti(r0, r1) tokenAPI.Input.Rune.AcceptMulti(r0, r1)
fmt.Printf("API results: %q\n", tokenAPI.Output.String()) fmt.Printf("API results: %q\n", tokenAPI.Output.String())
@ -113,7 +113,7 @@ func ExampleAPI_SkipRune() {
tokenAPI := tokenize.NewAPI("The input that the API will handle") tokenAPI := tokenize.NewAPI("The input that the API will handle")
for { for {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
// EOF reached. // EOF reached.
if err != nil { if err != nil {
@ -122,9 +122,9 @@ func ExampleAPI_SkipRune() {
// Only accept runes that are vowels. // Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) { if strings.ContainsRune("aeiouAEIOU", r) {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
} else { } else {
tokenAPI.Rune.MoveCursor(r) tokenAPI.Input.Rune.MoveCursor(r)
} }
} }
@ -174,10 +174,10 @@ func ExampleAPI_modifyingResults() {
// func ExampleAPI_Reset() { // func ExampleAPI_Reset() {
// tokenAPI := tokenize.NewAPI("Very important input!") // tokenAPI := tokenize.NewAPI("Very important input!")
// r, _, _ := tokenAPI.Rune.Peek(0) // read 'V' // r, _, _ := tokenAPI.Input.Rune.Peek(0) // read 'V'
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Rune.Peek(0) // read 'e' // r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e'
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// // Reset input and output. // // Reset input and output.
@ -186,10 +186,10 @@ func ExampleAPI_modifyingResults() {
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// // So then doing the same read operations, the same data are read. // // So then doing the same read operations, the same data are read.
// r, _, _ = tokenAPI.Rune.Peek(0) // read 'V' // r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'V'
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Rune.Peek(0) // read 'e' // r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e'
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// // Output: // // Output:
@ -241,14 +241,14 @@ func ExampleAPI_modifyingResults() {
// func ExampleAPI_Merge() { // func ExampleAPI_Merge() {
// tokenHandler := func(t *tokenize.API) bool { // tokenHandler := func(t *tokenize.API) bool {
// child1 := t.Fork() // child1 := t.Fork()
// r0, _, _ := t.Rune.Peek(0) // reads 'H' // r0, _, _ := t.Input.Rune.Peek(0) // reads 'H'
// r1, _, _ := t.Rune.Peek(1) // reads 'i' // r1, _, _ := t.Input.Rune.Peek(1) // reads 'i'
// t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1 // t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1
// child2 := t.Fork() // child2 := t.Fork()
// r0, _, _ = t.Rune.Peek(0) // reads ' ' // r0, _, _ = t.Input.Rune.Peek(0) // reads ' '
// r1, _, _ = t.Rune.Peek(1) // reads 'm' // r1, _, _ = t.Input.Rune.Peek(1) // reads 'm'
// t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2 // t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2
// t.Dispose(child2) // but they are not merged and thefore not used by child1 // t.Dispose(child2) // but they are not merged and thefore not used by child1
// t.Merge(child1) // We merge child1, which has read 'H' and 'i' only. // t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
@ -273,15 +273,15 @@ func ExampleAPI_modifyingResults() {
// child4 := tokenAPI.Fork() // child4 := tokenAPI.Fork()
// // Read a rune 'a' from child4. // // Read a rune 'a' from child4.
// r, _, _ := tokenAPI.Rune.Peek(0) // r, _, _ := tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'a', r, "child4 rune 1") // AssertEqual(t, 'a', r, "child4 rune 1")
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1") // AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1")
// // Read another rune 'b' from child4. // // Read another rune 'b' from child4.
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'b', r, "child4 rune 2") // AssertEqual(t, 'b', r, "child4 rune 2")
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2") // AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2")
// // Merge "ab" from child4 to child3. // // Merge "ab" from child4 to child3.
@ -289,9 +289,9 @@ func ExampleAPI_modifyingResults() {
// AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge") // AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge")
// // Read some more from child4. // // Read some more from child4.
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'c', r, "child4 rune 3") // AssertEqual(t, 'c', r, "child4 rune 3")
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1") // AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1")
// AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3") // AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3")
@ -306,29 +306,29 @@ func ExampleAPI_modifyingResults() {
// AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4") // AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4")
// // Now read some data from child3. // // Now read some data from child3.
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'd', r, "child3 rune 5") // AssertEqual(t, 'd', r, "child3 rune 5")
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'e', r, "child3 rune 5") // AssertEqual(t, 'e', r, "child3 rune 5")
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'f', r, "child3 rune 5") // AssertEqual(t, 'f', r, "child3 rune 5")
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6") // AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6")
// // Temporarily go some new forks from here, but don't use their outcome. // // Temporarily go some new forks from here, but don't use their outcome.
// child3sub1 := tokenAPI.Fork() // child3sub1 := tokenAPI.Fork()
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// child3sub2 := tokenAPI.Fork() // child3sub2 := tokenAPI.Fork()
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1 // tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1
// tokenAPI.Dispose(child3sub2) // and dispose of sub2 // tokenAPI.Dispose(child3sub2) // and dispose of sub2
// tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging // tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging
@ -355,8 +355,8 @@ func ExampleAPI_modifyingResults() {
// tokenAPI.Dispose(child1) // tokenAPI.Dispose(child1)
// // Read some data from the top level api. // // Read some data from the top level api.
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result") // AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result")
// AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result") // AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result")
@ -364,15 +364,15 @@ func ExampleAPI_modifyingResults() {
func TestClearData(t *testing.T) { func TestClearData(t *testing.T) {
tokenAPI := tokenize.NewAPI("Laphroaig") tokenAPI := tokenize.NewAPI("Laphroaig")
r, _, _ := tokenAPI.Rune.Peek(0) // Read 'L' r, _, _ := tokenAPI.Input.Rune.Peek(0) // Read 'L'
tokenAPI.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'a' r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a'
tokenAPI.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start. tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'p' r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p'
tokenAPI.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'r' r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r'
tokenAPI.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result") AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result")
} }

View File

@ -267,7 +267,6 @@ var A = struct {
// Doing so saves you a lot of typing, and it makes your code a lot cleaner. // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct { var M = struct {
Drop func(Handler) Handler Drop func(Handler) Handler
DropUntilEndOfLine Handler
Trim func(handler Handler, cutset string) Handler Trim func(handler Handler, cutset string) Handler
TrimLeft func(handler Handler, cutset string) Handler TrimLeft func(handler Handler, cutset string) Handler
TrimRight func(handler Handler, cutset string) Handler TrimRight func(handler Handler, cutset string) Handler
@ -278,7 +277,6 @@ var M = struct {
ByCallback func(Handler, func(string) string) Handler ByCallback func(Handler, func(string) string) Handler
}{ }{
Drop: ModifyDrop, Drop: ModifyDrop,
DropUntilEndOfLine: ModifyDropUntilEndOfLine(),
Trim: ModifyTrim, Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft, TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight, TrimRight: ModifyTrimRight,
@ -350,9 +348,9 @@ var T = struct {
// MatchByte creates a Handler function that matches against the provided byte. // MatchByte creates a Handler function that matches against the provided byte.
func MatchByte(expected byte) Handler { func MatchByte(expected byte) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && b == expected { if err == nil && b == expected {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
return false return false
@ -365,9 +363,9 @@ func MatchRune(expected rune) Handler {
return MatchByte(byte(expected)) return MatchByte(byte(expected))
} }
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil && r == expected { if err == nil && r == expected {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -378,13 +376,13 @@ func MatchRune(expected rune) Handler {
// one of the provided bytes. The first match counts. // one of the provided bytes. The first match counts.
func MatchBytes(expected ...byte) Handler { func MatchBytes(expected ...byte) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil { if err != nil {
return false return false
} }
for _, e := range expected { for _, e := range expected {
if b == e { if b == e {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
} }
@ -408,13 +406,13 @@ func MatchRunes(expected ...rune) Handler {
return MatchBytes(expectedBytes...) return MatchBytes(expectedBytes...)
} }
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err != nil { if err != nil {
return false return false
} }
for _, e := range expected { for _, e := range expected {
if r == e { if r == e {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
} }
@ -434,9 +432,9 @@ func MatchByteRange(start byte, end byte) Handler {
callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
} }
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && b >= start && b <= end { if err == nil && b >= start && b <= end {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
return false return false
@ -458,9 +456,9 @@ func MatchRuneRange(start rune, end rune) Handler {
return MatchByteRange(byte(start), byte(end)) return MatchByteRange(byte(start), byte(end))
} }
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil && r >= start && r <= end { if err == nil && r >= start && r <= end {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -471,18 +469,18 @@ func MatchRuneRange(start rune, end rune) Handler {
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
func MatchNewline() Handler { func MatchNewline() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b1, err := tokenAPI.Byte.Peek(0) b1, err := tokenAPI.Input.Byte.Peek(0)
if err != nil { if err != nil {
return false return false
} }
if b1 == '\n' { if b1 == '\n' {
tokenAPI.Byte.Accept(b1) tokenAPI.Input.Byte.Accept(b1)
return true return true
} }
if b1 == '\r' { if b1 == '\r' {
b2, err := tokenAPI.Byte.Peek(1) b2, err := tokenAPI.Input.Byte.Peek(1)
if err == nil && b2 == '\n' { if err == nil && b2 == '\n' {
tokenAPI.Byte.AcceptMulti(b1, b2) tokenAPI.Input.Byte.AcceptMulti(b1, b2)
return true return true
} }
} }
@ -497,9 +495,9 @@ func MatchNewline() Handler {
// newlines, then take a look at MatchWhitespace(). // newlines, then take a look at MatchWhitespace().
func MatchBlank() Handler { func MatchBlank() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && (b == ' ' || b == '\t') { if err == nil && (b == ' ' || b == '\t') {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
return false return false
@ -516,20 +514,20 @@ func MatchBlank() Handler {
func MatchBlanks() Handler { func MatchBlanks() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Match the first blank. // Match the first blank.
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') { if err != nil || (b != ' ' && b != '\t') {
return false return false
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
// Now match any number of followup blanks. We've already got // Now match any number of followup blanks. We've already got
// a successful match at this point, so we'll always return true at the end. // a successful match at this point, so we'll always return true at the end.
for { for {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') { if err != nil || (b != ' ' && b != '\t') {
return true return true
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
} }
} }
@ -540,35 +538,35 @@ func MatchBlanks() Handler {
func MatchWhitespace() Handler { func MatchWhitespace() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Match the first whitespace. // Match the first whitespace.
b1, err := tokenAPI.Byte.Peek(0) b1, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
return false return false
} }
if b1 == '\r' { if b1 == '\r' {
b2, err := tokenAPI.Byte.Peek(1) b2, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b2 != '\n' { if err != nil || b2 != '\n' {
return false return false
} }
tokenAPI.Byte.AcceptMulti(b1, b2) tokenAPI.Input.Byte.AcceptMulti(b1, b2)
} else { } else {
tokenAPI.Byte.Accept(b1) tokenAPI.Input.Byte.Accept(b1)
} }
// Now match any number of followup whitespace. We've already got // Now match any number of followup whitespace. We've already got
// a successful match at this point, so we'll always return true at the end. // a successful match at this point, so we'll always return true at the end.
for { for {
b1, err := tokenAPI.Byte.Peek(0) b1, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
return true return true
} }
if b1 == '\r' { if b1 == '\r' {
b2, err := tokenAPI.Byte.Peek(1) b2, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b2 != '\n' { if err != nil || b2 != '\n' {
return true return true
} }
tokenAPI.Byte.AcceptMulti(b1, b2) tokenAPI.Input.Byte.AcceptMulti(b1, b2)
} else { } else {
tokenAPI.Byte.Accept(b1) tokenAPI.Input.Byte.Accept(b1)
} }
} }
} }
@ -588,9 +586,9 @@ func MatchUnicodeSpace() Handler {
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchByteByCallback(callback func(byte) bool) Handler { func MatchByteByCallback(callback func(byte) bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && callback(b) { if err == nil && callback(b) {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
return false return false
@ -605,9 +603,9 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchRuneByCallback(callback func(rune) bool) Handler { func MatchRuneByCallback(callback func(rune) bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil && callback(r) { if err == nil && callback(r) {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -617,18 +615,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. // MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
func MatchEndOfLine() Handler { func MatchEndOfLine() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b1, err := tokenAPI.Byte.Peek(0) b1, err := tokenAPI.Input.Byte.Peek(0)
if err != nil { if err != nil {
return err == io.EOF return err == io.EOF
} }
if b1 == '\n' { if b1 == '\n' {
tokenAPI.Byte.Accept(b1) tokenAPI.Input.Byte.Accept(b1)
return true return true
} }
if b1 == '\r' { if b1 == '\r' {
b2, _ := tokenAPI.Byte.Peek(1) b2, _ := tokenAPI.Input.Byte.Peek(1)
if b2 == '\n' { if b2 == '\n' {
tokenAPI.Byte.AcceptMulti(b1, b2) tokenAPI.Input.Byte.AcceptMulti(b1, b2)
return true return true
} }
} }
@ -642,7 +640,7 @@ func MatchStr(expected string) Handler {
expectedLength := len(expectedBytes) expectedLength := len(expectedBytes)
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.PeekMulti(0, expectedLength) b, err := tokenAPI.Input.Byte.PeekMulti(0, expectedLength)
if err != nil || len(b) < expectedLength { if err != nil || len(b) < expectedLength {
return false return false
} }
@ -651,7 +649,7 @@ func MatchStr(expected string) Handler {
return false return false
} }
} }
tokenAPI.Byte.AcceptMulti(expectedBytes...) tokenAPI.Input.Byte.AcceptMulti(expectedBytes...)
return true return true
} }
} }
@ -667,14 +665,14 @@ func MatchStrNoCase(expected string) Handler {
i := 0 i := 0
for _, e := range expected { for _, e := range expected {
if e <= '\x7F' { if e <= '\x7F' {
b, err := tokenAPI.Byte.Peek(offset) b, err := tokenAPI.Input.Byte.Peek(offset)
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) { if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
return false return false
} }
matches[i] = rune(b) matches[i] = rune(b)
offset++ offset++
} else { } else {
r, w, err := tokenAPI.Rune.Peek(offset) r, w, err := tokenAPI.Input.Rune.Peek(offset)
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) { if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
return false return false
} }
@ -683,7 +681,7 @@ func MatchStrNoCase(expected string) Handler {
} }
i++ i++
} }
tokenAPI.Rune.AcceptMulti(matches...) tokenAPI.Input.Rune.AcceptMulti(matches...)
return true return true
} }
} }
@ -746,9 +744,9 @@ func MatchNot(handler Handler) Handler {
tokenAPI.RestoreSnapshot(snap) tokenAPI.RestoreSnapshot(snap)
return false return false
} }
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil { if err == nil {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -942,13 +940,13 @@ func MakeInputFlusher(handler Handler) Handler {
// C.Signed(A.Integer) // C.Signed(A.Integer)
func MatchSigned(handler Handler) Handler { func MatchSigned(handler Handler) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil { if err != nil {
return false return false
} }
snap := tokenAPI.MakeSnapshot() snap := tokenAPI.MakeSnapshot()
if b == '-' || b == '+' { if b == '-' || b == '+' {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
if handler(tokenAPI) { if handler(tokenAPI) {
return true return true
@ -985,7 +983,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
// a successful or a failing match through its boolean return value. // a successful or a failing match through its boolean return value.
func MatchEndOfFile() Handler { func MatchEndOfFile() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
_, err := tokenAPI.Byte.Peek(0) _, err := tokenAPI.Input.Byte.Peek(0)
return err == io.EOF return err == io.EOF
} }
} }
@ -994,15 +992,47 @@ func MatchEndOfFile() Handler {
// more runes until the end of the line (or file when that's the case). // more runes until the end of the line (or file when that's the case).
// The newline itself is not included in the match. // The newline itself is not included in the match.
func MatchUntilEndOfLine() Handler { func MatchUntilEndOfLine() Handler {
return MatchOneOrMore(MatchNot(MatchEndOfLine())) return func(tokenAPI *API) bool {
f := tokenAPI.Input.Byte.AcceptMulti
if tokenAPI.Output.suspended > 0 {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
for {
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
state := 0
for i, b := range bs {
if b == '\r' {
state = 1
continue
}
if b == '\n' {
if state == 1 {
f(bs[:i+1]...)
} else {
f(bs[:i]...)
}
return true
}
state = 0
}
if err != nil {
if err == io.EOF {
f(bs...)
return true
}
return false
}
f(bs...)
}
}
} }
// MatchAnyByte creates a Handler function that accepts any byte from the input. // MatchAnyByte creates a Handler function that accepts any byte from the input.
func MatchAnyByte() Handler { func MatchAnyByte() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil { if err == nil {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
return false return false
@ -1014,9 +1044,9 @@ func MatchAnyByte() Handler {
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>. // replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() Handler { func MatchAnyRune() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil { if err == nil {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -1027,9 +1057,9 @@ func MatchAnyRune() Handler {
// UTF8 rune can be read from the input. // UTF8 rune can be read from the input.
func MatchValidRune() Handler { func MatchValidRune() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil && r != utf8.RuneError { if err == nil && r != utf8.RuneError {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -1040,9 +1070,9 @@ func MatchValidRune() Handler {
// UTF8 rune can be read from the input. // UTF8 rune can be read from the input.
func MatchInvalidRune() Handler { func MatchInvalidRune() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil && r == utf8.RuneError { if err == nil && r == utf8.RuneError {
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
return true return true
} }
return false return false
@ -1060,19 +1090,19 @@ func MatchDigit() Handler {
func MatchDigits() Handler { func MatchDigits() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Check if the first character is a digit. // Check if the first character is a digit.
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return false return false
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
// Continue accepting bytes as long as they are digits. // Continue accepting bytes as long as they are digits.
for { for {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return true return true
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
} }
} }
@ -1091,7 +1121,7 @@ func MatchDigitNotZero() Handler {
func MatchInteger(normalize bool) Handler { func MatchInteger(normalize bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Check if the first character is a digit. // Check if the first character is a digit.
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return false return false
} }
@ -1099,33 +1129,33 @@ func MatchInteger(normalize bool) Handler {
// When normalization is requested, drop leading zeroes. // When normalization is requested, drop leading zeroes.
if normalize && b == '0' { if normalize && b == '0' {
for { for {
b2, err := tokenAPI.Byte.Peek(1) b2, err := tokenAPI.Input.Byte.Peek(1)
// The next character is a zero, skip the leading zero and check again. // The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b { if err == nil && b2 == b {
tokenAPI.Byte.MoveCursor('0') tokenAPI.Input.Byte.MoveCursor('0')
continue continue
} }
// The next character is not a zero, nor a digit at all. // The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here. // We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' { if err != nil || b2 < '1' || b2 > '9' {
tokenAPI.Byte.Accept('0') tokenAPI.Input.Byte.Accept('0')
return true return true
} }
// The next character is a digit. SKip the leading zero and go with the digit. // The next character is a digit. SKip the leading zero and go with the digit.
tokenAPI.Byte.MoveCursor('0') tokenAPI.Input.Byte.MoveCursor('0')
tokenAPI.Byte.Accept(b2) tokenAPI.Input.Byte.Accept(b2)
break break
} }
} }
// Continue accepting bytes as long as they are digits. // Continue accepting bytes as long as they are digits.
for { for {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return true return true
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
} }
} }
@ -1140,7 +1170,7 @@ func MatchInteger(normalize bool) Handler {
func MatchDecimal(normalize bool) Handler { func MatchDecimal(normalize bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Check if the first character is a digit. // Check if the first character is a digit.
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return false return false
} }
@ -1148,58 +1178,58 @@ func MatchDecimal(normalize bool) Handler {
// When normalization is requested, drop leading zeroes. // When normalization is requested, drop leading zeroes.
if normalize && b == '0' { if normalize && b == '0' {
for { for {
b2, err := tokenAPI.Byte.Peek(1) b2, err := tokenAPI.Input.Byte.Peek(1)
// The next character is a zero, skip the leading zero and check again. // The next character is a zero, skip the leading zero and check again.
if err == nil && b2 == b { if err == nil && b2 == b {
tokenAPI.Byte.MoveCursor('0') tokenAPI.Input.Byte.MoveCursor('0')
continue continue
} }
// The next character is a dot, go with the zero before the dot and // The next character is a dot, go with the zero before the dot and
// let the upcoming code handle the dot. // let the upcoming code handle the dot.
if err == nil && b2 == '.' { if err == nil && b2 == '.' {
tokenAPI.Byte.Accept('0') tokenAPI.Input.Byte.Accept('0')
break break
} }
// The next character is not a zero, nor a digit at all. // The next character is not a zero, nor a digit at all.
// We're looking at a zero on its own here. // We're looking at a zero on its own here.
if err != nil || b2 < '1' || b2 > '9' { if err != nil || b2 < '1' || b2 > '9' {
tokenAPI.Byte.Accept('0') tokenAPI.Input.Byte.Accept('0')
return true return true
} }
// The next character is a digit. SKip the leading zero and go with the digit. // The next character is a digit. SKip the leading zero and go with the digit.
tokenAPI.Byte.MoveCursor('0') tokenAPI.Input.Byte.MoveCursor('0')
tokenAPI.Byte.Accept(b2) tokenAPI.Input.Byte.Accept(b2)
break break
} }
} }
// Continue accepting bytes as long as they are digits. // Continue accepting bytes as long as they are digits.
for { for {
b, err = tokenAPI.Byte.Peek(0) b, err = tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
break break
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
// No dot or no digit after a dot? Then we're done. // No dot or no digit after a dot? Then we're done.
if b != '.' { if b != '.' {
return true return true
} }
b, err = tokenAPI.Byte.Peek(1) b, err = tokenAPI.Input.Byte.Peek(1)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
return true return true
} }
// Continue accepting bytes as long as they are digits. // Continue accepting bytes as long as they are digits.
tokenAPI.Byte.AcceptMulti('.', b) tokenAPI.Input.Byte.AcceptMulti('.', b)
for { for {
b, err = tokenAPI.Byte.Peek(0) b, err = tokenAPI.Input.Byte.Peek(0)
if err != nil || b < '0' || b > '9' { if err != nil || b < '0' || b > '9' {
break break
} }
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
return true return true
} }
@ -1269,9 +1299,9 @@ func MatchUnicodeLower() Handler {
// digit can be read from the input. // digit can be read from the input.
func MatchHexDigit() Handler { func MatchHexDigit() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) { if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
return true return true
} }
return false return false
@ -1289,28 +1319,28 @@ func MatchHexDigit() Handler {
func MatchOctet(normalize bool) Handler { func MatchOctet(normalize bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Digit 1 // Digit 1
b0, err := tokenAPI.Byte.Peek(0) b0, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b0 < '0' || b0 > '9' { if err != nil || b0 < '0' || b0 > '9' {
return false return false
} }
// Digit 2 // Digit 2
b1, err := tokenAPI.Byte.Peek(1) b1, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b1 < '0' || b1 > '9' { if err != nil || b1 < '0' || b1 > '9' {
// Output 1-digit octet. // Output 1-digit octet.
tokenAPI.Byte.Accept(b0) tokenAPI.Input.Byte.Accept(b0)
return true return true
} }
// Digit 3 // Digit 3
b2, err := tokenAPI.Byte.Peek(2) b2, err := tokenAPI.Input.Byte.Peek(2)
if err != nil || b2 < '0' || b2 > '9' { if err != nil || b2 < '0' || b2 > '9' {
// Output 2-digit octet. // Output 2-digit octet.
if normalize && b0 == '0' { if normalize && b0 == '0' {
tokenAPI.Byte.MoveCursor(b0) tokenAPI.Input.Byte.MoveCursor(b0)
tokenAPI.Byte.Accept(b1) tokenAPI.Input.Byte.Accept(b1)
} else { } else {
tokenAPI.Byte.AcceptMulti(b0, b1) tokenAPI.Input.Byte.AcceptMulti(b0, b1)
} }
return true return true
} }
@ -1322,15 +1352,15 @@ func MatchOctet(normalize bool) Handler {
// Output 3-digit octet. // Output 3-digit octet.
if normalize && b0 == '0' { if normalize && b0 == '0' {
tokenAPI.Byte.MoveCursor(b0) tokenAPI.Input.Byte.MoveCursor(b0)
if b1 == '0' { if b1 == '0' {
tokenAPI.Byte.MoveCursor(b1) tokenAPI.Input.Byte.MoveCursor(b1)
} else { } else {
tokenAPI.Byte.Accept(b1) tokenAPI.Input.Byte.Accept(b1)
} }
tokenAPI.Byte.Accept(b2) tokenAPI.Input.Byte.Accept(b2)
} else { } else {
tokenAPI.Byte.AcceptMulti(b0, b1, b2) tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
} }
return true return true
} }
@ -1523,27 +1553,6 @@ func ModifyDrop(handler Handler) Handler {
} }
} }
// ModifyDropUntilEndOfLine creates a Handler that drops all input until an end of line
// (or end of file). This handler is typically used when ignoring any input data after
// a comment start like '#' or '//' when parsing code or configuration data.
func ModifyDropUntilEndOfLine() Handler {
return func(tokenAPI *API) bool {
for {
b, err := tokenAPI.Byte.Peek(0)
if err != nil {
if err == io.EOF {
return true
}
return false
}
if b == '\n' {
return true
}
tokenAPI.Byte.MoveCursor(b)
}
}
}
// ModifyTrim creates a Handler that checks if the provided Handler applies. // ModifyTrim creates a Handler that checks if the provided Handler applies.
// If it does, then its output is taken and characters from the provided // If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output. // cutset are trimmed from both the left and the right of the output.
@ -1654,6 +1663,7 @@ func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
}) })
} }
// TODO I think here I can win some speed by using the methods from, I think, the parse2 solution.
func interpretString(str string) (string, error) { func interpretString(str string) (string, error) {
var sb strings.Builder var sb strings.Builder
for len(str) > 0 { for len(str) > 0 {

View File

@ -55,19 +55,19 @@ func ExampleNew() {
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) { func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
tokenizeAPI := makeTokenizeAPI() tokenizeAPI := makeTokenizeAPI()
r, _, _ := tokenizeAPI.Rune.Peek(0) r, _, _ := tokenizeAPI.Input.Rune.Peek(0)
AssertEqual(t, 'T', r, "first rune") AssertEqual(t, 'T', r, "first rune")
} }
func TestInputCanAcceptRunesFromReader(t *testing.T) { func TestInputCanAcceptRunesFromReader(t *testing.T) {
tokenAPI := makeTokenizeAPI() tokenAPI := makeTokenizeAPI()
r0, _, _ := tokenAPI.Rune.Peek(0) r0, _, _ := tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r0) tokenAPI.Input.Rune.Accept(r0)
r1, _, _ := tokenAPI.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls. r1, _, _ := tokenAPI.Input.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := tokenAPI.Rune.Peek(1) r2, _, _ := tokenAPI.Input.Rune.Peek(1)
tokenAPI.Rune.AcceptMulti(r1, r2) tokenAPI.Input.Rune.AcceptMulti(r1, r2)
AssertEqual(t, "Tes", tokenAPI.Output.String(), "i.String()") AssertEqual(t, "Tes", tokenAPI.Output.String(), "i.String()")
} }
@ -136,27 +136,27 @@ func TestAccept_UpdatesCursor(t *testing.T) {
tokenAPI := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) tokenAPI := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", tokenAPI.Input.Cursor(), "cursor 1") AssertEqual(t, "start of file", tokenAPI.Input.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := tokenAPI.Rune.Peek(0) r, _, _ := tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
} }
AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor 2") AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor 2")
r, _, _ := tokenAPI.Rune.Peek(0) // read "\n", cursor ends up at start of new line r, _, _ := tokenAPI.Input.Rune.Peek(0) // read "\n", cursor ends up at start of new line
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
AssertEqual(t, "line 2, column 1", tokenAPI.Input.Cursor(), "cursor 3") AssertEqual(t, "line 2, column 1", tokenAPI.Input.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
b, _ := tokenAPI.Byte.Peek(0) b, _ := tokenAPI.Input.Byte.Peek(0)
tokenAPI.Byte.Accept(b) tokenAPI.Input.Byte.Accept(b)
} }
AssertEqual(t, "line 3, column 5", tokenAPI.Input.Cursor(), "cursor 4") AssertEqual(t, "line 3, column 5", tokenAPI.Input.Cursor(), "cursor 4")
} }
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) { func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
tokenAPI := tokenize.NewAPI(strings.NewReader("X")) tokenAPI := tokenize.NewAPI(strings.NewReader("X"))
r, _, _ := tokenAPI.Rune.Peek(0) r, _, _ := tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) tokenAPI.Input.Rune.Accept(r)
r, _, err := tokenAPI.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
@ -167,9 +167,9 @@ func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
// child := i.Fork() // child := i.Fork()
// // To to the EOF. // // To to the EOF.
// r, _, _ := i.Rune.Peek(0) // r, _, _ := i.Input.Rune.Peek(0)
// i.Rune.Accept(r) // i.Input.Rune.Accept(r)
// r, _, err := i.Rune.Peek(0) // r, _, err := i.Input.Rune.Peek(0)
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") // AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
// AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()") // AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
@ -177,7 +177,7 @@ func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
// i.Dispose(child) // i.Dispose(child)
// // So here we should see the same input data as before. // // So here we should see the same input data as before.
// r, _, err = i.Rune.Peek(0) // r, _, err = i.Input.Rune.Peek(0)
// AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") // AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
// AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") // AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
// } // }

View File

@ -7,8 +7,8 @@ import (
func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) { func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
// Create input, accept the first rune. // Create input, accept the first rune.
tokenAPI := NewAPI("Testing") tokenAPI := NewAPI("Testing")
r, _, _ := tokenAPI.Rune.Peek(0) r, _, _ := tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) // T tokenAPI.Input.Rune.Accept(r) // T
AssertEqual(t, "T", tokenAPI.Output.String(), "accepted rune in input") AssertEqual(t, "T", tokenAPI.Output.String(), "accepted rune in input")
// Split // Split
@ -17,10 +17,10 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
AssertEqual(t, 1, tokenAPI.pointers.offset, "child offset") AssertEqual(t, 1, tokenAPI.pointers.offset, "child offset")
// Accept two runes via fork. // Accept two runes via fork.
r, _, _ = tokenAPI.Rune.Peek(0) r, _, _ = tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) // e tokenAPI.Input.Rune.Accept(r) // e
r, _, _ = tokenAPI.Rune.Peek(0) r, _, _ = tokenAPI.Input.Rune.Peek(0)
tokenAPI.Rune.Accept(r) // s tokenAPI.Input.Rune.Accept(r) // s
AssertEqual(t, "es", tokenAPI.Output.String(), "result runes in split output") AssertEqual(t, "es", tokenAPI.Output.String(), "result runes in split output")
AssertEqual(t, 3, tokenAPI.pointers.offset, "offset in split output") AssertEqual(t, 3, tokenAPI.pointers.offset, "offset in split output")
@ -32,16 +32,16 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
// func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { // func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
// tokenAPI := NewAPI("Testing") // tokenAPI := NewAPI("Testing")
// r, _, _ := tokenAPI.Rune.Peek(0) // r, _, _ := tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // T // tokenAPI.Input.Rune.Accept(r) // T
// f1 := tokenAPI.Fork() // f1 := tokenAPI.Fork()
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // e // tokenAPI.Input.Rune.Accept(r) // e
// f2 := tokenAPI.Fork() // f2 := tokenAPI.Fork()
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // s // tokenAPI.Input.Rune.Accept(r) // s
// AssertEqual(t, "s", tokenAPI.Output.String(), "f2 String()") // AssertEqual(t, "s", tokenAPI.Output.String(), "f2 String()")
// AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f2.offset A") // AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f2.offset A")
@ -63,10 +63,10 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
// // false in this case, and nothing else happens. // // false in this case, and nothing else happens.
// AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input at start") // AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input at start")
// r, _, _ := tokenAPI.Rune.Peek(0) // r, _, _ := tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // c // tokenAPI.Input.Rune.Accept(r) // c
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // o // tokenAPI.Input.Rune.Accept(r) // o
// AssertTrue(t, tokenAPI.Input.Flush() == true, "flush input after reading some data") // AssertTrue(t, tokenAPI.Input.Flush() == true, "flush input after reading some data")
// AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after flush input") // AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after flush input")
@ -76,10 +76,10 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
// // Read offset is now zero, but reading should continue after "co". // // Read offset is now zero, but reading should continue after "co".
// // The output so far isn't modified, so the following accept calls // // The output so far isn't modified, so the following accept calls
// // will add their runes to the already accepted string "co". // // will add their runes to the already accepted string "co".
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // o // tokenAPI.Input.Rune.Accept(r) // o
// r, _, _ = tokenAPI.Rune.Peek(0) // r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Rune.Accept(r) // o // tokenAPI.Input.Rune.Accept(r) // o
// AssertEqual(t, "cool", tokenAPI.Output.String(), "end result") // AssertEqual(t, "cool", tokenAPI.Output.String(), "end result")
// } // }