Made a big jump in performance on big files with lots of comments, by reading in chunks till end of line, instead of byte-by-byte.
This commit is contained in:
parent
53ae659ef6
commit
e0b1039abd
|
@ -199,7 +199,7 @@ func (parseAPI *API) ExpectEndOfFile() {
|
|||
// • there was an error while reading the input.
|
||||
func (parseAPI *API) Expected(expected string) {
|
||||
parseAPI.panicWhenStoppedOrInError("Expected")
|
||||
_, err := parseAPI.tokenAPI.Byte.Peek(0)
|
||||
_, err := parseAPI.tokenAPI.Input.Byte.Peek(0)
|
||||
switch {
|
||||
case err == nil:
|
||||
parseAPI.SetError("unexpected input%s", fmtExpects(expected))
|
||||
|
|
|
@ -71,15 +71,13 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
reader read.Buffer // the buffered input reader
|
||||
pointers stackFrame // various values for keeping track of input, output, cursor.
|
||||
Input Input // access to a set of general input-related methods
|
||||
Byte InputByteMode // access to a set of byte-based input methods
|
||||
Rune InputRuneMode // access to a set of UTF8 rune-based input methods
|
||||
Output Output // access to a set of output-related methods
|
||||
Result Result // access to a set of result retrieval methods
|
||||
outputTokens []Token // storage for accepted tokens
|
||||
outputBytes []byte // storage for accepted bytes
|
||||
reader read.Buffer // the buffered input reader
|
||||
pointers stackFrame // various values for keeping track of input, output, cursor.
|
||||
Input Input // access to a set of general input-related methods
|
||||
Output Output // access to a set of output-related methods
|
||||
Result Result // access to a set of result retrieval methods
|
||||
outputTokens []Token // storage for accepted tokens
|
||||
outputBytes []byte // storage for accepted bytes
|
||||
}
|
||||
|
||||
type stackFrame struct {
|
||||
|
|
|
@ -4,13 +4,13 @@ import "testing"
|
|||
|
||||
func TestMoveCursorByBytes(t *testing.T) {
|
||||
tokenAPI := NewAPI("")
|
||||
tokenAPI.Byte.MoveCursor('a')
|
||||
tokenAPI.Byte.MoveCursor('b')
|
||||
tokenAPI.Byte.MoveCursor('c')
|
||||
tokenAPI.Byte.MoveCursor('\r')
|
||||
tokenAPI.Byte.MoveCursor('\n')
|
||||
tokenAPI.Byte.MoveCursor('a')
|
||||
tokenAPI.Byte.MoveCursor('b')
|
||||
tokenAPI.Input.Byte.MoveCursor('a')
|
||||
tokenAPI.Input.Byte.MoveCursor('b')
|
||||
tokenAPI.Input.Byte.MoveCursor('c')
|
||||
tokenAPI.Input.Byte.MoveCursor('\r')
|
||||
tokenAPI.Input.Byte.MoveCursor('\n')
|
||||
tokenAPI.Input.Byte.MoveCursor('a')
|
||||
tokenAPI.Input.Byte.MoveCursor('b')
|
||||
|
||||
AssertEqual(t, "line 2, column 3", tokenAPI.Input.Cursor(), "Cursor position after moving by byte")
|
||||
AssertEqual(t, 7, tokenAPI.pointers.offset, "Offset after moving by byte")
|
||||
|
|
|
@ -6,12 +6,12 @@ import (
|
|||
|
||||
func TestMoveCursorByRunes(t *testing.T) {
|
||||
tokenAPI := NewAPI("")
|
||||
tokenAPI.Rune.MoveCursor('ɹ')
|
||||
tokenAPI.Rune.MoveCursor('n')
|
||||
tokenAPI.Rune.MoveCursor('u')
|
||||
tokenAPI.Rune.MoveCursor('\r')
|
||||
tokenAPI.Rune.MoveCursor('\n')
|
||||
tokenAPI.Rune.MoveCursor('ǝ')
|
||||
tokenAPI.Input.Rune.MoveCursor('ɹ')
|
||||
tokenAPI.Input.Rune.MoveCursor('n')
|
||||
tokenAPI.Input.Rune.MoveCursor('u')
|
||||
tokenAPI.Input.Rune.MoveCursor('\r')
|
||||
tokenAPI.Input.Rune.MoveCursor('\n')
|
||||
tokenAPI.Input.Rune.MoveCursor('ǝ')
|
||||
|
||||
AssertEqual(t, "line 2, column 2", tokenAPI.Input.Cursor(), "Cursor position after moving by rune")
|
||||
AssertEqual(t, 8, tokenAPI.pointers.offset, "Offset after moving by rune")
|
||||
|
@ -38,7 +38,7 @@ func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
|
|||
tokenAPI := NewAPI("")
|
||||
for _, s := range test.input {
|
||||
for _, r := range s {
|
||||
tokenAPI.Rune.MoveCursor(r)
|
||||
tokenAPI.Input.Rune.MoveCursor(r)
|
||||
}
|
||||
}
|
||||
if tokenAPI.pointers.line != test.line {
|
||||
|
|
|
@ -45,10 +45,10 @@ func ExampleNewAPI() {
|
|||
func ExampleAPI_PeekByte() {
|
||||
tokenAPI := tokenize.NewAPI("The input that the API will handle")
|
||||
|
||||
r1, _, err := tokenAPI.Rune.Peek(19) // 'A',
|
||||
r2, _, err := tokenAPI.Rune.Peek(20) // 'P'
|
||||
r3, _, err := tokenAPI.Rune.Peek(21) // 'I'
|
||||
_, _, err = tokenAPI.Rune.Peek(100) // EOF
|
||||
r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'A',
|
||||
r2, _, err := tokenAPI.Input.Rune.Peek(20) // 'P'
|
||||
r3, _, err := tokenAPI.Input.Rune.Peek(21) // 'I'
|
||||
_, _, err = tokenAPI.Input.Rune.Peek(100) // EOF
|
||||
|
||||
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
|
||||
|
||||
|
@ -59,10 +59,10 @@ func ExampleAPI_PeekByte() {
|
|||
func ExampleAPI_PeekRune() {
|
||||
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
|
||||
|
||||
r1, _, err := tokenAPI.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
|
||||
r2, _, err := tokenAPI.Rune.Peek(21) // 'P'
|
||||
r3, _, err := tokenAPI.Rune.Peek(22) // 'I'
|
||||
_, _, err = tokenAPI.Rune.Peek(100) // EOF
|
||||
r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
|
||||
r2, _, err := tokenAPI.Input.Rune.Peek(21) // 'P'
|
||||
r3, _, err := tokenAPI.Input.Rune.Peek(22) // 'I'
|
||||
_, _, err = tokenAPI.Input.Rune.Peek(100) // EOF
|
||||
|
||||
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
|
||||
|
||||
|
@ -74,15 +74,15 @@ func ExampleAPI_AcceptRune() {
|
|||
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
|
||||
|
||||
// Reads 'T' and accepts it to the API output data.
|
||||
r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r)
|
||||
r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
|
||||
// Reads 'h' and accepts it to the API output data.
|
||||
r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r)
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
|
||||
// Reads 'e', but does not accept it to the API output data.
|
||||
r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
|
||||
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
|
||||
|
||||
|
@ -94,14 +94,14 @@ func ExampleAPI_AcceptRunes() {
|
|||
tokenAPI := tokenize.NewAPI("The input that the API will handle")
|
||||
|
||||
// Peeks at the first two runes 'T' and 'h'.
|
||||
r0, _, _ := tokenAPI.Rune.Peek(0)
|
||||
r1, _, _ := tokenAPI.Rune.Peek(1)
|
||||
r0, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
r1, _, _ := tokenAPI.Input.Rune.Peek(1)
|
||||
|
||||
// Peeks at the third rune 'e'.
|
||||
tokenAPI.Rune.Peek(2)
|
||||
tokenAPI.Input.Rune.Peek(2)
|
||||
|
||||
// Accepts only 'T' and 'h' into the API results.
|
||||
tokenAPI.Rune.AcceptMulti(r0, r1)
|
||||
tokenAPI.Input.Rune.AcceptMulti(r0, r1)
|
||||
|
||||
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
|
||||
|
||||
|
@ -113,7 +113,7 @@ func ExampleAPI_SkipRune() {
|
|||
tokenAPI := tokenize.NewAPI("The input that the API will handle")
|
||||
|
||||
for {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
|
||||
// EOF reached.
|
||||
if err != nil {
|
||||
|
@ -122,9 +122,9 @@ func ExampleAPI_SkipRune() {
|
|||
|
||||
// Only accept runes that are vowels.
|
||||
if strings.ContainsRune("aeiouAEIOU", r) {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
} else {
|
||||
tokenAPI.Rune.MoveCursor(r)
|
||||
tokenAPI.Input.Rune.MoveCursor(r)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,10 +174,10 @@ func ExampleAPI_modifyingResults() {
|
|||
// func ExampleAPI_Reset() {
|
||||
// tokenAPI := tokenize.NewAPI("Very important input!")
|
||||
|
||||
// r, _, _ := tokenAPI.Rune.Peek(0) // read 'V'
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0) // read 'e'
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ := tokenAPI.Input.Rune.Peek(0) // read 'V'
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e'
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
|
||||
|
||||
// // Reset input and output.
|
||||
|
@ -186,10 +186,10 @@ func ExampleAPI_modifyingResults() {
|
|||
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
|
||||
|
||||
// // So then doing the same read operations, the same data are read.
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0) // read 'V'
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0) // read 'e'
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'V'
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e'
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
|
||||
|
||||
// // Output:
|
||||
|
@ -241,14 +241,14 @@ func ExampleAPI_modifyingResults() {
|
|||
// func ExampleAPI_Merge() {
|
||||
// tokenHandler := func(t *tokenize.API) bool {
|
||||
// child1 := t.Fork()
|
||||
// r0, _, _ := t.Rune.Peek(0) // reads 'H'
|
||||
// r1, _, _ := t.Rune.Peek(1) // reads 'i'
|
||||
// t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1
|
||||
// r0, _, _ := t.Input.Rune.Peek(0) // reads 'H'
|
||||
// r1, _, _ := t.Input.Rune.Peek(1) // reads 'i'
|
||||
// t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1
|
||||
|
||||
// child2 := t.Fork()
|
||||
// r0, _, _ = t.Rune.Peek(0) // reads ' '
|
||||
// r1, _, _ = t.Rune.Peek(1) // reads 'm'
|
||||
// t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2
|
||||
// r0, _, _ = t.Input.Rune.Peek(0) // reads ' '
|
||||
// r1, _, _ = t.Input.Rune.Peek(1) // reads 'm'
|
||||
// t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2
|
||||
// t.Dispose(child2) // but they are not merged and thefore not used by child1
|
||||
|
||||
// t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
||||
|
@ -273,15 +273,15 @@ func ExampleAPI_modifyingResults() {
|
|||
// child4 := tokenAPI.Fork()
|
||||
|
||||
// // Read a rune 'a' from child4.
|
||||
// r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
// r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'a', r, "child4 rune 1")
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1")
|
||||
|
||||
// // Read another rune 'b' from child4.
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'b', r, "child4 rune 2")
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2")
|
||||
|
||||
// // Merge "ab" from child4 to child3.
|
||||
|
@ -289,9 +289,9 @@ func ExampleAPI_modifyingResults() {
|
|||
// AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge")
|
||||
|
||||
// // Read some more from child4.
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'c', r, "child4 rune 3")
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1")
|
||||
// AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3")
|
||||
|
||||
|
@ -306,29 +306,29 @@ func ExampleAPI_modifyingResults() {
|
|||
// AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4")
|
||||
|
||||
// // Now read some data from child3.
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'd', r, "child3 rune 5")
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'e', r, "child3 rune 5")
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'f', r, "child3 rune 5")
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
|
||||
// AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6")
|
||||
|
||||
// // Temporarily go some new forks from here, but don't use their outcome.
|
||||
// child3sub1 := tokenAPI.Fork()
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// child3sub2 := tokenAPI.Fork()
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
// tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1
|
||||
// tokenAPI.Dispose(child3sub2) // and dispose of sub2
|
||||
// tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging
|
||||
|
@ -355,8 +355,8 @@ func ExampleAPI_modifyingResults() {
|
|||
// tokenAPI.Dispose(child1)
|
||||
|
||||
// // Read some data from the top level api.
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r)
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r)
|
||||
|
||||
// AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result")
|
||||
// AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result")
|
||||
|
@ -364,15 +364,15 @@ func ExampleAPI_modifyingResults() {
|
|||
|
||||
func TestClearData(t *testing.T) {
|
||||
tokenAPI := tokenize.NewAPI("Laphroaig")
|
||||
r, _, _ := tokenAPI.Rune.Peek(0) // Read 'L'
|
||||
tokenAPI.Rune.Accept(r) // Add to runes
|
||||
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'a'
|
||||
tokenAPI.Rune.Accept(r) // Add to runes
|
||||
r, _, _ := tokenAPI.Input.Rune.Peek(0) // Read 'L'
|
||||
tokenAPI.Input.Rune.Accept(r) // Add to runes
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a'
|
||||
tokenAPI.Input.Rune.Accept(r) // Add to runes
|
||||
tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start.
|
||||
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'p'
|
||||
tokenAPI.Rune.Accept(r) // Add to runes
|
||||
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'r'
|
||||
tokenAPI.Rune.Accept(r) // Add to runes
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p'
|
||||
tokenAPI.Input.Rune.Accept(r) // Add to runes
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r'
|
||||
tokenAPI.Input.Rune.Accept(r) // Add to runes
|
||||
|
||||
AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result")
|
||||
}
|
||||
|
|
|
@ -266,27 +266,25 @@ var A = struct {
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var M = struct {
|
||||
Drop func(Handler) Handler
|
||||
DropUntilEndOfLine Handler
|
||||
Trim func(handler Handler, cutset string) Handler
|
||||
TrimLeft func(handler Handler, cutset string) Handler
|
||||
TrimRight func(handler Handler, cutset string) Handler
|
||||
TrimSpace func(handler Handler) Handler
|
||||
ToLower func(Handler) Handler
|
||||
ToUpper func(Handler) Handler
|
||||
Replace func(handler Handler, replaceWith string) Handler
|
||||
ByCallback func(Handler, func(string) string) Handler
|
||||
Drop func(Handler) Handler
|
||||
Trim func(handler Handler, cutset string) Handler
|
||||
TrimLeft func(handler Handler, cutset string) Handler
|
||||
TrimRight func(handler Handler, cutset string) Handler
|
||||
TrimSpace func(handler Handler) Handler
|
||||
ToLower func(Handler) Handler
|
||||
ToUpper func(Handler) Handler
|
||||
Replace func(handler Handler, replaceWith string) Handler
|
||||
ByCallback func(Handler, func(string) string) Handler
|
||||
}{
|
||||
Drop: ModifyDrop,
|
||||
DropUntilEndOfLine: ModifyDropUntilEndOfLine(),
|
||||
Trim: ModifyTrim,
|
||||
TrimLeft: ModifyTrimLeft,
|
||||
TrimRight: ModifyTrimRight,
|
||||
TrimSpace: ModifyTrimSpace,
|
||||
ToLower: ModifyToLower,
|
||||
ToUpper: ModifyToUpper,
|
||||
Replace: ModifyReplace,
|
||||
ByCallback: ModifyByCallback,
|
||||
Drop: ModifyDrop,
|
||||
Trim: ModifyTrim,
|
||||
TrimLeft: ModifyTrimLeft,
|
||||
TrimRight: ModifyTrimRight,
|
||||
TrimSpace: ModifyTrimSpace,
|
||||
ToLower: ModifyToLower,
|
||||
ToUpper: ModifyToUpper,
|
||||
Replace: ModifyReplace,
|
||||
ByCallback: ModifyByCallback,
|
||||
}
|
||||
|
||||
// T provides convenient access to a range of Token producers (which in their
|
||||
|
@ -350,9 +348,9 @@ var T = struct {
|
|||
// MatchByte creates a Handler function that matches against the provided byte.
|
||||
func MatchByte(expected byte) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && b == expected {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -365,9 +363,9 @@ func MatchRune(expected rune) Handler {
|
|||
return MatchByte(byte(expected))
|
||||
}
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil && r == expected {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -378,13 +376,13 @@ func MatchRune(expected rune) Handler {
|
|||
// one of the provided bytes. The first match counts.
|
||||
func MatchBytes(expected ...byte) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range expected {
|
||||
if b == e {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -408,13 +406,13 @@ func MatchRunes(expected ...rune) Handler {
|
|||
return MatchBytes(expectedBytes...)
|
||||
}
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range expected {
|
||||
if r == e {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -434,9 +432,9 @@ func MatchByteRange(start byte, end byte) Handler {
|
|||
callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||
}
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && b >= start && b <= end {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -458,9 +456,9 @@ func MatchRuneRange(start rune, end rune) Handler {
|
|||
return MatchByteRange(byte(start), byte(end))
|
||||
}
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil && r >= start && r <= end {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -471,18 +469,18 @@ func MatchRuneRange(start rune, end rune) Handler {
|
|||
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
||||
func MatchNewline() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b1, err := tokenAPI.Byte.Peek(0)
|
||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if b1 == '\n' {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
if b1 == '\r' {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err == nil && b2 == '\n' {
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -497,9 +495,9 @@ func MatchNewline() Handler {
|
|||
// newlines, then take a look at MatchWhitespace().
|
||||
func MatchBlank() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && (b == ' ' || b == '\t') {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -516,20 +514,20 @@ func MatchBlank() Handler {
|
|||
func MatchBlanks() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Match the first blank.
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b != ' ' && b != '\t') {
|
||||
return false
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
|
||||
// Now match any number of followup blanks. We've already got
|
||||
// a successful match at this point, so we'll always return true at the end.
|
||||
for {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b != ' ' && b != '\t') {
|
||||
return true
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -540,35 +538,35 @@ func MatchBlanks() Handler {
|
|||
func MatchWhitespace() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Match the first whitespace.
|
||||
b1, err := tokenAPI.Byte.Peek(0)
|
||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
||||
return false
|
||||
}
|
||||
if b1 == '\r' {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b2 != '\n' {
|
||||
return false
|
||||
}
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||||
} else {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
}
|
||||
|
||||
// Now match any number of followup whitespace. We've already got
|
||||
// a successful match at this point, so we'll always return true at the end.
|
||||
for {
|
||||
b1, err := tokenAPI.Byte.Peek(0)
|
||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
||||
return true
|
||||
}
|
||||
if b1 == '\r' {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b2 != '\n' {
|
||||
return true
|
||||
}
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||||
} else {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -588,9 +586,9 @@ func MatchUnicodeSpace() Handler {
|
|||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||
func MatchByteByCallback(callback func(byte) bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && callback(b) {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -605,9 +603,9 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
|
|||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil && callback(r) {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -617,18 +615,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
|
|||
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
||||
func MatchEndOfLine() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b1, err := tokenAPI.Byte.Peek(0)
|
||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil {
|
||||
return err == io.EOF
|
||||
}
|
||||
if b1 == '\n' {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
if b1 == '\r' {
|
||||
b2, _ := tokenAPI.Byte.Peek(1)
|
||||
b2, _ := tokenAPI.Input.Byte.Peek(1)
|
||||
if b2 == '\n' {
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -642,7 +640,7 @@ func MatchStr(expected string) Handler {
|
|||
expectedLength := len(expectedBytes)
|
||||
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.PeekMulti(0, expectedLength)
|
||||
b, err := tokenAPI.Input.Byte.PeekMulti(0, expectedLength)
|
||||
if err != nil || len(b) < expectedLength {
|
||||
return false
|
||||
}
|
||||
|
@ -651,7 +649,7 @@ func MatchStr(expected string) Handler {
|
|||
return false
|
||||
}
|
||||
}
|
||||
tokenAPI.Byte.AcceptMulti(expectedBytes...)
|
||||
tokenAPI.Input.Byte.AcceptMulti(expectedBytes...)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -667,14 +665,14 @@ func MatchStrNoCase(expected string) Handler {
|
|||
i := 0
|
||||
for _, e := range expected {
|
||||
if e <= '\x7F' {
|
||||
b, err := tokenAPI.Byte.Peek(offset)
|
||||
b, err := tokenAPI.Input.Byte.Peek(offset)
|
||||
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
|
||||
return false
|
||||
}
|
||||
matches[i] = rune(b)
|
||||
offset++
|
||||
} else {
|
||||
r, w, err := tokenAPI.Rune.Peek(offset)
|
||||
r, w, err := tokenAPI.Input.Rune.Peek(offset)
|
||||
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
|
||||
return false
|
||||
}
|
||||
|
@ -683,7 +681,7 @@ func MatchStrNoCase(expected string) Handler {
|
|||
}
|
||||
i++
|
||||
}
|
||||
tokenAPI.Rune.AcceptMulti(matches...)
|
||||
tokenAPI.Input.Rune.AcceptMulti(matches...)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -746,9 +744,9 @@ func MatchNot(handler Handler) Handler {
|
|||
tokenAPI.RestoreSnapshot(snap)
|
||||
return false
|
||||
}
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -942,13 +940,13 @@ func MakeInputFlusher(handler Handler) Handler {
|
|||
// C.Signed(A.Integer)
|
||||
func MatchSigned(handler Handler) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
snap := tokenAPI.MakeSnapshot()
|
||||
if b == '-' || b == '+' {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
if handler(tokenAPI) {
|
||||
return true
|
||||
|
@ -985,7 +983,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
|
|||
// a successful or a failing match through its boolean return value.
|
||||
func MatchEndOfFile() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
_, err := tokenAPI.Byte.Peek(0)
|
||||
_, err := tokenAPI.Input.Byte.Peek(0)
|
||||
return err == io.EOF
|
||||
}
|
||||
}
|
||||
|
@ -994,15 +992,47 @@ func MatchEndOfFile() Handler {
|
|||
// more runes until the end of the line (or file when that's the case).
|
||||
// The newline itself is not included in the match.
|
||||
func MatchUntilEndOfLine() Handler {
|
||||
return MatchOneOrMore(MatchNot(MatchEndOfLine()))
|
||||
return func(tokenAPI *API) bool {
|
||||
f := tokenAPI.Input.Byte.AcceptMulti
|
||||
if tokenAPI.Output.suspended > 0 {
|
||||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||
}
|
||||
for {
|
||||
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
state := 0
|
||||
for i, b := range bs {
|
||||
if b == '\r' {
|
||||
state = 1
|
||||
continue
|
||||
}
|
||||
if b == '\n' {
|
||||
if state == 1 {
|
||||
f(bs[:i+1]...)
|
||||
} else {
|
||||
f(bs[:i]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
state = 0
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
f(bs...)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
f(bs...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MatchAnyByte creates a Handler function that accepts any byte from the input.
|
||||
func MatchAnyByte() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -1014,9 +1044,9 @@ func MatchAnyByte() Handler {
|
|||
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
||||
func MatchAnyRune() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -1027,9 +1057,9 @@ func MatchAnyRune() Handler {
|
|||
// UTF8 rune can be read from the input.
|
||||
func MatchValidRune() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil && r != utf8.RuneError {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -1040,9 +1070,9 @@ func MatchValidRune() Handler {
|
|||
// UTF8 rune can be read from the input.
|
||||
func MatchInvalidRune() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil && r == utf8.RuneError {
|
||||
tokenAPI.Rune.Accept(r)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -1060,19 +1090,19 @@ func MatchDigit() Handler {
|
|||
func MatchDigits() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Check if the first character is a digit.
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
return false
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
|
||||
// Continue accepting bytes as long as they are digits.
|
||||
for {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
return true
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1091,7 +1121,7 @@ func MatchDigitNotZero() Handler {
|
|||
func MatchInteger(normalize bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Check if the first character is a digit.
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
return false
|
||||
}
|
||||
|
@ -1099,33 +1129,33 @@ func MatchInteger(normalize bool) Handler {
|
|||
// When normalization is requested, drop leading zeroes.
|
||||
if normalize && b == '0' {
|
||||
for {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
|
||||
// The next character is a zero, skip the leading zero and check again.
|
||||
if err == nil && b2 == b {
|
||||
tokenAPI.Byte.MoveCursor('0')
|
||||
tokenAPI.Input.Byte.MoveCursor('0')
|
||||
continue
|
||||
}
|
||||
// The next character is not a zero, nor a digit at all.
|
||||
// We're looking at a zero on its own here.
|
||||
if err != nil || b2 < '1' || b2 > '9' {
|
||||
tokenAPI.Byte.Accept('0')
|
||||
tokenAPI.Input.Byte.Accept('0')
|
||||
return true
|
||||
}
|
||||
// The next character is a digit. SKip the leading zero and go with the digit.
|
||||
tokenAPI.Byte.MoveCursor('0')
|
||||
tokenAPI.Byte.Accept(b2)
|
||||
tokenAPI.Input.Byte.MoveCursor('0')
|
||||
tokenAPI.Input.Byte.Accept(b2)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Continue accepting bytes as long as they are digits.
|
||||
for {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
return true
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1140,7 +1170,7 @@ func MatchInteger(normalize bool) Handler {
|
|||
func MatchDecimal(normalize bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Check if the first character is a digit.
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
return false
|
||||
}
|
||||
|
@ -1148,58 +1178,58 @@ func MatchDecimal(normalize bool) Handler {
|
|||
// When normalization is requested, drop leading zeroes.
|
||||
if normalize && b == '0' {
|
||||
for {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
|
||||
// The next character is a zero, skip the leading zero and check again.
|
||||
if err == nil && b2 == b {
|
||||
tokenAPI.Byte.MoveCursor('0')
|
||||
tokenAPI.Input.Byte.MoveCursor('0')
|
||||
continue
|
||||
}
|
||||
// The next character is a dot, go with the zero before the dot and
|
||||
// let the upcoming code handle the dot.
|
||||
if err == nil && b2 == '.' {
|
||||
tokenAPI.Byte.Accept('0')
|
||||
tokenAPI.Input.Byte.Accept('0')
|
||||
break
|
||||
}
|
||||
// The next character is not a zero, nor a digit at all.
|
||||
// We're looking at a zero on its own here.
|
||||
if err != nil || b2 < '1' || b2 > '9' {
|
||||
tokenAPI.Byte.Accept('0')
|
||||
tokenAPI.Input.Byte.Accept('0')
|
||||
return true
|
||||
}
|
||||
// The next character is a digit. SKip the leading zero and go with the digit.
|
||||
tokenAPI.Byte.MoveCursor('0')
|
||||
tokenAPI.Byte.Accept(b2)
|
||||
tokenAPI.Input.Byte.MoveCursor('0')
|
||||
tokenAPI.Input.Byte.Accept(b2)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Continue accepting bytes as long as they are digits.
|
||||
for {
|
||||
b, err = tokenAPI.Byte.Peek(0)
|
||||
b, err = tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
break
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
|
||||
// No dot or no digit after a dot? Then we're done.
|
||||
if b != '.' {
|
||||
return true
|
||||
}
|
||||
b, err = tokenAPI.Byte.Peek(1)
|
||||
b, err = tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
return true
|
||||
}
|
||||
|
||||
// Continue accepting bytes as long as they are digits.
|
||||
tokenAPI.Byte.AcceptMulti('.', b)
|
||||
tokenAPI.Input.Byte.AcceptMulti('.', b)
|
||||
for {
|
||||
b, err = tokenAPI.Byte.Peek(0)
|
||||
b, err = tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b < '0' || b > '9' {
|
||||
break
|
||||
}
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -1269,9 +1299,9 @@ func MatchUnicodeLower() Handler {
|
|||
// digit can be read from the input.
|
||||
func MatchHexDigit() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
|
||||
tokenAPI.Byte.Accept(b)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -1289,28 +1319,28 @@ func MatchHexDigit() Handler {
|
|||
func MatchOctet(normalize bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Digit 1
|
||||
b0, err := tokenAPI.Byte.Peek(0)
|
||||
b0, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b0 < '0' || b0 > '9' {
|
||||
return false
|
||||
}
|
||||
|
||||
// Digit 2
|
||||
b1, err := tokenAPI.Byte.Peek(1)
|
||||
b1, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b1 < '0' || b1 > '9' {
|
||||
// Output 1-digit octet.
|
||||
tokenAPI.Byte.Accept(b0)
|
||||
tokenAPI.Input.Byte.Accept(b0)
|
||||
return true
|
||||
}
|
||||
|
||||
// Digit 3
|
||||
b2, err := tokenAPI.Byte.Peek(2)
|
||||
b2, err := tokenAPI.Input.Byte.Peek(2)
|
||||
if err != nil || b2 < '0' || b2 > '9' {
|
||||
// Output 2-digit octet.
|
||||
if normalize && b0 == '0' {
|
||||
tokenAPI.Byte.MoveCursor(b0)
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
tokenAPI.Input.Byte.MoveCursor(b0)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
} else {
|
||||
tokenAPI.Byte.AcceptMulti(b0, b1)
|
||||
tokenAPI.Input.Byte.AcceptMulti(b0, b1)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -1322,15 +1352,15 @@ func MatchOctet(normalize bool) Handler {
|
|||
|
||||
// Output 3-digit octet.
|
||||
if normalize && b0 == '0' {
|
||||
tokenAPI.Byte.MoveCursor(b0)
|
||||
tokenAPI.Input.Byte.MoveCursor(b0)
|
||||
if b1 == '0' {
|
||||
tokenAPI.Byte.MoveCursor(b1)
|
||||
tokenAPI.Input.Byte.MoveCursor(b1)
|
||||
} else {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
}
|
||||
tokenAPI.Byte.Accept(b2)
|
||||
tokenAPI.Input.Byte.Accept(b2)
|
||||
} else {
|
||||
tokenAPI.Byte.AcceptMulti(b0, b1, b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -1523,27 +1553,6 @@ func ModifyDrop(handler Handler) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
// ModifyDropUntilEndOfLine creates a Handler that drops all input until an end of line
|
||||
// (or end of file). This handler is typically used when ignoring any input data after
|
||||
// a comment start like '#' or '//' when parsing code or configuration data.
|
||||
func ModifyDropUntilEndOfLine() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
for {
|
||||
b, err := tokenAPI.Byte.Peek(0)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
if b == '\n' {
|
||||
return true
|
||||
}
|
||||
tokenAPI.Byte.MoveCursor(b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ModifyTrim creates a Handler that checks if the provided Handler applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from both the left and the right of the output.
|
||||
|
@ -1654,6 +1663,7 @@ func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
|
|||
})
|
||||
}
|
||||
|
||||
// TODO I think here I can win some speed by using the methods from, I think, the parse2 solution.
|
||||
func interpretString(str string) (string, error) {
|
||||
var sb strings.Builder
|
||||
for len(str) > 0 {
|
||||
|
|
|
@ -55,19 +55,19 @@ func ExampleNew() {
|
|||
|
||||
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
|
||||
tokenizeAPI := makeTokenizeAPI()
|
||||
r, _, _ := tokenizeAPI.Rune.Peek(0)
|
||||
r, _, _ := tokenizeAPI.Input.Rune.Peek(0)
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
tokenAPI := makeTokenizeAPI()
|
||||
|
||||
r0, _, _ := tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r0)
|
||||
r0, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r0)
|
||||
|
||||
r1, _, _ := tokenAPI.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls.
|
||||
r2, _, _ := tokenAPI.Rune.Peek(1)
|
||||
tokenAPI.Rune.AcceptMulti(r1, r2)
|
||||
r1, _, _ := tokenAPI.Input.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls.
|
||||
r2, _, _ := tokenAPI.Input.Rune.Peek(1)
|
||||
tokenAPI.Input.Rune.AcceptMulti(r1, r2)
|
||||
|
||||
AssertEqual(t, "Tes", tokenAPI.Output.String(), "i.String()")
|
||||
}
|
||||
|
@ -136,27 +136,27 @@ func TestAccept_UpdatesCursor(t *testing.T) {
|
|||
tokenAPI := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", tokenAPI.Input.Cursor(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r)
|
||||
r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor 2")
|
||||
|
||||
r, _, _ := tokenAPI.Rune.Peek(0) // read "\n", cursor ends up at start of new line
|
||||
tokenAPI.Rune.Accept(r)
|
||||
r, _, _ := tokenAPI.Input.Rune.Peek(0) // read "\n", cursor ends up at start of new line
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
AssertEqual(t, "line 2, column 1", tokenAPI.Input.Cursor(), "cursor 3")
|
||||
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
b, _ := tokenAPI.Byte.Peek(0)
|
||||
tokenAPI.Byte.Accept(b)
|
||||
b, _ := tokenAPI.Input.Byte.Peek(0)
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", tokenAPI.Input.Cursor(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
tokenAPI := tokenize.NewAPI(strings.NewReader("X"))
|
||||
r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r)
|
||||
r, _, err := tokenAPI.Rune.Peek(0)
|
||||
r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
|
@ -167,9 +167,9 @@ func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
|||
// child := i.Fork()
|
||||
|
||||
// // To to the EOF.
|
||||
// r, _, _ := i.Rune.Peek(0)
|
||||
// i.Rune.Accept(r)
|
||||
// r, _, err := i.Rune.Peek(0)
|
||||
// r, _, _ := i.Input.Rune.Peek(0)
|
||||
// i.Input.Rune.Accept(r)
|
||||
// r, _, err := i.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
// AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
|
||||
|
||||
|
@ -177,7 +177,7 @@ func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
|||
// i.Dispose(child)
|
||||
|
||||
// // So here we should see the same input data as before.
|
||||
// r, _, err = i.Rune.Peek(0)
|
||||
// r, _, err = i.Input.Rune.Peek(0)
|
||||
// AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
// AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
// }
|
||||
|
|
|
@ -7,8 +7,8 @@ import (
|
|||
func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
tokenAPI := NewAPI("Testing")
|
||||
r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r) // T
|
||||
r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r) // T
|
||||
AssertEqual(t, "T", tokenAPI.Output.String(), "accepted rune in input")
|
||||
|
||||
// Split
|
||||
|
@ -17,10 +17,10 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
|
|||
AssertEqual(t, 1, tokenAPI.pointers.offset, "child offset")
|
||||
|
||||
// Accept two runes via fork.
|
||||
r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r) // e
|
||||
r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
tokenAPI.Rune.Accept(r) // s
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r) // e
|
||||
r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
tokenAPI.Input.Rune.Accept(r) // s
|
||||
AssertEqual(t, "es", tokenAPI.Output.String(), "result runes in split output")
|
||||
AssertEqual(t, 3, tokenAPI.pointers.offset, "offset in split output")
|
||||
|
||||
|
@ -32,16 +32,16 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
|
|||
|
||||
// func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
// tokenAPI := NewAPI("Testing")
|
||||
// r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // T
|
||||
// r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // T
|
||||
|
||||
// f1 := tokenAPI.Fork()
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // e
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // e
|
||||
|
||||
// f2 := tokenAPI.Fork()
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // s
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // s
|
||||
// AssertEqual(t, "s", tokenAPI.Output.String(), "f2 String()")
|
||||
// AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f2.offset A")
|
||||
|
||||
|
@ -63,10 +63,10 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
|
|||
// // false in this case, and nothing else happens.
|
||||
// AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input at start")
|
||||
|
||||
// r, _, _ := tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // c
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // o
|
||||
// r, _, _ := tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // c
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // o
|
||||
|
||||
// AssertTrue(t, tokenAPI.Input.Flush() == true, "flush input after reading some data")
|
||||
// AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after flush input")
|
||||
|
@ -76,10 +76,10 @@ func TestMakeSplitOutput_SplitsOutputAtActiveCursorPosition(t *testing.T) {
|
|||
// // Read offset is now zero, but reading should continue after "co".
|
||||
// // The output so far isn't modified, so the following accept calls
|
||||
// // will add their runes to the already accepted string "co".
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // o
|
||||
// r, _, _ = tokenAPI.Rune.Peek(0)
|
||||
// tokenAPI.Rune.Accept(r) // o
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // o
|
||||
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
|
||||
// tokenAPI.Input.Rune.Accept(r) // o
|
||||
|
||||
// AssertEqual(t, "cool", tokenAPI.Output.String(), "end result")
|
||||
// }
|
||||
|
|
Loading…
Reference in New Issue