diff --git a/parse/api.go b/parse/api.go index ad61e0b..38219c2 100644 --- a/parse/api.go +++ b/parse/api.go @@ -30,13 +30,13 @@ type API struct { // On a successful peek, the results (data + tokens) are returned by the peek. // They are availablel (as with Accept()) through parse.API.Result. func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { - forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler) - t := parseAPI.tokenAPI + child, ok := parseAPI.invokeHandler("Peek", tokenHandler) + tokenAPI := parseAPI.tokenAPI if ok { - parseAPI.Result.Tokens = t.Output.Tokens() - parseAPI.Result.Runes = t.Output.Runes() + parseAPI.Result.Tokens = tokenAPI.Output.Tokens() + parseAPI.Result.Runes = tokenAPI.Output.Runes() } - t.Dispose(forkedAPI) + tokenAPI.Dispose(child) return ok } @@ -48,39 +48,14 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { // No results (data + tokens) are returned by Peek(). If want access to the data // through parse.API.Result, make use of PeekWithResult() instead. func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { - forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler) + child, ok := parseAPI.invokeHandler("Peek", tokenHandler) t := parseAPI.tokenAPI parseAPI.Result.Tokens = nil parseAPI.Result.Runes = nil - t.Dispose(forkedAPI) + t.Dispose(child) return ok } -// PeekChars is a very lightweight peek command, which takes a look at one or -// more upcoming characters on the input data. -// -// If you need more complex logic for checking the upcoming input data, then -// make use of the Peek() method with a tokenize.Handler function instead. -func (parseAPI *API) PeekChars(chars ...rune) bool { - offset := 0 - for _, r := range chars { - if r <= 0x1F { - b, err := parseAPI.tokenAPI.Byte.Peek(offset) - if err != nil || b != byte(r) { - return false - } - offset++ - } else { - rRead, w, err := parseAPI.tokenAPI.Rune.Peek(offset) - if err != nil || rRead != r { - return false - } - offset += w - } - } - return true -} - // Accept checks if the upcoming input data matches the provided tokenize.Handler. // If it does, then true will be returned and the read cursor will be moved // forward to beyond the match that was found. Otherwise false will be @@ -89,20 +64,20 @@ func (parseAPI *API) PeekChars(chars ...rune) bool { // After calling this method, you can retrieve the results using the Result() method. func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { t := parseAPI.tokenAPI - forkedAPI, ok := parseAPI.invokeHandler("Accept", tokenHandler) + child, ok := parseAPI.invokeHandler("Accept", tokenHandler) if ok { // Keep track of the results as produced by this child. parseAPI.Result.Tokens = t.Output.Tokens() parseAPI.Result.Runes = t.Output.Runes() // Merge to the parent level. - t.Merge(forkedAPI) - t.Dispose(forkedAPI) + t.Merge(child) + t.Dispose(child) // And flush the input reader buffer. t.Input.Flush() } else { - t.Dispose(forkedAPI) + t.Dispose(child) } return ok } diff --git a/read/read.go b/read/read.go index 7e2b0e7..c002808 100644 --- a/read/read.go +++ b/read/read.go @@ -159,6 +159,34 @@ func (buf *Buffer) ByteAt(offset int) (byte, error) { return buf.buffer[buf.start+offset], nil } +// BytesAt reads at max the provided number of bytes at the provided byte offset. +// +// The byte offset is relative to the current starting position of the Buffer. +// When starting reading, offset 0 will point at the start of the input. +// After flushing, offset 0 will point at the input up to where the flush +// was done. +// +// When reading was successful, the byte will be returned. The returned +// error will be nil. +// +// When reading failed, the returned byte slice might be empty, or it might +// contain a part of the requsted bytes. The error will not be nil. +// One special read fail is actually a normal situation: end +// of file reached. In that case, the returned error wille be io.EOF. +// +// Once a read error is encountered, that same read error will guaranteed +// be return on every subsequent read at or beyond the provided offset. +func (buf *Buffer) BytesAt(offset int, count int) ([]byte, error) { + if buf.len < offset+count && buf.err == nil { + buf.fill(offset + count) + } + + if buf.err != nil && offset+count > buf.errOffset { + return buf.buffer[buf.start+offset : buf.start+buf.errOffset], buf.err + } + return buf.buffer[buf.start+offset : buf.start+offset+count], nil +} + func (buf *Buffer) fill(minBytes int) { // Grow the buffer so it can contain at least the number of requested bytes. if minBytes > buf.cap-buf.start { diff --git a/read/read_test.go b/read/read_test.go index a4b4512..b2a9405 100644 --- a/read/read_test.go +++ b/read/read_test.go @@ -134,6 +134,24 @@ func ExampleBuffer_ByteAt() { // Err: EOF } +func ExampleBuffer_BytesAt() { + reader := New(strings.NewReader("Hello, world!")) + + b, err := reader.BytesAt(0, 5) + fmt.Printf("%s err=%v\n", b, err) + + b, err = reader.BytesAt(7, 10) + fmt.Printf("%s err=%v\n", b, err) + + b, err = reader.BytesAt(7, 5) + fmt.Printf("%s err=%v\n", b, err) + + // Output: + // Hello err= + // world! err=EOF + // world err= +} + func ExampleBuffer_RuneAt() { reader := New(strings.NewReader("Hello, pןɹoʍ!")) diff --git a/tokenize/api_bytemode.go b/tokenize/api_bytemode.go index 75baafc..4f50849 100644 --- a/tokenize/api_bytemode.go +++ b/tokenize/api_bytemode.go @@ -17,6 +17,14 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) { return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset) } +// PeekMulti returns at max the provided maximum number of bytes at the provided +// byte offset. When less bytes are available on the input, then this is not an +// error as such. The returned error can in such case be set to io.EOF to indicate +// that the end of the input was reached though. +func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) { + return byteMode.reader.BytesAt(byteMode.api.stackFrame.offset+offset, count) +} + func (byteMode InputByteMode) Accept(b byte) { byteMode.api.Output.AddByte(b) byteMode.MoveCursor(b) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 335e416..77a5d13 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -639,22 +639,17 @@ func MatchEndOfLine() Handler { // MatchStr creates a Handler that matches the input against the provided string. func MatchStr(expected string) Handler { expectedRunes := []rune(expected) + expectedBytes := []byte(expected) + expectedLength := len(expectedBytes) return func(tokenAPI *API) bool { - offset := 0 - for _, e := range expectedRunes { - if e <= '\x7F' { - b, err := tokenAPI.Byte.Peek(offset) - if err != nil || b != byte(e) { - return false - } - offset++ - } else { - r, w, err := tokenAPI.Rune.Peek(offset) - if err != nil || e != r { - return false - } - offset += w + b, err := tokenAPI.Byte.PeekMulti(0, expectedLength) + if err != nil || len(b) < expectedLength { + return false + } + for i, bExpected := range expectedBytes { + if b[i] != bExpected { + return false } } tokenAPI.Rune.AcceptMulti(expectedRunes...) @@ -1232,55 +1227,59 @@ func MatchDecimal(normalize bool) Handler { // False falues: false, FALSE, False, 0, f, F func MatchBoolean() Handler { return func(tokenAPI *API) bool { - b1, err := tokenAPI.Byte.Peek(0) - if err != nil { + // 5 bytes can hold all possible boolean values. + b, _ := tokenAPI.Byte.PeekMulti(0, 5) + l := len(b) + + // No bytes read at all, so a definitive mismatch. + if l < 1 { return false } - if b1 == '1' || b1 == '0' { - tokenAPI.Byte.Accept(b1) - return true - } - if b1 == 't' || b1 == 'T' { - b2, err := tokenAPI.Byte.Peek(1) - if err != nil || (b2 != 'R' && b2 != 'r') { - tokenAPI.Byte.Accept(b1) - return true - } - b3, _ := tokenAPI.Byte.Peek(2) - b4, err := tokenAPI.Byte.Peek(3) - if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' { - tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4) - return true - } - if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' { - tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4) - return true - } - tokenAPI.Byte.Accept(b1) + + // Boolean '0' or '1'. + if b[0] == '1' || b[0] == '0' { + tokenAPI.Byte.Accept(b[0]) return true } - if b1 == 'f' || b1 == 'F' { - b2, err := tokenAPI.Byte.Peek(1) - if err != nil || (b2 != 'A' && b2 != 'a') { - tokenAPI.Byte.Accept(b1) + // Booleans 't', 'T', 'TRUE', True' or 'true'. + if b[0] == 't' || b[0] == 'T' { + tokenAPI.Byte.Accept(b[0]) + if l < 4 { return true } - // TODO Multibyte peeks (also useful for strings) - b3, _ := tokenAPI.Byte.Peek(2) - b4, _ := tokenAPI.Byte.Peek(3) - b5, err := tokenAPI.Byte.Peek(4) - if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' { - tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5) + if b[0] == 't' { + if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' { + tokenAPI.Byte.AcceptMulti(b[1:4]...) + } return true } - if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' { - tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5) - return true + if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') || + (b[1] == 'r' && b[2] == 'u' && b[3] == 'e') { + tokenAPI.Byte.AcceptMulti(b[1:4]...) } - tokenAPI.Byte.Accept(b1) return true } + + // Booleans 'f', 'F', 'FALSE', False' or 'false'. + if b[0] == 'f' || b[0] == 'F' { + tokenAPI.Byte.Accept(b[0]) + if l < 5 { + return true + } + if b[0] == 'f' { + if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' { + tokenAPI.Byte.AcceptMulti(b[1:5]...) + } + return true + } + if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') || + (b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') { + tokenAPI.Byte.AcceptMulti(b[1:5]...) + } + return true + } + return false } } diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index c190899..866c902 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -353,6 +353,7 @@ func TestModifiers(t *testing.T) { {"missed me!", m.Drop(a.Rune('w')), false, ""}, {"where are you?", m.Drop(a.Rune('w')), true, ""}, {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, + {"cool", a.Str("cool"), true, "cool"}, {"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, @@ -508,3 +509,27 @@ func TestCombination(t *testing.T) { {"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"}, }) } + +// 46709 ns/op +func BenchmarkBoolean(b *testing.B) { + for i := 0; i < b.N; i++ { + tokenize.A.Boolean.Match("0") + tokenize.A.Boolean.Match("1") + tokenize.A.Boolean.Match("t") + tokenize.A.Boolean.Match("f") + tokenize.A.Boolean.Match("T") + tokenize.A.Boolean.Match("F") + tokenize.A.Boolean.Match("0XX") + tokenize.A.Boolean.Match("1XX") + tokenize.A.Boolean.Match("tXX") + tokenize.A.Boolean.Match("fXX") + tokenize.A.Boolean.Match("TXX") + tokenize.A.Boolean.Match("FXX") + tokenize.A.Boolean.Match("true") + tokenize.A.Boolean.Match("TRUE") + tokenize.A.Boolean.Match("True") + tokenize.A.Boolean.Match("false") + tokenize.A.Boolean.Match("FALSE") + tokenize.A.Boolean.Match("False") + } +} diff --git a/tokenize/tokenizer_whitebox_test.go b/tokenize/tokenizer_whitebox_test.go index 79ef26b..23a64c7 100644 --- a/tokenize/tokenizer_whitebox_test.go +++ b/tokenize/tokenizer_whitebox_test.go @@ -89,22 +89,22 @@ func TestFlushInput(t *testing.T) { func TestInputFlusherWrapper(t *testing.T) { runeA := A.Rune('a') flushB := C.FlushInput(A.Rune('b')) - api := NewAPI("abaab") - runeA(api) - AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read") - AssertEqual(t, "a", api.Output.String(), "runes after 1 read") - flushB(api) - AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush") - AssertEqual(t, "ab", api.Output.String(), "runes after 2 reads") - runeA(api) - AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads") - AssertEqual(t, "aba", api.Output.String(), "runes after 3 reads") - runeA(api) - AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads") - AssertEqual(t, "abaa", api.Output.String(), "runes after 4 reads") - flushB(api) - AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush") - AssertEqual(t, "abaab", api.Output.String(), "runes after 5 reads") + tokenAPI := NewAPI("abaab") + runeA(tokenAPI) + AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 1 read") + AssertEqual(t, "a", tokenAPI.Output.String(), "runes after 1 read") + flushB(tokenAPI) + AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 2 reads + input flush") + AssertEqual(t, "ab", tokenAPI.Output.String(), "runes after 2 reads") + runeA(tokenAPI) + AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 3 reads") + AssertEqual(t, "aba", tokenAPI.Output.String(), "runes after 3 reads") + runeA(tokenAPI) + AssertEqual(t, 2, tokenAPI.stackFrame.offset, "offset after 4 reads") + AssertEqual(t, "abaa", tokenAPI.Output.String(), "runes after 4 reads") + flushB(tokenAPI) + AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 5 reads + input flush") + AssertEqual(t, "abaab", tokenAPI.Output.String(), "runes after 5 reads") } func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {