Added multi-byte peeks for some performance improvements.

This commit is contained in:
Maurice Makaay 2019-07-23 23:23:40 +00:00
parent 7037c6d24a
commit 802701ade5
7 changed files with 156 additions and 103 deletions

View File

@ -30,13 +30,13 @@ type API struct {
// On a successful peek, the results (data + tokens) are returned by the peek. // On a successful peek, the results (data + tokens) are returned by the peek.
// They are availablel (as with Accept()) through parse.API.Result. // They are availablel (as with Accept()) through parse.API.Result.
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler) child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI tokenAPI := parseAPI.tokenAPI
if ok { if ok {
parseAPI.Result.Tokens = t.Output.Tokens() parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = t.Output.Runes() parseAPI.Result.Runes = tokenAPI.Output.Runes()
} }
t.Dispose(forkedAPI) tokenAPI.Dispose(child)
return ok return ok
} }
@ -48,39 +48,14 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
// No results (data + tokens) are returned by Peek(). If want access to the data // No results (data + tokens) are returned by Peek(). If want access to the data
// through parse.API.Result, make use of PeekWithResult() instead. // through parse.API.Result, make use of PeekWithResult() instead.
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler) child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI t := parseAPI.tokenAPI
parseAPI.Result.Tokens = nil parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil parseAPI.Result.Runes = nil
t.Dispose(forkedAPI) t.Dispose(child)
return ok return ok
} }
// PeekChars is a very lightweight peek command, which takes a look at one or
// more upcoming characters on the input data.
//
// If you need more complex logic for checking the upcoming input data, then
// make use of the Peek() method with a tokenize.Handler function instead.
func (parseAPI *API) PeekChars(chars ...rune) bool {
offset := 0
for _, r := range chars {
if r <= 0x1F {
b, err := parseAPI.tokenAPI.Byte.Peek(offset)
if err != nil || b != byte(r) {
return false
}
offset++
} else {
rRead, w, err := parseAPI.tokenAPI.Rune.Peek(offset)
if err != nil || rRead != r {
return false
}
offset += w
}
}
return true
}
// Accept checks if the upcoming input data matches the provided tokenize.Handler. // Accept checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned and the read cursor will be moved // If it does, then true will be returned and the read cursor will be moved
// forward to beyond the match that was found. Otherwise false will be // forward to beyond the match that was found. Otherwise false will be
@ -89,20 +64,20 @@ func (parseAPI *API) PeekChars(chars ...rune) bool {
// After calling this method, you can retrieve the results using the Result() method. // After calling this method, you can retrieve the results using the Result() method.
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
t := parseAPI.tokenAPI t := parseAPI.tokenAPI
forkedAPI, ok := parseAPI.invokeHandler("Accept", tokenHandler) child, ok := parseAPI.invokeHandler("Accept", tokenHandler)
if ok { if ok {
// Keep track of the results as produced by this child. // Keep track of the results as produced by this child.
parseAPI.Result.Tokens = t.Output.Tokens() parseAPI.Result.Tokens = t.Output.Tokens()
parseAPI.Result.Runes = t.Output.Runes() parseAPI.Result.Runes = t.Output.Runes()
// Merge to the parent level. // Merge to the parent level.
t.Merge(forkedAPI) t.Merge(child)
t.Dispose(forkedAPI) t.Dispose(child)
// And flush the input reader buffer. // And flush the input reader buffer.
t.Input.Flush() t.Input.Flush()
} else { } else {
t.Dispose(forkedAPI) t.Dispose(child)
} }
return ok return ok
} }

View File

@ -159,6 +159,34 @@ func (buf *Buffer) ByteAt(offset int) (byte, error) {
return buf.buffer[buf.start+offset], nil return buf.buffer[buf.start+offset], nil
} }
// BytesAt reads at max the provided number of bytes at the provided byte offset.
//
// The byte offset is relative to the current starting position of the Buffer.
// When starting reading, offset 0 will point at the start of the input.
// After flushing, offset 0 will point at the input up to where the flush
// was done.
//
// When reading was successful, the byte will be returned. The returned
// error will be nil.
//
// When reading failed, the returned byte slice might be empty, or it might
// contain a part of the requsted bytes. The error will not be nil.
// One special read fail is actually a normal situation: end
// of file reached. In that case, the returned error wille be io.EOF.
//
// Once a read error is encountered, that same read error will guaranteed
// be return on every subsequent read at or beyond the provided offset.
func (buf *Buffer) BytesAt(offset int, count int) ([]byte, error) {
if buf.len < offset+count && buf.err == nil {
buf.fill(offset + count)
}
if buf.err != nil && offset+count > buf.errOffset {
return buf.buffer[buf.start+offset : buf.start+buf.errOffset], buf.err
}
return buf.buffer[buf.start+offset : buf.start+offset+count], nil
}
func (buf *Buffer) fill(minBytes int) { func (buf *Buffer) fill(minBytes int) {
// Grow the buffer so it can contain at least the number of requested bytes. // Grow the buffer so it can contain at least the number of requested bytes.
if minBytes > buf.cap-buf.start { if minBytes > buf.cap-buf.start {

View File

@ -134,6 +134,24 @@ func ExampleBuffer_ByteAt() {
// Err: EOF // Err: EOF
} }
func ExampleBuffer_BytesAt() {
reader := New(strings.NewReader("Hello, world!"))
b, err := reader.BytesAt(0, 5)
fmt.Printf("%s err=%v\n", b, err)
b, err = reader.BytesAt(7, 10)
fmt.Printf("%s err=%v\n", b, err)
b, err = reader.BytesAt(7, 5)
fmt.Printf("%s err=%v\n", b, err)
// Output:
// Hello err=<nil>
// world! err=EOF
// world err=<nil>
}
func ExampleBuffer_RuneAt() { func ExampleBuffer_RuneAt() {
reader := New(strings.NewReader("Hello, pןɹoʍ!")) reader := New(strings.NewReader("Hello, pןɹoʍ!"))

View File

@ -17,6 +17,14 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset) return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
} }
// PeekMulti returns at max the provided maximum number of bytes at the provided
// byte offset. When less bytes are available on the input, then this is not an
// error as such. The returned error can in such case be set to io.EOF to indicate
// that the end of the input was reached though.
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
return byteMode.reader.BytesAt(byteMode.api.stackFrame.offset+offset, count)
}
func (byteMode InputByteMode) Accept(b byte) { func (byteMode InputByteMode) Accept(b byte) {
byteMode.api.Output.AddByte(b) byteMode.api.Output.AddByte(b)
byteMode.MoveCursor(b) byteMode.MoveCursor(b)

View File

@ -639,22 +639,17 @@ func MatchEndOfLine() Handler {
// MatchStr creates a Handler that matches the input against the provided string. // MatchStr creates a Handler that matches the input against the provided string.
func MatchStr(expected string) Handler { func MatchStr(expected string) Handler {
expectedRunes := []rune(expected) expectedRunes := []rune(expected)
expectedBytes := []byte(expected)
expectedLength := len(expectedBytes)
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
offset := 0 b, err := tokenAPI.Byte.PeekMulti(0, expectedLength)
for _, e := range expectedRunes { if err != nil || len(b) < expectedLength {
if e <= '\x7F' { return false
b, err := tokenAPI.Byte.Peek(offset) }
if err != nil || b != byte(e) { for i, bExpected := range expectedBytes {
return false if b[i] != bExpected {
} return false
offset++
} else {
r, w, err := tokenAPI.Rune.Peek(offset)
if err != nil || e != r {
return false
}
offset += w
} }
} }
tokenAPI.Rune.AcceptMulti(expectedRunes...) tokenAPI.Rune.AcceptMulti(expectedRunes...)
@ -1232,55 +1227,59 @@ func MatchDecimal(normalize bool) Handler {
// False falues: false, FALSE, False, 0, f, F // False falues: false, FALSE, False, 0, f, F
func MatchBoolean() Handler { func MatchBoolean() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b1, err := tokenAPI.Byte.Peek(0) // 5 bytes can hold all possible boolean values.
if err != nil { b, _ := tokenAPI.Byte.PeekMulti(0, 5)
l := len(b)
// No bytes read at all, so a definitive mismatch.
if l < 1 {
return false return false
} }
if b1 == '1' || b1 == '0' {
tokenAPI.Byte.Accept(b1) // Boolean '0' or '1'.
return true if b[0] == '1' || b[0] == '0' {
} tokenAPI.Byte.Accept(b[0])
if b1 == 't' || b1 == 'T' {
b2, err := tokenAPI.Byte.Peek(1)
if err != nil || (b2 != 'R' && b2 != 'r') {
tokenAPI.Byte.Accept(b1)
return true
}
b3, _ := tokenAPI.Byte.Peek(2)
b4, err := tokenAPI.Byte.Peek(3)
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4)
return true
}
if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4)
return true
}
tokenAPI.Byte.Accept(b1)
return true return true
} }
if b1 == 'f' || b1 == 'F' { // Booleans 't', 'T', 'TRUE', True' or 'true'.
b2, err := tokenAPI.Byte.Peek(1) if b[0] == 't' || b[0] == 'T' {
if err != nil || (b2 != 'A' && b2 != 'a') { tokenAPI.Byte.Accept(b[0])
tokenAPI.Byte.Accept(b1) if l < 4 {
return true return true
} }
// TODO Multibyte peeks (also useful for strings) if b[0] == 't' {
b3, _ := tokenAPI.Byte.Peek(2) if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
b4, _ := tokenAPI.Byte.Peek(3) tokenAPI.Byte.AcceptMulti(b[1:4]...)
b5, err := tokenAPI.Byte.Peek(4) }
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5)
return true return true
} }
if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' { if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5) (b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
return true tokenAPI.Byte.AcceptMulti(b[1:4]...)
} }
tokenAPI.Byte.Accept(b1)
return true return true
} }
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
if b[0] == 'f' || b[0] == 'F' {
tokenAPI.Byte.Accept(b[0])
if l < 5 {
return true
}
if b[0] == 'f' {
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
return false return false
} }
} }

View File

@ -353,6 +353,7 @@ func TestModifiers(t *testing.T) {
{"missed me!", m.Drop(a.Rune('w')), false, ""}, {"missed me!", m.Drop(a.Rune('w')), false, ""},
{"where are you?", m.Drop(a.Rune('w')), true, ""}, {"where are you?", m.Drop(a.Rune('w')), true, ""},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{"cool", a.Str("cool"), true, "cool"},
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"}, {"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
@ -508,3 +509,27 @@ func TestCombination(t *testing.T) {
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"}, {"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
}) })
} }
// 46709 ns/op
func BenchmarkBoolean(b *testing.B) {
for i := 0; i < b.N; i++ {
tokenize.A.Boolean.Match("0")
tokenize.A.Boolean.Match("1")
tokenize.A.Boolean.Match("t")
tokenize.A.Boolean.Match("f")
tokenize.A.Boolean.Match("T")
tokenize.A.Boolean.Match("F")
tokenize.A.Boolean.Match("0XX")
tokenize.A.Boolean.Match("1XX")
tokenize.A.Boolean.Match("tXX")
tokenize.A.Boolean.Match("fXX")
tokenize.A.Boolean.Match("TXX")
tokenize.A.Boolean.Match("FXX")
tokenize.A.Boolean.Match("true")
tokenize.A.Boolean.Match("TRUE")
tokenize.A.Boolean.Match("True")
tokenize.A.Boolean.Match("false")
tokenize.A.Boolean.Match("FALSE")
tokenize.A.Boolean.Match("False")
}
}

View File

@ -89,22 +89,22 @@ func TestFlushInput(t *testing.T) {
func TestInputFlusherWrapper(t *testing.T) { func TestInputFlusherWrapper(t *testing.T) {
runeA := A.Rune('a') runeA := A.Rune('a')
flushB := C.FlushInput(A.Rune('b')) flushB := C.FlushInput(A.Rune('b'))
api := NewAPI("abaab") tokenAPI := NewAPI("abaab")
runeA(api) runeA(tokenAPI)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read") AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 1 read")
AssertEqual(t, "a", api.Output.String(), "runes after 1 read") AssertEqual(t, "a", tokenAPI.Output.String(), "runes after 1 read")
flushB(api) flushB(tokenAPI)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush") AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 2 reads + input flush")
AssertEqual(t, "ab", api.Output.String(), "runes after 2 reads") AssertEqual(t, "ab", tokenAPI.Output.String(), "runes after 2 reads")
runeA(api) runeA(tokenAPI)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads") AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 3 reads")
AssertEqual(t, "aba", api.Output.String(), "runes after 3 reads") AssertEqual(t, "aba", tokenAPI.Output.String(), "runes after 3 reads")
runeA(api) runeA(tokenAPI)
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads") AssertEqual(t, 2, tokenAPI.stackFrame.offset, "offset after 4 reads")
AssertEqual(t, "abaa", api.Output.String(), "runes after 4 reads") AssertEqual(t, "abaa", tokenAPI.Output.String(), "runes after 4 reads")
flushB(api) flushB(tokenAPI)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush") AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 5 reads + input flush")
AssertEqual(t, "abaab", api.Output.String(), "runes after 5 reads") AssertEqual(t, "abaab", tokenAPI.Output.String(), "runes after 5 reads")
} }
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) { func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {