Added multi-byte peeks for some performance improvements.

This commit is contained in:
Maurice Makaay 2019-07-23 23:23:40 +00:00
parent 7037c6d24a
commit 802701ade5
7 changed files with 156 additions and 103 deletions

View File

@ -30,13 +30,13 @@ type API struct {
// On a successful peek, the results (data + tokens) are returned by the peek.
// They are availablel (as with Accept()) through parse.API.Result.
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
tokenAPI := parseAPI.tokenAPI
if ok {
parseAPI.Result.Tokens = t.Output.Tokens()
parseAPI.Result.Runes = t.Output.Runes()
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
parseAPI.Result.Runes = tokenAPI.Output.Runes()
}
t.Dispose(forkedAPI)
tokenAPI.Dispose(child)
return ok
}
@ -48,39 +48,14 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
// No results (data + tokens) are returned by Peek(). If want access to the data
// through parse.API.Result, make use of PeekWithResult() instead.
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler)
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
t := parseAPI.tokenAPI
parseAPI.Result.Tokens = nil
parseAPI.Result.Runes = nil
t.Dispose(forkedAPI)
t.Dispose(child)
return ok
}
// PeekChars is a very lightweight peek command, which takes a look at one or
// more upcoming characters on the input data.
//
// If you need more complex logic for checking the upcoming input data, then
// make use of the Peek() method with a tokenize.Handler function instead.
func (parseAPI *API) PeekChars(chars ...rune) bool {
offset := 0
for _, r := range chars {
if r <= 0x1F {
b, err := parseAPI.tokenAPI.Byte.Peek(offset)
if err != nil || b != byte(r) {
return false
}
offset++
} else {
rRead, w, err := parseAPI.tokenAPI.Rune.Peek(offset)
if err != nil || rRead != r {
return false
}
offset += w
}
}
return true
}
// Accept checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned and the read cursor will be moved
// forward to beyond the match that was found. Otherwise false will be
@ -89,20 +64,20 @@ func (parseAPI *API) PeekChars(chars ...rune) bool {
// After calling this method, you can retrieve the results using the Result() method.
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
t := parseAPI.tokenAPI
forkedAPI, ok := parseAPI.invokeHandler("Accept", tokenHandler)
child, ok := parseAPI.invokeHandler("Accept", tokenHandler)
if ok {
// Keep track of the results as produced by this child.
parseAPI.Result.Tokens = t.Output.Tokens()
parseAPI.Result.Runes = t.Output.Runes()
// Merge to the parent level.
t.Merge(forkedAPI)
t.Dispose(forkedAPI)
t.Merge(child)
t.Dispose(child)
// And flush the input reader buffer.
t.Input.Flush()
} else {
t.Dispose(forkedAPI)
t.Dispose(child)
}
return ok
}

View File

@ -159,6 +159,34 @@ func (buf *Buffer) ByteAt(offset int) (byte, error) {
return buf.buffer[buf.start+offset], nil
}
// BytesAt reads at max the provided number of bytes at the provided byte offset.
//
// The byte offset is relative to the current starting position of the Buffer.
// When starting reading, offset 0 will point at the start of the input.
// After flushing, offset 0 will point at the input up to where the flush
// was done.
//
// When reading was successful, the byte will be returned. The returned
// error will be nil.
//
// When reading failed, the returned byte slice might be empty, or it might
// contain a part of the requsted bytes. The error will not be nil.
// One special read fail is actually a normal situation: end
// of file reached. In that case, the returned error wille be io.EOF.
//
// Once a read error is encountered, that same read error will guaranteed
// be return on every subsequent read at or beyond the provided offset.
func (buf *Buffer) BytesAt(offset int, count int) ([]byte, error) {
if buf.len < offset+count && buf.err == nil {
buf.fill(offset + count)
}
if buf.err != nil && offset+count > buf.errOffset {
return buf.buffer[buf.start+offset : buf.start+buf.errOffset], buf.err
}
return buf.buffer[buf.start+offset : buf.start+offset+count], nil
}
func (buf *Buffer) fill(minBytes int) {
// Grow the buffer so it can contain at least the number of requested bytes.
if minBytes > buf.cap-buf.start {

View File

@ -134,6 +134,24 @@ func ExampleBuffer_ByteAt() {
// Err: EOF
}
func ExampleBuffer_BytesAt() {
reader := New(strings.NewReader("Hello, world!"))
b, err := reader.BytesAt(0, 5)
fmt.Printf("%s err=%v\n", b, err)
b, err = reader.BytesAt(7, 10)
fmt.Printf("%s err=%v\n", b, err)
b, err = reader.BytesAt(7, 5)
fmt.Printf("%s err=%v\n", b, err)
// Output:
// Hello err=<nil>
// world! err=EOF
// world err=<nil>
}
func ExampleBuffer_RuneAt() {
reader := New(strings.NewReader("Hello, pןɹoʍ!"))

View File

@ -17,6 +17,14 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
}
// PeekMulti returns at max the provided maximum number of bytes at the provided
// byte offset. When less bytes are available on the input, then this is not an
// error as such. The returned error can in such case be set to io.EOF to indicate
// that the end of the input was reached though.
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
return byteMode.reader.BytesAt(byteMode.api.stackFrame.offset+offset, count)
}
func (byteMode InputByteMode) Accept(b byte) {
byteMode.api.Output.AddByte(b)
byteMode.MoveCursor(b)

View File

@ -639,22 +639,17 @@ func MatchEndOfLine() Handler {
// MatchStr creates a Handler that matches the input against the provided string.
func MatchStr(expected string) Handler {
expectedRunes := []rune(expected)
expectedBytes := []byte(expected)
expectedLength := len(expectedBytes)
return func(tokenAPI *API) bool {
offset := 0
for _, e := range expectedRunes {
if e <= '\x7F' {
b, err := tokenAPI.Byte.Peek(offset)
if err != nil || b != byte(e) {
return false
}
offset++
} else {
r, w, err := tokenAPI.Rune.Peek(offset)
if err != nil || e != r {
return false
}
offset += w
b, err := tokenAPI.Byte.PeekMulti(0, expectedLength)
if err != nil || len(b) < expectedLength {
return false
}
for i, bExpected := range expectedBytes {
if b[i] != bExpected {
return false
}
}
tokenAPI.Rune.AcceptMulti(expectedRunes...)
@ -1232,55 +1227,59 @@ func MatchDecimal(normalize bool) Handler {
// False falues: false, FALSE, False, 0, f, F
func MatchBoolean() Handler {
return func(tokenAPI *API) bool {
b1, err := tokenAPI.Byte.Peek(0)
if err != nil {
// 5 bytes can hold all possible boolean values.
b, _ := tokenAPI.Byte.PeekMulti(0, 5)
l := len(b)
// No bytes read at all, so a definitive mismatch.
if l < 1 {
return false
}
if b1 == '1' || b1 == '0' {
tokenAPI.Byte.Accept(b1)
return true
}
if b1 == 't' || b1 == 'T' {
b2, err := tokenAPI.Byte.Peek(1)
if err != nil || (b2 != 'R' && b2 != 'r') {
tokenAPI.Byte.Accept(b1)
return true
}
b3, _ := tokenAPI.Byte.Peek(2)
b4, err := tokenAPI.Byte.Peek(3)
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4)
return true
}
if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4)
return true
}
tokenAPI.Byte.Accept(b1)
// Boolean '0' or '1'.
if b[0] == '1' || b[0] == '0' {
tokenAPI.Byte.Accept(b[0])
return true
}
if b1 == 'f' || b1 == 'F' {
b2, err := tokenAPI.Byte.Peek(1)
if err != nil || (b2 != 'A' && b2 != 'a') {
tokenAPI.Byte.Accept(b1)
// Booleans 't', 'T', 'TRUE', True' or 'true'.
if b[0] == 't' || b[0] == 'T' {
tokenAPI.Byte.Accept(b[0])
if l < 4 {
return true
}
// TODO Multibyte peeks (also useful for strings)
b3, _ := tokenAPI.Byte.Peek(2)
b4, _ := tokenAPI.Byte.Peek(3)
b5, err := tokenAPI.Byte.Peek(4)
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5)
if b[0] == 't' {
if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:4]...)
}
return true
}
if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' {
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5)
return true
if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:4]...)
}
tokenAPI.Byte.Accept(b1)
return true
}
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
if b[0] == 'f' || b[0] == 'F' {
tokenAPI.Byte.Accept(b[0])
if l < 5 {
return true
}
if b[0] == 'f' {
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
return false
}
}

View File

@ -353,6 +353,7 @@ func TestModifiers(t *testing.T) {
{"missed me!", m.Drop(a.Rune('w')), false, ""},
{"where are you?", m.Drop(a.Rune('w')), true, ""},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{"cool", a.Str("cool"), true, "cool"},
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
@ -508,3 +509,27 @@ func TestCombination(t *testing.T) {
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
})
}
// 46709 ns/op
func BenchmarkBoolean(b *testing.B) {
for i := 0; i < b.N; i++ {
tokenize.A.Boolean.Match("0")
tokenize.A.Boolean.Match("1")
tokenize.A.Boolean.Match("t")
tokenize.A.Boolean.Match("f")
tokenize.A.Boolean.Match("T")
tokenize.A.Boolean.Match("F")
tokenize.A.Boolean.Match("0XX")
tokenize.A.Boolean.Match("1XX")
tokenize.A.Boolean.Match("tXX")
tokenize.A.Boolean.Match("fXX")
tokenize.A.Boolean.Match("TXX")
tokenize.A.Boolean.Match("FXX")
tokenize.A.Boolean.Match("true")
tokenize.A.Boolean.Match("TRUE")
tokenize.A.Boolean.Match("True")
tokenize.A.Boolean.Match("false")
tokenize.A.Boolean.Match("FALSE")
tokenize.A.Boolean.Match("False")
}
}

View File

@ -89,22 +89,22 @@ func TestFlushInput(t *testing.T) {
func TestInputFlusherWrapper(t *testing.T) {
runeA := A.Rune('a')
flushB := C.FlushInput(A.Rune('b'))
api := NewAPI("abaab")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
AssertEqual(t, "a", api.Output.String(), "runes after 1 read")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
AssertEqual(t, "ab", api.Output.String(), "runes after 2 reads")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
AssertEqual(t, "aba", api.Output.String(), "runes after 3 reads")
runeA(api)
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
AssertEqual(t, "abaa", api.Output.String(), "runes after 4 reads")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
AssertEqual(t, "abaab", api.Output.String(), "runes after 5 reads")
tokenAPI := NewAPI("abaab")
runeA(tokenAPI)
AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 1 read")
AssertEqual(t, "a", tokenAPI.Output.String(), "runes after 1 read")
flushB(tokenAPI)
AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 2 reads + input flush")
AssertEqual(t, "ab", tokenAPI.Output.String(), "runes after 2 reads")
runeA(tokenAPI)
AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 3 reads")
AssertEqual(t, "aba", tokenAPI.Output.String(), "runes after 3 reads")
runeA(tokenAPI)
AssertEqual(t, 2, tokenAPI.stackFrame.offset, "offset after 4 reads")
AssertEqual(t, "abaa", tokenAPI.Output.String(), "runes after 4 reads")
flushB(tokenAPI)
AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 5 reads + input flush")
AssertEqual(t, "abaab", tokenAPI.Output.String(), "runes after 5 reads")
}
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {