Added multi-byte peeks for some performance improvements.
This commit is contained in:
parent
7037c6d24a
commit
802701ade5
47
parse/api.go
47
parse/api.go
|
@ -30,13 +30,13 @@ type API struct {
|
|||
// On a successful peek, the results (data + tokens) are returned by the peek.
|
||||
// They are availablel (as with Accept()) through parse.API.Result.
|
||||
func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
|
||||
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler)
|
||||
t := parseAPI.tokenAPI
|
||||
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
|
||||
tokenAPI := parseAPI.tokenAPI
|
||||
if ok {
|
||||
parseAPI.Result.Tokens = t.Output.Tokens()
|
||||
parseAPI.Result.Runes = t.Output.Runes()
|
||||
parseAPI.Result.Tokens = tokenAPI.Output.Tokens()
|
||||
parseAPI.Result.Runes = tokenAPI.Output.Runes()
|
||||
}
|
||||
t.Dispose(forkedAPI)
|
||||
tokenAPI.Dispose(child)
|
||||
return ok
|
||||
}
|
||||
|
||||
|
@ -48,39 +48,14 @@ func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool {
|
|||
// No results (data + tokens) are returned by Peek(). If want access to the data
|
||||
// through parse.API.Result, make use of PeekWithResult() instead.
|
||||
func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool {
|
||||
forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler)
|
||||
child, ok := parseAPI.invokeHandler("Peek", tokenHandler)
|
||||
t := parseAPI.tokenAPI
|
||||
parseAPI.Result.Tokens = nil
|
||||
parseAPI.Result.Runes = nil
|
||||
t.Dispose(forkedAPI)
|
||||
t.Dispose(child)
|
||||
return ok
|
||||
}
|
||||
|
||||
// PeekChars is a very lightweight peek command, which takes a look at one or
|
||||
// more upcoming characters on the input data.
|
||||
//
|
||||
// If you need more complex logic for checking the upcoming input data, then
|
||||
// make use of the Peek() method with a tokenize.Handler function instead.
|
||||
func (parseAPI *API) PeekChars(chars ...rune) bool {
|
||||
offset := 0
|
||||
for _, r := range chars {
|
||||
if r <= 0x1F {
|
||||
b, err := parseAPI.tokenAPI.Byte.Peek(offset)
|
||||
if err != nil || b != byte(r) {
|
||||
return false
|
||||
}
|
||||
offset++
|
||||
} else {
|
||||
rRead, w, err := parseAPI.tokenAPI.Rune.Peek(offset)
|
||||
if err != nil || rRead != r {
|
||||
return false
|
||||
}
|
||||
offset += w
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Accept checks if the upcoming input data matches the provided tokenize.Handler.
|
||||
// If it does, then true will be returned and the read cursor will be moved
|
||||
// forward to beyond the match that was found. Otherwise false will be
|
||||
|
@ -89,20 +64,20 @@ func (parseAPI *API) PeekChars(chars ...rune) bool {
|
|||
// After calling this method, you can retrieve the results using the Result() method.
|
||||
func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool {
|
||||
t := parseAPI.tokenAPI
|
||||
forkedAPI, ok := parseAPI.invokeHandler("Accept", tokenHandler)
|
||||
child, ok := parseAPI.invokeHandler("Accept", tokenHandler)
|
||||
if ok {
|
||||
// Keep track of the results as produced by this child.
|
||||
parseAPI.Result.Tokens = t.Output.Tokens()
|
||||
parseAPI.Result.Runes = t.Output.Runes()
|
||||
|
||||
// Merge to the parent level.
|
||||
t.Merge(forkedAPI)
|
||||
t.Dispose(forkedAPI)
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
|
||||
// And flush the input reader buffer.
|
||||
t.Input.Flush()
|
||||
} else {
|
||||
t.Dispose(forkedAPI)
|
||||
t.Dispose(child)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
|
28
read/read.go
28
read/read.go
|
@ -159,6 +159,34 @@ func (buf *Buffer) ByteAt(offset int) (byte, error) {
|
|||
return buf.buffer[buf.start+offset], nil
|
||||
}
|
||||
|
||||
// BytesAt reads at max the provided number of bytes at the provided byte offset.
|
||||
//
|
||||
// The byte offset is relative to the current starting position of the Buffer.
|
||||
// When starting reading, offset 0 will point at the start of the input.
|
||||
// After flushing, offset 0 will point at the input up to where the flush
|
||||
// was done.
|
||||
//
|
||||
// When reading was successful, the byte will be returned. The returned
|
||||
// error will be nil.
|
||||
//
|
||||
// When reading failed, the returned byte slice might be empty, or it might
|
||||
// contain a part of the requsted bytes. The error will not be nil.
|
||||
// One special read fail is actually a normal situation: end
|
||||
// of file reached. In that case, the returned error wille be io.EOF.
|
||||
//
|
||||
// Once a read error is encountered, that same read error will guaranteed
|
||||
// be return on every subsequent read at or beyond the provided offset.
|
||||
func (buf *Buffer) BytesAt(offset int, count int) ([]byte, error) {
|
||||
if buf.len < offset+count && buf.err == nil {
|
||||
buf.fill(offset + count)
|
||||
}
|
||||
|
||||
if buf.err != nil && offset+count > buf.errOffset {
|
||||
return buf.buffer[buf.start+offset : buf.start+buf.errOffset], buf.err
|
||||
}
|
||||
return buf.buffer[buf.start+offset : buf.start+offset+count], nil
|
||||
}
|
||||
|
||||
func (buf *Buffer) fill(minBytes int) {
|
||||
// Grow the buffer so it can contain at least the number of requested bytes.
|
||||
if minBytes > buf.cap-buf.start {
|
||||
|
|
|
@ -134,6 +134,24 @@ func ExampleBuffer_ByteAt() {
|
|||
// Err: EOF
|
||||
}
|
||||
|
||||
func ExampleBuffer_BytesAt() {
|
||||
reader := New(strings.NewReader("Hello, world!"))
|
||||
|
||||
b, err := reader.BytesAt(0, 5)
|
||||
fmt.Printf("%s err=%v\n", b, err)
|
||||
|
||||
b, err = reader.BytesAt(7, 10)
|
||||
fmt.Printf("%s err=%v\n", b, err)
|
||||
|
||||
b, err = reader.BytesAt(7, 5)
|
||||
fmt.Printf("%s err=%v\n", b, err)
|
||||
|
||||
// Output:
|
||||
// Hello err=<nil>
|
||||
// world! err=EOF
|
||||
// world err=<nil>
|
||||
}
|
||||
|
||||
func ExampleBuffer_RuneAt() {
|
||||
reader := New(strings.NewReader("Hello, pןɹoʍ!"))
|
||||
|
||||
|
|
|
@ -17,6 +17,14 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
|||
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
// PeekMulti returns at max the provided maximum number of bytes at the provided
|
||||
// byte offset. When less bytes are available on the input, then this is not an
|
||||
// error as such. The returned error can in such case be set to io.EOF to indicate
|
||||
// that the end of the input was reached though.
|
||||
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
|
||||
return byteMode.reader.BytesAt(byteMode.api.stackFrame.offset+offset, count)
|
||||
}
|
||||
|
||||
func (byteMode InputByteMode) Accept(b byte) {
|
||||
byteMode.api.Output.AddByte(b)
|
||||
byteMode.MoveCursor(b)
|
||||
|
|
|
@ -639,22 +639,17 @@ func MatchEndOfLine() Handler {
|
|||
// MatchStr creates a Handler that matches the input against the provided string.
|
||||
func MatchStr(expected string) Handler {
|
||||
expectedRunes := []rune(expected)
|
||||
expectedBytes := []byte(expected)
|
||||
expectedLength := len(expectedBytes)
|
||||
|
||||
return func(tokenAPI *API) bool {
|
||||
offset := 0
|
||||
for _, e := range expectedRunes {
|
||||
if e <= '\x7F' {
|
||||
b, err := tokenAPI.Byte.Peek(offset)
|
||||
if err != nil || b != byte(e) {
|
||||
return false
|
||||
}
|
||||
offset++
|
||||
} else {
|
||||
r, w, err := tokenAPI.Rune.Peek(offset)
|
||||
if err != nil || e != r {
|
||||
return false
|
||||
}
|
||||
offset += w
|
||||
b, err := tokenAPI.Byte.PeekMulti(0, expectedLength)
|
||||
if err != nil || len(b) < expectedLength {
|
||||
return false
|
||||
}
|
||||
for i, bExpected := range expectedBytes {
|
||||
if b[i] != bExpected {
|
||||
return false
|
||||
}
|
||||
}
|
||||
tokenAPI.Rune.AcceptMulti(expectedRunes...)
|
||||
|
@ -1232,55 +1227,59 @@ func MatchDecimal(normalize bool) Handler {
|
|||
// False falues: false, FALSE, False, 0, f, F
|
||||
func MatchBoolean() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b1, err := tokenAPI.Byte.Peek(0)
|
||||
if err != nil {
|
||||
// 5 bytes can hold all possible boolean values.
|
||||
b, _ := tokenAPI.Byte.PeekMulti(0, 5)
|
||||
l := len(b)
|
||||
|
||||
// No bytes read at all, so a definitive mismatch.
|
||||
if l < 1 {
|
||||
return false
|
||||
}
|
||||
if b1 == '1' || b1 == '0' {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
if b1 == 't' || b1 == 'T' {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
if err != nil || (b2 != 'R' && b2 != 'r') {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
b3, _ := tokenAPI.Byte.Peek(2)
|
||||
b4, err := tokenAPI.Byte.Peek(3)
|
||||
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4)
|
||||
return true
|
||||
}
|
||||
if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' {
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4)
|
||||
return true
|
||||
}
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
|
||||
// Boolean '0' or '1'.
|
||||
if b[0] == '1' || b[0] == '0' {
|
||||
tokenAPI.Byte.Accept(b[0])
|
||||
return true
|
||||
}
|
||||
|
||||
if b1 == 'f' || b1 == 'F' {
|
||||
b2, err := tokenAPI.Byte.Peek(1)
|
||||
if err != nil || (b2 != 'A' && b2 != 'a') {
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
// Booleans 't', 'T', 'TRUE', True' or 'true'.
|
||||
if b[0] == 't' || b[0] == 'T' {
|
||||
tokenAPI.Byte.Accept(b[0])
|
||||
if l < 4 {
|
||||
return true
|
||||
}
|
||||
// TODO Multibyte peeks (also useful for strings)
|
||||
b3, _ := tokenAPI.Byte.Peek(2)
|
||||
b4, _ := tokenAPI.Byte.Peek(3)
|
||||
b5, err := tokenAPI.Byte.Peek(4)
|
||||
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5)
|
||||
if b[0] == 't' {
|
||||
if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:4]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' {
|
||||
tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5)
|
||||
return true
|
||||
if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
|
||||
(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:4]...)
|
||||
}
|
||||
tokenAPI.Byte.Accept(b1)
|
||||
return true
|
||||
}
|
||||
|
||||
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
|
||||
if b[0] == 'f' || b[0] == 'F' {
|
||||
tokenAPI.Byte.Accept(b[0])
|
||||
if l < 5 {
|
||||
return true
|
||||
}
|
||||
if b[0] == 'f' {
|
||||
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:5]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
|
||||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:5]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -353,6 +353,7 @@ func TestModifiers(t *testing.T) {
|
|||
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
||||
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{"cool", a.Str("cool"), true, "cool"},
|
||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
|
@ -508,3 +509,27 @@ func TestCombination(t *testing.T) {
|
|||
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
||||
})
|
||||
}
|
||||
|
||||
// 46709 ns/op
|
||||
func BenchmarkBoolean(b *testing.B) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
tokenize.A.Boolean.Match("0")
|
||||
tokenize.A.Boolean.Match("1")
|
||||
tokenize.A.Boolean.Match("t")
|
||||
tokenize.A.Boolean.Match("f")
|
||||
tokenize.A.Boolean.Match("T")
|
||||
tokenize.A.Boolean.Match("F")
|
||||
tokenize.A.Boolean.Match("0XX")
|
||||
tokenize.A.Boolean.Match("1XX")
|
||||
tokenize.A.Boolean.Match("tXX")
|
||||
tokenize.A.Boolean.Match("fXX")
|
||||
tokenize.A.Boolean.Match("TXX")
|
||||
tokenize.A.Boolean.Match("FXX")
|
||||
tokenize.A.Boolean.Match("true")
|
||||
tokenize.A.Boolean.Match("TRUE")
|
||||
tokenize.A.Boolean.Match("True")
|
||||
tokenize.A.Boolean.Match("false")
|
||||
tokenize.A.Boolean.Match("FALSE")
|
||||
tokenize.A.Boolean.Match("False")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,22 +89,22 @@ func TestFlushInput(t *testing.T) {
|
|||
func TestInputFlusherWrapper(t *testing.T) {
|
||||
runeA := A.Rune('a')
|
||||
flushB := C.FlushInput(A.Rune('b'))
|
||||
api := NewAPI("abaab")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
|
||||
AssertEqual(t, "a", api.Output.String(), "runes after 1 read")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
|
||||
AssertEqual(t, "ab", api.Output.String(), "runes after 2 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
|
||||
AssertEqual(t, "aba", api.Output.String(), "runes after 3 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
|
||||
AssertEqual(t, "abaa", api.Output.String(), "runes after 4 reads")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
|
||||
AssertEqual(t, "abaab", api.Output.String(), "runes after 5 reads")
|
||||
tokenAPI := NewAPI("abaab")
|
||||
runeA(tokenAPI)
|
||||
AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 1 read")
|
||||
AssertEqual(t, "a", tokenAPI.Output.String(), "runes after 1 read")
|
||||
flushB(tokenAPI)
|
||||
AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 2 reads + input flush")
|
||||
AssertEqual(t, "ab", tokenAPI.Output.String(), "runes after 2 reads")
|
||||
runeA(tokenAPI)
|
||||
AssertEqual(t, 1, tokenAPI.stackFrame.offset, "offset after 3 reads")
|
||||
AssertEqual(t, "aba", tokenAPI.Output.String(), "runes after 3 reads")
|
||||
runeA(tokenAPI)
|
||||
AssertEqual(t, 2, tokenAPI.stackFrame.offset, "offset after 4 reads")
|
||||
AssertEqual(t, "abaa", tokenAPI.Output.String(), "runes after 4 reads")
|
||||
flushB(tokenAPI)
|
||||
AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after 5 reads + input flush")
|
||||
AssertEqual(t, "abaab", tokenAPI.Output.String(), "runes after 5 reads")
|
||||
}
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
|
|
Loading…
Reference in New Issue