Switched to byte input for built-in tokenize.Handler functions.
This commit is contained in:
parent
d4492e4f0a
commit
0362763e83
|
@ -240,22 +240,22 @@ func (buf *Buffer) grow(requiredSize int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grow the buffer store by allocating a new one and copying the data.
|
// Grow the buffer store by allocating a new one and copying the data.
|
||||||
newStore := makeSlice(2*capStore + requiredSize)
|
newStore := makeSlice(requiredSize, 2*capStore+requiredSize)
|
||||||
copy(newStore, buf.buffer)
|
copy(newStore, buf.buffer)
|
||||||
buf.store = newStore
|
buf.store = newStore[:0]
|
||||||
buf.buffer = buf.store[:requiredSize]
|
buf.buffer = buf.store[:requiredSize]
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||||
// with ErrTooLarge.
|
// with ErrTooLarge.
|
||||||
func makeSlice(n int) []byte {
|
func makeSlice(l int, c int) []byte {
|
||||||
// If the make fails, give a known error.
|
// If the make fails, give a known error.
|
||||||
defer func() {
|
defer func() {
|
||||||
if recover() != nil {
|
if recover() != nil {
|
||||||
panic(ErrTooLarge)
|
panic(ErrTooLarge)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
return make([]byte, 0, n)
|
return make([]byte, l, c)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush deletes the provided number of bytes from the start of the Buffer.
|
// Flush deletes the provided number of bytes from the start of the Buffer.
|
||||||
|
|
|
@ -327,6 +327,8 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// store |x 64 |
|
// store |x 64 |
|
||||||
// buffer |x 64 |
|
// buffer |x 64 |
|
||||||
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 4, 64)
|
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 4, 64)
|
||||||
|
rn, _, _ := r.RuneAt(0)
|
||||||
|
assertEqual(t, 'X', rn)
|
||||||
|
|
||||||
// The first 64 bytes will fit in the standard cache.
|
// The first 64 bytes will fit in the standard cache.
|
||||||
// store |xxxx64xxxxx|
|
// store |xxxx64xxxxx|
|
||||||
|
@ -353,6 +355,10 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// buffer |xxxxx65xxxxx 128 |
|
// buffer |xxxxx65xxxxx 128 |
|
||||||
assertCache(t, "read cap + 1", r, func() { r.RuneAt(61) }, 0, 65+128, 65, 65+128)
|
assertCache(t, "read cap + 1", r, func() { r.RuneAt(61) }, 0, 65+128, 65, 65+128)
|
||||||
|
|
||||||
|
// The bytes that we had before must be copied to the newly allocated store.
|
||||||
|
rn, _, _ = r.RuneAt(0)
|
||||||
|
assertEqual(t, 'X', rn)
|
||||||
|
|
||||||
// A partial flush frees the start of the store and moves
|
// A partial flush frees the start of the store and moves
|
||||||
// the buffer slice.
|
// the buffer slice.
|
||||||
// store | 50 x15x 128 |
|
// store | 50 x15x 128 |
|
||||||
|
|
|
@ -146,6 +146,13 @@ func (i *API) PeekRune(offset int) (rune, int, error) {
|
||||||
return i.reader.RuneAt(i.stackFrame.offset + offset)
|
return i.reader.RuneAt(i.stackFrame.offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PeekByte returns the byte at the provided offset.
|
||||||
|
//
|
||||||
|
// The read cursor and current read offset are not updated by this operation.
|
||||||
|
func (i *API) PeekByte(offset int) (byte, error) {
|
||||||
|
return i.reader.ByteAt(i.stackFrame.offset + offset)
|
||||||
|
}
|
||||||
|
|
||||||
// Accept the last rune as read by NextRune() into the Result runes and move
|
// Accept the last rune as read by NextRune() into the Result runes and move
|
||||||
// the cursor forward.
|
// the cursor forward.
|
||||||
//
|
//
|
||||||
|
@ -163,6 +170,28 @@ func (i *API) Accept() {
|
||||||
i.acceptRunes(i.lastRuneWidth, i.lastRune)
|
i.acceptRunes(i.lastRuneWidth, i.lastRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *API) acceptBytes(bytes ...byte) {
|
||||||
|
curRuneEnd := i.stackFrame.runeEnd
|
||||||
|
newRuneEnd := curRuneEnd + len(bytes)
|
||||||
|
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
if cap(i.runes) < newRuneEnd {
|
||||||
|
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||||
|
copy(newRunes, i.runes)
|
||||||
|
i.runes = newRunes
|
||||||
|
} else {
|
||||||
|
i.runes = i.runes[0:newRuneEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
for offset, b := range bytes {
|
||||||
|
i.runes[curRuneEnd+offset] = rune(b)
|
||||||
|
i.stackFrame.moveCursorByByte(b)
|
||||||
|
}
|
||||||
|
i.stackFrame.runeEnd = newRuneEnd
|
||||||
|
i.stackFrame.offset += len(bytes)
|
||||||
|
i.runeRead = false
|
||||||
|
}
|
||||||
|
|
||||||
func (i *API) acceptRunes(width int, runes ...rune) {
|
func (i *API) acceptRunes(width int, runes ...rune) {
|
||||||
curRuneEnd := i.stackFrame.runeEnd
|
curRuneEnd := i.stackFrame.runeEnd
|
||||||
newRuneEnd := curRuneEnd + len(runes)
|
newRuneEnd := curRuneEnd + len(runes)
|
||||||
|
|
|
@ -11,12 +11,20 @@ func (f *stackFrame) moveCursor(input string) *stackFrame {
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *stackFrame) moveCursorByRune(r rune) *stackFrame {
|
func (f *stackFrame) moveCursorByRune(r rune) {
|
||||||
if r == '\n' {
|
if r == '\n' {
|
||||||
f.column = 0
|
f.column = 0
|
||||||
f.line++
|
f.line++
|
||||||
} else {
|
} else {
|
||||||
f.column++
|
f.column++
|
||||||
}
|
}
|
||||||
return f
|
}
|
||||||
|
|
||||||
|
func (f *stackFrame) moveCursorByByte(b byte) {
|
||||||
|
if b == '\n' {
|
||||||
|
f.column = 0
|
||||||
|
f.line++
|
||||||
|
} else {
|
||||||
|
f.column++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,32 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
func TestMoveCursorByBytes(t *testing.T) {
|
||||||
|
api := NewAPI("")
|
||||||
|
api.stackFrame.moveCursorByByte('a')
|
||||||
|
api.stackFrame.moveCursorByByte('b')
|
||||||
|
api.stackFrame.moveCursorByByte('c')
|
||||||
|
api.stackFrame.moveCursorByByte('\r')
|
||||||
|
api.stackFrame.moveCursorByByte('\n')
|
||||||
|
api.stackFrame.moveCursorByByte('a')
|
||||||
|
api.stackFrame.moveCursorByByte('b')
|
||||||
|
|
||||||
|
AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMoveCursorByRunes(t *testing.T) {
|
||||||
|
api := NewAPI("")
|
||||||
|
api.stackFrame.moveCursorByRune('ɹ')
|
||||||
|
api.stackFrame.moveCursorByRune('n')
|
||||||
|
api.stackFrame.moveCursorByRune('u')
|
||||||
|
api.stackFrame.moveCursorByRune('\r')
|
||||||
|
api.stackFrame.moveCursorByRune('\n')
|
||||||
|
api.stackFrame.moveCursorByRune('ǝ')
|
||||||
|
|
||||||
|
AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
|
||||||
for _, test := range []struct {
|
for _, test := range []struct {
|
||||||
name string
|
name string
|
||||||
input []string
|
input []string
|
||||||
|
|
|
@ -374,6 +374,18 @@ func MatchRuneRange(start rune, end rune) Handler {
|
||||||
if end < start {
|
if end < start {
|
||||||
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||||
}
|
}
|
||||||
|
if end <= 127 {
|
||||||
|
byteStart := byte(start)
|
||||||
|
byteEnd := byte(end)
|
||||||
|
return func(t *API) bool {
|
||||||
|
r, err := t.PeekByte(0)
|
||||||
|
if err == nil && r >= byteStart && r <= byteEnd {
|
||||||
|
t.acceptBytes(r)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, w, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil && r >= start && r <= end {
|
if err == nil && r >= start && r <= end {
|
||||||
|
@ -388,18 +400,18 @@ func MatchRuneRange(start rune, end rune) Handler {
|
||||||
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
||||||
func MatchNewline() Handler {
|
func MatchNewline() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r1, _, err := t.PeekRune(0)
|
b1, err := t.PeekByte(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if r1 == '\n' {
|
if b1 == '\n' {
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if b1 == '\r' {
|
||||||
r2, _, err := t.PeekRune(1)
|
b2, err := t.PeekByte(1)
|
||||||
if err == nil && r2 == '\n' {
|
if err == nil && b2 == '\n' {
|
||||||
t.acceptRunes(2, r1, r2)
|
t.acceptBytes(b1, b2)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -414,9 +426,9 @@ func MatchNewline() Handler {
|
||||||
// newlines, then take a look at MatchWhitespace().
|
// newlines, then take a look at MatchWhitespace().
|
||||||
func MatchBlank() Handler {
|
func MatchBlank() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.NextRune()
|
b, err := t.PeekByte(0)
|
||||||
if err == nil && (r == ' ' || r == '\t') {
|
if err == nil && (b == ' ' || b == '\t') {
|
||||||
t.Accept()
|
t.acceptBytes(b)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -433,20 +445,20 @@ func MatchBlank() Handler {
|
||||||
func MatchBlanks() Handler {
|
func MatchBlanks() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
// Match the first blank.
|
// Match the first blank.
|
||||||
r, _, err := t.PeekRune(0)
|
b, err := t.PeekByte(0)
|
||||||
if err != nil || (r != ' ' && r != '\t') {
|
if err != nil || (b != ' ' && b != '\t') {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
t.acceptRunes(1, r)
|
t.acceptBytes(b)
|
||||||
|
|
||||||
// Now match any number of followup blanks. We've already got
|
// Now match any number of followup blanks. We've already got
|
||||||
// a successful match at this point, so we'll always return true at the end.
|
// a successful match at this point, so we'll always return true at the end.
|
||||||
for {
|
for {
|
||||||
r, _, err := t.PeekRune(0)
|
b, err := t.PeekByte(0)
|
||||||
if err != nil || (r != ' ' && r != '\t') {
|
if err != nil || (b != ' ' && b != '\t') {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.acceptRunes(1, r)
|
t.acceptBytes(b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -457,35 +469,35 @@ func MatchBlanks() Handler {
|
||||||
func MatchWhitespace() Handler {
|
func MatchWhitespace() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
// Match the first whitespace.
|
// Match the first whitespace.
|
||||||
r1, _, err := t.PeekRune(0)
|
b1, err := t.PeekByte(0)
|
||||||
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if b1 == '\r' {
|
||||||
r2, _, err := t.PeekRune(1)
|
b2, err := t.PeekByte(1)
|
||||||
if err != nil || r2 != '\n' {
|
if err != nil || b2 != '\n' {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
t.acceptRunes(2, r1, r2)
|
t.acceptBytes(b1, b2)
|
||||||
} else {
|
} else {
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now match any number of followup whitespace. We've already got
|
// Now match any number of followup whitespace. We've already got
|
||||||
// a successful match at this point, so we'll always return true at the end.
|
// a successful match at this point, so we'll always return true at the end.
|
||||||
for {
|
for {
|
||||||
r1, _, err := t.PeekRune(0)
|
b1, err := t.PeekByte(0)
|
||||||
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if b1 == '\r' {
|
||||||
r2, _, err := t.PeekRune(1)
|
b2, err := t.PeekByte(1)
|
||||||
if err != nil || r2 != '\n' {
|
if err != nil || b2 != '\n' {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.acceptRunes(2, r1, r2)
|
t.acceptBytes(b1, b2)
|
||||||
} else {
|
} else {
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -517,18 +529,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||||
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
||||||
func MatchEndOfLine() Handler {
|
func MatchEndOfLine() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r1, _, err := t.PeekRune(0)
|
b1, err := t.PeekByte(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err == io.EOF
|
return err == io.EOF
|
||||||
}
|
}
|
||||||
if r1 == '\n' {
|
if b1 == '\n' {
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if b1 == '\r' {
|
||||||
r2, _, _ := t.PeekRune(1)
|
b2, _ := t.PeekByte(1)
|
||||||
if r2 == '\n' {
|
if b2 == '\n' {
|
||||||
t.acceptRunes(2, r1, r2)
|
t.acceptBytes(b1, b2)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -955,45 +967,45 @@ func MatchFloat() Handler {
|
||||||
// False falues: false, FALSE, False, 0, f, F
|
// False falues: false, FALSE, False, 0, f, F
|
||||||
func MatchBoolean() Handler {
|
func MatchBoolean() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r1, _, err := t.PeekRune(0)
|
b1, err := t.PeekByte(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if r1 == '1' || r1 == '0' {
|
if b1 == '1' || b1 == '0' {
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == 't' || r1 == 'T' {
|
if b1 == 't' || b1 == 'T' {
|
||||||
r2, _, _ := t.PeekRune(1)
|
b2, _ := t.PeekByte(1)
|
||||||
r3, _, _ := t.PeekRune(2)
|
b3, _ := t.PeekByte(2)
|
||||||
r4, _, err := t.PeekRune(3)
|
b4, err := t.PeekByte(3)
|
||||||
if err == nil && r2 == 'r' && r3 == 'u' && r4 == 'e' {
|
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
|
||||||
t.acceptRunes(4, r1, r2, r3, r4)
|
t.acceptBytes(b1, b2, b3, b4)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if err == nil && r1 == 'T' && r2 == 'R' && r3 == 'U' && r4 == 'E' {
|
if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' {
|
||||||
t.acceptRunes(4, r1, r2, r3, r4)
|
t.acceptBytes(b1, b2, b3, b4)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if r1 == 'f' || r1 == 'F' {
|
if b1 == 'f' || b1 == 'F' {
|
||||||
r2, _, _ := t.PeekRune(1)
|
b2, _ := t.PeekByte(1)
|
||||||
r3, _, _ := t.PeekRune(2)
|
b3, _ := t.PeekByte(2)
|
||||||
r4, _, _ := t.PeekRune(3)
|
b4, _ := t.PeekByte(3)
|
||||||
r5, _, err := t.PeekRune(4)
|
b5, err := t.PeekByte(4)
|
||||||
|
|
||||||
if err == nil && r2 == 'a' && r3 == 'l' && r4 == 's' && r5 == 'e' {
|
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
|
||||||
t.acceptRunes(5, r1, r2, r3, r4, r5)
|
t.acceptBytes(b1, b2, b3, b4, b5)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if err == nil && r1 == 'F' && r2 == 'A' && r3 == 'L' && r4 == 'S' && r5 == 'E' {
|
if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' {
|
||||||
t.acceptRunes(5, r1, r2, r3, r4, r5)
|
t.acceptBytes(b1, b2, b3, b4, b5)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.acceptRunes(1, r1)
|
t.acceptBytes(b1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -1039,7 +1051,14 @@ func MatchUnicodeLower() Handler {
|
||||||
// MatchHexDigit creates a Handler function that check if a single hexadecimal
|
// MatchHexDigit creates a Handler function that check if a single hexadecimal
|
||||||
// digit can be read from the input.
|
// digit can be read from the input.
|
||||||
func MatchHexDigit() Handler {
|
func MatchHexDigit() Handler {
|
||||||
return MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F'))
|
return func(t *API) bool {
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
|
||||||
|
t.acceptBytes(b)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchOctet creates a Handler function that checks if a valid octet value
|
// MatchOctet creates a Handler function that checks if a valid octet value
|
||||||
|
|
Loading…
Reference in New Issue