Bytes reader working, now carry on switching to byte reading in the tokenizer code.
This commit is contained in:
parent
17935b7534
commit
d4492e4f0a
244
read/read.go
244
read/read.go
|
@ -4,27 +4,29 @@
|
|||
// Let's say we've got the following input coming up in the io.Reader that is
|
||||
// wrapped by the Reader:
|
||||
//
|
||||
// |H|e|l|l|o|,| |w|o|r|l|d|!| <-- runes
|
||||
// 0 6 12 <-- rune offset
|
||||
// |H|e|l|l|o|,| |w|o|r|l|d|!| <-- bytes
|
||||
// 0 6 12 <-- byte offset
|
||||
//
|
||||
// The Reader can now be used to retrieve runes from the input, based on their
|
||||
// offset, using RuneAt(offset). Normally these runes will be retrieved in
|
||||
// sequence, but that is not a requirement. Let's say we retrieve the rune with
|
||||
// offset 6 from the input (the 'w'), then the Reader buffer be filled with runes
|
||||
// from the io.Reader until there are enough runes available to return the rune
|
||||
// for offset 6:
|
||||
// The Reader can now be used to retrieve data from the input, based on their
|
||||
// byte offset, e.g. using RuneAt(offset) or ByteAt(offset). Normally these data
|
||||
// will be retrieved in sequence by the user of this code, but that is not a
|
||||
// requirement. Let's say we retrieve the byte with offset 6 from the input
|
||||
// (the 'w'), then the Reader buffer be filled with runes from the io.Reader
|
||||
// until there are enough runes available to return the rune for offset 6:
|
||||
//
|
||||
// |H|e|l|l|o| |w|
|
||||
// 0 6
|
||||
//
|
||||
// Using RuneAt, you can retrieve arbitrary runes. If you request one that is
|
||||
// in the Reader buffer, then the buffered rune is returned. If you request one
|
||||
// that is not in the buffer, then the buffer will be expanded.
|
||||
// This means that you can retrieve data for arbitrary offsets. If you request
|
||||
// an offset that is already in the Reader buffer, then the buffered data are
|
||||
// returned. If you request one that is not in the buffer, then the buffer will
|
||||
// be expanded.
|
||||
//
|
||||
// To make this into a sliding window, the Reader provides the method
|
||||
// Flush(numberOfRunes). This method will drop the provided number of runes from
|
||||
// the Reader buffer. So when we'd do a Flush(3) on the example buffer from above,
|
||||
// then the Reader buffer would become:
|
||||
// To make this into a sliding window (preserving memory space while scanning
|
||||
// the input data), the Reader provides the method Flush(numberOfBytes).
|
||||
// This method will drop the provided number of bytes from the Reader buffer.
|
||||
// So when we'd do a Flush(3) on the example buffer from above, then the Reader
|
||||
// buffer would become:
|
||||
//
|
||||
// |l|o| |w|
|
||||
// 0 3
|
||||
|
@ -79,32 +81,33 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
|||
}
|
||||
|
||||
// Buffer wraps around a bufio.Reader and provides an additional layer of
|
||||
// buffering that allows us to read the same runes over and over again.
|
||||
// buffering that allows us to read the same data over and over again.
|
||||
// This is useful for implementing a parser that must be able to do lookahead
|
||||
// on the input, returning to the original input position after finishing
|
||||
// that lookahead).
|
||||
//
|
||||
// To minimze memory use, it is also possible to flush the read buffer when there is
|
||||
// no more need to go back to previously read runes.
|
||||
// To minimize memory use, it is also possible to flush the read buffer when there is
|
||||
// no more need to go back to previously read data.
|
||||
//
|
||||
// The parserkit.reader.Reader is used internally by tokenize.API.
|
||||
// This parserkit.reader.Reader is used internally by tokenize.API.
|
||||
type Buffer struct {
|
||||
bufio *bufio.Reader // used for ReadRune()
|
||||
store []rune // buffer store, the buffer field is a slice on top of this one
|
||||
buffer []rune // input buffer, holding runes that were read from input
|
||||
err error // a read error, if one occurred
|
||||
errOffset int // the offset in the buffer at which the read error was encountered
|
||||
firstReadDone bool // whether or not the first read was done
|
||||
bufio *bufio.Reader // used for ReadRune()
|
||||
store []byte // buffer store, the buffer field is a slice on top of this one
|
||||
buffer []byte // input buffer, holding runes that were read from input
|
||||
err error // a read error, if one occurred
|
||||
errOffset int // the offset in the buffer at which the read error was encountered
|
||||
}
|
||||
|
||||
// RuneAt reads the rune at the provided rune offset.
|
||||
// RuneAt reads the rune at the provided byte offset.
|
||||
//
|
||||
// This offset is relative to the current starting position of the Buffer.
|
||||
// The offset is relative to the current starting position of the Buffer.
|
||||
// When starting reading, offset 0 will point at the start of the input.
|
||||
// After flushing, offset 0 will point at the input up to where the flush was done.
|
||||
// After flushing, offset 0 will point at the input up to where the flush
|
||||
// was done.
|
||||
//
|
||||
// The error return value will be nil when reading was successful.
|
||||
// When an invalid rune is encountered on the input, the error will be nil,
|
||||
// When reading was successful, the rune and the width of the rune in bytes
|
||||
// will be returned. The returned error will be nil.
|
||||
// When an invalid UTF8 rune is encountered on the input, the error will be nil,
|
||||
// but the rune will be utf8.RuneError
|
||||
//
|
||||
// When reading failed, the rune will be utf8.RuneError and the error will
|
||||
|
@ -113,48 +116,83 @@ type Buffer struct {
|
|||
//
|
||||
// Once a read error is encountered, that same read error will guaranteed
|
||||
// be return on every subsequent read at or beyond the provided offset.
|
||||
func (r *Buffer) RuneAt(offset int) (rune, error) {
|
||||
// Re-issue a previously seen read error.
|
||||
if r.err != nil && offset >= r.errOffset {
|
||||
return utf8.RuneError, r.err
|
||||
func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
|
||||
// Shortcut: re-issue a previously seen read error.
|
||||
if buf.err != nil && offset >= buf.errOffset {
|
||||
return utf8.RuneError, 0, buf.err
|
||||
}
|
||||
|
||||
// Rune at provided offset is not yet available in the input buffer.
|
||||
// Read runes until we have enough runes to satisfy the offset.
|
||||
l := len(r.buffer)
|
||||
|
||||
// Number of runes to add to the buffer to have enough space to store
|
||||
// the rune at the offset
|
||||
n := offset - l + 1
|
||||
|
||||
if n > 0 {
|
||||
r.grow(n)
|
||||
var readRune rune
|
||||
var err error
|
||||
for writeAt := l; writeAt <= offset; writeAt++ {
|
||||
readRune, _, err = r.bufio.ReadRune()
|
||||
|
||||
// Skip BOM.
|
||||
if !r.firstReadDone {
|
||||
r.firstReadDone = true
|
||||
if readRune == '\uFEFF' {
|
||||
writeAt--
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Handle errors.
|
||||
// Compute the number of bytes that we need in the buffer to be able
|
||||
// to return the rune at the provided byte offset.
|
||||
bufferLen := len(buf.buffer)
|
||||
requiredLen := offset + utf8.UTFMax
|
||||
if requiredLen > bufferLen && buf.err == nil {
|
||||
buf.grow(requiredLen)
|
||||
for writeAt := bufferLen; writeAt < requiredLen; writeAt++ {
|
||||
b, err := buf.bufio.ReadByte()
|
||||
if err != nil {
|
||||
r.err = err
|
||||
r.errOffset = writeAt
|
||||
return utf8.RuneError, err
|
||||
buf.err = err
|
||||
buf.errOffset = writeAt
|
||||
buf.buffer = buf.buffer[:writeAt]
|
||||
break
|
||||
}
|
||||
|
||||
r.buffer[writeAt] = readRune
|
||||
buf.buffer[writeAt] = b
|
||||
}
|
||||
return readRune, nil
|
||||
}
|
||||
return r.buffer[offset], nil
|
||||
|
||||
if buf.err != nil && offset >= buf.errOffset {
|
||||
return utf8.RuneError, 0, buf.err
|
||||
}
|
||||
|
||||
r, w := utf8.DecodeRune(buf.buffer[offset:])
|
||||
return r, w, nil
|
||||
}
|
||||
|
||||
// ByteAt reads the byte at the provided byte offset.
|
||||
//
|
||||
// The offset is relative to the current starting position of the Buffer.
|
||||
// When starting reading, offset 0 will point at the start of the input.
|
||||
// After flushing, offset 0 will point at the input up to where the flush
|
||||
// was done.
|
||||
//
|
||||
// When reading was successful, the byte will be returned. The returned
|
||||
// error will be nil.
|
||||
//
|
||||
// When reading failed, the byte will be 0x00 and the error will
|
||||
// be not nil. One special read fail is actually a normal situation: end
|
||||
// of file reached. In that case, the returned error wille be io.EOF.
|
||||
//
|
||||
// Once a read error is encountered, that same read error will guaranteed
|
||||
// be return on every subsequent read at or beyond the provided offset.
|
||||
func (buf *Buffer) ByteAt(offset int) (byte, error) {
|
||||
// Shortcut: re-issue a previously seen read error.
|
||||
if buf.err != nil && offset >= buf.errOffset {
|
||||
return 0, buf.err
|
||||
}
|
||||
|
||||
// Compute the number of bytes that we need in the buffer to be able
|
||||
// to return the byte at the provided byte offset.
|
||||
bufferLen := len(buf.buffer)
|
||||
requiredLen := offset + 1
|
||||
if requiredLen > bufferLen && buf.err == nil {
|
||||
buf.grow(requiredLen)
|
||||
for writeAt := bufferLen; writeAt < requiredLen; writeAt++ {
|
||||
b, err := buf.bufio.ReadByte()
|
||||
if err != nil {
|
||||
buf.err = err
|
||||
buf.errOffset = writeAt
|
||||
buf.buffer = buf.buffer[:writeAt]
|
||||
break
|
||||
}
|
||||
buf.buffer[writeAt] = b
|
||||
}
|
||||
}
|
||||
|
||||
if buf.err != nil && offset >= buf.errOffset {
|
||||
return 0, buf.err
|
||||
}
|
||||
|
||||
return buf.buffer[offset], nil
|
||||
}
|
||||
|
||||
// The upcoming code was inspired heavily by the Go built-in 'bytes' package.
|
||||
|
@ -168,82 +206,80 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
|
|||
// grow grows the buffer to guarantee space for n more bytes.
|
||||
// It returns the index where bytes should be written.
|
||||
// If the buffer can't grow it will panic with ErrTooLarge.
|
||||
func (r *Buffer) grow(n int) {
|
||||
func (buf *Buffer) grow(requiredSize int) {
|
||||
// Instantiate new buffer store
|
||||
if r.store == nil {
|
||||
if buf.store == nil {
|
||||
b := smallBufferSize
|
||||
if b < n {
|
||||
b = n
|
||||
if b < requiredSize {
|
||||
b = requiredSize
|
||||
}
|
||||
r.store = make([]rune, 0, b)
|
||||
r.buffer = r.store[:n]
|
||||
buf.store = make([]byte, 0, b)
|
||||
buf.buffer = buf.store[:requiredSize]
|
||||
return
|
||||
}
|
||||
|
||||
lenBuffer := len(r.buffer)
|
||||
capBuffer := cap(r.buffer)
|
||||
freeBuffer := capBuffer - lenBuffer
|
||||
newSize := lenBuffer + n
|
||||
capBuffer := cap(buf.buffer)
|
||||
|
||||
// Grow the buffer store by reslicing within the available capacity.
|
||||
if freeBuffer >= n {
|
||||
r.buffer = r.buffer[:newSize]
|
||||
if capBuffer >= requiredSize {
|
||||
buf.buffer = buf.buffer[:requiredSize]
|
||||
return
|
||||
}
|
||||
|
||||
capStore := cap(r.store)
|
||||
capStore := cap(buf.store)
|
||||
freeAtStartOfStore := capStore - capBuffer
|
||||
|
||||
// Grow the buffer by moving the data to the start of the store.
|
||||
// Note: according to the spec, overlapping slices are allowed with copy().
|
||||
if freeAtStartOfStore > 0 && newSize <= capStore {
|
||||
r.store = r.store[0:newSize]
|
||||
copy(r.store, r.buffer)
|
||||
r.buffer = r.store[:newSize]
|
||||
r.store = r.store[:0]
|
||||
if freeAtStartOfStore > 0 && requiredSize <= capStore {
|
||||
buf.store = buf.store[0:requiredSize]
|
||||
copy(buf.store, buf.buffer)
|
||||
buf.buffer = buf.store[:requiredSize]
|
||||
buf.store = buf.store[:0]
|
||||
return
|
||||
}
|
||||
|
||||
// Grow the buffer store by allocating a new one and copying the data.
|
||||
buf := makeSlice(2*capStore + n)
|
||||
copy(buf, r.buffer)
|
||||
r.store = buf
|
||||
r.buffer = r.store[:newSize]
|
||||
newStore := makeSlice(2*capStore + requiredSize)
|
||||
copy(newStore, buf.buffer)
|
||||
buf.store = newStore
|
||||
buf.buffer = buf.store[:requiredSize]
|
||||
}
|
||||
|
||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||
// with ErrTooLarge.
|
||||
func makeSlice(n int) []rune {
|
||||
func makeSlice(n int) []byte {
|
||||
// If the make fails, give a known error.
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
panic(ErrTooLarge)
|
||||
}
|
||||
}()
|
||||
return make([]rune, 0, n)
|
||||
return make([]byte, 0, n)
|
||||
}
|
||||
|
||||
// Flush deletes the provided number of runes from the start of the Buffer.
|
||||
// Flush deletes the provided number of bytes from the start of the Buffer.
|
||||
// After flushing the Buffer, offset 0 as used by RuneAt() will point to
|
||||
// the rune that comes after the runes that were flushed.
|
||||
// So what this basically does, is turn the Buffer into a sliding window.
|
||||
func (r *Buffer) Flush(numberOfRunes int) {
|
||||
l := len(r.buffer)
|
||||
if numberOfRunes > l {
|
||||
func (buf *Buffer) Flush(numberOfBytes int) {
|
||||
if numberOfBytes == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
bufferLen := len(buf.buffer)
|
||||
if numberOfBytes > bufferLen {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, l))
|
||||
"exceeds size of the buffer (%d)", numberOfBytes, bufferLen))
|
||||
}
|
||||
if numberOfRunes == 0 {
|
||||
if bufferLen == numberOfBytes {
|
||||
buf.buffer = buf.store[:0]
|
||||
buf.errOffset = 0
|
||||
return
|
||||
}
|
||||
if l == numberOfRunes {
|
||||
r.buffer = r.store[:0]
|
||||
r.errOffset = 0
|
||||
return
|
||||
}
|
||||
r.buffer = r.buffer[numberOfRunes:]
|
||||
if r.err != nil {
|
||||
r.errOffset = r.errOffset - numberOfRunes
|
||||
buf.buffer = buf.buffer[numberOfBytes:]
|
||||
if buf.err != nil {
|
||||
buf.errOffset = buf.errOffset - numberOfBytes
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,8 +12,8 @@ import (
|
|||
func ExampleNew() {
|
||||
printFirstRuneOf := func(input interface{}) {
|
||||
r := New(input)
|
||||
c, _ := r.RuneAt(0)
|
||||
fmt.Printf("%q\n", c)
|
||||
c, w, _ := r.RuneAt(0)
|
||||
fmt.Printf("rune %q, width %d\n", c, w)
|
||||
}
|
||||
|
||||
simpleString := "Hello, world!"
|
||||
|
@ -25,14 +25,14 @@ func ExampleNew() {
|
|||
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
|
||||
printFirstRuneOf(bufioReaderPointer)
|
||||
|
||||
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Where do we go, world?")))
|
||||
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Ɍead the manual!")))
|
||||
printFirstRuneOf(bufioReaderValue)
|
||||
|
||||
// Output:
|
||||
// 'H'
|
||||
// 'G'
|
||||
// 'W'
|
||||
// 'W'
|
||||
// rune 'H', width 1
|
||||
// rune 'G', width 1
|
||||
// rune 'W', width 1
|
||||
// rune 'Ɍ', width 2
|
||||
}
|
||||
|
||||
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
||||
|
@ -46,13 +46,13 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
|||
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
||||
} {
|
||||
r := New(test.input)
|
||||
firstRune, _ := r.RuneAt(0)
|
||||
firstRune, _, _ := r.RuneAt(0)
|
||||
if firstRune != 'H' {
|
||||
t.Errorf("[%s] first rune not 'H'", test.name)
|
||||
}
|
||||
lastRune, _ := r.RuneAt(12)
|
||||
lastRune, _, _ := r.RuneAt(12)
|
||||
if lastRune != '!' {
|
||||
t.Errorf("[%s] last rune not '!'", test.name)
|
||||
t.Errorf("[%s] last rune not '!', but %q", test.name, lastRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -63,41 +63,63 @@ func TestNew_UnhandledInputType_Panics(t *testing.T) {
|
|||
"parsekit.read.New(): no support for input of type int")
|
||||
}
|
||||
|
||||
func TestBuffer_RuneAt(t *testing.T) {
|
||||
func TestBuffer_ByteAt(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
at := func(i int) byte { b, _ := r.ByteAt(i); return b }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||
assertEqual(t, "H!wH", result)
|
||||
}
|
||||
|
||||
func TestBuffer_RuneAt(t *testing.T) {
|
||||
r := New(strings.NewReader("¡pןɹoʍ 'oןןǝH"))
|
||||
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(5), at(8), at(0))
|
||||
assertEqual(t, "¡ɹʍ¡", result)
|
||||
}
|
||||
|
||||
func TestBuffer_ByteAt_endOfFile(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, world!"))
|
||||
|
||||
b, err := r.ByteAt(13)
|
||||
result := fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
|
||||
assertEqual(t, "'\\x00' EOF true", result)
|
||||
|
||||
b, err = r.ByteAt(20)
|
||||
result = fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
|
||||
assertEqual(t, "'\\x00' EOF true", result)
|
||||
}
|
||||
|
||||
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, world!"))
|
||||
|
||||
rn, err := r.RuneAt(13)
|
||||
rn, _, err := r.RuneAt(13)
|
||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
assertEqual(t, "'<27>' EOF true", result)
|
||||
|
||||
rn, err = r.RuneAt(20)
|
||||
rn, _, err = r.RuneAt(20)
|
||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
assertEqual(t, "'<27>' EOF true", result)
|
||||
}
|
||||
|
||||
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, \xcdworld!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||||
|
||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||
assertEqual(t, " <20>wo", result)
|
||||
}
|
||||
|
||||
func ExampleBuffer_RuneAt() {
|
||||
func ExampleBuffer_ByteAt() {
|
||||
reader := New(strings.NewReader("Hello, world!"))
|
||||
|
||||
fmt.Printf("Runes: ")
|
||||
for i := 0; ; i++ {
|
||||
r, err := reader.RuneAt(i)
|
||||
offset := 0
|
||||
for {
|
||||
r, err := reader.ByteAt(offset)
|
||||
offset++
|
||||
if err != nil {
|
||||
fmt.Printf("\nErr: %s\n", err)
|
||||
break
|
||||
|
@ -110,18 +132,39 @@ func ExampleBuffer_RuneAt() {
|
|||
// Err: EOF
|
||||
}
|
||||
|
||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
r := New(strings.NewReader("\uFEFFBommetje!"))
|
||||
b, _ := r.RuneAt(0)
|
||||
o, _ := r.RuneAt(1)
|
||||
m, _ := r.RuneAt(2)
|
||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||
assertEqual(t, "Bom", bom)
|
||||
func ExampleBuffer_RuneAt() {
|
||||
reader := New(strings.NewReader("Hello, pןɹoʍ!"))
|
||||
|
||||
fmt.Printf("Runes: ")
|
||||
offset := 0
|
||||
for {
|
||||
r, w, err := reader.RuneAt(offset)
|
||||
offset += w
|
||||
if err != nil {
|
||||
fmt.Printf("\nErr: %s\n", err)
|
||||
break
|
||||
}
|
||||
fmt.Printf("%c", r)
|
||||
}
|
||||
|
||||
// Output:
|
||||
// Runes: Hello, pןɹoʍ!
|
||||
// Err: EOF
|
||||
}
|
||||
|
||||
// TODO reimplement somewhere, maybe a separate call in the reader or should it be part of a parser?
|
||||
// func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
// r := New(strings.NewReader("\uFEFFBommetje!"))
|
||||
// b, _, _ := r.RuneAt(0)
|
||||
// o, _, _ := r.RuneAt(1)
|
||||
// m, _, _ := r.RuneAt(2)
|
||||
// bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||
// assertEqual(t, "Bom", bom)
|
||||
// }
|
||||
|
||||
func TestBuffer_Flush(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||||
|
||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||
result := fmt.Sprintf("%c", at(7))
|
||||
|
@ -138,7 +181,7 @@ func TestBuffer_Flush(t *testing.T) {
|
|||
|
||||
func ExampleBuffer_Flush() {
|
||||
r := New(strings.NewReader("dog eat dog!"))
|
||||
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
||||
at := func(offset int) rune { c, _, _ := r.RuneAt(offset); return c }
|
||||
|
||||
// Read from the first 4 runes of the input.
|
||||
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
|
||||
|
@ -162,30 +205,31 @@ func ExampleBuffer_Flush() {
|
|||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, world!"))
|
||||
|
||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||
r.RuneAt(10)
|
||||
// Fill buffer with "Hello, world!", the first 13 runes.
|
||||
rn, _, _ := r.RuneAt(12)
|
||||
assertEqual(t, '!', rn)
|
||||
|
||||
// However, we flush 12 runes, which exceeds the buffer size.
|
||||
// However, we flush 14 runes, which exceeds the buffer size.
|
||||
assertPanic(t,
|
||||
func() { r.Flush(12) },
|
||||
func() { r.Flush(14) },
|
||||
"parsekit.read.Buffer.Flush(): number of runes to flush "+
|
||||
"(12) exceeds size of the buffer (11)")
|
||||
"(14) exceeds size of the buffer (13)")
|
||||
}
|
||||
|
||||
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
||||
r := New(strings.NewReader("Hello, world!"))
|
||||
_, err := r.RuneAt(13)
|
||||
_, _, err := r.RuneAt(13)
|
||||
assertEqual(t, err.Error(), "EOF")
|
||||
_, err = r.RuneAt(13)
|
||||
_, _, err = r.RuneAt(13)
|
||||
assertEqual(t, err.Error(), "EOF")
|
||||
_, err = r.RuneAt(14)
|
||||
_, _, err = r.RuneAt(14)
|
||||
assertEqual(t, err.Error(), "EOF")
|
||||
r.Flush(13)
|
||||
_, err = r.RuneAt(0)
|
||||
_, _, err = r.RuneAt(0)
|
||||
assertEqual(t, err.Error(), "EOF")
|
||||
_, err = r.RuneAt(1)
|
||||
_, _, err = r.RuneAt(1)
|
||||
assertEqual(t, err.Error(), "EOF")
|
||||
_, err = r.RuneAt(2)
|
||||
_, _, err = r.RuneAt(2)
|
||||
assertEqual(t, err.Error(), "EOF")
|
||||
}
|
||||
|
||||
|
@ -203,43 +247,43 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
|
|||
r := New(input)
|
||||
|
||||
// Read the last availble rune.
|
||||
readRune, _ := r.RuneAt(3)
|
||||
readRune, _, _ := r.RuneAt(3)
|
||||
assertEqual(t, 'd', readRune)
|
||||
|
||||
// Reading the next offset must result in the io.EOF error from the stub.
|
||||
readRune, err := r.RuneAt(4)
|
||||
readRune, _, err := r.RuneAt(4)
|
||||
assertEqual(t, utf8.RuneError, readRune)
|
||||
assertEqual(t, io.EOF, err)
|
||||
|
||||
// Reading even further should yield the same io.EOF error.
|
||||
readRune, err = r.RuneAt(5)
|
||||
readRune, _, err = r.RuneAt(5)
|
||||
assertEqual(t, utf8.RuneError, readRune)
|
||||
assertEqual(t, io.EOF, err)
|
||||
|
||||
// After an error, we must still be able to read the last rune.
|
||||
readRune, _ = r.RuneAt(3)
|
||||
readRune, _, _ = r.RuneAt(3)
|
||||
assertEqual(t, 'd', readRune)
|
||||
|
||||
// Flushing updates the error index too.
|
||||
r.Flush(3)
|
||||
|
||||
// The last rune is now at offset 0.
|
||||
readRune, _ = r.RuneAt(0)
|
||||
readRune, _, _ = r.RuneAt(0)
|
||||
assertEqual(t, 'd', readRune)
|
||||
|
||||
// The io.EOF is now at offset 1.
|
||||
_, err = r.RuneAt(1)
|
||||
_, _, err = r.RuneAt(1)
|
||||
assertEqual(t, io.EOF, err)
|
||||
|
||||
// Let's flush that last rune too.
|
||||
r.Flush(1)
|
||||
|
||||
// The io.EOF is now at offset 0.
|
||||
_, err = r.RuneAt(0)
|
||||
_, _, err = r.RuneAt(0)
|
||||
assertEqual(t, io.EOF, err)
|
||||
|
||||
// And reading beyond that offset also yields io.EOF.
|
||||
_, err = r.RuneAt(1)
|
||||
_, _, err = r.RuneAt(1)
|
||||
assertEqual(t, io.EOF, err)
|
||||
}
|
||||
|
||||
|
@ -247,13 +291,13 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
|||
input, size := makeLargeStubReader()
|
||||
r := New(input)
|
||||
|
||||
readRune, err := r.RuneAt(0)
|
||||
readRune, _, err := r.RuneAt(0)
|
||||
assertEqual(t, 'X', readRune)
|
||||
readRune, err = r.RuneAt(size - 1)
|
||||
readRune, _, err = r.RuneAt(size - 1)
|
||||
assertEqual(t, 'Y', readRune)
|
||||
readRune, err = r.RuneAt(size)
|
||||
readRune, _, err = r.RuneAt(size)
|
||||
assertEqual(t, io.EOF, err)
|
||||
readRune, err = r.RuneAt(10)
|
||||
readRune, _, err = r.RuneAt(10)
|
||||
assertEqual(t, 'X', readRune)
|
||||
}
|
||||
|
||||
|
@ -261,9 +305,9 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes
|
|||
input, size := makeLargeStubReader()
|
||||
r := New(input)
|
||||
|
||||
readRune, _ := r.RuneAt(size - 200)
|
||||
readRune, _, _ := r.RuneAt(size - 200)
|
||||
assertEqual(t, 'X', readRune)
|
||||
readRune, _ = r.RuneAt(size - 1)
|
||||
readRune, _, _ = r.RuneAt(size - 1)
|
||||
assertEqual(t, 'Y', readRune)
|
||||
}
|
||||
|
||||
|
@ -271,7 +315,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
|
|||
input, size := makeLargeStubReader()
|
||||
r := New(input)
|
||||
|
||||
readRune, _ := r.RuneAt(size - 1)
|
||||
readRune, _, _ := r.RuneAt(size - 1)
|
||||
assertEqual(t, 'Y', readRune)
|
||||
}
|
||||
|
||||
|
@ -282,12 +326,17 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
// The first read will create the standard cache.
|
||||
// store |x 64 |
|
||||
// buffer |x 64 |
|
||||
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64)
|
||||
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 4, 64)
|
||||
|
||||
// The first 64 reads will fit in the standard cache.
|
||||
// The first 64 bytes will fit in the standard cache.
|
||||
// store |xxxx64xxxxx|
|
||||
// buffer |xxxx64xxxxx|
|
||||
assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64)
|
||||
//
|
||||
// Note: in the test offset 60 is used instead of offset 63, because
|
||||
// RuneAt() will fill the buffer with 4 bytes to accomodate for the
|
||||
// longest UTF8 character encodings. In all upcoming tests, the same
|
||||
// logic applies to the RuneAt() calls.
|
||||
assertCache(t, "read fill cache", r, func() { r.RuneAt(60) }, 0, 64, 64, 64)
|
||||
|
||||
// Flushing zero input keeps everything as-is.
|
||||
// store |xxxx64xxxxx|
|
||||
|
@ -302,7 +351,7 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
// Reading 65 chars will allocate a new store of 2 * size + n.
|
||||
// store |xxxxx65xxxxx 128 |
|
||||
// buffer |xxxxx65xxxxx 128 |
|
||||
assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128)
|
||||
assertCache(t, "read cap + 1", r, func() { r.RuneAt(61) }, 0, 65+128, 65, 65+128)
|
||||
|
||||
// A partial flush frees the start of the store and moves
|
||||
// the buffer slice.
|
||||
|
@ -315,7 +364,7 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
// without a new allocation.
|
||||
// store | 50 xxxxxxxxx143xxxxxxxx|
|
||||
// buffer |xxxxxxxxx143xxxxxxxx|
|
||||
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143)
|
||||
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(139) }, 0, 50+143, 143, 143)
|
||||
|
||||
// Flush the full input.
|
||||
// store | 193 |
|
||||
|
@ -325,7 +374,7 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
// Read a bit more than half the capacity.
|
||||
// store |xxxxxx101xxxxxxxx 92 |
|
||||
// buffer |xxxxxx101xxxxxxxx 92 |
|
||||
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193)
|
||||
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(97) }, 0, 193, 101, 193)
|
||||
|
||||
// Then flush almost all input.
|
||||
// store | 100 x1x 92 |
|
||||
|
@ -337,7 +386,7 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
// store (where it fits), space is freed up for the read operation.
|
||||
// store |xxxxx100xxxxxx 93 |
|
||||
// buffer |xxxxx100xxxxxx 93 |
|
||||
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193)
|
||||
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(96) }, 0, 193, 100, 193)
|
||||
|
||||
// Now flush only one rune from the cache.
|
||||
// store |1 xxxx99xxxxx 93 |
|
||||
|
@ -349,7 +398,7 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
// the data is moved to the start and no reallocation is needed.
|
||||
// store |1 xxxx99xxxxx 93 |
|
||||
// buffer |xxxx99xxxxx 93 |
|
||||
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193)
|
||||
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(189) }, 0, 193, 193, 193)
|
||||
}
|
||||
|
||||
func makeLargeStubReader() (*StubReader, int) {
|
||||
|
|
|
@ -70,15 +70,16 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
reader *read.Buffer // the input data reader
|
||||
lastRune rune // the rune as retrieved by the last NextRune() calll
|
||||
lastRuneErr error // the error for the last NextRune() call
|
||||
runeRead bool // whether or not a rune was read using NextRune()
|
||||
runes []rune // the rune stack
|
||||
tokens []Token // the token stack
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
reader *read.Buffer // the input data reader
|
||||
lastRune rune // the rune as retrieved by the last NextRune() call
|
||||
lastRuneWidth int // the width in bytes of the last read rune
|
||||
lastRuneErr error // the error for the last NextRune() call
|
||||
runeRead bool // whether or not a rune was read using NextRune()
|
||||
runes []rune // the rune stack
|
||||
tokens []Token // the token stack
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
}
|
||||
|
||||
type stackFrame struct {
|
||||
|
@ -129,8 +130,9 @@ func (i *API) NextRune() (rune, error) {
|
|||
"without a prior call to Accept()")
|
||||
}
|
||||
|
||||
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||
readRune, runeWidth, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||
i.lastRune = readRune
|
||||
i.lastRuneWidth = runeWidth
|
||||
i.lastRuneErr = err
|
||||
i.runeRead = true
|
||||
|
||||
|
@ -140,7 +142,7 @@ func (i *API) NextRune() (rune, error) {
|
|||
// PeekRune returns the rune at the provided offset.
|
||||
//
|
||||
// The read cursor and current read offset are not updated by this operation.
|
||||
func (i *API) PeekRune(offset int) (rune, error) {
|
||||
func (i *API) PeekRune(offset int) (rune, int, error) {
|
||||
return i.reader.RuneAt(i.stackFrame.offset + offset)
|
||||
}
|
||||
|
||||
|
@ -158,10 +160,10 @@ func (i *API) Accept() {
|
|||
"but the prior call to NextRune() failed")
|
||||
}
|
||||
|
||||
i.accept(i.lastRune)
|
||||
i.acceptRunes(i.lastRuneWidth, i.lastRune)
|
||||
}
|
||||
|
||||
func (i *API) accept(runes ...rune) {
|
||||
func (i *API) acceptRunes(width int, runes ...rune) {
|
||||
curRuneEnd := i.stackFrame.runeEnd
|
||||
newRuneEnd := curRuneEnd + len(runes)
|
||||
|
||||
|
@ -179,7 +181,7 @@ func (i *API) accept(runes ...rune) {
|
|||
i.stackFrame.moveCursorByRune(r)
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
i.stackFrame.offset += len(runes)
|
||||
i.stackFrame.offset += width
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
|
@ -216,6 +218,8 @@ func (i *API) Fork() int {
|
|||
i.stackLevel++
|
||||
i.runeRead = false
|
||||
|
||||
// TODO do some good benchmarking on these two options. The explicit version might be
|
||||
// the faster one, but I am not sure of that right now.
|
||||
// A
|
||||
// i.stackFrames[i.stackLevel] = *i.stackFrame
|
||||
// i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||
|
|
|
@ -29,10 +29,10 @@ func ExampleAPI_NextRune() {
|
|||
func ExampleAPI_PeekRune() {
|
||||
api := tokenize.NewAPI("The input that the API will handle")
|
||||
|
||||
r1, err := api.PeekRune(19) // 'A'
|
||||
r2, err := api.PeekRune(20) // 'P'
|
||||
r3, err := api.PeekRune(21) // 'I'
|
||||
_, err = api.PeekRune(100) // EOF
|
||||
r1, _, err := api.PeekRune(19) // 'A'
|
||||
r2, _, err := api.PeekRune(20) // 'P'
|
||||
r3, _, err := api.PeekRune(21) // 'I'
|
||||
_, _, err = api.PeekRune(100) // EOF
|
||||
|
||||
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
|
||||
|
||||
|
|
|
@ -336,9 +336,9 @@ var T = struct {
|
|||
// MatchRune creates a Handler function that matches against the provided rune.
|
||||
func MatchRune(expected rune) Handler {
|
||||
return func(t *API) bool {
|
||||
r, err := t.PeekRune(0)
|
||||
r, w, err := t.PeekRune(0)
|
||||
if err == nil && r == expected {
|
||||
t.accept(r)
|
||||
t.acceptRunes(w, r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -349,13 +349,13 @@ func MatchRune(expected rune) Handler {
|
|||
// one of the provided runes. The first match counts.
|
||||
func MatchRunes(expected ...rune) Handler {
|
||||
return func(t *API) bool {
|
||||
r, err := t.PeekRune(0)
|
||||
r, w, err := t.PeekRune(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range expected {
|
||||
if r == e {
|
||||
t.accept(r)
|
||||
t.acceptRunes(w, r)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -375,9 +375,9 @@ func MatchRuneRange(start rune, end rune) Handler {
|
|||
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||
}
|
||||
return func(t *API) bool {
|
||||
r, err := t.PeekRune(0)
|
||||
r, w, err := t.PeekRune(0)
|
||||
if err == nil && r >= start && r <= end {
|
||||
t.accept(r)
|
||||
t.acceptRunes(w, r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -388,18 +388,18 @@ func MatchRuneRange(start rune, end rune) Handler {
|
|||
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
||||
func MatchNewline() Handler {
|
||||
return func(t *API) bool {
|
||||
r1, err := t.PeekRune(0)
|
||||
r1, _, err := t.PeekRune(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if r1 == '\n' {
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
return true
|
||||
}
|
||||
if r1 == '\r' {
|
||||
r2, err := t.PeekRune(1)
|
||||
r2, _, err := t.PeekRune(1)
|
||||
if err == nil && r2 == '\n' {
|
||||
t.accept(r1, r2)
|
||||
t.acceptRunes(2, r1, r2)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -433,19 +433,20 @@ func MatchBlank() Handler {
|
|||
func MatchBlanks() Handler {
|
||||
return func(t *API) bool {
|
||||
// Match the first blank.
|
||||
r, err := t.PeekRune(0)
|
||||
r, _, err := t.PeekRune(0)
|
||||
if err != nil || (r != ' ' && r != '\t') {
|
||||
return false
|
||||
}
|
||||
t.acceptRunes(1, r)
|
||||
|
||||
// Now match any number of followup blanks. We've already got
|
||||
// a successful match at this point, so we'll always return true at the end.
|
||||
for {
|
||||
r, err := t.PeekRune(0)
|
||||
r, _, err := t.PeekRune(0)
|
||||
if err != nil || (r != ' ' && r != '\t') {
|
||||
return true
|
||||
}
|
||||
t.accept(r)
|
||||
t.acceptRunes(1, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -456,35 +457,35 @@ func MatchBlanks() Handler {
|
|||
func MatchWhitespace() Handler {
|
||||
return func(t *API) bool {
|
||||
// Match the first whitespace.
|
||||
r1, err := t.PeekRune(0)
|
||||
r1, _, err := t.PeekRune(0)
|
||||
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
||||
return false
|
||||
}
|
||||
if r1 == '\r' {
|
||||
r2, err := t.PeekRune(1)
|
||||
r2, _, err := t.PeekRune(1)
|
||||
if err != nil || r2 != '\n' {
|
||||
return false
|
||||
}
|
||||
t.accept(r1, r2)
|
||||
t.acceptRunes(2, r1, r2)
|
||||
} else {
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
}
|
||||
|
||||
// Now match any number of followup whitespace. We've already got
|
||||
// a successful match at this point, so we'll always return true at the end.
|
||||
for {
|
||||
r1, err := t.PeekRune(0)
|
||||
r1, _, err := t.PeekRune(0)
|
||||
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
||||
return true
|
||||
}
|
||||
if r1 == '\r' {
|
||||
r2, err := t.PeekRune(1)
|
||||
r2, _, err := t.PeekRune(1)
|
||||
if err != nil || r2 != '\n' {
|
||||
return true
|
||||
}
|
||||
t.accept(r1, r2)
|
||||
t.acceptRunes(2, r1, r2)
|
||||
} else {
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -504,9 +505,9 @@ func MatchUnicodeSpace() Handler {
|
|||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||
return func(t *API) bool {
|
||||
r, err := t.PeekRune(0)
|
||||
r, w, err := t.PeekRune(0)
|
||||
if err == nil && callback(r) {
|
||||
t.accept(r)
|
||||
t.acceptRunes(w, r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -516,18 +517,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
|
|||
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
||||
func MatchEndOfLine() Handler {
|
||||
return func(t *API) bool {
|
||||
r1, err := t.PeekRune(0)
|
||||
r1, _, err := t.PeekRune(0)
|
||||
if err != nil {
|
||||
return err == io.EOF
|
||||
}
|
||||
if r1 == '\n' {
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
return true
|
||||
}
|
||||
if r1 == '\r' {
|
||||
r2, _ := t.PeekRune(1)
|
||||
r2, _, _ := t.PeekRune(1)
|
||||
if r2 == '\n' {
|
||||
t.accept(r1, r2)
|
||||
t.acceptRunes(2, r1, r2)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -537,14 +538,17 @@ func MatchEndOfLine() Handler {
|
|||
|
||||
// MatchStr creates a Handler that matches the input against the provided string.
|
||||
func MatchStr(expected string) Handler {
|
||||
expectedRunes := []rune(expected)
|
||||
width := len(expected)
|
||||
|
||||
return func(t *API) bool {
|
||||
for i, e := range expected {
|
||||
r, err := t.PeekRune(i)
|
||||
for i, e := range expectedRunes {
|
||||
r, _, err := t.PeekRune(i)
|
||||
if err != nil || e != r {
|
||||
return false
|
||||
}
|
||||
}
|
||||
t.accept([]rune(expected)...)
|
||||
t.acceptRunes(width, expectedRunes...)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -553,16 +557,18 @@ func MatchStr(expected string) Handler {
|
|||
// provided string in a case-insensitive manner.
|
||||
func MatchStrNoCase(expected string) Handler {
|
||||
l := len([]rune(expected))
|
||||
matches := make([]rune, l)
|
||||
return func(t *API) bool {
|
||||
matches := make([]rune, l)
|
||||
width := 0
|
||||
for i, e := range expected {
|
||||
r, err := t.PeekRune(i)
|
||||
r, w, err := t.PeekRune(i)
|
||||
if err != nil || unicode.ToUpper(e) != unicode.ToUpper(r) {
|
||||
return false
|
||||
}
|
||||
matches[i] = r
|
||||
width += w
|
||||
}
|
||||
t.accept(matches...)
|
||||
t.acceptRunes(width, matches...)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -882,9 +888,9 @@ func MatchAnyRune() Handler {
|
|||
// UTF8 rune can be read from the input.
|
||||
func MatchValidRune() Handler {
|
||||
return func(t *API) bool {
|
||||
r, err := t.PeekRune(0)
|
||||
r, w, err := t.PeekRune(0)
|
||||
if err == nil && r != utf8.RuneError {
|
||||
t.accept(r)
|
||||
t.acceptRunes(w, r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -895,9 +901,9 @@ func MatchValidRune() Handler {
|
|||
// UTF8 rune can be read from the input.
|
||||
func MatchInvalidRune() Handler {
|
||||
return func(t *API) bool {
|
||||
r, err := t.PeekRune(0)
|
||||
r, w, err := t.PeekRune(0)
|
||||
if err == nil && r == utf8.RuneError {
|
||||
t.accept(r)
|
||||
t.acceptRunes(w, r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -949,45 +955,45 @@ func MatchFloat() Handler {
|
|||
// False falues: false, FALSE, False, 0, f, F
|
||||
func MatchBoolean() Handler {
|
||||
return func(t *API) bool {
|
||||
r1, err := t.PeekRune(0)
|
||||
r1, _, err := t.PeekRune(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if r1 == '1' || r1 == '0' {
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
return true
|
||||
}
|
||||
if r1 == 't' || r1 == 'T' {
|
||||
r2, _ := t.PeekRune(1)
|
||||
r3, _ := t.PeekRune(2)
|
||||
r4, err := t.PeekRune(3)
|
||||
r2, _, _ := t.PeekRune(1)
|
||||
r3, _, _ := t.PeekRune(2)
|
||||
r4, _, err := t.PeekRune(3)
|
||||
if err == nil && r2 == 'r' && r3 == 'u' && r4 == 'e' {
|
||||
t.accept(r1, r2, r3, r4)
|
||||
t.acceptRunes(4, r1, r2, r3, r4)
|
||||
return true
|
||||
}
|
||||
if err == nil && r1 == 'T' && r2 == 'R' && r3 == 'U' && r4 == 'E' {
|
||||
t.accept(r1, r2, r3, r4)
|
||||
t.acceptRunes(4, r1, r2, r3, r4)
|
||||
return true
|
||||
}
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
return true
|
||||
}
|
||||
|
||||
if r1 == 'f' || r1 == 'F' {
|
||||
r2, _ := t.PeekRune(1)
|
||||
r3, _ := t.PeekRune(2)
|
||||
r4, _ := t.PeekRune(3)
|
||||
r5, err := t.PeekRune(4)
|
||||
r2, _, _ := t.PeekRune(1)
|
||||
r3, _, _ := t.PeekRune(2)
|
||||
r4, _, _ := t.PeekRune(3)
|
||||
r5, _, err := t.PeekRune(4)
|
||||
|
||||
if err == nil && r2 == 'a' && r3 == 'l' && r4 == 's' && r5 == 'e' {
|
||||
t.accept(r1, r2, r3, r4, r5)
|
||||
t.acceptRunes(5, r1, r2, r3, r4, r5)
|
||||
return true
|
||||
}
|
||||
if err == nil && r1 == 'F' && r2 == 'A' && r3 == 'L' && r4 == 'S' && r5 == 'E' {
|
||||
t.accept(r1, r2, r3, r4, r5)
|
||||
t.acceptRunes(5, r1, r2, r3, r4, r5)
|
||||
return true
|
||||
}
|
||||
t.accept(r1)
|
||||
t.acceptRunes(1, r1)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
Loading…
Reference in New Issue