Bytes reader working, now carry on switching to byte reading in the tokenizer code.
This commit is contained in:
parent
17935b7534
commit
d4492e4f0a
244
read/read.go
244
read/read.go
|
@ -4,27 +4,29 @@
|
||||||
// Let's say we've got the following input coming up in the io.Reader that is
|
// Let's say we've got the following input coming up in the io.Reader that is
|
||||||
// wrapped by the Reader:
|
// wrapped by the Reader:
|
||||||
//
|
//
|
||||||
// |H|e|l|l|o|,| |w|o|r|l|d|!| <-- runes
|
// |H|e|l|l|o|,| |w|o|r|l|d|!| <-- bytes
|
||||||
// 0 6 12 <-- rune offset
|
// 0 6 12 <-- byte offset
|
||||||
//
|
//
|
||||||
// The Reader can now be used to retrieve runes from the input, based on their
|
// The Reader can now be used to retrieve data from the input, based on their
|
||||||
// offset, using RuneAt(offset). Normally these runes will be retrieved in
|
// byte offset, e.g. using RuneAt(offset) or ByteAt(offset). Normally these data
|
||||||
// sequence, but that is not a requirement. Let's say we retrieve the rune with
|
// will be retrieved in sequence by the user of this code, but that is not a
|
||||||
// offset 6 from the input (the 'w'), then the Reader buffer be filled with runes
|
// requirement. Let's say we retrieve the byte with offset 6 from the input
|
||||||
// from the io.Reader until there are enough runes available to return the rune
|
// (the 'w'), then the Reader buffer be filled with runes from the io.Reader
|
||||||
// for offset 6:
|
// until there are enough runes available to return the rune for offset 6:
|
||||||
//
|
//
|
||||||
// |H|e|l|l|o| |w|
|
// |H|e|l|l|o| |w|
|
||||||
// 0 6
|
// 0 6
|
||||||
//
|
//
|
||||||
// Using RuneAt, you can retrieve arbitrary runes. If you request one that is
|
// This means that you can retrieve data for arbitrary offsets. If you request
|
||||||
// in the Reader buffer, then the buffered rune is returned. If you request one
|
// an offset that is already in the Reader buffer, then the buffered data are
|
||||||
// that is not in the buffer, then the buffer will be expanded.
|
// returned. If you request one that is not in the buffer, then the buffer will
|
||||||
|
// be expanded.
|
||||||
//
|
//
|
||||||
// To make this into a sliding window, the Reader provides the method
|
// To make this into a sliding window (preserving memory space while scanning
|
||||||
// Flush(numberOfRunes). This method will drop the provided number of runes from
|
// the input data), the Reader provides the method Flush(numberOfBytes).
|
||||||
// the Reader buffer. So when we'd do a Flush(3) on the example buffer from above,
|
// This method will drop the provided number of bytes from the Reader buffer.
|
||||||
// then the Reader buffer would become:
|
// So when we'd do a Flush(3) on the example buffer from above, then the Reader
|
||||||
|
// buffer would become:
|
||||||
//
|
//
|
||||||
// |l|o| |w|
|
// |l|o| |w|
|
||||||
// 0 3
|
// 0 3
|
||||||
|
@ -79,32 +81,33 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Buffer wraps around a bufio.Reader and provides an additional layer of
|
// Buffer wraps around a bufio.Reader and provides an additional layer of
|
||||||
// buffering that allows us to read the same runes over and over again.
|
// buffering that allows us to read the same data over and over again.
|
||||||
// This is useful for implementing a parser that must be able to do lookahead
|
// This is useful for implementing a parser that must be able to do lookahead
|
||||||
// on the input, returning to the original input position after finishing
|
// on the input, returning to the original input position after finishing
|
||||||
// that lookahead).
|
// that lookahead).
|
||||||
//
|
//
|
||||||
// To minimze memory use, it is also possible to flush the read buffer when there is
|
// To minimize memory use, it is also possible to flush the read buffer when there is
|
||||||
// no more need to go back to previously read runes.
|
// no more need to go back to previously read data.
|
||||||
//
|
//
|
||||||
// The parserkit.reader.Reader is used internally by tokenize.API.
|
// This parserkit.reader.Reader is used internally by tokenize.API.
|
||||||
type Buffer struct {
|
type Buffer struct {
|
||||||
bufio *bufio.Reader // used for ReadRune()
|
bufio *bufio.Reader // used for ReadRune()
|
||||||
store []rune // buffer store, the buffer field is a slice on top of this one
|
store []byte // buffer store, the buffer field is a slice on top of this one
|
||||||
buffer []rune // input buffer, holding runes that were read from input
|
buffer []byte // input buffer, holding runes that were read from input
|
||||||
err error // a read error, if one occurred
|
err error // a read error, if one occurred
|
||||||
errOffset int // the offset in the buffer at which the read error was encountered
|
errOffset int // the offset in the buffer at which the read error was encountered
|
||||||
firstReadDone bool // whether or not the first read was done
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// RuneAt reads the rune at the provided rune offset.
|
// RuneAt reads the rune at the provided byte offset.
|
||||||
//
|
//
|
||||||
// This offset is relative to the current starting position of the Buffer.
|
// The offset is relative to the current starting position of the Buffer.
|
||||||
// When starting reading, offset 0 will point at the start of the input.
|
// When starting reading, offset 0 will point at the start of the input.
|
||||||
// After flushing, offset 0 will point at the input up to where the flush was done.
|
// After flushing, offset 0 will point at the input up to where the flush
|
||||||
|
// was done.
|
||||||
//
|
//
|
||||||
// The error return value will be nil when reading was successful.
|
// When reading was successful, the rune and the width of the rune in bytes
|
||||||
// When an invalid rune is encountered on the input, the error will be nil,
|
// will be returned. The returned error will be nil.
|
||||||
|
// When an invalid UTF8 rune is encountered on the input, the error will be nil,
|
||||||
// but the rune will be utf8.RuneError
|
// but the rune will be utf8.RuneError
|
||||||
//
|
//
|
||||||
// When reading failed, the rune will be utf8.RuneError and the error will
|
// When reading failed, the rune will be utf8.RuneError and the error will
|
||||||
|
@ -113,48 +116,83 @@ type Buffer struct {
|
||||||
//
|
//
|
||||||
// Once a read error is encountered, that same read error will guaranteed
|
// Once a read error is encountered, that same read error will guaranteed
|
||||||
// be return on every subsequent read at or beyond the provided offset.
|
// be return on every subsequent read at or beyond the provided offset.
|
||||||
func (r *Buffer) RuneAt(offset int) (rune, error) {
|
func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
|
||||||
// Re-issue a previously seen read error.
|
// Shortcut: re-issue a previously seen read error.
|
||||||
if r.err != nil && offset >= r.errOffset {
|
if buf.err != nil && offset >= buf.errOffset {
|
||||||
return utf8.RuneError, r.err
|
return utf8.RuneError, 0, buf.err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rune at provided offset is not yet available in the input buffer.
|
// Compute the number of bytes that we need in the buffer to be able
|
||||||
// Read runes until we have enough runes to satisfy the offset.
|
// to return the rune at the provided byte offset.
|
||||||
l := len(r.buffer)
|
bufferLen := len(buf.buffer)
|
||||||
|
requiredLen := offset + utf8.UTFMax
|
||||||
// Number of runes to add to the buffer to have enough space to store
|
if requiredLen > bufferLen && buf.err == nil {
|
||||||
// the rune at the offset
|
buf.grow(requiredLen)
|
||||||
n := offset - l + 1
|
for writeAt := bufferLen; writeAt < requiredLen; writeAt++ {
|
||||||
|
b, err := buf.bufio.ReadByte()
|
||||||
if n > 0 {
|
|
||||||
r.grow(n)
|
|
||||||
var readRune rune
|
|
||||||
var err error
|
|
||||||
for writeAt := l; writeAt <= offset; writeAt++ {
|
|
||||||
readRune, _, err = r.bufio.ReadRune()
|
|
||||||
|
|
||||||
// Skip BOM.
|
|
||||||
if !r.firstReadDone {
|
|
||||||
r.firstReadDone = true
|
|
||||||
if readRune == '\uFEFF' {
|
|
||||||
writeAt--
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle errors.
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
r.err = err
|
buf.err = err
|
||||||
r.errOffset = writeAt
|
buf.errOffset = writeAt
|
||||||
return utf8.RuneError, err
|
buf.buffer = buf.buffer[:writeAt]
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
buf.buffer[writeAt] = b
|
||||||
r.buffer[writeAt] = readRune
|
|
||||||
}
|
}
|
||||||
return readRune, nil
|
|
||||||
}
|
}
|
||||||
return r.buffer[offset], nil
|
|
||||||
|
if buf.err != nil && offset >= buf.errOffset {
|
||||||
|
return utf8.RuneError, 0, buf.err
|
||||||
|
}
|
||||||
|
|
||||||
|
r, w := utf8.DecodeRune(buf.buffer[offset:])
|
||||||
|
return r, w, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ByteAt reads the byte at the provided byte offset.
|
||||||
|
//
|
||||||
|
// The offset is relative to the current starting position of the Buffer.
|
||||||
|
// When starting reading, offset 0 will point at the start of the input.
|
||||||
|
// After flushing, offset 0 will point at the input up to where the flush
|
||||||
|
// was done.
|
||||||
|
//
|
||||||
|
// When reading was successful, the byte will be returned. The returned
|
||||||
|
// error will be nil.
|
||||||
|
//
|
||||||
|
// When reading failed, the byte will be 0x00 and the error will
|
||||||
|
// be not nil. One special read fail is actually a normal situation: end
|
||||||
|
// of file reached. In that case, the returned error wille be io.EOF.
|
||||||
|
//
|
||||||
|
// Once a read error is encountered, that same read error will guaranteed
|
||||||
|
// be return on every subsequent read at or beyond the provided offset.
|
||||||
|
func (buf *Buffer) ByteAt(offset int) (byte, error) {
|
||||||
|
// Shortcut: re-issue a previously seen read error.
|
||||||
|
if buf.err != nil && offset >= buf.errOffset {
|
||||||
|
return 0, buf.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the number of bytes that we need in the buffer to be able
|
||||||
|
// to return the byte at the provided byte offset.
|
||||||
|
bufferLen := len(buf.buffer)
|
||||||
|
requiredLen := offset + 1
|
||||||
|
if requiredLen > bufferLen && buf.err == nil {
|
||||||
|
buf.grow(requiredLen)
|
||||||
|
for writeAt := bufferLen; writeAt < requiredLen; writeAt++ {
|
||||||
|
b, err := buf.bufio.ReadByte()
|
||||||
|
if err != nil {
|
||||||
|
buf.err = err
|
||||||
|
buf.errOffset = writeAt
|
||||||
|
buf.buffer = buf.buffer[:writeAt]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
buf.buffer[writeAt] = b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if buf.err != nil && offset >= buf.errOffset {
|
||||||
|
return 0, buf.err
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.buffer[offset], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// The upcoming code was inspired heavily by the Go built-in 'bytes' package.
|
// The upcoming code was inspired heavily by the Go built-in 'bytes' package.
|
||||||
|
@ -168,82 +206,80 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
|
||||||
// grow grows the buffer to guarantee space for n more bytes.
|
// grow grows the buffer to guarantee space for n more bytes.
|
||||||
// It returns the index where bytes should be written.
|
// It returns the index where bytes should be written.
|
||||||
// If the buffer can't grow it will panic with ErrTooLarge.
|
// If the buffer can't grow it will panic with ErrTooLarge.
|
||||||
func (r *Buffer) grow(n int) {
|
func (buf *Buffer) grow(requiredSize int) {
|
||||||
// Instantiate new buffer store
|
// Instantiate new buffer store
|
||||||
if r.store == nil {
|
if buf.store == nil {
|
||||||
b := smallBufferSize
|
b := smallBufferSize
|
||||||
if b < n {
|
if b < requiredSize {
|
||||||
b = n
|
b = requiredSize
|
||||||
}
|
}
|
||||||
r.store = make([]rune, 0, b)
|
buf.store = make([]byte, 0, b)
|
||||||
r.buffer = r.store[:n]
|
buf.buffer = buf.store[:requiredSize]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
lenBuffer := len(r.buffer)
|
capBuffer := cap(buf.buffer)
|
||||||
capBuffer := cap(r.buffer)
|
|
||||||
freeBuffer := capBuffer - lenBuffer
|
|
||||||
newSize := lenBuffer + n
|
|
||||||
|
|
||||||
// Grow the buffer store by reslicing within the available capacity.
|
// Grow the buffer store by reslicing within the available capacity.
|
||||||
if freeBuffer >= n {
|
if capBuffer >= requiredSize {
|
||||||
r.buffer = r.buffer[:newSize]
|
buf.buffer = buf.buffer[:requiredSize]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
capStore := cap(r.store)
|
capStore := cap(buf.store)
|
||||||
freeAtStartOfStore := capStore - capBuffer
|
freeAtStartOfStore := capStore - capBuffer
|
||||||
|
|
||||||
// Grow the buffer by moving the data to the start of the store.
|
// Grow the buffer by moving the data to the start of the store.
|
||||||
// Note: according to the spec, overlapping slices are allowed with copy().
|
// Note: according to the spec, overlapping slices are allowed with copy().
|
||||||
if freeAtStartOfStore > 0 && newSize <= capStore {
|
if freeAtStartOfStore > 0 && requiredSize <= capStore {
|
||||||
r.store = r.store[0:newSize]
|
buf.store = buf.store[0:requiredSize]
|
||||||
copy(r.store, r.buffer)
|
copy(buf.store, buf.buffer)
|
||||||
r.buffer = r.store[:newSize]
|
buf.buffer = buf.store[:requiredSize]
|
||||||
r.store = r.store[:0]
|
buf.store = buf.store[:0]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grow the buffer store by allocating a new one and copying the data.
|
// Grow the buffer store by allocating a new one and copying the data.
|
||||||
buf := makeSlice(2*capStore + n)
|
newStore := makeSlice(2*capStore + requiredSize)
|
||||||
copy(buf, r.buffer)
|
copy(newStore, buf.buffer)
|
||||||
r.store = buf
|
buf.store = newStore
|
||||||
r.buffer = r.store[:newSize]
|
buf.buffer = buf.store[:requiredSize]
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||||
// with ErrTooLarge.
|
// with ErrTooLarge.
|
||||||
func makeSlice(n int) []rune {
|
func makeSlice(n int) []byte {
|
||||||
// If the make fails, give a known error.
|
// If the make fails, give a known error.
|
||||||
defer func() {
|
defer func() {
|
||||||
if recover() != nil {
|
if recover() != nil {
|
||||||
panic(ErrTooLarge)
|
panic(ErrTooLarge)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
return make([]rune, 0, n)
|
return make([]byte, 0, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush deletes the provided number of runes from the start of the Buffer.
|
// Flush deletes the provided number of bytes from the start of the Buffer.
|
||||||
// After flushing the Buffer, offset 0 as used by RuneAt() will point to
|
// After flushing the Buffer, offset 0 as used by RuneAt() will point to
|
||||||
// the rune that comes after the runes that were flushed.
|
// the rune that comes after the runes that were flushed.
|
||||||
// So what this basically does, is turn the Buffer into a sliding window.
|
// So what this basically does, is turn the Buffer into a sliding window.
|
||||||
func (r *Buffer) Flush(numberOfRunes int) {
|
func (buf *Buffer) Flush(numberOfBytes int) {
|
||||||
l := len(r.buffer)
|
if numberOfBytes == 0 {
|
||||||
if numberOfRunes > l {
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
bufferLen := len(buf.buffer)
|
||||||
|
if numberOfBytes > bufferLen {
|
||||||
panic(fmt.Sprintf(
|
panic(fmt.Sprintf(
|
||||||
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
|
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
|
||||||
"exceeds size of the buffer (%d)", numberOfRunes, l))
|
"exceeds size of the buffer (%d)", numberOfBytes, bufferLen))
|
||||||
}
|
}
|
||||||
if numberOfRunes == 0 {
|
if bufferLen == numberOfBytes {
|
||||||
|
buf.buffer = buf.store[:0]
|
||||||
|
buf.errOffset = 0
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if l == numberOfRunes {
|
buf.buffer = buf.buffer[numberOfBytes:]
|
||||||
r.buffer = r.store[:0]
|
if buf.err != nil {
|
||||||
r.errOffset = 0
|
buf.errOffset = buf.errOffset - numberOfBytes
|
||||||
return
|
|
||||||
}
|
|
||||||
r.buffer = r.buffer[numberOfRunes:]
|
|
||||||
if r.err != nil {
|
|
||||||
r.errOffset = r.errOffset - numberOfRunes
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,8 +12,8 @@ import (
|
||||||
func ExampleNew() {
|
func ExampleNew() {
|
||||||
printFirstRuneOf := func(input interface{}) {
|
printFirstRuneOf := func(input interface{}) {
|
||||||
r := New(input)
|
r := New(input)
|
||||||
c, _ := r.RuneAt(0)
|
c, w, _ := r.RuneAt(0)
|
||||||
fmt.Printf("%q\n", c)
|
fmt.Printf("rune %q, width %d\n", c, w)
|
||||||
}
|
}
|
||||||
|
|
||||||
simpleString := "Hello, world!"
|
simpleString := "Hello, world!"
|
||||||
|
@ -25,14 +25,14 @@ func ExampleNew() {
|
||||||
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
|
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
|
||||||
printFirstRuneOf(bufioReaderPointer)
|
printFirstRuneOf(bufioReaderPointer)
|
||||||
|
|
||||||
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Where do we go, world?")))
|
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Ɍead the manual!")))
|
||||||
printFirstRuneOf(bufioReaderValue)
|
printFirstRuneOf(bufioReaderValue)
|
||||||
|
|
||||||
// Output:
|
// Output:
|
||||||
// 'H'
|
// rune 'H', width 1
|
||||||
// 'G'
|
// rune 'G', width 1
|
||||||
// 'W'
|
// rune 'W', width 1
|
||||||
// 'W'
|
// rune 'Ɍ', width 2
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
||||||
|
@ -46,13 +46,13 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
||||||
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
||||||
} {
|
} {
|
||||||
r := New(test.input)
|
r := New(test.input)
|
||||||
firstRune, _ := r.RuneAt(0)
|
firstRune, _, _ := r.RuneAt(0)
|
||||||
if firstRune != 'H' {
|
if firstRune != 'H' {
|
||||||
t.Errorf("[%s] first rune not 'H'", test.name)
|
t.Errorf("[%s] first rune not 'H'", test.name)
|
||||||
}
|
}
|
||||||
lastRune, _ := r.RuneAt(12)
|
lastRune, _, _ := r.RuneAt(12)
|
||||||
if lastRune != '!' {
|
if lastRune != '!' {
|
||||||
t.Errorf("[%s] last rune not '!'", test.name)
|
t.Errorf("[%s] last rune not '!', but %q", test.name, lastRune)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,41 +63,63 @@ func TestNew_UnhandledInputType_Panics(t *testing.T) {
|
||||||
"parsekit.read.New(): no support for input of type int")
|
"parsekit.read.New(): no support for input of type int")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuffer_RuneAt(t *testing.T) {
|
func TestBuffer_ByteAt(t *testing.T) {
|
||||||
r := New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
at := func(i int) byte { b, _ := r.ByteAt(i); return b }
|
||||||
|
|
||||||
// It is possible to go back and forth while reading the input.
|
|
||||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||||
assertEqual(t, "H!wH", result)
|
assertEqual(t, "H!wH", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuffer_RuneAt(t *testing.T) {
|
||||||
|
r := New(strings.NewReader("¡pןɹoʍ 'oןןǝH"))
|
||||||
|
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
// It is possible to go back and forth while reading the input.
|
||||||
|
result := fmt.Sprintf("%c%c%c%c", at(0), at(5), at(8), at(0))
|
||||||
|
assertEqual(t, "¡ɹʍ¡", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_ByteAt_endOfFile(t *testing.T) {
|
||||||
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
|
b, err := r.ByteAt(13)
|
||||||
|
result := fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
|
||||||
|
assertEqual(t, "'\\x00' EOF true", result)
|
||||||
|
|
||||||
|
b, err = r.ByteAt(20)
|
||||||
|
result = fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
|
||||||
|
assertEqual(t, "'\\x00' EOF true", result)
|
||||||
|
}
|
||||||
|
|
||||||
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
|
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
|
||||||
r := New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
rn, err := r.RuneAt(13)
|
rn, _, err := r.RuneAt(13)
|
||||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||||
assertEqual(t, "'<27>' EOF true", result)
|
assertEqual(t, "'<27>' EOF true", result)
|
||||||
|
|
||||||
rn, err = r.RuneAt(20)
|
rn, _, err = r.RuneAt(20)
|
||||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||||
assertEqual(t, "'<27>' EOF true", result)
|
assertEqual(t, "'<27>' EOF true", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
|
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
|
||||||
r := New(strings.NewReader("Hello, \xcdworld!"))
|
r := New(strings.NewReader("Hello, \xcdworld!"))
|
||||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||||
assertEqual(t, " <20>wo", result)
|
assertEqual(t, " <20>wo", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleBuffer_RuneAt() {
|
func ExampleBuffer_ByteAt() {
|
||||||
reader := New(strings.NewReader("Hello, world!"))
|
reader := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
fmt.Printf("Runes: ")
|
fmt.Printf("Runes: ")
|
||||||
for i := 0; ; i++ {
|
offset := 0
|
||||||
r, err := reader.RuneAt(i)
|
for {
|
||||||
|
r, err := reader.ByteAt(offset)
|
||||||
|
offset++
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("\nErr: %s\n", err)
|
fmt.Printf("\nErr: %s\n", err)
|
||||||
break
|
break
|
||||||
|
@ -110,18 +132,39 @@ func ExampleBuffer_RuneAt() {
|
||||||
// Err: EOF
|
// Err: EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
func ExampleBuffer_RuneAt() {
|
||||||
r := New(strings.NewReader("\uFEFFBommetje!"))
|
reader := New(strings.NewReader("Hello, pןɹoʍ!"))
|
||||||
b, _ := r.RuneAt(0)
|
|
||||||
o, _ := r.RuneAt(1)
|
fmt.Printf("Runes: ")
|
||||||
m, _ := r.RuneAt(2)
|
offset := 0
|
||||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
for {
|
||||||
assertEqual(t, "Bom", bom)
|
r, w, err := reader.RuneAt(offset)
|
||||||
|
offset += w
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("\nErr: %s\n", err)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Printf("%c", r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Runes: Hello, pןɹoʍ!
|
||||||
|
// Err: EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO reimplement somewhere, maybe a separate call in the reader or should it be part of a parser?
|
||||||
|
// func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||||
|
// r := New(strings.NewReader("\uFEFFBommetje!"))
|
||||||
|
// b, _, _ := r.RuneAt(0)
|
||||||
|
// o, _, _ := r.RuneAt(1)
|
||||||
|
// m, _, _ := r.RuneAt(2)
|
||||||
|
// bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||||
|
// assertEqual(t, "Bom", bom)
|
||||||
|
// }
|
||||||
|
|
||||||
func TestBuffer_Flush(t *testing.T) {
|
func TestBuffer_Flush(t *testing.T) {
|
||||||
r := New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||||
result := fmt.Sprintf("%c", at(7))
|
result := fmt.Sprintf("%c", at(7))
|
||||||
|
@ -138,7 +181,7 @@ func TestBuffer_Flush(t *testing.T) {
|
||||||
|
|
||||||
func ExampleBuffer_Flush() {
|
func ExampleBuffer_Flush() {
|
||||||
r := New(strings.NewReader("dog eat dog!"))
|
r := New(strings.NewReader("dog eat dog!"))
|
||||||
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
at := func(offset int) rune { c, _, _ := r.RuneAt(offset); return c }
|
||||||
|
|
||||||
// Read from the first 4 runes of the input.
|
// Read from the first 4 runes of the input.
|
||||||
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
|
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
|
||||||
|
@ -162,30 +205,31 @@ func ExampleBuffer_Flush() {
|
||||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||||
r := New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
// Fill buffer with "Hello, world!", the first 13 runes.
|
||||||
r.RuneAt(10)
|
rn, _, _ := r.RuneAt(12)
|
||||||
|
assertEqual(t, '!', rn)
|
||||||
|
|
||||||
// However, we flush 12 runes, which exceeds the buffer size.
|
// However, we flush 14 runes, which exceeds the buffer size.
|
||||||
assertPanic(t,
|
assertPanic(t,
|
||||||
func() { r.Flush(12) },
|
func() { r.Flush(14) },
|
||||||
"parsekit.read.Buffer.Flush(): number of runes to flush "+
|
"parsekit.read.Buffer.Flush(): number of runes to flush "+
|
||||||
"(12) exceeds size of the buffer (11)")
|
"(14) exceeds size of the buffer (13)")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
||||||
r := New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
_, err := r.RuneAt(13)
|
_, _, err := r.RuneAt(13)
|
||||||
assertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(13)
|
_, _, err = r.RuneAt(13)
|
||||||
assertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(14)
|
_, _, err = r.RuneAt(14)
|
||||||
assertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
r.Flush(13)
|
r.Flush(13)
|
||||||
_, err = r.RuneAt(0)
|
_, _, err = r.RuneAt(0)
|
||||||
assertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(1)
|
_, _, err = r.RuneAt(1)
|
||||||
assertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(2)
|
_, _, err = r.RuneAt(2)
|
||||||
assertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,43 +247,43 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
|
||||||
r := New(input)
|
r := New(input)
|
||||||
|
|
||||||
// Read the last availble rune.
|
// Read the last availble rune.
|
||||||
readRune, _ := r.RuneAt(3)
|
readRune, _, _ := r.RuneAt(3)
|
||||||
assertEqual(t, 'd', readRune)
|
assertEqual(t, 'd', readRune)
|
||||||
|
|
||||||
// Reading the next offset must result in the io.EOF error from the stub.
|
// Reading the next offset must result in the io.EOF error from the stub.
|
||||||
readRune, err := r.RuneAt(4)
|
readRune, _, err := r.RuneAt(4)
|
||||||
assertEqual(t, utf8.RuneError, readRune)
|
assertEqual(t, utf8.RuneError, readRune)
|
||||||
assertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// Reading even further should yield the same io.EOF error.
|
// Reading even further should yield the same io.EOF error.
|
||||||
readRune, err = r.RuneAt(5)
|
readRune, _, err = r.RuneAt(5)
|
||||||
assertEqual(t, utf8.RuneError, readRune)
|
assertEqual(t, utf8.RuneError, readRune)
|
||||||
assertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// After an error, we must still be able to read the last rune.
|
// After an error, we must still be able to read the last rune.
|
||||||
readRune, _ = r.RuneAt(3)
|
readRune, _, _ = r.RuneAt(3)
|
||||||
assertEqual(t, 'd', readRune)
|
assertEqual(t, 'd', readRune)
|
||||||
|
|
||||||
// Flushing updates the error index too.
|
// Flushing updates the error index too.
|
||||||
r.Flush(3)
|
r.Flush(3)
|
||||||
|
|
||||||
// The last rune is now at offset 0.
|
// The last rune is now at offset 0.
|
||||||
readRune, _ = r.RuneAt(0)
|
readRune, _, _ = r.RuneAt(0)
|
||||||
assertEqual(t, 'd', readRune)
|
assertEqual(t, 'd', readRune)
|
||||||
|
|
||||||
// The io.EOF is now at offset 1.
|
// The io.EOF is now at offset 1.
|
||||||
_, err = r.RuneAt(1)
|
_, _, err = r.RuneAt(1)
|
||||||
assertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// Let's flush that last rune too.
|
// Let's flush that last rune too.
|
||||||
r.Flush(1)
|
r.Flush(1)
|
||||||
|
|
||||||
// The io.EOF is now at offset 0.
|
// The io.EOF is now at offset 0.
|
||||||
_, err = r.RuneAt(0)
|
_, _, err = r.RuneAt(0)
|
||||||
assertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// And reading beyond that offset also yields io.EOF.
|
// And reading beyond that offset also yields io.EOF.
|
||||||
_, err = r.RuneAt(1)
|
_, _, err = r.RuneAt(1)
|
||||||
assertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -247,13 +291,13 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
||||||
input, size := makeLargeStubReader()
|
input, size := makeLargeStubReader()
|
||||||
r := New(input)
|
r := New(input)
|
||||||
|
|
||||||
readRune, err := r.RuneAt(0)
|
readRune, _, err := r.RuneAt(0)
|
||||||
assertEqual(t, 'X', readRune)
|
assertEqual(t, 'X', readRune)
|
||||||
readRune, err = r.RuneAt(size - 1)
|
readRune, _, err = r.RuneAt(size - 1)
|
||||||
assertEqual(t, 'Y', readRune)
|
assertEqual(t, 'Y', readRune)
|
||||||
readRune, err = r.RuneAt(size)
|
readRune, _, err = r.RuneAt(size)
|
||||||
assertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
readRune, err = r.RuneAt(10)
|
readRune, _, err = r.RuneAt(10)
|
||||||
assertEqual(t, 'X', readRune)
|
assertEqual(t, 'X', readRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,9 +305,9 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes
|
||||||
input, size := makeLargeStubReader()
|
input, size := makeLargeStubReader()
|
||||||
r := New(input)
|
r := New(input)
|
||||||
|
|
||||||
readRune, _ := r.RuneAt(size - 200)
|
readRune, _, _ := r.RuneAt(size - 200)
|
||||||
assertEqual(t, 'X', readRune)
|
assertEqual(t, 'X', readRune)
|
||||||
readRune, _ = r.RuneAt(size - 1)
|
readRune, _, _ = r.RuneAt(size - 1)
|
||||||
assertEqual(t, 'Y', readRune)
|
assertEqual(t, 'Y', readRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,7 +315,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
|
||||||
input, size := makeLargeStubReader()
|
input, size := makeLargeStubReader()
|
||||||
r := New(input)
|
r := New(input)
|
||||||
|
|
||||||
readRune, _ := r.RuneAt(size - 1)
|
readRune, _, _ := r.RuneAt(size - 1)
|
||||||
assertEqual(t, 'Y', readRune)
|
assertEqual(t, 'Y', readRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -282,12 +326,17 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// The first read will create the standard cache.
|
// The first read will create the standard cache.
|
||||||
// store |x 64 |
|
// store |x 64 |
|
||||||
// buffer |x 64 |
|
// buffer |x 64 |
|
||||||
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64)
|
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 4, 64)
|
||||||
|
|
||||||
// The first 64 reads will fit in the standard cache.
|
// The first 64 bytes will fit in the standard cache.
|
||||||
// store |xxxx64xxxxx|
|
// store |xxxx64xxxxx|
|
||||||
// buffer |xxxx64xxxxx|
|
// buffer |xxxx64xxxxx|
|
||||||
assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64)
|
//
|
||||||
|
// Note: in the test offset 60 is used instead of offset 63, because
|
||||||
|
// RuneAt() will fill the buffer with 4 bytes to accomodate for the
|
||||||
|
// longest UTF8 character encodings. In all upcoming tests, the same
|
||||||
|
// logic applies to the RuneAt() calls.
|
||||||
|
assertCache(t, "read fill cache", r, func() { r.RuneAt(60) }, 0, 64, 64, 64)
|
||||||
|
|
||||||
// Flushing zero input keeps everything as-is.
|
// Flushing zero input keeps everything as-is.
|
||||||
// store |xxxx64xxxxx|
|
// store |xxxx64xxxxx|
|
||||||
|
@ -302,7 +351,7 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// Reading 65 chars will allocate a new store of 2 * size + n.
|
// Reading 65 chars will allocate a new store of 2 * size + n.
|
||||||
// store |xxxxx65xxxxx 128 |
|
// store |xxxxx65xxxxx 128 |
|
||||||
// buffer |xxxxx65xxxxx 128 |
|
// buffer |xxxxx65xxxxx 128 |
|
||||||
assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128)
|
assertCache(t, "read cap + 1", r, func() { r.RuneAt(61) }, 0, 65+128, 65, 65+128)
|
||||||
|
|
||||||
// A partial flush frees the start of the store and moves
|
// A partial flush frees the start of the store and moves
|
||||||
// the buffer slice.
|
// the buffer slice.
|
||||||
|
@ -315,7 +364,7 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// without a new allocation.
|
// without a new allocation.
|
||||||
// store | 50 xxxxxxxxx143xxxxxxxx|
|
// store | 50 xxxxxxxxx143xxxxxxxx|
|
||||||
// buffer |xxxxxxxxx143xxxxxxxx|
|
// buffer |xxxxxxxxx143xxxxxxxx|
|
||||||
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143)
|
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(139) }, 0, 50+143, 143, 143)
|
||||||
|
|
||||||
// Flush the full input.
|
// Flush the full input.
|
||||||
// store | 193 |
|
// store | 193 |
|
||||||
|
@ -325,7 +374,7 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// Read a bit more than half the capacity.
|
// Read a bit more than half the capacity.
|
||||||
// store |xxxxxx101xxxxxxxx 92 |
|
// store |xxxxxx101xxxxxxxx 92 |
|
||||||
// buffer |xxxxxx101xxxxxxxx 92 |
|
// buffer |xxxxxx101xxxxxxxx 92 |
|
||||||
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193)
|
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(97) }, 0, 193, 101, 193)
|
||||||
|
|
||||||
// Then flush almost all input.
|
// Then flush almost all input.
|
||||||
// store | 100 x1x 92 |
|
// store | 100 x1x 92 |
|
||||||
|
@ -337,7 +386,7 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// store (where it fits), space is freed up for the read operation.
|
// store (where it fits), space is freed up for the read operation.
|
||||||
// store |xxxxx100xxxxxx 93 |
|
// store |xxxxx100xxxxxx 93 |
|
||||||
// buffer |xxxxx100xxxxxx 93 |
|
// buffer |xxxxx100xxxxxx 93 |
|
||||||
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193)
|
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(96) }, 0, 193, 100, 193)
|
||||||
|
|
||||||
// Now flush only one rune from the cache.
|
// Now flush only one rune from the cache.
|
||||||
// store |1 xxxx99xxxxx 93 |
|
// store |1 xxxx99xxxxx 93 |
|
||||||
|
@ -349,7 +398,7 @@ func TestAllocationPatterns(t *testing.T) {
|
||||||
// the data is moved to the start and no reallocation is needed.
|
// the data is moved to the start and no reallocation is needed.
|
||||||
// store |1 xxxx99xxxxx 93 |
|
// store |1 xxxx99xxxxx 93 |
|
||||||
// buffer |xxxx99xxxxx 93 |
|
// buffer |xxxx99xxxxx 93 |
|
||||||
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193)
|
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(189) }, 0, 193, 193, 193)
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeLargeStubReader() (*StubReader, int) {
|
func makeLargeStubReader() (*StubReader, int) {
|
||||||
|
|
|
@ -70,15 +70,16 @@ import (
|
||||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||||
// no bookkeeping has to be implemented when implementing a parser.
|
// no bookkeeping has to be implemented when implementing a parser.
|
||||||
type API struct {
|
type API struct {
|
||||||
reader *read.Buffer // the input data reader
|
reader *read.Buffer // the input data reader
|
||||||
lastRune rune // the rune as retrieved by the last NextRune() calll
|
lastRune rune // the rune as retrieved by the last NextRune() call
|
||||||
lastRuneErr error // the error for the last NextRune() call
|
lastRuneWidth int // the width in bytes of the last read rune
|
||||||
runeRead bool // whether or not a rune was read using NextRune()
|
lastRuneErr error // the error for the last NextRune() call
|
||||||
runes []rune // the rune stack
|
runeRead bool // whether or not a rune was read using NextRune()
|
||||||
tokens []Token // the token stack
|
runes []rune // the rune stack
|
||||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
tokens []Token // the token stack
|
||||||
stackLevel int // the current stack level
|
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||||
stackFrame *stackFrame // the current stack frame
|
stackLevel int // the current stack level
|
||||||
|
stackFrame *stackFrame // the current stack frame
|
||||||
}
|
}
|
||||||
|
|
||||||
type stackFrame struct {
|
type stackFrame struct {
|
||||||
|
@ -129,8 +130,9 @@ func (i *API) NextRune() (rune, error) {
|
||||||
"without a prior call to Accept()")
|
"without a prior call to Accept()")
|
||||||
}
|
}
|
||||||
|
|
||||||
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
readRune, runeWidth, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||||
i.lastRune = readRune
|
i.lastRune = readRune
|
||||||
|
i.lastRuneWidth = runeWidth
|
||||||
i.lastRuneErr = err
|
i.lastRuneErr = err
|
||||||
i.runeRead = true
|
i.runeRead = true
|
||||||
|
|
||||||
|
@ -140,7 +142,7 @@ func (i *API) NextRune() (rune, error) {
|
||||||
// PeekRune returns the rune at the provided offset.
|
// PeekRune returns the rune at the provided offset.
|
||||||
//
|
//
|
||||||
// The read cursor and current read offset are not updated by this operation.
|
// The read cursor and current read offset are not updated by this operation.
|
||||||
func (i *API) PeekRune(offset int) (rune, error) {
|
func (i *API) PeekRune(offset int) (rune, int, error) {
|
||||||
return i.reader.RuneAt(i.stackFrame.offset + offset)
|
return i.reader.RuneAt(i.stackFrame.offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,10 +160,10 @@ func (i *API) Accept() {
|
||||||
"but the prior call to NextRune() failed")
|
"but the prior call to NextRune() failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
i.accept(i.lastRune)
|
i.acceptRunes(i.lastRuneWidth, i.lastRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) accept(runes ...rune) {
|
func (i *API) acceptRunes(width int, runes ...rune) {
|
||||||
curRuneEnd := i.stackFrame.runeEnd
|
curRuneEnd := i.stackFrame.runeEnd
|
||||||
newRuneEnd := curRuneEnd + len(runes)
|
newRuneEnd := curRuneEnd + len(runes)
|
||||||
|
|
||||||
|
@ -179,7 +181,7 @@ func (i *API) accept(runes ...rune) {
|
||||||
i.stackFrame.moveCursorByRune(r)
|
i.stackFrame.moveCursorByRune(r)
|
||||||
}
|
}
|
||||||
i.stackFrame.runeEnd = newRuneEnd
|
i.stackFrame.runeEnd = newRuneEnd
|
||||||
i.stackFrame.offset += len(runes)
|
i.stackFrame.offset += width
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,6 +218,8 @@ func (i *API) Fork() int {
|
||||||
i.stackLevel++
|
i.stackLevel++
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
|
|
||||||
|
// TODO do some good benchmarking on these two options. The explicit version might be
|
||||||
|
// the faster one, but I am not sure of that right now.
|
||||||
// A
|
// A
|
||||||
// i.stackFrames[i.stackLevel] = *i.stackFrame
|
// i.stackFrames[i.stackLevel] = *i.stackFrame
|
||||||
// i.stackFrame = &i.stackFrames[i.stackLevel]
|
// i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||||
|
|
|
@ -29,10 +29,10 @@ func ExampleAPI_NextRune() {
|
||||||
func ExampleAPI_PeekRune() {
|
func ExampleAPI_PeekRune() {
|
||||||
api := tokenize.NewAPI("The input that the API will handle")
|
api := tokenize.NewAPI("The input that the API will handle")
|
||||||
|
|
||||||
r1, err := api.PeekRune(19) // 'A'
|
r1, _, err := api.PeekRune(19) // 'A'
|
||||||
r2, err := api.PeekRune(20) // 'P'
|
r2, _, err := api.PeekRune(20) // 'P'
|
||||||
r3, err := api.PeekRune(21) // 'I'
|
r3, _, err := api.PeekRune(21) // 'I'
|
||||||
_, err = api.PeekRune(100) // EOF
|
_, _, err = api.PeekRune(100) // EOF
|
||||||
|
|
||||||
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
|
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
|
||||||
|
|
||||||
|
|
|
@ -336,9 +336,9 @@ var T = struct {
|
||||||
// MatchRune creates a Handler function that matches against the provided rune.
|
// MatchRune creates a Handler function that matches against the provided rune.
|
||||||
func MatchRune(expected rune) Handler {
|
func MatchRune(expected rune) Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil && r == expected {
|
if err == nil && r == expected {
|
||||||
t.accept(r)
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -349,13 +349,13 @@ func MatchRune(expected rune) Handler {
|
||||||
// one of the provided runes. The first match counts.
|
// one of the provided runes. The first match counts.
|
||||||
func MatchRunes(expected ...rune) Handler {
|
func MatchRunes(expected ...rune) Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
for _, e := range expected {
|
for _, e := range expected {
|
||||||
if r == e {
|
if r == e {
|
||||||
t.accept(r)
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -375,9 +375,9 @@ func MatchRuneRange(start rune, end rune) Handler {
|
||||||
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||||
}
|
}
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil && r >= start && r <= end {
|
if err == nil && r >= start && r <= end {
|
||||||
t.accept(r)
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -388,18 +388,18 @@ func MatchRuneRange(start rune, end rune) Handler {
|
||||||
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
||||||
func MatchNewline() Handler {
|
func MatchNewline() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r1, err := t.PeekRune(0)
|
r1, _, err := t.PeekRune(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if r1 == '\n' {
|
if r1 == '\n' {
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if r1 == '\r' {
|
||||||
r2, err := t.PeekRune(1)
|
r2, _, err := t.PeekRune(1)
|
||||||
if err == nil && r2 == '\n' {
|
if err == nil && r2 == '\n' {
|
||||||
t.accept(r1, r2)
|
t.acceptRunes(2, r1, r2)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -433,19 +433,20 @@ func MatchBlank() Handler {
|
||||||
func MatchBlanks() Handler {
|
func MatchBlanks() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
// Match the first blank.
|
// Match the first blank.
|
||||||
r, err := t.PeekRune(0)
|
r, _, err := t.PeekRune(0)
|
||||||
if err != nil || (r != ' ' && r != '\t') {
|
if err != nil || (r != ' ' && r != '\t') {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
t.acceptRunes(1, r)
|
||||||
|
|
||||||
// Now match any number of followup blanks. We've already got
|
// Now match any number of followup blanks. We've already got
|
||||||
// a successful match at this point, so we'll always return true at the end.
|
// a successful match at this point, so we'll always return true at the end.
|
||||||
for {
|
for {
|
||||||
r, err := t.PeekRune(0)
|
r, _, err := t.PeekRune(0)
|
||||||
if err != nil || (r != ' ' && r != '\t') {
|
if err != nil || (r != ' ' && r != '\t') {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.accept(r)
|
t.acceptRunes(1, r)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -456,35 +457,35 @@ func MatchBlanks() Handler {
|
||||||
func MatchWhitespace() Handler {
|
func MatchWhitespace() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
// Match the first whitespace.
|
// Match the first whitespace.
|
||||||
r1, err := t.PeekRune(0)
|
r1, _, err := t.PeekRune(0)
|
||||||
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if r1 == '\r' {
|
||||||
r2, err := t.PeekRune(1)
|
r2, _, err := t.PeekRune(1)
|
||||||
if err != nil || r2 != '\n' {
|
if err != nil || r2 != '\n' {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
t.accept(r1, r2)
|
t.acceptRunes(2, r1, r2)
|
||||||
} else {
|
} else {
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now match any number of followup whitespace. We've already got
|
// Now match any number of followup whitespace. We've already got
|
||||||
// a successful match at this point, so we'll always return true at the end.
|
// a successful match at this point, so we'll always return true at the end.
|
||||||
for {
|
for {
|
||||||
r1, err := t.PeekRune(0)
|
r1, _, err := t.PeekRune(0)
|
||||||
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if r1 == '\r' {
|
||||||
r2, err := t.PeekRune(1)
|
r2, _, err := t.PeekRune(1)
|
||||||
if err != nil || r2 != '\n' {
|
if err != nil || r2 != '\n' {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.accept(r1, r2)
|
t.acceptRunes(2, r1, r2)
|
||||||
} else {
|
} else {
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -504,9 +505,9 @@ func MatchUnicodeSpace() Handler {
|
||||||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||||
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil && callback(r) {
|
if err == nil && callback(r) {
|
||||||
t.accept(r)
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -516,18 +517,18 @@ func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||||
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
||||||
func MatchEndOfLine() Handler {
|
func MatchEndOfLine() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r1, err := t.PeekRune(0)
|
r1, _, err := t.PeekRune(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err == io.EOF
|
return err == io.EOF
|
||||||
}
|
}
|
||||||
if r1 == '\n' {
|
if r1 == '\n' {
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == '\r' {
|
if r1 == '\r' {
|
||||||
r2, _ := t.PeekRune(1)
|
r2, _, _ := t.PeekRune(1)
|
||||||
if r2 == '\n' {
|
if r2 == '\n' {
|
||||||
t.accept(r1, r2)
|
t.acceptRunes(2, r1, r2)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -537,14 +538,17 @@ func MatchEndOfLine() Handler {
|
||||||
|
|
||||||
// MatchStr creates a Handler that matches the input against the provided string.
|
// MatchStr creates a Handler that matches the input against the provided string.
|
||||||
func MatchStr(expected string) Handler {
|
func MatchStr(expected string) Handler {
|
||||||
|
expectedRunes := []rune(expected)
|
||||||
|
width := len(expected)
|
||||||
|
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
for i, e := range expected {
|
for i, e := range expectedRunes {
|
||||||
r, err := t.PeekRune(i)
|
r, _, err := t.PeekRune(i)
|
||||||
if err != nil || e != r {
|
if err != nil || e != r {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
t.accept([]rune(expected)...)
|
t.acceptRunes(width, expectedRunes...)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -553,16 +557,18 @@ func MatchStr(expected string) Handler {
|
||||||
// provided string in a case-insensitive manner.
|
// provided string in a case-insensitive manner.
|
||||||
func MatchStrNoCase(expected string) Handler {
|
func MatchStrNoCase(expected string) Handler {
|
||||||
l := len([]rune(expected))
|
l := len([]rune(expected))
|
||||||
matches := make([]rune, l)
|
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
|
matches := make([]rune, l)
|
||||||
|
width := 0
|
||||||
for i, e := range expected {
|
for i, e := range expected {
|
||||||
r, err := t.PeekRune(i)
|
r, w, err := t.PeekRune(i)
|
||||||
if err != nil || unicode.ToUpper(e) != unicode.ToUpper(r) {
|
if err != nil || unicode.ToUpper(e) != unicode.ToUpper(r) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
matches[i] = r
|
matches[i] = r
|
||||||
|
width += w
|
||||||
}
|
}
|
||||||
t.accept(matches...)
|
t.acceptRunes(width, matches...)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -882,9 +888,9 @@ func MatchAnyRune() Handler {
|
||||||
// UTF8 rune can be read from the input.
|
// UTF8 rune can be read from the input.
|
||||||
func MatchValidRune() Handler {
|
func MatchValidRune() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil && r != utf8.RuneError {
|
if err == nil && r != utf8.RuneError {
|
||||||
t.accept(r)
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -895,9 +901,9 @@ func MatchValidRune() Handler {
|
||||||
// UTF8 rune can be read from the input.
|
// UTF8 rune can be read from the input.
|
||||||
func MatchInvalidRune() Handler {
|
func MatchInvalidRune() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.PeekRune(0)
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil && r == utf8.RuneError {
|
if err == nil && r == utf8.RuneError {
|
||||||
t.accept(r)
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -949,45 +955,45 @@ func MatchFloat() Handler {
|
||||||
// False falues: false, FALSE, False, 0, f, F
|
// False falues: false, FALSE, False, 0, f, F
|
||||||
func MatchBoolean() Handler {
|
func MatchBoolean() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
r1, err := t.PeekRune(0)
|
r1, _, err := t.PeekRune(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if r1 == '1' || r1 == '0' {
|
if r1 == '1' || r1 == '0' {
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if r1 == 't' || r1 == 'T' {
|
if r1 == 't' || r1 == 'T' {
|
||||||
r2, _ := t.PeekRune(1)
|
r2, _, _ := t.PeekRune(1)
|
||||||
r3, _ := t.PeekRune(2)
|
r3, _, _ := t.PeekRune(2)
|
||||||
r4, err := t.PeekRune(3)
|
r4, _, err := t.PeekRune(3)
|
||||||
if err == nil && r2 == 'r' && r3 == 'u' && r4 == 'e' {
|
if err == nil && r2 == 'r' && r3 == 'u' && r4 == 'e' {
|
||||||
t.accept(r1, r2, r3, r4)
|
t.acceptRunes(4, r1, r2, r3, r4)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if err == nil && r1 == 'T' && r2 == 'R' && r3 == 'U' && r4 == 'E' {
|
if err == nil && r1 == 'T' && r2 == 'R' && r3 == 'U' && r4 == 'E' {
|
||||||
t.accept(r1, r2, r3, r4)
|
t.acceptRunes(4, r1, r2, r3, r4)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if r1 == 'f' || r1 == 'F' {
|
if r1 == 'f' || r1 == 'F' {
|
||||||
r2, _ := t.PeekRune(1)
|
r2, _, _ := t.PeekRune(1)
|
||||||
r3, _ := t.PeekRune(2)
|
r3, _, _ := t.PeekRune(2)
|
||||||
r4, _ := t.PeekRune(3)
|
r4, _, _ := t.PeekRune(3)
|
||||||
r5, err := t.PeekRune(4)
|
r5, _, err := t.PeekRune(4)
|
||||||
|
|
||||||
if err == nil && r2 == 'a' && r3 == 'l' && r4 == 's' && r5 == 'e' {
|
if err == nil && r2 == 'a' && r3 == 'l' && r4 == 's' && r5 == 'e' {
|
||||||
t.accept(r1, r2, r3, r4, r5)
|
t.acceptRunes(5, r1, r2, r3, r4, r5)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if err == nil && r1 == 'F' && r2 == 'A' && r3 == 'L' && r4 == 'S' && r5 == 'E' {
|
if err == nil && r1 == 'F' && r2 == 'A' && r3 == 'L' && r4 == 'S' && r5 == 'E' {
|
||||||
t.accept(r1, r2, r3, r4, r5)
|
t.acceptRunes(5, r1, r2, r3, r4, r5)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.accept(r1)
|
t.acceptRunes(1, r1)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|
Loading…
Reference in New Issue