Moved Reader into its own package.
This commit is contained in:
parent
6d92e1dc68
commit
98d2db0374
2
go.mod
2
go.mod
|
@ -1,3 +1,5 @@
|
||||||
module git.makaay.nl/mauricem/go-parsekit
|
module git.makaay.nl/mauricem/go-parsekit
|
||||||
|
|
||||||
go 1.12
|
go 1.12
|
||||||
|
|
||||||
|
require github.com/stretchr/testify v1.3.0
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||||
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
|
@ -172,7 +172,7 @@ func (a *ParseAPIOnAction) Stay() bool {
|
||||||
|
|
||||||
func (a *ParseAPIOnAction) flushReader() {
|
func (a *ParseAPIOnAction) flushReader() {
|
||||||
if a.tokenAPI.result.offset > 0 {
|
if a.tokenAPI.result.offset > 0 {
|
||||||
a.tokenAPI.root.reader.flush(a.tokenAPI.root.result.offset)
|
a.tokenAPI.root.reader.Flush(a.tokenAPI.root.result.offset)
|
||||||
a.tokenAPI.root.result.offset = 0
|
a.tokenAPI.root.result.offset = 0
|
||||||
a.parseAPI.initLoopCheck()
|
a.parseAPI.initLoopCheck()
|
||||||
}
|
}
|
||||||
|
|
84
reader.go
84
reader.go
|
@ -1,84 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// reader wraps around an io.Reader and provides buffering to allows us to read
|
|
||||||
// the same runes over and over again. This is useful for implementing a parser
|
|
||||||
// that must be able to do lookahead on the input, returning to the original
|
|
||||||
// input position after finishing that lookahead).
|
|
||||||
//
|
|
||||||
// To minimze memory use, it is also possible to flush the buffer when there is
|
|
||||||
// no more need to go back to previously read runes.
|
|
||||||
//
|
|
||||||
// The reader is used internally by parsekit.TokenAPI.
|
|
||||||
type reader struct {
|
|
||||||
bufio *bufio.Reader // Used for ReadRune()
|
|
||||||
buffer []rune // Input buffer, holding runes that were read from input
|
|
||||||
bufferOffset int // The offset of the buffer, relative to the start of the input
|
|
||||||
bufferLen int // Input size, the number of runes in the buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
// newwReader initializes a new reader struct, wrapped around the provided io.Reader.
|
|
||||||
func newReader(r io.Reader) *reader {
|
|
||||||
return &reader{
|
|
||||||
bufio: bufio.NewReader(r),
|
|
||||||
buffer: []rune{},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// runeAt reads the rune at the provided rune offset.
|
|
||||||
//
|
|
||||||
// This offset is relative to the current starting position of the buffer in
|
|
||||||
// the reader. When starting reading, offset 0 will point at the start of the
|
|
||||||
// input. After flushing, offset 0 will point at the input up to where
|
|
||||||
// the flush was done.
|
|
||||||
//
|
|
||||||
// The error return value will be nil when reading was successful.
|
|
||||||
// When an invalid rune is encountered on the input, the error will be nil,
|
|
||||||
// but the rune will be utf8.RuneError
|
|
||||||
//
|
|
||||||
// When reading failed, the rune will be utf8.RuneError. One special read
|
|
||||||
// fail is actually a normal situation: end of file reached. In that case,
|
|
||||||
// the returned error wille be io.EOF.
|
|
||||||
func (r *reader) runeAt(offset int) (rune, error) {
|
|
||||||
// Rune at provided offset is not yet available in the input buffer.
|
|
||||||
// Read runes until we have enough runes to satisfy the offset.
|
|
||||||
for r.bufferLen <= offset {
|
|
||||||
readRune, _, err := r.bufio.ReadRune()
|
|
||||||
|
|
||||||
// Handle errors.
|
|
||||||
if err != nil {
|
|
||||||
return utf8.RuneError, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip BOM.
|
|
||||||
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
|
||||||
r.bufferOffset++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
r.buffer = append(r.buffer, readRune)
|
|
||||||
r.bufferLen++
|
|
||||||
}
|
|
||||||
return r.buffer[offset], nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush deletes the provided number of runes from the start of the
|
|
||||||
// reader buffer. After flushing the buffer, offset 0 as used by runeAt()
|
|
||||||
// will point to the rune that comes after the flushed runes.
|
|
||||||
// So what this basically does is turn the Reader into a sliding window.
|
|
||||||
func (r *reader) flush(numberOfRunes int) {
|
|
||||||
if numberOfRunes > r.bufferLen {
|
|
||||||
panic(fmt.Sprintf(
|
|
||||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
|
||||||
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
|
||||||
}
|
|
||||||
r.bufferOffset += numberOfRunes
|
|
||||||
r.bufferLen -= numberOfRunes
|
|
||||||
r.buffer = r.buffer[numberOfRunes:]
|
|
||||||
}
|
|
|
@ -0,0 +1,123 @@
|
||||||
|
// Package reader provides a buffered Reader that wraps around an io.Reader.
|
||||||
|
//
|
||||||
|
// Functionally, it provides an input buffer in the form of a sliding window.
|
||||||
|
// Let's say we've got the following input coming up in the io.Reader that is
|
||||||
|
// wrapped by the Reader:
|
||||||
|
//
|
||||||
|
// |H|e|l|l|o|,| |w|o|r|l|d|!| <-- runes
|
||||||
|
// 0 6 12 <-- rune offset
|
||||||
|
//
|
||||||
|
// The Reader can now be used to retrieve runes from the input, based on their
|
||||||
|
// offset, using RuneAt(offset). Normally these runes will be retrieved in
|
||||||
|
// sequence, but that is not a requirement. Let's say we retrieve the rune with
|
||||||
|
// offset 6 from the input (the 'w'), then the Reader buffer be filled with runes
|
||||||
|
// from the io.Reader until there are enough runes available to return the rune
|
||||||
|
// for offset 6:
|
||||||
|
//
|
||||||
|
// |H|e|l|l|o| |w|
|
||||||
|
// 0 6
|
||||||
|
//
|
||||||
|
// Using RuneAt, you can retrieve arbitrary runes. If you request one that is
|
||||||
|
// in the Reader buffer, then the buffered rune is returned. If you request one
|
||||||
|
// that is not in the buffer, then the buffer will be expanded.
|
||||||
|
//
|
||||||
|
// To make this into a sliding window, the Reader provides the method
|
||||||
|
// Flush(numberOfRunes). This method will drop the provided number of runes from
|
||||||
|
// the Reader buffer. So when we'd do a Flush(3) on the example buffer from above,
|
||||||
|
// then the Reader buffer would become:
|
||||||
|
//
|
||||||
|
// |l|o| |w|
|
||||||
|
// 0 3
|
||||||
|
//
|
||||||
|
// Note that the offset for the first rune 'l' in the buffer is now 0.
|
||||||
|
// You can consider the input to be changed in a similar way:
|
||||||
|
//
|
||||||
|
// |l|o|,| |w|o|r|l|d|!|
|
||||||
|
// 0 6 9
|
||||||
|
//
|
||||||
|
// So after a flush, the first upcoming rune after the flushed runes
|
||||||
|
// will always have index 0.
|
||||||
|
package reader
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Reader wraps around an io.Reader and provides buffering that allows us to read
|
||||||
|
// the same runes over and over again. This is useful for implementing a parser
|
||||||
|
// that must be able to do lookahead on the input, returning to the original
|
||||||
|
// input position after finishing that lookahead).
|
||||||
|
//
|
||||||
|
// To minimze memory use, it is also possible to flush the read buffer when there is
|
||||||
|
// no more need to go back to previously read runes.
|
||||||
|
//
|
||||||
|
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
|
||||||
|
type Reader struct {
|
||||||
|
bufio *bufio.Reader // Used for ReadRune()
|
||||||
|
buffer []rune // Input buffer, holding runes that were read from input
|
||||||
|
bufferOffset int // The offset of the buffer, relative to the start of the input
|
||||||
|
bufferLen int // Input size, the number of runes in the buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
// New initializes a new reader struct, wrapped around the provided io.Reader.
|
||||||
|
func New(r io.Reader) *Reader {
|
||||||
|
return &Reader{
|
||||||
|
bufio: bufio.NewReader(r),
|
||||||
|
buffer: []rune{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RuneAt reads the rune at the provided rune offset.
|
||||||
|
//
|
||||||
|
// This offset is relative to the current starting position of the buffer in
|
||||||
|
// the reader. When starting reading, offset 0 will point at the start of the
|
||||||
|
// input. After flushing, offset 0 will point at the input up to where
|
||||||
|
// the flush was done.
|
||||||
|
//
|
||||||
|
// The error return value will be nil when reading was successful.
|
||||||
|
// When an invalid rune is encountered on the input, the error will be nil,
|
||||||
|
// but the rune will be utf8.RuneError
|
||||||
|
//
|
||||||
|
// When reading failed, the rune will be utf8.RuneError and the error will
|
||||||
|
// be not nil. One special read fail is actually a normal situation: end
|
||||||
|
// of file reached. In that case, the returned error wille be io.EOF.
|
||||||
|
func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||||
|
// Rune at provided offset is not yet available in the input buffer.
|
||||||
|
// Read runes until we have enough runes to satisfy the offset.
|
||||||
|
for r.bufferLen <= offset {
|
||||||
|
readRune, _, err := r.bufio.ReadRune()
|
||||||
|
|
||||||
|
// Handle errors.
|
||||||
|
if err != nil {
|
||||||
|
return utf8.RuneError, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip BOM.
|
||||||
|
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
||||||
|
r.bufferOffset++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
r.buffer = append(r.buffer, readRune)
|
||||||
|
r.bufferLen++
|
||||||
|
}
|
||||||
|
return r.buffer[offset], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush deletes the provided number of runes from the start of the
|
||||||
|
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
||||||
|
// will point to the rune that comes after the flushed runes.
|
||||||
|
// So what this basically does is turn the Reader into a sliding window.
|
||||||
|
func (r *Reader) Flush(numberOfRunes int) {
|
||||||
|
if numberOfRunes > r.bufferLen {
|
||||||
|
panic(fmt.Sprintf(
|
||||||
|
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||||
|
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
||||||
|
}
|
||||||
|
r.bufferOffset += numberOfRunes
|
||||||
|
r.bufferLen -= numberOfRunes
|
||||||
|
r.buffer = r.buffer[numberOfRunes:]
|
||||||
|
}
|
|
@ -0,0 +1,131 @@
|
||||||
|
package reader_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/reader"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleNew() {
|
||||||
|
r := reader.New(strings.NewReader("Hello, world!"))
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
fmt.Printf("%c", at(0))
|
||||||
|
fmt.Printf("%c", at(12))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// H!
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReader_RuneAt(t *testing.T) {
|
||||||
|
r := reader.New(strings.NewReader("Hello, world!"))
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
// It is possible to go back and forth while reading the input.
|
||||||
|
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||||
|
assert.Equal(t, "H!wH", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReader_RuneAt_endOfFile(t *testing.T) {
|
||||||
|
r := reader.New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
|
rn, err := r.RuneAt(13)
|
||||||
|
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||||
|
assert.Equal(t, "'<27>' EOF true", result)
|
||||||
|
|
||||||
|
rn, err = r.RuneAt(20)
|
||||||
|
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||||
|
assert.Equal(t, "'<27>' EOF true", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReader_RuneAt_invalidRune(t *testing.T) {
|
||||||
|
r := reader.New(strings.NewReader("Hello, \xcdworld!"))
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||||
|
assert.Equal(t, " <20>wo", result, "result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_RuneAt() {
|
||||||
|
reader := reader.New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
|
fmt.Printf("Runes: ")
|
||||||
|
for i := 0; ; i++ {
|
||||||
|
r, err := reader.RuneAt(i)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("\nErr: %s\n", err)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
fmt.Printf("%c", r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Runes: Hello, world!
|
||||||
|
// Err: EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||||
|
r := reader.New(strings.NewReader("\uFEFFBommetje!"))
|
||||||
|
b, _ := r.RuneAt(0)
|
||||||
|
o, _ := r.RuneAt(1)
|
||||||
|
m, _ := r.RuneAt(2)
|
||||||
|
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||||
|
assert.Equal(t, "Bom", bom, "first three runes")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReader_Flush(t *testing.T) {
|
||||||
|
r := reader.New(strings.NewReader("Hello, world!"))
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||||
|
result := fmt.Sprintf("%c", at(7))
|
||||||
|
assert.Equal(t, "w", result, "first read")
|
||||||
|
|
||||||
|
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||||
|
r.Flush(4)
|
||||||
|
|
||||||
|
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||||
|
// We can continue reading from there.
|
||||||
|
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
||||||
|
assert.Equal(t, "o, wor", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_Flush() {
|
||||||
|
r := reader.New(strings.NewReader("dog eat dog!"))
|
||||||
|
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
||||||
|
|
||||||
|
// Read from the first 4 runes of the input.
|
||||||
|
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
|
||||||
|
|
||||||
|
// Flush those 4 runes, bringing offset 0 to the start of "eat dog".
|
||||||
|
r.Flush(4)
|
||||||
|
|
||||||
|
// Read another 4 runes, because of the flushing, we start at offset 0.
|
||||||
|
fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3))
|
||||||
|
|
||||||
|
// Again, flush 4 runes, bringing offset 0 to the start of "dog!".
|
||||||
|
r.Flush(4)
|
||||||
|
|
||||||
|
// Read from the remainder runes.
|
||||||
|
fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// dog ate good!
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||||
|
r := reader.New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
|
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||||
|
r.RuneAt(10)
|
||||||
|
|
||||||
|
// However, we flush 12 runes, which exceeds the buffer size.
|
||||||
|
assert.PanicsWithValue(t,
|
||||||
|
"parsekit.Input.Reader.Flush(): number of runes to flush "+
|
||||||
|
"(12) exceeds size of the buffer (11)",
|
||||||
|
func() { r.Flush(12) })
|
||||||
|
}
|
|
@ -1,94 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ExamplenewReader() {
|
|
||||||
in := strings.NewReader("Hello, world!")
|
|
||||||
r := newReader(in)
|
|
||||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
|
||||||
|
|
||||||
fmt.Printf("%c", at(0))
|
|
||||||
fmt.Printf("%c", at(12))
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// H!
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReader_runeAt(t *testing.T) {
|
|
||||||
in := strings.NewReader("Hello, world!")
|
|
||||||
r := newReader(in)
|
|
||||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
|
||||||
|
|
||||||
// It is possible to go back and forth while reading the input.
|
|
||||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
|
||||||
AssertEqual(t, "H!wH", result, "result")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReader_runeAt_endOfFile(t *testing.T) {
|
|
||||||
in := strings.NewReader("Hello, world!")
|
|
||||||
r := newReader(in)
|
|
||||||
|
|
||||||
rn, err := r.runeAt(13)
|
|
||||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
|
||||||
AssertEqual(t, "'<27>' EOF true", result, "result")
|
|
||||||
|
|
||||||
rn, err = r.runeAt(20)
|
|
||||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
|
||||||
AssertEqual(t, "'<27>' EOF true", result, "result")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReader_runeAt_invalidRune(t *testing.T) {
|
|
||||||
in := strings.NewReader("Hello, \xcdworld!")
|
|
||||||
r := newReader(in)
|
|
||||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
|
||||||
|
|
||||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
|
||||||
AssertEqual(t, " <20>wo", result, "result")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
|
||||||
in := strings.NewReader("\uFEFFBommetje!")
|
|
||||||
r := newReader(in)
|
|
||||||
b, _ := r.runeAt(0)
|
|
||||||
o, _ := r.runeAt(1)
|
|
||||||
m, _ := r.runeAt(2)
|
|
||||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
|
||||||
AssertEqual(t, "Bom", bom, "first three runes")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReader_Flush(t *testing.T) {
|
|
||||||
in := strings.NewReader("Hello, world!")
|
|
||||||
r := newReader(in)
|
|
||||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
|
||||||
|
|
||||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
|
||||||
result := fmt.Sprintf("%c", at(7))
|
|
||||||
AssertEqual(t, "w", result, "first read")
|
|
||||||
|
|
||||||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
|
||||||
r.flush(4)
|
|
||||||
|
|
||||||
// Rune 0 is now pointing at what originally was rune offset 4.
|
|
||||||
// We can continue reading from there.
|
|
||||||
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
|
||||||
AssertEqual(t, "o, wor", result, "second read")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
|
||||||
in := strings.NewReader("Hello, world!")
|
|
||||||
r := newReader(in)
|
|
||||||
|
|
||||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
|
||||||
r.runeAt(10)
|
|
||||||
|
|
||||||
// However, we flush 12 runes, which exceeds the buffer size.
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() { r.flush(12) },
|
|
||||||
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
|
|
||||||
})
|
|
||||||
}
|
|
50
tokenapi.go
50
tokenapi.go
|
@ -3,11 +3,13 @@ package parsekit
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/reader"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve input data and
|
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
|
||||||
// to report back results. For easy lookahead support, a forking strategy is
|
// the reader and to report back tokenizing results. For easy lookahead support,
|
||||||
// provided.
|
// a forking strategy is provided.
|
||||||
//
|
//
|
||||||
// BASIC OPERATION:
|
// BASIC OPERATION:
|
||||||
//
|
//
|
||||||
|
@ -15,19 +17,19 @@ import (
|
||||||
//
|
//
|
||||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||||
// is then added to the results of the TokenAPI and the read cursor is moved
|
// is then added to the results of the TokenAPI and the read cursor is moved
|
||||||
// forward. Runes collected this way can later on be retrieved using for
|
// forward.
|
||||||
// example the method Result().Runes().
|
//
|
||||||
|
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||||
|
// with as many runes as needed. Runes collected this way can later on be
|
||||||
|
// retrieved using the method
|
||||||
//
|
//
|
||||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||||
// NextRune() again. Failing to do so will result in a panic.
|
// NextRune() again. Failing to do so will result in a panic.
|
||||||
//
|
//
|
||||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
// Next to adding runes to the result, it is also possible to modify the
|
||||||
// with as many runes as needed.
|
// stored runes or to add lexical Tokens to the result. For all things
|
||||||
//
|
// concerning results, take a look at the TokenHandlerResult struct, which
|
||||||
// Next to adding runes to the output, it is also possible to modify the
|
// can be accessed though the method Result().
|
||||||
// already collected runes or to produce lexical Tokens. For all things
|
|
||||||
// concerning results, take a look at the TokenHandlerResult struct, which can be
|
|
||||||
// accessed though the method Result().
|
|
||||||
//
|
//
|
||||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||||
//
|
//
|
||||||
|
@ -38,7 +40,7 @@ import (
|
||||||
//
|
//
|
||||||
// The way in which this is supported, is by forking a TokenAPI struct by
|
// The way in which this is supported, is by forking a TokenAPI struct by
|
||||||
// calling method Fork(). This will return a forked child TokenAPI, with
|
// calling method Fork(). This will return a forked child TokenAPI, with
|
||||||
// an empty result buffer, but using the same read cursor position as the
|
// empty result data, but using the same read cursor position as the
|
||||||
// forked parent.
|
// forked parent.
|
||||||
//
|
//
|
||||||
// After forking, the same interface as described for BASIC OPERATION can be
|
// After forking, the same interface as described for BASIC OPERATION can be
|
||||||
|
@ -47,28 +49,30 @@ import (
|
||||||
// to the parent's results, and to move the read cursor position to that
|
// to the parent's results, and to move the read cursor position to that
|
||||||
// of the child.
|
// of the child.
|
||||||
//
|
//
|
||||||
// When the lookahead was unsuccessful or when the results of the forked child
|
// When the lookahead was unsuccessful, then the forked child TokenAPI can
|
||||||
// are not to be used, then the forked child TokenAPI can simply be discarded.
|
// simply be discarded. The parent TokenAPI was never modified, so it can
|
||||||
// The parent TokenAPI was never modified, so it can safely be used as if the
|
// safely be used as if the lookahead never happened.
|
||||||
// lookahead never happened.
|
|
||||||
//
|
//
|
||||||
// Note:
|
// Opinionized note:
|
||||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||||
// peeks and by moving the read cursor position back and forth, or by putting
|
// peeks and by moving the read cursor position back and forth, or by putting
|
||||||
// read input back on the input stream. That often leads to code that is
|
// read input back on the input stream. That often leads to code that is
|
||||||
// efficient, however, in my opinion, not very intuitive to read.
|
// efficient, however, in my opinion, not very intuitive to read. It can also
|
||||||
|
// be tedious to get the cursor position back at the correct position, which
|
||||||
|
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||||
|
// no bookkeeping has to be implemented when implementing a parser.
|
||||||
type TokenAPI struct {
|
type TokenAPI struct {
|
||||||
reader *reader
|
reader *reader.Reader
|
||||||
root *TokenAPI // the root TokenAPI
|
root *TokenAPI // the root TokenAPI
|
||||||
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||||
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||||
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens)
|
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
||||||
func NewTokenAPI(r io.Reader) *TokenAPI {
|
func NewTokenAPI(r io.Reader) *TokenAPI {
|
||||||
input := &TokenAPI{
|
input := &TokenAPI{
|
||||||
reader: newReader(r),
|
reader: reader.New(r),
|
||||||
result: newTokenHandlerResult(),
|
result: newTokenHandlerResult(),
|
||||||
}
|
}
|
||||||
input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
|
input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
|
||||||
|
@ -91,7 +95,7 @@ func (i *TokenAPI) NextRune() (rune, error) {
|
||||||
}
|
}
|
||||||
i.detachChilds()
|
i.detachChilds()
|
||||||
|
|
||||||
readRune, err := i.reader.runeAt(i.result.offset)
|
readRune, err := i.reader.RuneAt(i.result.offset)
|
||||||
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
||||||
return readRune, err
|
return readRune, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,7 +177,6 @@ func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *test
|
||||||
i.Accept()
|
i.Accept()
|
||||||
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
|
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
|
||||||
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
|
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
|
||||||
AssertEqual(t, 'T', i.reader.buffer[0], "TokenAPI.reader.buffer[0]")
|
|
||||||
r, _ = i.NextRune()
|
r, _ = i.NextRune()
|
||||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||||
}
|
}
|
||||||
|
@ -188,7 +187,6 @@ func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
i.Accept()
|
i.Accept()
|
||||||
}
|
}
|
||||||
AssertEqual(t, "Testing", string(i.reader.buffer), "reader input buffer")
|
|
||||||
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
|
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue