Moved Reader into its own package.
This commit is contained in:
parent
6d92e1dc68
commit
98d2db0374
2
go.mod
2
go.mod
|
@ -1,3 +1,5 @@
|
|||
module git.makaay.nl/mauricem/go-parsekit
|
||||
|
||||
go 1.12
|
||||
|
||||
require github.com/stretchr/testify v1.3.0
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
|
@ -172,7 +172,7 @@ func (a *ParseAPIOnAction) Stay() bool {
|
|||
|
||||
func (a *ParseAPIOnAction) flushReader() {
|
||||
if a.tokenAPI.result.offset > 0 {
|
||||
a.tokenAPI.root.reader.flush(a.tokenAPI.root.result.offset)
|
||||
a.tokenAPI.root.reader.Flush(a.tokenAPI.root.result.offset)
|
||||
a.tokenAPI.root.result.offset = 0
|
||||
a.parseAPI.initLoopCheck()
|
||||
}
|
||||
|
|
84
reader.go
84
reader.go
|
@ -1,84 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// reader wraps around an io.Reader and provides buffering to allows us to read
|
||||
// the same runes over and over again. This is useful for implementing a parser
|
||||
// that must be able to do lookahead on the input, returning to the original
|
||||
// input position after finishing that lookahead).
|
||||
//
|
||||
// To minimze memory use, it is also possible to flush the buffer when there is
|
||||
// no more need to go back to previously read runes.
|
||||
//
|
||||
// The reader is used internally by parsekit.TokenAPI.
|
||||
type reader struct {
|
||||
bufio *bufio.Reader // Used for ReadRune()
|
||||
buffer []rune // Input buffer, holding runes that were read from input
|
||||
bufferOffset int // The offset of the buffer, relative to the start of the input
|
||||
bufferLen int // Input size, the number of runes in the buffer
|
||||
}
|
||||
|
||||
// newwReader initializes a new reader struct, wrapped around the provided io.Reader.
|
||||
func newReader(r io.Reader) *reader {
|
||||
return &reader{
|
||||
bufio: bufio.NewReader(r),
|
||||
buffer: []rune{},
|
||||
}
|
||||
}
|
||||
|
||||
// runeAt reads the rune at the provided rune offset.
|
||||
//
|
||||
// This offset is relative to the current starting position of the buffer in
|
||||
// the reader. When starting reading, offset 0 will point at the start of the
|
||||
// input. After flushing, offset 0 will point at the input up to where
|
||||
// the flush was done.
|
||||
//
|
||||
// The error return value will be nil when reading was successful.
|
||||
// When an invalid rune is encountered on the input, the error will be nil,
|
||||
// but the rune will be utf8.RuneError
|
||||
//
|
||||
// When reading failed, the rune will be utf8.RuneError. One special read
|
||||
// fail is actually a normal situation: end of file reached. In that case,
|
||||
// the returned error wille be io.EOF.
|
||||
func (r *reader) runeAt(offset int) (rune, error) {
|
||||
// Rune at provided offset is not yet available in the input buffer.
|
||||
// Read runes until we have enough runes to satisfy the offset.
|
||||
for r.bufferLen <= offset {
|
||||
readRune, _, err := r.bufio.ReadRune()
|
||||
|
||||
// Handle errors.
|
||||
if err != nil {
|
||||
return utf8.RuneError, err
|
||||
}
|
||||
|
||||
// Skip BOM.
|
||||
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
||||
r.bufferOffset++
|
||||
continue
|
||||
}
|
||||
|
||||
r.buffer = append(r.buffer, readRune)
|
||||
r.bufferLen++
|
||||
}
|
||||
return r.buffer[offset], nil
|
||||
}
|
||||
|
||||
// Flush deletes the provided number of runes from the start of the
|
||||
// reader buffer. After flushing the buffer, offset 0 as used by runeAt()
|
||||
// will point to the rune that comes after the flushed runes.
|
||||
// So what this basically does is turn the Reader into a sliding window.
|
||||
func (r *reader) flush(numberOfRunes int) {
|
||||
if numberOfRunes > r.bufferLen {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
||||
}
|
||||
r.bufferOffset += numberOfRunes
|
||||
r.bufferLen -= numberOfRunes
|
||||
r.buffer = r.buffer[numberOfRunes:]
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
// Package reader provides a buffered Reader that wraps around an io.Reader.
|
||||
//
|
||||
// Functionally, it provides an input buffer in the form of a sliding window.
|
||||
// Let's say we've got the following input coming up in the io.Reader that is
|
||||
// wrapped by the Reader:
|
||||
//
|
||||
// |H|e|l|l|o|,| |w|o|r|l|d|!| <-- runes
|
||||
// 0 6 12 <-- rune offset
|
||||
//
|
||||
// The Reader can now be used to retrieve runes from the input, based on their
|
||||
// offset, using RuneAt(offset). Normally these runes will be retrieved in
|
||||
// sequence, but that is not a requirement. Let's say we retrieve the rune with
|
||||
// offset 6 from the input (the 'w'), then the Reader buffer be filled with runes
|
||||
// from the io.Reader until there are enough runes available to return the rune
|
||||
// for offset 6:
|
||||
//
|
||||
// |H|e|l|l|o| |w|
|
||||
// 0 6
|
||||
//
|
||||
// Using RuneAt, you can retrieve arbitrary runes. If you request one that is
|
||||
// in the Reader buffer, then the buffered rune is returned. If you request one
|
||||
// that is not in the buffer, then the buffer will be expanded.
|
||||
//
|
||||
// To make this into a sliding window, the Reader provides the method
|
||||
// Flush(numberOfRunes). This method will drop the provided number of runes from
|
||||
// the Reader buffer. So when we'd do a Flush(3) on the example buffer from above,
|
||||
// then the Reader buffer would become:
|
||||
//
|
||||
// |l|o| |w|
|
||||
// 0 3
|
||||
//
|
||||
// Note that the offset for the first rune 'l' in the buffer is now 0.
|
||||
// You can consider the input to be changed in a similar way:
|
||||
//
|
||||
// |l|o|,| |w|o|r|l|d|!|
|
||||
// 0 6 9
|
||||
//
|
||||
// So after a flush, the first upcoming rune after the flushed runes
|
||||
// will always have index 0.
|
||||
package reader
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Reader wraps around an io.Reader and provides buffering that allows us to read
|
||||
// the same runes over and over again. This is useful for implementing a parser
|
||||
// that must be able to do lookahead on the input, returning to the original
|
||||
// input position after finishing that lookahead).
|
||||
//
|
||||
// To minimze memory use, it is also possible to flush the read buffer when there is
|
||||
// no more need to go back to previously read runes.
|
||||
//
|
||||
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
|
||||
type Reader struct {
|
||||
bufio *bufio.Reader // Used for ReadRune()
|
||||
buffer []rune // Input buffer, holding runes that were read from input
|
||||
bufferOffset int // The offset of the buffer, relative to the start of the input
|
||||
bufferLen int // Input size, the number of runes in the buffer
|
||||
}
|
||||
|
||||
// New initializes a new reader struct, wrapped around the provided io.Reader.
|
||||
func New(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
bufio: bufio.NewReader(r),
|
||||
buffer: []rune{},
|
||||
}
|
||||
}
|
||||
|
||||
// RuneAt reads the rune at the provided rune offset.
|
||||
//
|
||||
// This offset is relative to the current starting position of the buffer in
|
||||
// the reader. When starting reading, offset 0 will point at the start of the
|
||||
// input. After flushing, offset 0 will point at the input up to where
|
||||
// the flush was done.
|
||||
//
|
||||
// The error return value will be nil when reading was successful.
|
||||
// When an invalid rune is encountered on the input, the error will be nil,
|
||||
// but the rune will be utf8.RuneError
|
||||
//
|
||||
// When reading failed, the rune will be utf8.RuneError and the error will
|
||||
// be not nil. One special read fail is actually a normal situation: end
|
||||
// of file reached. In that case, the returned error wille be io.EOF.
|
||||
func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||
// Rune at provided offset is not yet available in the input buffer.
|
||||
// Read runes until we have enough runes to satisfy the offset.
|
||||
for r.bufferLen <= offset {
|
||||
readRune, _, err := r.bufio.ReadRune()
|
||||
|
||||
// Handle errors.
|
||||
if err != nil {
|
||||
return utf8.RuneError, err
|
||||
}
|
||||
|
||||
// Skip BOM.
|
||||
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
||||
r.bufferOffset++
|
||||
continue
|
||||
}
|
||||
|
||||
r.buffer = append(r.buffer, readRune)
|
||||
r.bufferLen++
|
||||
}
|
||||
return r.buffer[offset], nil
|
||||
}
|
||||
|
||||
// Flush deletes the provided number of runes from the start of the
|
||||
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
||||
// will point to the rune that comes after the flushed runes.
|
||||
// So what this basically does is turn the Reader into a sliding window.
|
||||
func (r *Reader) Flush(numberOfRunes int) {
|
||||
if numberOfRunes > r.bufferLen {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
||||
}
|
||||
r.bufferOffset += numberOfRunes
|
||||
r.bufferLen -= numberOfRunes
|
||||
r.buffer = r.buffer[numberOfRunes:]
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
package reader_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/reader"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func ExampleNew() {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
fmt.Printf("%c", at(0))
|
||||
fmt.Printf("%c", at(12))
|
||||
|
||||
// Output:
|
||||
// H!
|
||||
}
|
||||
|
||||
func TestReader_RuneAt(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||
assert.Equal(t, "H!wH", result)
|
||||
}
|
||||
|
||||
func TestReader_RuneAt_endOfFile(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
|
||||
rn, err := r.RuneAt(13)
|
||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
assert.Equal(t, "'<27>' EOF true", result)
|
||||
|
||||
rn, err = r.RuneAt(20)
|
||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
assert.Equal(t, "'<27>' EOF true", result)
|
||||
}
|
||||
|
||||
func TestReader_RuneAt_invalidRune(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, \xcdworld!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||
assert.Equal(t, " <20>wo", result, "result")
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt() {
|
||||
reader := reader.New(strings.NewReader("Hello, world!"))
|
||||
|
||||
fmt.Printf("Runes: ")
|
||||
for i := 0; ; i++ {
|
||||
r, err := reader.RuneAt(i)
|
||||
if err != nil {
|
||||
fmt.Printf("\nErr: %s\n", err)
|
||||
break
|
||||
}
|
||||
fmt.Printf("%c", r)
|
||||
}
|
||||
|
||||
// Output:
|
||||
// Runes: Hello, world!
|
||||
// Err: EOF
|
||||
}
|
||||
|
||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("\uFEFFBommetje!"))
|
||||
b, _ := r.RuneAt(0)
|
||||
o, _ := r.RuneAt(1)
|
||||
m, _ := r.RuneAt(2)
|
||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||
assert.Equal(t, "Bom", bom, "first three runes")
|
||||
}
|
||||
|
||||
func TestReader_Flush(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||
result := fmt.Sprintf("%c", at(7))
|
||||
assert.Equal(t, "w", result, "first read")
|
||||
|
||||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||
r.Flush(4)
|
||||
|
||||
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||
// We can continue reading from there.
|
||||
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
||||
assert.Equal(t, "o, wor", result)
|
||||
}
|
||||
|
||||
func ExampleReader_Flush() {
|
||||
r := reader.New(strings.NewReader("dog eat dog!"))
|
||||
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
||||
|
||||
// Read from the first 4 runes of the input.
|
||||
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
|
||||
|
||||
// Flush those 4 runes, bringing offset 0 to the start of "eat dog".
|
||||
r.Flush(4)
|
||||
|
||||
// Read another 4 runes, because of the flushing, we start at offset 0.
|
||||
fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3))
|
||||
|
||||
// Again, flush 4 runes, bringing offset 0 to the start of "dog!".
|
||||
r.Flush(4)
|
||||
|
||||
// Read from the remainder runes.
|
||||
fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3))
|
||||
|
||||
// Output:
|
||||
// dog ate good!
|
||||
}
|
||||
|
||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
|
||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||
r.RuneAt(10)
|
||||
|
||||
// However, we flush 12 runes, which exceeds the buffer size.
|
||||
assert.PanicsWithValue(t,
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush "+
|
||||
"(12) exceeds size of the buffer (11)",
|
||||
func() { r.Flush(12) })
|
||||
}
|
|
@ -1,94 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExamplenewReader() {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
fmt.Printf("%c", at(0))
|
||||
fmt.Printf("%c", at(12))
|
||||
|
||||
// Output:
|
||||
// H!
|
||||
}
|
||||
|
||||
func TestReader_runeAt(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||
AssertEqual(t, "H!wH", result, "result")
|
||||
}
|
||||
|
||||
func TestReader_runeAt_endOfFile(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := newReader(in)
|
||||
|
||||
rn, err := r.runeAt(13)
|
||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
AssertEqual(t, "'<27>' EOF true", result, "result")
|
||||
|
||||
rn, err = r.runeAt(20)
|
||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
AssertEqual(t, "'<27>' EOF true", result, "result")
|
||||
}
|
||||
|
||||
func TestReader_runeAt_invalidRune(t *testing.T) {
|
||||
in := strings.NewReader("Hello, \xcdworld!")
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||
AssertEqual(t, " <20>wo", result, "result")
|
||||
}
|
||||
|
||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
in := strings.NewReader("\uFEFFBommetje!")
|
||||
r := newReader(in)
|
||||
b, _ := r.runeAt(0)
|
||||
o, _ := r.runeAt(1)
|
||||
m, _ := r.runeAt(2)
|
||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||
AssertEqual(t, "Bom", bom, "first three runes")
|
||||
}
|
||||
|
||||
func TestReader_Flush(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||
result := fmt.Sprintf("%c", at(7))
|
||||
AssertEqual(t, "w", result, "first read")
|
||||
|
||||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||
r.flush(4)
|
||||
|
||||
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||
// We can continue reading from there.
|
||||
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
||||
AssertEqual(t, "o, wor", result, "second read")
|
||||
}
|
||||
|
||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := newReader(in)
|
||||
|
||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||
r.runeAt(10)
|
||||
|
||||
// However, we flush 12 runes, which exceeds the buffer size.
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() { r.flush(12) },
|
||||
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
|
||||
})
|
||||
}
|
50
tokenapi.go
50
tokenapi.go
|
@ -3,11 +3,13 @@ package parsekit
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/reader"
|
||||
)
|
||||
|
||||
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve input data and
|
||||
// to report back results. For easy lookahead support, a forking strategy is
|
||||
// provided.
|
||||
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
|
||||
// the reader and to report back tokenizing results. For easy lookahead support,
|
||||
// a forking strategy is provided.
|
||||
//
|
||||
// BASIC OPERATION:
|
||||
//
|
||||
|
@ -15,19 +17,19 @@ import (
|
|||
//
|
||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||
// is then added to the results of the TokenAPI and the read cursor is moved
|
||||
// forward. Runes collected this way can later on be retrieved using for
|
||||
// example the method Result().Runes().
|
||||
// forward.
|
||||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed. Runes collected this way can later on be
|
||||
// retrieved using the method
|
||||
//
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed.
|
||||
//
|
||||
// Next to adding runes to the output, it is also possible to modify the
|
||||
// already collected runes or to produce lexical Tokens. For all things
|
||||
// concerning results, take a look at the TokenHandlerResult struct, which can be
|
||||
// accessed though the method Result().
|
||||
// Next to adding runes to the result, it is also possible to modify the
|
||||
// stored runes or to add lexical Tokens to the result. For all things
|
||||
// concerning results, take a look at the TokenHandlerResult struct, which
|
||||
// can be accessed though the method Result().
|
||||
//
|
||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||
//
|
||||
|
@ -38,7 +40,7 @@ import (
|
|||
//
|
||||
// The way in which this is supported, is by forking a TokenAPI struct by
|
||||
// calling method Fork(). This will return a forked child TokenAPI, with
|
||||
// an empty result buffer, but using the same read cursor position as the
|
||||
// empty result data, but using the same read cursor position as the
|
||||
// forked parent.
|
||||
//
|
||||
// After forking, the same interface as described for BASIC OPERATION can be
|
||||
|
@ -47,28 +49,30 @@ import (
|
|||
// to the parent's results, and to move the read cursor position to that
|
||||
// of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful or when the results of the forked child
|
||||
// are not to be used, then the forked child TokenAPI can simply be discarded.
|
||||
// The parent TokenAPI was never modified, so it can safely be used as if the
|
||||
// lookahead never happened.
|
||||
// When the lookahead was unsuccessful, then the forked child TokenAPI can
|
||||
// simply be discarded. The parent TokenAPI was never modified, so it can
|
||||
// safely be used as if the lookahead never happened.
|
||||
//
|
||||
// Note:
|
||||
// Opinionized note:
|
||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||
// peeks and by moving the read cursor position back and forth, or by putting
|
||||
// read input back on the input stream. That often leads to code that is
|
||||
// efficient, however, in my opinion, not very intuitive to read.
|
||||
// efficient, however, in my opinion, not very intuitive to read. It can also
|
||||
// be tedious to get the cursor position back at the correct position, which
|
||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type TokenAPI struct {
|
||||
reader *reader
|
||||
reader *reader.Reader
|
||||
root *TokenAPI // the root TokenAPI
|
||||
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens)
|
||||
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
|
||||
}
|
||||
|
||||
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
||||
func NewTokenAPI(r io.Reader) *TokenAPI {
|
||||
input := &TokenAPI{
|
||||
reader: newReader(r),
|
||||
reader: reader.New(r),
|
||||
result: newTokenHandlerResult(),
|
||||
}
|
||||
input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
|
||||
|
@ -91,7 +95,7 @@ func (i *TokenAPI) NextRune() (rune, error) {
|
|||
}
|
||||
i.detachChilds()
|
||||
|
||||
readRune, err := i.reader.runeAt(i.result.offset)
|
||||
readRune, err := i.reader.RuneAt(i.result.offset)
|
||||
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
||||
return readRune, err
|
||||
}
|
||||
|
|
|
@ -177,7 +177,6 @@ func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *test
|
|||
i.Accept()
|
||||
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
|
||||
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
|
||||
AssertEqual(t, 'T', i.reader.buffer[0], "TokenAPI.reader.buffer[0]")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
@ -188,7 +187,6 @@ func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
|||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "Testing", string(i.reader.buffer), "reader input buffer")
|
||||
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue