85 lines
2.8 KiB
Go
85 lines
2.8 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// reader wraps around an io.Reader and provides buffering to allows us to read
|
|
// the same runes over and over again. This is useful for implementing a parser
|
|
// that must be able to do lookahead on the input, returning to the original
|
|
// input position after finishing that lookahead).
|
|
//
|
|
// To minimze memory use, it is also possible to flush the buffer when there is
|
|
// no more need to go back to previously read runes.
|
|
//
|
|
// The reader is used internally by parsekit.TokenAPI.
|
|
type reader struct {
|
|
bufio *bufio.Reader // Used for ReadRune()
|
|
buffer []rune // Input buffer, holding runes that were read from input
|
|
bufferOffset int // The offset of the buffer, relative to the start of the input
|
|
bufferLen int // Input size, the number of runes in the buffer
|
|
}
|
|
|
|
// newwReader initializes a new reader struct, wrapped around the provided io.Reader.
|
|
func newReader(r io.Reader) *reader {
|
|
return &reader{
|
|
bufio: bufio.NewReader(r),
|
|
buffer: []rune{},
|
|
}
|
|
}
|
|
|
|
// runeAt reads the rune at the provided rune offset.
|
|
//
|
|
// This offset is relative to the current starting position of the buffer in
|
|
// the reader. When starting reading, offset 0 will point at the start of the
|
|
// input. After flushing, offset 0 will point at the input up to where
|
|
// the flush was done.
|
|
//
|
|
// The error return value will be nil when reading was successful.
|
|
// When an invalid rune is encountered on the input, the error will be nil,
|
|
// but the rune will be utf8.RuneError
|
|
//
|
|
// When reading failed, the rune will be utf8.RuneError. One special read
|
|
// fail is actually a normal situation: end of file reached. In that case,
|
|
// the returned error wille be io.EOF.
|
|
func (r *reader) runeAt(offset int) (rune, error) {
|
|
// Rune at provided offset is not yet available in the input buffer.
|
|
// Read runes until we have enough runes to satisfy the offset.
|
|
for r.bufferLen <= offset {
|
|
readRune, _, err := r.bufio.ReadRune()
|
|
|
|
// Handle errors.
|
|
if err != nil {
|
|
return utf8.RuneError, err
|
|
}
|
|
|
|
// Skip BOM.
|
|
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
|
r.bufferOffset++
|
|
continue
|
|
}
|
|
|
|
r.buffer = append(r.buffer, readRune)
|
|
r.bufferLen++
|
|
}
|
|
return r.buffer[offset], nil
|
|
}
|
|
|
|
// Flush deletes the provided number of runes from the start of the
|
|
// reader buffer. After flushing the buffer, offset 0 as used by runeAt()
|
|
// will point to the rune that comes after the flushed runes.
|
|
// So what this basically does is turn the Reader into a sliding window.
|
|
func (r *reader) flush(numberOfRunes int) {
|
|
if numberOfRunes > r.bufferLen {
|
|
panic(fmt.Sprintf(
|
|
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
|
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
|
}
|
|
r.bufferOffset += numberOfRunes
|
|
r.bufferLen -= numberOfRunes
|
|
r.buffer = r.buffer[numberOfRunes:]
|
|
}
|