Making parsekit.reader both simpler and more complex (more complex by adopting some buffer allocation logic from the built-in bytes package, to not be copying memory all the time during the read operations.
This commit is contained in:
parent
9656cd4449
commit
65895ac502
109
reader/reader.go
109
reader/reader.go
|
@ -41,6 +41,7 @@ package reader
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -58,12 +59,11 @@ import (
|
||||||
//
|
//
|
||||||
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
|
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
|
||||||
type Reader struct {
|
type Reader struct {
|
||||||
bufio *bufio.Reader // Used for ReadRune()
|
bufio *bufio.Reader // Used for ReadRune()
|
||||||
buffer []rune // Input buffer, holding runes that were read from input
|
buffer []rune // Input buffer, holding runes that were read from input
|
||||||
bufferOffset int // The offset of the buffer, relative to the start of the input
|
err error // A read error, if one occurred
|
||||||
bufferLen int // Input size, the number of runes in the buffer
|
errOffset int // The offset in the buffer at which the read error was encountered
|
||||||
err error // A read error, if one occurred
|
firstReadDone bool // Whether or not the first read was done
|
||||||
errOffset int // The offset in the buffer at which the read error was encountered
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// New initializes a new reader struct, wrapped around the provided input.
|
// New initializes a new reader struct, wrapped around the provided input.
|
||||||
|
@ -74,8 +74,7 @@ type Reader struct {
|
||||||
// - bufio.Reader
|
// - bufio.Reader
|
||||||
func New(input interface{}) *Reader {
|
func New(input interface{}) *Reader {
|
||||||
return &Reader{
|
return &Reader{
|
||||||
bufio: makeBufioReader(input),
|
bufio: makeBufioReader(input),
|
||||||
buffer: []rune{},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,6 +107,8 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
||||||
// When reading failed, the rune will be utf8.RuneError and the error will
|
// When reading failed, the rune will be utf8.RuneError and the error will
|
||||||
// be not nil. One special read fail is actually a normal situation: end
|
// be not nil. One special read fail is actually a normal situation: end
|
||||||
// of file reached. In that case, the returned error wille be io.EOF.
|
// of file reached. In that case, the returned error wille be io.EOF.
|
||||||
|
// Once a read error is encountered, that same read error will guaranteed
|
||||||
|
// be return on every subsequent read at or beyond the provided offset.
|
||||||
func (r *Reader) RuneAt(offset int) (rune, error) {
|
func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||||
// Re-issue a previously seen read error.
|
// Re-issue a previously seen read error.
|
||||||
if r.err != nil && offset >= r.errOffset {
|
if r.err != nil && offset >= r.errOffset {
|
||||||
|
@ -116,40 +117,92 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||||
|
|
||||||
// Rune at provided offset is not yet available in the input buffer.
|
// Rune at provided offset is not yet available in the input buffer.
|
||||||
// Read runes until we have enough runes to satisfy the offset.
|
// Read runes until we have enough runes to satisfy the offset.
|
||||||
for r.bufferLen <= offset {
|
l := len(r.buffer)
|
||||||
readRune, _, err := r.bufio.ReadRune()
|
n := offset - l + 1 // nr of runes to add to the buffer to get to offset
|
||||||
|
if n > 0 {
|
||||||
|
r.grow(n)
|
||||||
|
for writeAt := l; writeAt <= offset; writeAt++ {
|
||||||
|
readRune, _, err := r.bufio.ReadRune()
|
||||||
|
|
||||||
// Handle errors.
|
// Skip BOM.
|
||||||
if err != nil {
|
if !r.firstReadDone {
|
||||||
r.err = err
|
r.firstReadDone = true
|
||||||
r.errOffset = r.bufferLen
|
if readRune == '\uFEFF' {
|
||||||
return utf8.RuneError, err
|
writeAt--
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle errors.
|
||||||
|
if err != nil {
|
||||||
|
r.err = err
|
||||||
|
r.errOffset = writeAt
|
||||||
|
return utf8.RuneError, err
|
||||||
|
}
|
||||||
|
|
||||||
|
r.buffer[writeAt] = readRune
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip BOM.
|
|
||||||
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
|
||||||
r.bufferOffset++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
r.buffer = append(r.buffer, readRune)
|
|
||||||
r.bufferLen++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return r.buffer[offset], nil
|
return r.buffer[offset], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The upcoming code was inspired heavily by the Go built-in 'bytes' package.
|
||||||
|
|
||||||
|
// smallBufferSize is an initial allocation minimal capacity.
|
||||||
|
const smallBufferSize = 64
|
||||||
|
|
||||||
|
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
|
||||||
|
var ErrTooLarge = errors.New("parsekit.reader: too large")
|
||||||
|
|
||||||
|
// grow grows the buffer to guarantee space for n more bytes.
|
||||||
|
// It returns the index where bytes should be written.
|
||||||
|
// If the buffer can't grow it will panic with ErrTooLarge.
|
||||||
|
func (r *Reader) grow(n int) {
|
||||||
|
// Instantiate new buffer.
|
||||||
|
if r.buffer == nil {
|
||||||
|
b := smallBufferSize
|
||||||
|
if b < n {
|
||||||
|
b = n
|
||||||
|
}
|
||||||
|
r.buffer = make([]rune, n, b)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
l := len(r.buffer)
|
||||||
|
c := cap(r.buffer)
|
||||||
|
// Grow the buffer by reslicing within the available capacity.
|
||||||
|
if n <= c-l {
|
||||||
|
r.buffer = r.buffer[:l+n]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Grow the buffer by allocating a new one and copying the data.
|
||||||
|
buf := makeSlice(2*c + n)
|
||||||
|
copy(buf, r.buffer)
|
||||||
|
r.buffer = buf[:l+n]
|
||||||
|
}
|
||||||
|
|
||||||
|
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||||
|
// with ErrTooLarge.
|
||||||
|
func makeSlice(n int) []rune {
|
||||||
|
// If the make fails, give a known error.
|
||||||
|
defer func() {
|
||||||
|
if recover() != nil {
|
||||||
|
panic(ErrTooLarge)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return make([]rune, n)
|
||||||
|
}
|
||||||
|
|
||||||
// Flush deletes the provided number of runes from the start of the
|
// Flush deletes the provided number of runes from the start of the
|
||||||
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
||||||
// will point to the rune that comes after the flushed runes.
|
// will point to the rune that comes after the flushed runes.
|
||||||
// So what this basically does is turn the Reader into a sliding window.
|
// So what this basically does is turn the Reader into a sliding window.
|
||||||
func (r *Reader) Flush(numberOfRunes int) {
|
func (r *Reader) Flush(numberOfRunes int) {
|
||||||
if numberOfRunes > r.bufferLen {
|
if numberOfRunes > len(r.buffer) {
|
||||||
panic(fmt.Sprintf(
|
panic(fmt.Sprintf(
|
||||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||||
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
|
||||||
}
|
}
|
||||||
r.bufferOffset += numberOfRunes
|
|
||||||
r.bufferLen -= numberOfRunes
|
|
||||||
r.buffer = r.buffer[numberOfRunes:]
|
r.buffer = r.buffer[numberOfRunes:]
|
||||||
if r.err != nil {
|
if r.err != nil {
|
||||||
r.errOffset -= numberOfRunes
|
r.errOffset -= numberOfRunes
|
||||||
|
|
|
@ -231,6 +231,48 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
|
||||||
assert.Equal(t, io.EOF, err)
|
assert.Equal(t, io.EOF, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
||||||
|
input, size := makeLargeStubReader()
|
||||||
|
r := reader.New(input)
|
||||||
|
|
||||||
|
readRune, err := r.RuneAt(0)
|
||||||
|
assert.Equal(t, 'X', readRune)
|
||||||
|
readRune, err = r.RuneAt(size - 1)
|
||||||
|
assert.Equal(t, 'Y', readRune)
|
||||||
|
readRune, err = r.RuneAt(size)
|
||||||
|
assert.Equal(t, io.EOF, err)
|
||||||
|
readRune, err = r.RuneAt(10)
|
||||||
|
assert.Equal(t, 'X', readRune)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
|
||||||
|
input, size := makeLargeStubReader()
|
||||||
|
r := reader.New(input)
|
||||||
|
|
||||||
|
readRune, _ := r.RuneAt(size - 200)
|
||||||
|
assert.Equal(t, 'X', readRune)
|
||||||
|
readRune, _ = r.RuneAt(size - 1)
|
||||||
|
assert.Equal(t, 'Y', readRune)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
|
||||||
|
input, size := makeLargeStubReader()
|
||||||
|
r := reader.New(input)
|
||||||
|
|
||||||
|
readRune, _ := r.RuneAt(size - 1)
|
||||||
|
assert.Equal(t, 'Y', readRune)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeLargeStubReader() (*StubReader, int) {
|
||||||
|
size := utf8.UTFMax * 64 * 5
|
||||||
|
bytes := make([]byte, size)
|
||||||
|
for i := range bytes {
|
||||||
|
bytes[i] = 'X'
|
||||||
|
}
|
||||||
|
bytes[size-1] = 'Y'
|
||||||
|
return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
|
||||||
|
}
|
||||||
|
|
||||||
type StubReader struct {
|
type StubReader struct {
|
||||||
bytes []byte
|
bytes []byte
|
||||||
errors []error
|
errors []error
|
||||||
|
|
Loading…
Reference in New Issue