The parsekit.reader.Reader now caches error messages that are returned from

the embedded io.Reader. When an error is returned, the read offset and the
error are stored. When later on, the same of a higher offset is requested,
the error is returned again. This way the code will work for Readers that do
not repeatedly return the correct error when calling the Read() method
multiple times arter a first error has occurred.

Note: I am not sure if there are any Reader implementations that wouldn't
return the same error message over and over again, but hardening the
parsekit Reader to support this is not hard, so let's just go for it.
This commit is contained in:
Maurice Makaay 2019-06-09 19:42:20 +00:00
parent 76336e883e
commit 9656cd4449
2 changed files with 87 additions and 0 deletions

View File

@ -62,6 +62,8 @@ type Reader struct {
buffer []rune // Input buffer, holding runes that were read from input buffer []rune // Input buffer, holding runes that were read from input
bufferOffset int // The offset of the buffer, relative to the start of the input bufferOffset int // The offset of the buffer, relative to the start of the input
bufferLen int // Input size, the number of runes in the buffer bufferLen int // Input size, the number of runes in the buffer
err error // A read error, if one occurred
errOffset int // The offset in the buffer at which the read error was encountered
} }
// New initializes a new reader struct, wrapped around the provided input. // New initializes a new reader struct, wrapped around the provided input.
@ -107,6 +109,11 @@ func makeBufioReader(input interface{}) *bufio.Reader {
// be not nil. One special read fail is actually a normal situation: end // be not nil. One special read fail is actually a normal situation: end
// of file reached. In that case, the returned error wille be io.EOF. // of file reached. In that case, the returned error wille be io.EOF.
func (r *Reader) RuneAt(offset int) (rune, error) { func (r *Reader) RuneAt(offset int) (rune, error) {
// Re-issue a previously seen read error.
if r.err != nil && offset >= r.errOffset {
return utf8.RuneError, r.err
}
// Rune at provided offset is not yet available in the input buffer. // Rune at provided offset is not yet available in the input buffer.
// Read runes until we have enough runes to satisfy the offset. // Read runes until we have enough runes to satisfy the offset.
for r.bufferLen <= offset { for r.bufferLen <= offset {
@ -114,6 +121,8 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
// Handle errors. // Handle errors.
if err != nil { if err != nil {
r.err = err
r.errOffset = r.bufferLen
return utf8.RuneError, err return utf8.RuneError, err
} }
@ -142,4 +151,7 @@ func (r *Reader) Flush(numberOfRunes int) {
r.bufferOffset += numberOfRunes r.bufferOffset += numberOfRunes
r.bufferLen -= numberOfRunes r.bufferLen -= numberOfRunes
r.buffer = r.buffer[numberOfRunes:] r.buffer = r.buffer[numberOfRunes:]
if r.err != nil {
r.errOffset -= numberOfRunes
}
} }

View File

@ -6,6 +6,7 @@ import (
"io" "io"
"strings" "strings"
"testing" "testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/reader" "git.makaay.nl/mauricem/go-parsekit/reader"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -175,3 +176,77 @@ func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
_, err = r.RuneAt(2) _, err = r.RuneAt(2)
assert.Equal(t, err.Error(), "EOF") assert.Equal(t, err.Error(), "EOF")
} }
// In this test, I want to make sure that once a Reader returns an error,
// that error is cached and will be returned when data for the offset where
// the error occurred is read at a later time.
func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
input := &StubReader{
bytes: []byte{'a', 'b', 'c', 'd'},
errors: []error{
io.EOF,
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
},
}
r := reader.New(input)
// Read the last availble rune.
readRune, _ := r.RuneAt(3)
assert.Equal(t, 'd', readRune)
// Reading the next offset must result in the io.EOF error from the stub.
readRune, err := r.RuneAt(4)
assert.Equal(t, utf8.RuneError, readRune)
assert.Equal(t, io.EOF, err)
// Reading even further should yield the same io.EOF error.
readRune, err = r.RuneAt(5)
assert.Equal(t, utf8.RuneError, readRune)
assert.Equal(t, io.EOF, err)
// After an error, we must still be able to read the last rune.
readRune, _ = r.RuneAt(3)
assert.Equal(t, 'd', readRune)
// Flushing updates the error index too.
r.Flush(3)
// The last rune is now at offset 0.
readRune, _ = r.RuneAt(0)
assert.Equal(t, 'd', readRune)
// The io.EOF is now at offset 1.
_, err = r.RuneAt(1)
assert.Equal(t, io.EOF, err)
// Let's flush that last rune too.
r.Flush(1)
// The io.EOF is now at offset 0.
_, err = r.RuneAt(0)
assert.Equal(t, io.EOF, err)
// And reading beyond that offset also yields io.EOF.
_, err = r.RuneAt(1)
assert.Equal(t, io.EOF, err)
}
type StubReader struct {
bytes []byte
errors []error
}
func (r *StubReader) Read(p []byte) (n int, err error) {
if len(r.bytes) > 0 {
head, tail := r.bytes[0], r.bytes[1:]
r.bytes = tail
p[0] = head
return 1, nil
}
if len(r.errors) > 0 {
head, tail := r.errors[0], r.errors[1:]
r.errors = tail
return 0, head
}
panic("StubReader is all out of bytes and errors")
}