From 9656cd4449b235a04637df9c6d0591a128f8e3b1 Mon Sep 17 00:00:00 2001
From: Maurice Makaay <maurice@makaay.nl>
Date: Sun, 9 Jun 2019 19:42:20 +0000
Subject: [PATCH] The parsekit.reader.Reader now caches error messages that are
 returned from the embedded io.Reader. When an error is returned, the read
 offset and the error are stored. When later on, the same of a higher offset
 is requested, the error is returned again. This way the code will work for
 Readers that do not repeatedly return the correct error when calling the
 Read() method multiple times arter a first error has occurred.

Note: I am not sure if there are any Reader implementations that wouldn't
return the same error message over and over again, but hardening the
parsekit Reader to support this is not hard, so let's just go for it.
---
 reader/reader.go      | 12 +++++++
 reader/reader_test.go | 75 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)

diff --git a/reader/reader.go b/reader/reader.go
index 95b30b0..ea3af91 100644
--- a/reader/reader.go
+++ b/reader/reader.go
@@ -62,6 +62,8 @@ type Reader struct {
 	buffer       []rune        // Input buffer, holding runes that were read from input
 	bufferOffset int           // The offset of the buffer, relative to the start of the input
 	bufferLen    int           // Input size, the number of runes in the buffer
+	err          error         // A read error, if one occurred
+	errOffset    int           // The offset in the buffer at which the read error was encountered
 }
 
 // New initializes a new reader struct, wrapped around the provided input.
@@ -107,6 +109,11 @@ func makeBufioReader(input interface{}) *bufio.Reader {
 // be not nil. One special read fail is actually a normal situation: end
 // of file reached. In that case, the returned error wille be io.EOF.
 func (r *Reader) RuneAt(offset int) (rune, error) {
+	// Re-issue a previously seen read error.
+	if r.err != nil && offset >= r.errOffset {
+		return utf8.RuneError, r.err
+	}
+
 	// Rune at provided offset is not yet available in the input buffer.
 	// Read runes until we have enough runes to satisfy the offset.
 	for r.bufferLen <= offset {
@@ -114,6 +121,8 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
 
 		// Handle errors.
 		if err != nil {
+			r.err = err
+			r.errOffset = r.bufferLen
 			return utf8.RuneError, err
 		}
 
@@ -142,4 +151,7 @@ func (r *Reader) Flush(numberOfRunes int) {
 	r.bufferOffset += numberOfRunes
 	r.bufferLen -= numberOfRunes
 	r.buffer = r.buffer[numberOfRunes:]
+	if r.err != nil {
+		r.errOffset -= numberOfRunes
+	}
 }
diff --git a/reader/reader_test.go b/reader/reader_test.go
index e325520..234dbfe 100644
--- a/reader/reader_test.go
+++ b/reader/reader_test.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"strings"
 	"testing"
+	"unicode/utf8"
 
 	"git.makaay.nl/mauricem/go-parsekit/reader"
 	"github.com/stretchr/testify/assert"
@@ -175,3 +176,77 @@ func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
 	_, err = r.RuneAt(2)
 	assert.Equal(t, err.Error(), "EOF")
 }
+
+// In this test, I want to make sure that once a Reader returns an error,
+// that error is cached and will be returned when data for the offset where
+// the error occurred is read at a later time.
+func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
+	input := &StubReader{
+		bytes: []byte{'a', 'b', 'c', 'd'},
+		errors: []error{
+			io.EOF,
+			io.ErrUnexpectedEOF, // This error must never popup in the tests below.
+		},
+	}
+	r := reader.New(input)
+
+	// Read the last availble rune.
+	readRune, _ := r.RuneAt(3)
+	assert.Equal(t, 'd', readRune)
+
+	// Reading the next offset must result in the io.EOF error from the stub.
+	readRune, err := r.RuneAt(4)
+	assert.Equal(t, utf8.RuneError, readRune)
+	assert.Equal(t, io.EOF, err)
+
+	// Reading even further should yield the same io.EOF error.
+	readRune, err = r.RuneAt(5)
+	assert.Equal(t, utf8.RuneError, readRune)
+	assert.Equal(t, io.EOF, err)
+
+	// After an error, we must still be able to read the last rune.
+	readRune, _ = r.RuneAt(3)
+	assert.Equal(t, 'd', readRune)
+
+	// Flushing updates the error index too.
+	r.Flush(3)
+
+	// The last rune is now at offset 0.
+	readRune, _ = r.RuneAt(0)
+	assert.Equal(t, 'd', readRune)
+
+	// The io.EOF is now at offset 1.
+	_, err = r.RuneAt(1)
+	assert.Equal(t, io.EOF, err)
+
+	// Let's flush that last rune too.
+	r.Flush(1)
+
+	// The io.EOF is now at offset 0.
+	_, err = r.RuneAt(0)
+	assert.Equal(t, io.EOF, err)
+
+	// And reading beyond that offset also yields io.EOF.
+	_, err = r.RuneAt(1)
+	assert.Equal(t, io.EOF, err)
+}
+
+type StubReader struct {
+	bytes  []byte
+	errors []error
+}
+
+func (r *StubReader) Read(p []byte) (n int, err error) {
+	if len(r.bytes) > 0 {
+		head, tail := r.bytes[0], r.bytes[1:]
+		r.bytes = tail
+		p[0] = head
+		return 1, nil
+	}
+	if len(r.errors) > 0 {
+		head, tail := r.errors[0], r.errors[1:]
+		r.errors = tail
+		return 0, head
+	}
+	panic("StubReader is all out of bytes and errors")
+}