go-parsekit/read/read_test.go

384 lines
11 KiB
Go
Raw Blame History

package read
import (
"bufio"
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
)
func ExampleNew() {
printFirstRuneOf := func(input interface{}) {
r := New(input)
c, _ := r.RuneAt(0)
fmt.Printf("%q\n", c)
}
simpleString := "Hello, world!"
printFirstRuneOf(simpleString)
ioReaderImplementation := strings.NewReader("Good bye, world!")
printFirstRuneOf(ioReaderImplementation)
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
printFirstRuneOf(bufioReaderPointer)
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Where do we go, world?")))
printFirstRuneOf(bufioReaderValue)
// Output:
// 'H'
// 'G'
// 'W'
// 'W'
}
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
for _, test := range []struct {
name string
input interface{}
}{
{"string", "Hello, world!"},
{"io.Reader", strings.NewReader("Hello, world!")},
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
} {
r := New(test.input)
firstRune, _ := r.RuneAt(0)
if firstRune != 'H' {
t.Errorf("[%s] first rune not 'H'", test.name)
}
lastRune, _ := r.RuneAt(12)
if lastRune != '!' {
t.Errorf("[%s] last rune not '!'", test.name)
}
}
}
func TestNew_UnhandledInputType_Panics(t *testing.T) {
assertPanic(t,
func() { New(12345) },
"parsekit.read.New(): no support for input of type int")
}
func TestBuffer_RuneAt(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input.
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
assertEqual(t, "H!wH", result)
}
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
rn, err := r.RuneAt(13)
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
assertEqual(t, "'<27>' EOF true", result)
rn, err = r.RuneAt(20)
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
assertEqual(t, "'<27>' EOF true", result)
}
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
r := New(strings.NewReader("Hello, \xcdworld!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
assertEqual(t, " <20>wo", result)
}
func ExampleBuffer_RuneAt() {
reader := New(strings.NewReader("Hello, world!"))
fmt.Printf("Runes: ")
for i := 0; ; i++ {
r, err := reader.RuneAt(i)
if err != nil {
fmt.Printf("\nErr: %s\n", err)
break
}
fmt.Printf("%c", r)
}
// Output:
// Runes: Hello, world!
// Err: EOF
}
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
r := New(strings.NewReader("\uFEFFBommetje!"))
b, _ := r.RuneAt(0)
o, _ := r.RuneAt(1)
m, _ := r.RuneAt(2)
bom := fmt.Sprintf("%c%c%c", b, o, m)
assertEqual(t, "Bom", bom)
}
func TestBuffer_Flush(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w"
result := fmt.Sprintf("%c", at(7))
assertEqual(t, "w", result)
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4)
// Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there.
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
assertEqual(t, "o, wor", result)
}
func ExampleBuffer_Flush() {
r := New(strings.NewReader("dog eat dog!"))
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
// Read from the first 4 runes of the input.
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
// Flush those 4 runes, bringing offset 0 to the start of "eat dog".
r.Flush(4)
// Read another 4 runes, because of the flushing, we start at offset 0.
fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3))
// Again, flush 4 runes, bringing offset 0 to the start of "dog!".
r.Flush(4)
// Read from the remainder runes.
fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3))
// Output:
// dog ate good!
}
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
// Fill buffer with "Hello, worl", the first 11 runes.
r.RuneAt(10)
// However, we flush 12 runes, which exceeds the buffer size.
assertPanic(t,
func() { r.Flush(12) },
"parsekit.read.Buffer.Flush(): number of runes to flush "+
"(12) exceeds size of the buffer (11)")
}
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
_, err := r.RuneAt(13)
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(13)
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(14)
assertEqual(t, err.Error(), "EOF")
r.Flush(13)
_, err = r.RuneAt(0)
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(1)
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(2)
assertEqual(t, err.Error(), "EOF")
}
// In this test, I want to make sure that once a Buffer returns an error,
// that error is cached and will be returned when data for the offset where
// the error occurred is read at a later time.
func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
input := &StubReader{
bytes: []byte{'a', 'b', 'c', 'd'},
errors: []error{
io.EOF,
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
},
}
r := New(input)
// Read the last availble rune.
readRune, _ := r.RuneAt(3)
assertEqual(t, 'd', readRune)
// Reading the next offset must result in the io.EOF error from the stub.
readRune, err := r.RuneAt(4)
assertEqual(t, utf8.RuneError, readRune)
assertEqual(t, io.EOF, err)
// Reading even further should yield the same io.EOF error.
readRune, err = r.RuneAt(5)
assertEqual(t, utf8.RuneError, readRune)
assertEqual(t, io.EOF, err)
// After an error, we must still be able to read the last rune.
readRune, _ = r.RuneAt(3)
assertEqual(t, 'd', readRune)
// Flushing updates the error index too.
r.Flush(3)
// The last rune is now at offset 0.
readRune, _ = r.RuneAt(0)
assertEqual(t, 'd', readRune)
// The io.EOF is now at offset 1.
_, err = r.RuneAt(1)
assertEqual(t, io.EOF, err)
// Let's flush that last rune too.
r.Flush(1)
// The io.EOF is now at offset 0.
_, err = r.RuneAt(0)
assertEqual(t, io.EOF, err)
// And reading beyond that offset also yields io.EOF.
_, err = r.RuneAt(1)
assertEqual(t, io.EOF, err)
}
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := New(input)
readRune, err := r.RuneAt(0)
assertEqual(t, 'X', readRune)
readRune, err = r.RuneAt(size - 1)
assertEqual(t, 'Y', readRune)
readRune, err = r.RuneAt(size)
assertEqual(t, io.EOF, err)
readRune, err = r.RuneAt(10)
assertEqual(t, 'X', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := New(input)
readRune, _ := r.RuneAt(size - 200)
assertEqual(t, 'X', readRune)
readRune, _ = r.RuneAt(size - 1)
assertEqual(t, 'Y', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
input, size := makeLargeStubReader()
r := New(input)
readRune, _ := r.RuneAt(size - 1)
assertEqual(t, 'Y', readRune)
}
func TestAllocationPatterns(t *testing.T) {
input, _ := makeLargeStubReader()
r := New(input)
// The first read will create the standard cache.
// store |x 64 |
// buffer |x 64 |
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64)
// The first 64 reads will fit in the standard cache.
// store |xxxx64xxxxx|
// buffer |xxxx64xxxxx|
assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64)
// Flushing zero input keeps everything as-is.
// store |xxxx64xxxxx|
// buffer |xxxx64xxxxx|
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 64, 64, 64)
// Flushing all cached input truncates the cache.
// store | 64 |
// buffer | 64 |
assertCache(t, "flush full cache", r, func() { r.Flush(64) }, 0, 64, 0, 64)
// Reading 65 chars will allocate a new store of 2 * size + n.
// store |xxxxx65xxxxx 128 |
// buffer |xxxxx65xxxxx 128 |
assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128)
// A partial flush frees the start of the store and moves
// the buffer slice.
// store | 50 x15x 128 |
// buffer |x15x 128 |
assertCache(t, "flush partial", r, func() { r.Flush(50) }, 0, 50+15+128, 15, 15+128)
// The capacity for the buffer is now 2*64 + 15
// This number of runes can be read, filling up the store
// without a new allocation.
// store | 50 xxxxxxxxx143xxxxxxxx|
// buffer |xxxxxxxxx143xxxxxxxx|
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143)
// Flush the full input.
// store | 193 |
// buffer | |
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(143) }, 0, 193, 0, 193)
// Read a bit more than half the capacity.
// store |xxxxxx101xxxxxxxx 92 |
// buffer |xxxxxx101xxxxxxxx 92 |
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193)
// Then flush almost all input.
// store | 100 x1x 92 |
// buffer |x1x 92 |
assertCache(t, "flush almost all input", r, func() { r.Flush(100) }, 0, 193, 1, 93)
// Again read a bit more than half the capacity. This does not fit at the
// end of the store, but by moving the current buffer to the start of the
// store (where it fits), space is freed up for the read operation.
// store |xxxxx100xxxxxx 93 |
// buffer |xxxxx100xxxxxx 93 |
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193)
// Now flush only one rune from the cache.
// store |1 xxxx99xxxxx 93 |
// buffer |xxxx99xxxxx 93 |
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 193, 99, 192)
// Now read one more than the capacity. This will not fit, so space has
// to be made. Since there's 1 free space at the start of the store,
// the data is moved to the start and no reallocation is needed.
// store |1 xxxx99xxxxx 93 |
// buffer |xxxx99xxxxx 93 |
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193)
}
func makeLargeStubReader() (*StubReader, int) {
size := utf8.UTFMax * 64 * 5
bytes := make([]byte, size)
for i := range bytes {
bytes[i] = 'X'
}
bytes[size-1] = 'Y'
return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
}
type StubReader struct {
bytes []byte
errors []error
}
func (r *StubReader) Read(p []byte) (n int, err error) {
if len(r.bytes) > 0 {
head, tail := r.bytes[0], r.bytes[1:]
r.bytes = tail
p[0] = head
return 1, nil
}
if len(r.errors) > 0 {
head, tail := r.errors[0], r.errors[1:]
r.errors = tail
return 0, head
}
panic("StubReader is all out of bytes and errors")
}