go-parsekit/read/read_test.go

435 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package read
import (
"bufio"
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
)
func ExampleNew() {
printFirstRuneOf := func(input interface{}) {
r := New(input)
c, w, _ := r.RuneAt(0)
fmt.Printf("rune %q, width %d\n", c, w)
}
simpleString := "Hello, world!"
printFirstRuneOf(simpleString)
ioReaderImplementation := strings.NewReader("Good bye, world!")
printFirstRuneOf(ioReaderImplementation)
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
printFirstRuneOf(bufioReaderPointer)
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Ɍead the manual!")))
printFirstRuneOf(bufioReaderValue)
// Output:
// rune 'H', width 1
// rune 'G', width 1
// rune 'W', width 1
// rune 'Ɍ', width 2
}
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
for _, test := range []struct {
name string
input interface{}
}{
{"string", "Hello, world!"},
{"io.Reader", strings.NewReader("Hello, world!")},
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
} {
r := New(test.input)
firstRune, _, _ := r.RuneAt(0)
if firstRune != 'H' {
t.Errorf("[%s] first rune not 'H'", test.name)
}
lastRune, _, _ := r.RuneAt(12)
if lastRune != '!' {
t.Errorf("[%s] last rune not '!', but %q", test.name, lastRune)
}
}
}
func TestNew_UnhandledInputType_Panics(t *testing.T) {
assertPanic(t,
func() { New(12345) },
"parsekit.read.New(): no support for input of type int")
}
func TestBuffer_ByteAt(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
at := func(i int) byte { b, _ := r.ByteAt(i); return b }
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
assertEqual(t, "H!wH", result)
}
func TestBuffer_RuneAt(t *testing.T) {
r := New(strings.NewReader("¡pןɹoʍ 'oןןǝH"))
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input.
result := fmt.Sprintf("%c%c%c%c", at(0), at(5), at(8), at(0))
assertEqual(t, "¡ɹʍ¡", result)
}
func TestBuffer_ByteAt_endOfFile(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
b, err := r.ByteAt(13)
result := fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
assertEqual(t, "'\\x00' EOF true", result)
b, err = r.ByteAt(20)
result = fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
assertEqual(t, "'\\x00' EOF true", result)
}
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
rn, _, err := r.RuneAt(13)
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
assertEqual(t, "'<27>' EOF true", result)
rn, _, err = r.RuneAt(20)
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
assertEqual(t, "'<27>' EOF true", result)
}
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
r := New(strings.NewReader("Hello, \xcdworld!"))
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
assertEqual(t, " <20>wo", result)
}
func ExampleBuffer_ByteAt() {
reader := New(strings.NewReader("Hello, world!"))
fmt.Printf("Runes: ")
offset := 0
for {
r, err := reader.ByteAt(offset)
offset++
if err != nil {
fmt.Printf("\nErr: %s\n", err)
break
}
fmt.Printf("%c", r)
}
// Output:
// Runes: Hello, world!
// Err: EOF
}
func ExampleBuffer_RuneAt() {
reader := New(strings.NewReader("Hello, pןɹoʍ!"))
fmt.Printf("Runes: ")
offset := 0
for {
r, w, err := reader.RuneAt(offset)
offset += w
if err != nil {
fmt.Printf("\nErr: %s\n", err)
break
}
fmt.Printf("%c", r)
}
// Output:
// Runes: Hello, pןɹoʍ!
// Err: EOF
}
// TODO reimplement somewhere, maybe a separate call in the reader or should it be part of a parser?
// func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
// r := New(strings.NewReader("\uFEFFBommetje!"))
// b, _, _ := r.RuneAt(0)
// o, _, _ := r.RuneAt(1)
// m, _, _ := r.RuneAt(2)
// bom := fmt.Sprintf("%c%c%c", b, o, m)
// assertEqual(t, "Bom", bom)
// }
func TestBuffer_Flush(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w"
result := fmt.Sprintf("%c", at(7))
assertEqual(t, "w", result)
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4)
// Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there.
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
assertEqual(t, "o, wor", result)
}
func ExampleBuffer_Flush() {
r := New(strings.NewReader("dog eat dog!"))
at := func(offset int) rune { c, _, _ := r.RuneAt(offset); return c }
// Read from the first 4 runes of the input.
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
// Flush those 4 runes, bringing offset 0 to the start of "eat dog".
r.Flush(4)
// Read another 4 runes, because of the flushing, we start at offset 0.
fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3))
// Again, flush 4 runes, bringing offset 0 to the start of "dog!".
r.Flush(4)
// Read from the remainder runes.
fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3))
// Output:
// dog ate good!
}
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
// Fill buffer with "Hello, world!", the first 13 runes.
rn, _, _ := r.RuneAt(12)
assertEqual(t, '!', rn)
// However, we flush 14 runes, which exceeds the buffer size.
assertPanic(t,
func() { r.Flush(14) },
"parsekit.read.Buffer.Flush(): number of runes to flush "+
"(14) exceeds size of the buffer (13)")
}
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
r := New(strings.NewReader("Hello, world!"))
_, _, err := r.RuneAt(13)
assertEqual(t, err.Error(), "EOF")
_, _, err = r.RuneAt(13)
assertEqual(t, err.Error(), "EOF")
_, _, err = r.RuneAt(14)
assertEqual(t, err.Error(), "EOF")
r.Flush(13)
_, _, err = r.RuneAt(0)
assertEqual(t, err.Error(), "EOF")
_, _, err = r.RuneAt(1)
assertEqual(t, err.Error(), "EOF")
_, _, err = r.RuneAt(2)
assertEqual(t, err.Error(), "EOF")
}
// In this test, I want to make sure that once a Buffer returns an error,
// that error is cached and will be returned when data for the offset where
// the error occurred is read at a later time.
func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
input := &StubReader{
bytes: []byte{'a', 'b', 'c', 'd'},
errors: []error{
io.EOF,
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
},
}
r := New(input)
// Read the last availble rune.
readRune, _, _ := r.RuneAt(3)
assertEqual(t, 'd', readRune)
return
// Reading the next offset must result in the io.EOF error from the stub.
readRune, _, err := r.RuneAt(4)
assertEqual(t, utf8.RuneError, readRune)
assertEqual(t, io.EOF, err)
// Reading even further should yield the same io.EOF error.
readRune, _, err = r.RuneAt(5)
assertEqual(t, utf8.RuneError, readRune)
assertEqual(t, io.EOF, err)
// After an error, we must still be able to read the last rune.
readRune, _, _ = r.RuneAt(3)
assertEqual(t, 'd', readRune)
// Flushing updates the error index too.
r.Flush(3)
// The last rune is now at offset 0.
readRune, _, _ = r.RuneAt(0)
assertEqual(t, 'd', readRune)
// // The io.EOF is now at offset 1.
// _, _, err = r.RuneAt(1)
// assertEqual(t, io.EOF, err)
// // Let's flush that last rune too.
// r.Flush(1)
// // The io.EOF is now at offset 0.
// _, _, err = r.RuneAt(0)
// assertEqual(t, io.EOF, err)
// // And reading beyond that offset also yields io.EOF.
// _, _, err = r.RuneAt(1)
// assertEqual(t, io.EOF, err)
}
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := New(input)
readRune, _, err := r.RuneAt(0)
assertEqual(t, 'X', readRune)
readRune, _, err = r.RuneAt(size - 1)
assertEqual(t, 'Y', readRune)
readRune, _, err = r.RuneAt(size)
assertEqual(t, io.EOF, err)
readRune, _, err = r.RuneAt(10)
assertEqual(t, 'X', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := New(input)
readRune, _, _ := r.RuneAt(size - 200)
assertEqual(t, 'X', readRune)
readRune, _, _ = r.RuneAt(size - 1)
assertEqual(t, 'Y', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
input, size := makeLargeStubReader()
r := New(input)
readRune, _, _ := r.RuneAt(size - 1)
assertEqual(t, 'Y', readRune)
}
func TestAllocationPatterns(t *testing.T) {
input, _ := makeLargeStubReader()
r := New(input)
// The first read will create the standard cache.
// store |x 1024 |
// buffer |x 1024 |
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 1024, 4, 1024)
rn, _, _ := r.RuneAt(0)
assertEqual(t, 'X', rn)
// The first 1024 bytes will fit in the standard cache.
// store |xxxx1024xxxxx|
// buffer |xxxx1024xxxxx|
assertCache(t, "read fill cache", r, func() { r.ByteAt(1023) }, 0, 1024, 1024, 1024)
// Flushing zero input keeps everything as-is.
// store |xxxx1024xxxxx|
// buffer |xxxx1024xxxxx|
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 1024, 1024, 1024)
// Flushing all cached input truncates the cache.
// store | 1024 |
// buffer | 1024 |
assertCache(t, "flush full cache", r, func() { r.Flush(1024) }, 0, 1024, 0, 1024)
// Reading 1025 chars will allocate a new store of 2 * 1024.
// store |xxxxx1025xxxxx 1023 |
// buffer |xxxxx1025xxxxx 1023 |
assertCache(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 0, 2048, 1025, 2048)
// The bytes that we had before must be copied to the newly allocated store.
rn, _, _ = r.RuneAt(0)
assertEqual(t, 'X', rn)
// A partial flush frees the start of the store and moves
// the buffer slice.
// store | 25 xxx1000xxx 1023 |
// buffer |xxx1000xxx 1023 |
assertCache(t, "flush partial", r, func() { r.Flush(25) }, 0, 2048, 1000, 2048-25)
// The capacity for the buffer is now 2023
// This number of runes can be read, filling up the store
// without a new allocation.
// store | 25 xxxxxxxxxxx2023xxxxxxxxxx|
// buffer |xxxxxxxxxxx2023xxxxxxxxxx|
assertCache(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 0, 2048, 2023, 2048)
// Flush the full input.
// store | 2048 |
// buffer | 2048 |
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 0, 2048, 0, 2048)
// Read a bit more than half the capacity.
// store |xxxx1025xxxxxx 1023 |
// buffer |xxxx1025xxxxxx 1023 |
assertCache(t, "read more than half the cap", r, func() { r.ByteAt(1024) }, 0, 2048, 1025, 2048)
// Then flush almost all input.
// store | 1024 x1x 1023 |
// buffer 1024 |x1x 1023 |
assertCache(t, "flush almost all input", r, func() { r.Flush(1024) }, 0, 2048, 1, 1024)
// Again read a bit more than half the capacity. This does not fit at the
// end of the store, but by moving the current buffer to the start of the
// store (where it fits), space is freed up for the read operation.
// store |xxxxx1025xxxxxx 1023 |
// buffer |xxxxx1025xxxxxx 1023 |
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.ByteAt(1024) }, 0, 2048, 1025, 2048)
// Now flush only one rune from the cache.
// store |1 xxx1024xxxxxx 1023 |
// buffer |xxx1024xxxxxx 1023 |
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 2048, 1024, 2047)
// Now read the full available capacity. This will not fit, so
// space has to be made. Since there's 1 free space at the start of the store,
// the data are moved to the start and no reallocation is needed.
// store |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
assertCache(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 0, 2048, 2048, 2048)
}
func makeLargeStubReader() (*StubReader, int) {
size := 8192
bytes := make([]byte, size)
for i := range bytes {
bytes[i] = 'X'
}
bytes[size-1] = 'Y'
return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
}
type StubReader struct {
bytes []byte
errors []error
}
func (r *StubReader) Read(p []byte) (n int, err error) {
if len(r.bytes) > 0 {
head, tail := r.bytes[0], r.bytes[1:]
r.bytes = tail
p[0] = head
return 1, nil
}
if len(r.errors) > 0 {
head, tail := r.errors[0], r.errors[1:]
r.errors = tail
return 0, head
}
panic("StubReader is all out of bytes and errors")
}