522 lines
15 KiB
Go
522 lines
15 KiB
Go
package read
|
||
|
||
import (
|
||
"bufio"
|
||
"fmt"
|
||
"io"
|
||
"os"
|
||
"runtime"
|
||
"strings"
|
||
"testing"
|
||
"unicode/utf8"
|
||
)
|
||
|
||
func ExampleNew() {
|
||
printFirstRuneOf := func(input interface{}) {
|
||
r := New(input)
|
||
c, w, _ := r.RuneAt(0)
|
||
fmt.Printf("rune %q, width in bytes = %d\n", c, w)
|
||
}
|
||
|
||
simpleString := "Ƕello, world!"
|
||
printFirstRuneOf(simpleString)
|
||
|
||
ioReaderImplementation := strings.NewReader("Good bye, world!")
|
||
printFirstRuneOf(ioReaderImplementation)
|
||
|
||
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
|
||
printFirstRuneOf(bufioReaderPointer)
|
||
|
||
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Ɍead the manual!")))
|
||
printFirstRuneOf(bufioReaderValue)
|
||
|
||
// Output:
|
||
// rune 'Ƕ', width in bytes = 2
|
||
// rune 'G', width in bytes = 1
|
||
// rune 'W', width in bytes = 1
|
||
// rune 'Ɍ', width in bytes = 2
|
||
}
|
||
|
||
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
||
for _, test := range []struct {
|
||
name string
|
||
input interface{}
|
||
}{
|
||
{"string", "Hello, world!"},
|
||
{"io.Reader", strings.NewReader("Hello, world!")},
|
||
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
|
||
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
||
} {
|
||
r := New(test.input)
|
||
firstRune, _, _ := r.RuneAt(0)
|
||
if firstRune != 'H' {
|
||
t.Errorf("[%s] first rune not 'H'", test.name)
|
||
}
|
||
lastRune, _, _ := r.RuneAt(12)
|
||
if lastRune != '!' {
|
||
t.Errorf("[%s] last rune not '!', but %q", test.name, lastRune)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestNew_UnhandledInputType_Panics(t *testing.T) {
|
||
assertPanic(t,
|
||
func() { New(12345) },
|
||
"parsekit.read.New(): no support for input of type int")
|
||
}
|
||
|
||
func TestBuffer_ByteAt(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, world!"))
|
||
at := func(i int) byte { b, _ := r.ByteAt(i); return b }
|
||
|
||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||
assertEqual(t, "H!wH", result)
|
||
}
|
||
|
||
func TestBuffer_RuneAt(t *testing.T) {
|
||
r := New(strings.NewReader("¡pןɹoʍ 'oןןǝH"))
|
||
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||
|
||
// It is possible to go back and forth while reading the input.
|
||
result := fmt.Sprintf("%c%c%c%c", at(0), at(5), at(8), at(0))
|
||
assertEqual(t, "¡ɹʍ¡", result)
|
||
}
|
||
|
||
func TestBuffer_ByteAt_endOfFile(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, world!"))
|
||
|
||
b, err := r.ByteAt(13)
|
||
result := fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
|
||
assertEqual(t, "'\\x00' EOF true", result)
|
||
|
||
b, err = r.ByteAt(20)
|
||
result = fmt.Sprintf("%q %s %t", b, err, err == io.EOF)
|
||
assertEqual(t, "'\\x00' EOF true", result)
|
||
}
|
||
|
||
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, world!"))
|
||
|
||
rn, _, err := r.RuneAt(13)
|
||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||
assertEqual(t, "'<27>' EOF true", result)
|
||
|
||
rn, _, err = r.RuneAt(20)
|
||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||
assertEqual(t, "'<27>' EOF true", result)
|
||
}
|
||
|
||
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, \xcdworld!"))
|
||
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||
|
||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||
assertEqual(t, " <20>wo", result)
|
||
}
|
||
|
||
func ExampleBuffer_ByteAt() {
|
||
reader := New(strings.NewReader("Hello, world!"))
|
||
|
||
fmt.Printf("Runes: ")
|
||
offset := 0
|
||
for {
|
||
r, err := reader.ByteAt(offset)
|
||
offset++
|
||
if err != nil {
|
||
fmt.Printf("\nErr: %s\n", err)
|
||
break
|
||
}
|
||
fmt.Printf("%c", r)
|
||
}
|
||
|
||
// Output:
|
||
// Runes: Hello, world!
|
||
// Err: EOF
|
||
}
|
||
|
||
func ExampleBuffer_BytesAt() {
|
||
reader := New(strings.NewReader("Hello, world!"))
|
||
|
||
b, err := reader.BytesAt(0, 5)
|
||
fmt.Printf("%s err=%v\n", b, err)
|
||
|
||
b, err = reader.BytesAt(7, 10)
|
||
fmt.Printf("%s err=%v\n", b, err)
|
||
|
||
b, err = reader.BytesAt(7, 5)
|
||
fmt.Printf("%s err=%v\n", b, err)
|
||
|
||
// Output:
|
||
// Hello err=<nil>
|
||
// world! err=EOF
|
||
// world err=<nil>
|
||
}
|
||
|
||
func ExampleBuffer_RuneAt() {
|
||
reader := New(strings.NewReader("Hello, pןɹoʍ!"))
|
||
|
||
fmt.Printf("Runes: ")
|
||
offset := 0
|
||
for {
|
||
r, _, err := reader.RuneAt(offset)
|
||
offset += utf8.RuneLen(r)
|
||
if err != nil {
|
||
fmt.Printf("\nErr: %s\n", err)
|
||
break
|
||
}
|
||
fmt.Printf("%c", r)
|
||
}
|
||
|
||
// Output:
|
||
// Runes: Hello, pןɹoʍ!
|
||
// Err: EOF
|
||
}
|
||
|
||
// TODO reimplement somewhere, maybe a separate call in the reader or should it be part of a parser?
|
||
// func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||
// r := New(strings.NewReader("\uFEFFBommetje!"))
|
||
// b, _, _ := r.RuneAt(0)
|
||
// o, _, _ := r.RuneAt(1)
|
||
// m, _, _ := r.RuneAt(2)
|
||
// bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||
// assertEqual(t, "Bom", bom)
|
||
// }
|
||
|
||
func TestBuffer_Flush(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, world!"))
|
||
at := func(i int) rune { r, _, _ := r.RuneAt(i); return r }
|
||
|
||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||
result := fmt.Sprintf("%c", at(7))
|
||
assertEqual(t, "w", result)
|
||
|
||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||
r.Flush(4)
|
||
|
||
// Rune 0 is now pointing at what originally was rune offset 4.
|
||
// We can continue reading from there.
|
||
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
||
assertEqual(t, "o, wor", result)
|
||
}
|
||
|
||
func ExampleBuffer_Flush() {
|
||
r := New(strings.NewReader("dog eat dog!"))
|
||
at := func(offset int) rune { c, _, _ := r.RuneAt(offset); return c }
|
||
|
||
// Read from the first 4 runes of the input.
|
||
fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3))
|
||
|
||
// Flush those 4 runes, bringing offset 0 to the start of "eat dog".
|
||
r.Flush(4)
|
||
|
||
// Read another 4 runes, because of the flushing, we start at offset 0.
|
||
fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3))
|
||
|
||
// We might even read some more runes. That is no problem.
|
||
at(4)
|
||
at(5)
|
||
|
||
// Again, flush 4 runes, bringing offset 0 to the start of "dog!".
|
||
r.Flush(4)
|
||
|
||
// Read from the remainder runes.
|
||
fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3))
|
||
|
||
// Output:
|
||
// dog ate good!
|
||
}
|
||
|
||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, world!"))
|
||
|
||
// Fill buffer with "Hello, world!", the first 13 runes.
|
||
rn, _, _ := r.RuneAt(12)
|
||
assertEqual(t, '!', rn)
|
||
|
||
// However, we flush 14 runes, which exceeds the buffer size.
|
||
assertPanic(t,
|
||
func() { r.Flush(14) },
|
||
"parsekit.read.Buffer.Flush(): number of bytes to flush (14) exceeds size of the buffered data (13)")
|
||
}
|
||
|
||
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
||
r := New(strings.NewReader("Hello, world!"))
|
||
_, _, err := r.RuneAt(13)
|
||
assertEqual(t, err.Error(), "EOF")
|
||
_, _, err = r.RuneAt(13)
|
||
assertEqual(t, err.Error(), "EOF")
|
||
_, _, err = r.RuneAt(14)
|
||
assertEqual(t, err.Error(), "EOF")
|
||
r.Flush(13)
|
||
_, _, err = r.RuneAt(0)
|
||
assertEqual(t, err.Error(), "EOF")
|
||
_, _, err = r.RuneAt(1)
|
||
assertEqual(t, err.Error(), "EOF")
|
||
_, _, err = r.RuneAt(2)
|
||
assertEqual(t, err.Error(), "EOF")
|
||
}
|
||
|
||
// In this test, I want to make sure that once a Buffer returns an error,
|
||
// that error is cached and will be returned when data for the offset where
|
||
// the error occurred is read at a later time.
|
||
func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
|
||
input := &StubReader{
|
||
bytes: []byte{'a', 'b', 'c', 'd'},
|
||
errors: []error{
|
||
io.EOF,
|
||
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
|
||
},
|
||
}
|
||
r := New(input)
|
||
|
||
// Read the last availble rune.
|
||
readRune, _, _ := r.RuneAt(3)
|
||
assertEqual(t, 'd', readRune)
|
||
|
||
// Reading the next offset must result in the io.EOF error from the stub.
|
||
readRune, _, err := r.RuneAt(4)
|
||
assertEqual(t, utf8.RuneError, readRune)
|
||
assertEqual(t, io.EOF, err)
|
||
|
||
// Reading even further should yield the same io.EOF error.
|
||
readRune, _, err = r.RuneAt(5)
|
||
assertEqual(t, utf8.RuneError, readRune)
|
||
assertEqual(t, io.EOF, err)
|
||
|
||
// After an error, we must still be able to read the last rune.
|
||
readRune, _, _ = r.RuneAt(3)
|
||
assertEqual(t, 'd', readRune)
|
||
|
||
// Flushing updates the error index too.
|
||
r.Flush(3)
|
||
|
||
// The last rune is now at offset 0.
|
||
readRune, _, _ = r.RuneAt(0)
|
||
assertEqual(t, 'd', readRune)
|
||
|
||
// The io.EOF is now at offset 1.
|
||
_, _, err = r.RuneAt(1)
|
||
assertEqual(t, io.EOF, err)
|
||
|
||
// Let's flush that last rune too.
|
||
r.Flush(1)
|
||
|
||
// The io.EOF is now at offset 0.
|
||
_, _, err = r.RuneAt(0)
|
||
assertEqual(t, io.EOF, err)
|
||
|
||
// And reading beyond that offset also yields io.EOF.
|
||
_, _, err = r.RuneAt(1)
|
||
assertEqual(t, io.EOF, err)
|
||
}
|
||
|
||
func TestInputLargerThanDefaultBufSize(t *testing.T) {
|
||
input, size := makeLargeStubReader()
|
||
r := New(input)
|
||
|
||
readRune, _, err := r.RuneAt(0)
|
||
assertEqual(t, 'A', readRune)
|
||
readRune, _, err = r.RuneAt(size - 1)
|
||
assertEqual(t, 'B', readRune)
|
||
readRune, _, err = r.RuneAt(size)
|
||
assertEqual(t, io.EOF, err)
|
||
readRune, _, err = r.RuneAt(10)
|
||
assertEqual(t, 'K', readRune)
|
||
}
|
||
|
||
func TestInputLargerThanDefaultBufSize_WithFirstReadLargerThanBufSize(t *testing.T) {
|
||
input, size := makeLargeStubReader()
|
||
r := New(input)
|
||
|
||
readRune, _, _ := r.RuneAt(size - 200)
|
||
assertEqual(t, 'K', readRune)
|
||
readRune, _, _ = r.RuneAt(size - 1)
|
||
assertEqual(t, 'B', readRune)
|
||
}
|
||
|
||
func TestInputLargerThanDefaultBufSize_WithFirstReadToLastByte(t *testing.T) {
|
||
input, size := makeLargeStubReader()
|
||
r := New(input)
|
||
|
||
readRune, _, _ := r.RuneAt(size - 1)
|
||
assertEqual(t, 'B', readRune)
|
||
}
|
||
|
||
func TestAllocationPatterns(t *testing.T) {
|
||
input, _ := makeLargeStubReader()
|
||
buf := New(input)
|
||
r := &buf
|
||
|
||
// The first read will create the standard buffer and fill it with data.
|
||
// The first rune is requested, but there's more input data availble,
|
||
// so the cache is filled up completely.
|
||
// buffer |xxxx1024xxxxx|
|
||
assertBuffer(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 0, 1024)
|
||
rn, _, _ := r.RuneAt(0)
|
||
assertEqual(t, 'A', rn)
|
||
|
||
// The first 1024 bytes will fit in the standard buffer.
|
||
// buffer |xxxx1024xxxxx|
|
||
assertBuffer(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 0, 1024)
|
||
|
||
// Flushing zero input keeps everything as-is.
|
||
// buffer |xxxx1024xxxxx|
|
||
assertBuffer(t, "flush zero", r, func() { r.Flush(0) }, 1024, 0, 1024)
|
||
|
||
// Flushing all cached input truncates the buffer.
|
||
// buffer | 1024 |
|
||
assertBuffer(t, "flush full cache", r, func() { r.Flush(1024) }, 1024, 0, 0)
|
||
|
||
// Reading 1025 chars will allocate a new store of 2 * 1024 and fill it with data.
|
||
// Offset 1024 is requested, but there's more input data availble,
|
||
// so the cache is filled up completely.
|
||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
|
||
runeBefore, _, _ := r.RuneAt(0)
|
||
assertBuffer(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 0, 2048)
|
||
runeAfter, _, _ := r.RuneAt(0)
|
||
|
||
// The bytes that we had before must be copied to the newly allocated store.
|
||
assertEqual(t, runeBefore, runeAfter)
|
||
|
||
// A partial flush moves the buffer offset, but the stored data stay the same.
|
||
// buffer 25 |xxxxxxxxxxx2023xxxxxxxxxx|
|
||
assertBuffer(t, "flush partial", r, func() { r.Flush(25) }, 2048, 25, 2023)
|
||
|
||
// The capacity for the usable part of the buffer is now 2023
|
||
// This number of runes can be read, without triggering a re-allocation.
|
||
// buffer 25 |xxxxxxxxxxx2023xxxxxxxxxx|
|
||
assertBuffer(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 25, 2023)
|
||
|
||
// Flush the full input.
|
||
// store | 2048 |
|
||
// buffer | 2048 |
|
||
assertBuffer(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 2048, 0, 0)
|
||
|
||
// Fill up the store again.
|
||
// Offset 1234 is requested, but there's more input data availble,
|
||
// so the cache is filled up completely.
|
||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
|
||
assertBuffer(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)
|
||
|
||
// Then flush almost all input.
|
||
// buffer 2047 |x1x|
|
||
assertBuffer(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2047, 1)
|
||
|
||
// Read some data beyond the single byte. This moves the single byte at the end to
|
||
// the start and fills up the rest of the buffer, without a reallocation.
|
||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
|
||
assertBuffer(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)
|
||
|
||
// Now flush only one rune from the cache.
|
||
// buffer 1 |xxxxxxxxx2047xxxxxxxxxxxxxx|
|
||
assertBuffer(t, "flush 1", r, func() { r.Flush(1) }, 2048, 1, 2047)
|
||
|
||
// Now read the full available capacity. This will not fit, so
|
||
// space has to be made. Since there's 1 free space at the start of the store,
|
||
// the data are moved to the start and no reallocation is needed.
|
||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
|
||
assertBuffer(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 0, 2048)
|
||
|
||
// Now read in the whole rest of the buffer, asking for an offset that is way out of range.
|
||
// It does allocate enough memory to store 10000 bytes (bringing us to 10240), but while reading it is
|
||
// detected that there are not enough bytes to fill it. That puts a limit on the amount of data in
|
||
// the buffer, so the buffer is not completely filled.
|
||
// buffer |xxxxxxxxxxxxxxx5120xxxxxxxxxxxxxxxxxxxx 10240-5120 |
|
||
remaining := input.remaining
|
||
assertBuffer(t, "over-ask", r, func() { r.ByteAt(10000) }, 10240, 0, 2048+remaining)
|
||
}
|
||
|
||
func makeLargeStubReader() (*StubReader, int) {
|
||
size := 8192
|
||
bytes := make([]byte, size)
|
||
for i := range bytes {
|
||
bytes[i] = 'A' + byte(i%26)
|
||
}
|
||
return &StubReader{bytes: bytes, errors: []error{io.EOF}, remaining: size}, size
|
||
}
|
||
|
||
type StubReader struct {
|
||
bytes []byte
|
||
errors []error
|
||
remaining int
|
||
}
|
||
|
||
func (r *StubReader) Read(p []byte) (n int, err error) {
|
||
if len(r.bytes) > 0 {
|
||
head, tail := r.bytes[0], r.bytes[1:]
|
||
r.bytes = tail
|
||
p[0] = head
|
||
r.remaining--
|
||
return 1, nil
|
||
}
|
||
if len(r.errors) > 0 {
|
||
head, tail := r.errors[0], r.errors[1:]
|
||
r.errors = tail
|
||
return 0, head
|
||
}
|
||
panic("StubReader is all out of bytes and errors")
|
||
}
|
||
|
||
func Benchmark0BytesInputFile(b *testing.B) {
|
||
processInputFile(b, 0)
|
||
}
|
||
|
||
func Benchmark100BytesInputFile(b *testing.B) {
|
||
processInputFile(b, 100)
|
||
}
|
||
|
||
func Benchmark1024BytesInputFile(b *testing.B) {
|
||
processInputFile(b, 1024)
|
||
}
|
||
|
||
func Benchmark2048BytesInputFile(b *testing.B) {
|
||
processInputFile(b, 2048)
|
||
}
|
||
|
||
func Benchmark2000000BytesInputFile(b *testing.B) {
|
||
processInputFile(b, 2000000)
|
||
}
|
||
|
||
func processInputFile(b *testing.B, testSize int) {
|
||
for x := 0; x < b.N; x++ {
|
||
_, filename, _, _ := runtime.Caller(0)
|
||
path := strings.Replace(filename, "read_test.go", fmt.Sprintf("testfiles/%dbytes.txt", testSize), 1)
|
||
input, err := os.Open(path)
|
||
if err != nil {
|
||
panic(fmt.Sprintf("Cannot open file for test (%v): %s", path, err))
|
||
}
|
||
|
||
i := New(input)
|
||
|
||
offset := 0
|
||
readSize := 0
|
||
flushAt := 1024
|
||
for {
|
||
_, err := i.ByteAt(offset)
|
||
if err != nil {
|
||
break
|
||
}
|
||
offset++
|
||
readSize++
|
||
if offset == flushAt {
|
||
i.Flush(offset)
|
||
offset = 0
|
||
|
||
// So we flush full buffer sizes and partial buffer sizes to
|
||
// get more test coverage.
|
||
if flushAt == 1000 {
|
||
flushAt = 1024
|
||
} else {
|
||
flushAt = 1000
|
||
}
|
||
}
|
||
if readSize > testSize {
|
||
b.Fatalf("Test input is %d bytes, but read %d bytes so far!", testSize, readSize)
|
||
}
|
||
}
|
||
if readSize != testSize {
|
||
b.Fatalf("Expected to read %d bytes, but read %d bytes instead", testSize, readSize)
|
||
}
|
||
}
|
||
}
|