package read import ( "bufio" "fmt" "io" "strings" "testing" "unicode/utf8" ) func ExampleNew() { printFirstRuneOf := func(input interface{}) { r := New(input) c, _ := r.RuneAt(0) fmt.Printf("%q\n", c) } simpleString := "Hello, world!" printFirstRuneOf(simpleString) ioReaderImplementation := strings.NewReader("Good bye, world!") printFirstRuneOf(ioReaderImplementation) bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?")) printFirstRuneOf(bufioReaderPointer) bufioReaderValue := *(bufio.NewReader(strings.NewReader("Where do we go, world?"))) printFirstRuneOf(bufioReaderValue) // Output: // 'H' // 'G' // 'W' // 'W' } func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { for _, test := range []struct { name string input interface{} }{ {"string", "Hello, world!"}, {"io.Reader", strings.NewReader("Hello, world!")}, {"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))}, {"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))}, } { r := New(test.input) firstRune, _ := r.RuneAt(0) if firstRune != 'H' { t.Errorf("[%s] first rune not 'H'", test.name) } lastRune, _ := r.RuneAt(12) if lastRune != '!' { t.Errorf("[%s] last rune not '!'", test.name) } } } func TestNew_UnhandledInputType_Panics(t *testing.T) { assertPanic(t, func() { New(12345) }, "parsekit.read.New(): no support for input of type int") } func TestBuffer_RuneAt(t *testing.T) { r := New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // It is possible to go back and forth while reading the input. result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0)) assertEqual(t, "H!wH", result) } func TestBuffer_RuneAt_endOfFile(t *testing.T) { r := New(strings.NewReader("Hello, world!")) rn, err := r.RuneAt(13) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) assertEqual(t, "'�' EOF true", result) rn, err = r.RuneAt(20) result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) assertEqual(t, "'�' EOF true", result) } func TestBuffer_RuneAt_invalidRune(t *testing.T) { r := New(strings.NewReader("Hello, \xcdworld!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) assertEqual(t, " �wo", result) } func ExampleBuffer_RuneAt() { reader := New(strings.NewReader("Hello, world!")) fmt.Printf("Runes: ") for i := 0; ; i++ { r, err := reader.RuneAt(i) if err != nil { fmt.Printf("\nErr: %s\n", err) break } fmt.Printf("%c", r) } // Output: // Runes: Hello, world! // Err: EOF } func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { r := New(strings.NewReader("\uFEFFBommetje!")) b, _ := r.RuneAt(0) o, _ := r.RuneAt(1) m, _ := r.RuneAt(2) bom := fmt.Sprintf("%c%c%c", b, o, m) assertEqual(t, "Bom", bom) } func TestBuffer_Flush(t *testing.T) { r := New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // Fills the buffer with the first 8 runes on the input: "Hello, w" result := fmt.Sprintf("%c", at(7)) assertEqual(t, "w", result) // Now flush the first 4 runes from the buffer (dropping "Hell" from it) r.Flush(4) // Rune 0 is now pointing at what originally was rune offset 4. // We can continue reading from there. result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5)) assertEqual(t, "o, wor", result) } func ExampleBuffer_Flush() { r := New(strings.NewReader("dog eat dog!")) at := func(offset int) rune { c, _ := r.RuneAt(offset); return c } // Read from the first 4 runes of the input. fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3)) // Flush those 4 runes, bringing offset 0 to the start of "eat dog". r.Flush(4) // Read another 4 runes, because of the flushing, we start at offset 0. fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3)) // Again, flush 4 runes, bringing offset 0 to the start of "dog!". r.Flush(4) // Read from the remainder runes. fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3)) // Output: // dog ate good! } func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { r := New(strings.NewReader("Hello, world!")) // Fill buffer with "Hello, worl", the first 11 runes. r.RuneAt(10) // However, we flush 12 runes, which exceeds the buffer size. assertPanic(t, func() { r.Flush(12) }, "parsekit.read.Buffer.Flush(): number of runes to flush "+ "(12) exceeds size of the buffer (11)") } func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) { r := New(strings.NewReader("Hello, world!")) _, err := r.RuneAt(13) assertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(13) assertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(14) assertEqual(t, err.Error(), "EOF") r.Flush(13) _, err = r.RuneAt(0) assertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(1) assertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(2) assertEqual(t, err.Error(), "EOF") } // In this test, I want to make sure that once a Buffer returns an error, // that error is cached and will be returned when data for the offset where // the error occurred is read at a later time. func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) { input := &StubReader{ bytes: []byte{'a', 'b', 'c', 'd'}, errors: []error{ io.EOF, io.ErrUnexpectedEOF, // This error must never popup in the tests below. }, } r := New(input) // Read the last availble rune. readRune, _ := r.RuneAt(3) assertEqual(t, 'd', readRune) // Reading the next offset must result in the io.EOF error from the stub. readRune, err := r.RuneAt(4) assertEqual(t, utf8.RuneError, readRune) assertEqual(t, io.EOF, err) // Reading even further should yield the same io.EOF error. readRune, err = r.RuneAt(5) assertEqual(t, utf8.RuneError, readRune) assertEqual(t, io.EOF, err) // After an error, we must still be able to read the last rune. readRune, _ = r.RuneAt(3) assertEqual(t, 'd', readRune) // Flushing updates the error index too. r.Flush(3) // The last rune is now at offset 0. readRune, _ = r.RuneAt(0) assertEqual(t, 'd', readRune) // The io.EOF is now at offset 1. _, err = r.RuneAt(1) assertEqual(t, io.EOF, err) // Let's flush that last rune too. r.Flush(1) // The io.EOF is now at offset 0. _, err = r.RuneAt(0) assertEqual(t, io.EOF, err) // And reading beyond that offset also yields io.EOF. _, err = r.RuneAt(1) assertEqual(t, io.EOF, err) } func TestInputLargerThanDefaultBufSize64(t *testing.T) { input, size := makeLargeStubReader() r := New(input) readRune, err := r.RuneAt(0) assertEqual(t, 'X', readRune) readRune, err = r.RuneAt(size - 1) assertEqual(t, 'Y', readRune) readRune, err = r.RuneAt(size) assertEqual(t, io.EOF, err) readRune, err = r.RuneAt(10) assertEqual(t, 'X', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) { input, size := makeLargeStubReader() r := New(input) readRune, _ := r.RuneAt(size - 200) assertEqual(t, 'X', readRune) readRune, _ = r.RuneAt(size - 1) assertEqual(t, 'Y', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { input, size := makeLargeStubReader() r := New(input) readRune, _ := r.RuneAt(size - 1) assertEqual(t, 'Y', readRune) } func TestAllocationPatterns(t *testing.T) { input, _ := makeLargeStubReader() r := New(input) // The first read will create the standard cache. // store |x 64 | // buffer |x 64 | assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64) // The first 64 reads will fit in the standard cache. // store |xxxx64xxxxx| // buffer |xxxx64xxxxx| assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64) // Flushing zero input keeps everything as-is. // store |xxxx64xxxxx| // buffer |xxxx64xxxxx| assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 64, 64, 64) // Flushing all cached input truncates the cache. // store | 64 | // buffer | 64 | assertCache(t, "flush full cache", r, func() { r.Flush(64) }, 0, 64, 0, 64) // Reading 65 chars will allocate a new store of 2 * size + n. // store |xxxxx65xxxxx 128 | // buffer |xxxxx65xxxxx 128 | assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128) // A partial flush frees the start of the store and moves // the buffer slice. // store | 50 x15x 128 | // buffer |x15x 128 | assertCache(t, "flush partial", r, func() { r.Flush(50) }, 0, 50+15+128, 15, 15+128) // The capacity for the buffer is now 2*64 + 15 // This number of runes can be read, filling up the store // without a new allocation. // store | 50 xxxxxxxxx143xxxxxxxx| // buffer |xxxxxxxxx143xxxxxxxx| assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143) // Flush the full input. // store | 193 | // buffer | | assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(143) }, 0, 193, 0, 193) // Read a bit more than half the capacity. // store |xxxxxx101xxxxxxxx 92 | // buffer |xxxxxx101xxxxxxxx 92 | assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193) // Then flush almost all input. // store | 100 x1x 92 | // buffer |x1x 92 | assertCache(t, "flush almost all input", r, func() { r.Flush(100) }, 0, 193, 1, 93) // Again read a bit more than half the capacity. This does not fit at the // end of the store, but by moving the current buffer to the start of the // store (where it fits), space is freed up for the read operation. // store |xxxxx100xxxxxx 93 | // buffer |xxxxx100xxxxxx 93 | assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193) // Now flush only one rune from the cache. // store |1 xxxx99xxxxx 93 | // buffer |xxxx99xxxxx 93 | assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 193, 99, 192) // Now read one more than the capacity. This will not fit, so space has // to be made. Since there's 1 free space at the start of the store, // the data is moved to the start and no reallocation is needed. // store |1 xxxx99xxxxx 93 | // buffer |xxxx99xxxxx 93 | assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193) } func makeLargeStubReader() (*StubReader, int) { size := utf8.UTFMax * 64 * 5 bytes := make([]byte, size) for i := range bytes { bytes[i] = 'X' } bytes[size-1] = 'Y' return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size } type StubReader struct { bytes []byte errors []error } func (r *StubReader) Read(p []byte) (n int, err error) { if len(r.bytes) > 0 { head, tail := r.bytes[0], r.bytes[1:] r.bytes = tail p[0] = head return 1, nil } if len(r.errors) > 0 { head, tail := r.errors[0], r.errors[1:] r.errors = tail return 0, head } panic("StubReader is all out of bytes and errors") }