package read import ( "bufio" "fmt" "io" "strings" "testing" "unicode/utf8" ) func ExampleNew() { printFirstRuneOf := func(input interface{}) { r := New(input) c, w, _ := r.RuneAt(0) fmt.Printf("rune %q, width %d\n", c, w) } simpleString := "Hello, world!" printFirstRuneOf(simpleString) ioReaderImplementation := strings.NewReader("Good bye, world!") printFirstRuneOf(ioReaderImplementation) bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?")) printFirstRuneOf(bufioReaderPointer) bufioReaderValue := *(bufio.NewReader(strings.NewReader("Ɍead the manual!"))) printFirstRuneOf(bufioReaderValue) // Output: // rune 'H', width 1 // rune 'G', width 1 // rune 'W', width 1 // rune 'Ɍ', width 2 } func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { for _, test := range []struct { name string input interface{} }{ {"string", "Hello, world!"}, {"io.Reader", strings.NewReader("Hello, world!")}, {"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))}, {"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))}, } { r := New(test.input) firstRune, _, _ := r.RuneAt(0) if firstRune != 'H' { t.Errorf("[%s] first rune not 'H'", test.name) } lastRune, _, _ := r.RuneAt(12) if lastRune != '!' { t.Errorf("[%s] last rune not '!', but %q", test.name, lastRune) } } } func TestNew_UnhandledInputType_Panics(t *testing.T) { assertPanic(t, func() { New(12345) }, "parsekit.read.New(): no support for input of type int") } func TestBuffer_ByteAt(t *testing.T) { r := New(strings.NewReader("Hello, world!")) at := func(i int) byte { b, _ := r.ByteAt(i); return b } result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0)) assertEqual(t, "H!wH", result) } func TestBuffer_RuneAt(t *testing.T) { r := New(strings.NewReader("¡pןɹoʍ 'oןןǝH")) at := func(i int) rune { r, _, _ := r.RuneAt(i); return r } // It is possible to go back and forth while reading the input. result := fmt.Sprintf("%c%c%c%c", at(0), at(5), at(8), at(0)) assertEqual(t, "¡ɹʍ¡", result) } func TestBuffer_ByteAt_endOfFile(t *testing.T) { r := New(strings.NewReader("Hello, world!")) b, err := r.ByteAt(13) result := fmt.Sprintf("%q %s %t", b, err, err == io.EOF) assertEqual(t, "'\\x00' EOF true", result) b, err = r.ByteAt(20) result = fmt.Sprintf("%q %s %t", b, err, err == io.EOF) assertEqual(t, "'\\x00' EOF true", result) } func TestBuffer_RuneAt_endOfFile(t *testing.T) { r := New(strings.NewReader("Hello, world!")) rn, _, err := r.RuneAt(13) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) assertEqual(t, "'�' EOF true", result) rn, _, err = r.RuneAt(20) result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) assertEqual(t, "'�' EOF true", result) } func TestBuffer_RuneAt_invalidRune(t *testing.T) { r := New(strings.NewReader("Hello, \xcdworld!")) at := func(i int) rune { r, _, _ := r.RuneAt(i); return r } result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) assertEqual(t, " �wo", result) } func ExampleBuffer_ByteAt() { reader := New(strings.NewReader("Hello, world!")) fmt.Printf("Runes: ") offset := 0 for { r, err := reader.ByteAt(offset) offset++ if err != nil { fmt.Printf("\nErr: %s\n", err) break } fmt.Printf("%c", r) } // Output: // Runes: Hello, world! // Err: EOF } func ExampleBuffer_RuneAt() { reader := New(strings.NewReader("Hello, pןɹoʍ!")) fmt.Printf("Runes: ") offset := 0 for { r, w, err := reader.RuneAt(offset) offset += w if err != nil { fmt.Printf("\nErr: %s\n", err) break } fmt.Printf("%c", r) } // Output: // Runes: Hello, pןɹoʍ! // Err: EOF } // TODO reimplement somewhere, maybe a separate call in the reader or should it be part of a parser? // func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { // r := New(strings.NewReader("\uFEFFBommetje!")) // b, _, _ := r.RuneAt(0) // o, _, _ := r.RuneAt(1) // m, _, _ := r.RuneAt(2) // bom := fmt.Sprintf("%c%c%c", b, o, m) // assertEqual(t, "Bom", bom) // } func TestBuffer_Flush(t *testing.T) { r := New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _, _ := r.RuneAt(i); return r } // Fills the buffer with the first 8 runes on the input: "Hello, w" result := fmt.Sprintf("%c", at(7)) assertEqual(t, "w", result) // Now flush the first 4 runes from the buffer (dropping "Hell" from it) r.Flush(4) // Rune 0 is now pointing at what originally was rune offset 4. // We can continue reading from there. result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5)) assertEqual(t, "o, wor", result) } func ExampleBuffer_Flush() { r := New(strings.NewReader("dog eat dog!")) at := func(offset int) rune { c, _, _ := r.RuneAt(offset); return c } // Read from the first 4 runes of the input. fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3)) // Flush those 4 runes, bringing offset 0 to the start of "eat dog". r.Flush(4) // Read another 4 runes, because of the flushing, we start at offset 0. fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3)) // Again, flush 4 runes, bringing offset 0 to the start of "dog!". r.Flush(4) // Read from the remainder runes. fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3)) // Output: // dog ate good! } func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { r := New(strings.NewReader("Hello, world!")) // Fill buffer with "Hello, world!", the first 13 runes. rn, _, _ := r.RuneAt(12) assertEqual(t, '!', rn) // However, we flush 14 runes, which exceeds the buffer size. assertPanic(t, func() { r.Flush(14) }, "parsekit.read.Buffer.Flush(): number of runes to flush "+ "(14) exceeds size of the buffer (13)") } func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) { r := New(strings.NewReader("Hello, world!")) _, _, err := r.RuneAt(13) assertEqual(t, err.Error(), "EOF") _, _, err = r.RuneAt(13) assertEqual(t, err.Error(), "EOF") _, _, err = r.RuneAt(14) assertEqual(t, err.Error(), "EOF") r.Flush(13) _, _, err = r.RuneAt(0) assertEqual(t, err.Error(), "EOF") _, _, err = r.RuneAt(1) assertEqual(t, err.Error(), "EOF") _, _, err = r.RuneAt(2) assertEqual(t, err.Error(), "EOF") } // In this test, I want to make sure that once a Buffer returns an error, // that error is cached and will be returned when data for the offset where // the error occurred is read at a later time. func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) { input := &StubReader{ bytes: []byte{'a', 'b', 'c', 'd'}, errors: []error{ io.EOF, io.ErrUnexpectedEOF, // This error must never popup in the tests below. }, } r := New(input) // Read the last availble rune. readRune, _, _ := r.RuneAt(3) assertEqual(t, 'd', readRune) // Reading the next offset must result in the io.EOF error from the stub. readRune, _, err := r.RuneAt(4) assertEqual(t, utf8.RuneError, readRune) assertEqual(t, io.EOF, err) // Reading even further should yield the same io.EOF error. readRune, _, err = r.RuneAt(5) assertEqual(t, utf8.RuneError, readRune) assertEqual(t, io.EOF, err) // After an error, we must still be able to read the last rune. readRune, _, _ = r.RuneAt(3) assertEqual(t, 'd', readRune) // Flushing updates the error index too. r.Flush(3) // The last rune is now at offset 0. readRune, _, _ = r.RuneAt(0) assertEqual(t, 'd', readRune) // The io.EOF is now at offset 1. _, _, err = r.RuneAt(1) assertEqual(t, io.EOF, err) // Let's flush that last rune too. r.Flush(1) // The io.EOF is now at offset 0. _, _, err = r.RuneAt(0) assertEqual(t, io.EOF, err) // And reading beyond that offset also yields io.EOF. _, _, err = r.RuneAt(1) assertEqual(t, io.EOF, err) } func TestInputLargerThanDefaultBufSize64(t *testing.T) { input, size := makeLargeStubReader() r := New(input) readRune, _, err := r.RuneAt(0) assertEqual(t, 'X', readRune) readRune, _, err = r.RuneAt(size - 1) assertEqual(t, 'Y', readRune) readRune, _, err = r.RuneAt(size) assertEqual(t, io.EOF, err) readRune, _, err = r.RuneAt(10) assertEqual(t, 'X', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) { input, size := makeLargeStubReader() r := New(input) readRune, _, _ := r.RuneAt(size - 200) assertEqual(t, 'X', readRune) readRune, _, _ = r.RuneAt(size - 1) assertEqual(t, 'Y', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { input, size := makeLargeStubReader() r := New(input) readRune, _, _ := r.RuneAt(size - 1) assertEqual(t, 'Y', readRune) } func TestAllocationPatterns(t *testing.T) { input, _ := makeLargeStubReader() r := New(input) // The first read will create the standard cache. // store |x 64 | // buffer |x 64 | assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 4, 64) // The first 64 bytes will fit in the standard cache. // store |xxxx64xxxxx| // buffer |xxxx64xxxxx| // // Note: in the test offset 60 is used instead of offset 63, because // RuneAt() will fill the buffer with 4 bytes to accomodate for the // longest UTF8 character encodings. In all upcoming tests, the same // logic applies to the RuneAt() calls. assertCache(t, "read fill cache", r, func() { r.RuneAt(60) }, 0, 64, 64, 64) // Flushing zero input keeps everything as-is. // store |xxxx64xxxxx| // buffer |xxxx64xxxxx| assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 64, 64, 64) // Flushing all cached input truncates the cache. // store | 64 | // buffer | 64 | assertCache(t, "flush full cache", r, func() { r.Flush(64) }, 0, 64, 0, 64) // Reading 65 chars will allocate a new store of 2 * size + n. // store |xxxxx65xxxxx 128 | // buffer |xxxxx65xxxxx 128 | assertCache(t, "read cap + 1", r, func() { r.RuneAt(61) }, 0, 65+128, 65, 65+128) // A partial flush frees the start of the store and moves // the buffer slice. // store | 50 x15x 128 | // buffer |x15x 128 | assertCache(t, "flush partial", r, func() { r.Flush(50) }, 0, 50+15+128, 15, 15+128) // The capacity for the buffer is now 2*64 + 15 // This number of runes can be read, filling up the store // without a new allocation. // store | 50 xxxxxxxxx143xxxxxxxx| // buffer |xxxxxxxxx143xxxxxxxx| assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(139) }, 0, 50+143, 143, 143) // Flush the full input. // store | 193 | // buffer | | assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(143) }, 0, 193, 0, 193) // Read a bit more than half the capacity. // store |xxxxxx101xxxxxxxx 92 | // buffer |xxxxxx101xxxxxxxx 92 | assertCache(t, "read more than half the cap", r, func() { r.RuneAt(97) }, 0, 193, 101, 193) // Then flush almost all input. // store | 100 x1x 92 | // buffer |x1x 92 | assertCache(t, "flush almost all input", r, func() { r.Flush(100) }, 0, 193, 1, 93) // Again read a bit more than half the capacity. This does not fit at the // end of the store, but by moving the current buffer to the start of the // store (where it fits), space is freed up for the read operation. // store |xxxxx100xxxxxx 93 | // buffer |xxxxx100xxxxxx 93 | assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(96) }, 0, 193, 100, 193) // Now flush only one rune from the cache. // store |1 xxxx99xxxxx 93 | // buffer |xxxx99xxxxx 93 | assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 193, 99, 192) // Now read one more than the capacity. This will not fit, so space has // to be made. Since there's 1 free space at the start of the store, // the data is moved to the start and no reallocation is needed. // store |1 xxxx99xxxxx 93 | // buffer |xxxx99xxxxx 93 | assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(189) }, 0, 193, 193, 193) } func makeLargeStubReader() (*StubReader, int) { size := utf8.UTFMax * 64 * 5 bytes := make([]byte, size) for i := range bytes { bytes[i] = 'X' } bytes[size-1] = 'Y' return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size } type StubReader struct { bytes []byte errors []error } func (r *StubReader) Read(p []byte) (n int, err error) { if len(r.bytes) > 0 { head, tail := r.bytes[0], r.bytes[1:] r.bytes = tail p[0] = head return 1, nil } if len(r.errors) > 0 { head, tail := r.errors[0], r.errors[1:] r.errors = tail return 0, head } panic("StubReader is all out of bytes and errors") }