package read_test import ( "bufio" "fmt" "io" "strings" "testing" "unicode/utf8" "git.makaay.nl/mauricem/go-parsekit/read" "github.com/stretchr/testify/assert" ) func ExampleNew() { r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } fmt.Printf("%c", at(0)) fmt.Printf("%c", at(12)) // Output: // H! } func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { for _, test := range []struct { name string input interface{} }{ {"string", "Hello, world!"}, {"io.Reader", strings.NewReader("Hello, world!")}, {"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))}, {"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))}, } { r := read.New(test.input) firstRune, _ := r.RuneAt(0) if firstRune != 'H' { t.Errorf("[%s] first rune not 'H'", test.name) } lastRune, _ := r.RuneAt(12) if lastRune != '!' { t.Errorf("[%s] last rune not '!'", test.name) } } } func TestNew_UnhandledInputType_Panics(t *testing.T) { assert.PanicsWithValue(t, "parsekit.read.New(): no support for input of type int", func() { read.New(12345) }) } func TestReader_RuneAt(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // It is possible to go back and forth while reading the input. result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0)) assert.Equal(t, "H!wH", result) } func TestReader_RuneAt_endOfFile(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) rn, err := r.RuneAt(13) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) assert.Equal(t, "'�' EOF true", result) rn, err = r.RuneAt(20) result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) assert.Equal(t, "'�' EOF true", result) } func TestReader_RuneAt_invalidRune(t *testing.T) { r := read.New(strings.NewReader("Hello, \xcdworld!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) assert.Equal(t, " �wo", result, "result") } func ExampleReader_RuneAt() { reader := read.New(strings.NewReader("Hello, world!")) fmt.Printf("Runes: ") for i := 0; ; i++ { r, err := reader.RuneAt(i) if err != nil { fmt.Printf("\nErr: %s\n", err) break } fmt.Printf("%c", r) } // Output: // Runes: Hello, world! // Err: EOF } func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { r := read.New(strings.NewReader("\uFEFFBommetje!")) b, _ := r.RuneAt(0) o, _ := r.RuneAt(1) m, _ := r.RuneAt(2) bom := fmt.Sprintf("%c%c%c", b, o, m) assert.Equal(t, "Bom", bom, "first three runes") } func TestReader_Flush(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // Fills the buffer with the first 8 runes on the input: "Hello, w" result := fmt.Sprintf("%c", at(7)) assert.Equal(t, "w", result, "first read") // Now flush the first 4 runes from the buffer (dropping "Hell" from it) r.Flush(4) // Rune 0 is now pointing at what originally was rune offset 4. // We can continue reading from there. result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5)) assert.Equal(t, "o, wor", result) } func ExampleReader_Flush() { r := read.New(strings.NewReader("dog eat dog!")) at := func(offset int) rune { c, _ := r.RuneAt(offset); return c } // Read from the first 4 runes of the input. fmt.Printf("%c%c%c%c", at(0), at(1), at(2), at(3)) // Flush those 4 runes, bringing offset 0 to the start of "eat dog". r.Flush(4) // Read another 4 runes, because of the flushing, we start at offset 0. fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3)) // Again, flush 4 runes, bringing offset 0 to the start of "dog!". r.Flush(4) // Read from the remainder runes. fmt.Printf("%c%c%c%c%c", at(2), at(1), at(1), at(0), at(3)) // Output: // dog ate good! } func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) // Fill buffer with "Hello, worl", the first 11 runes. r.RuneAt(10) // However, we flush 12 runes, which exceeds the buffer size. assert.PanicsWithValue(t, "parsekit.read.Reader.Flush(): number of runes to flush "+ "(12) exceeds size of the buffer (11)", func() { r.Flush(12) }) } func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) _, err := r.RuneAt(13) assert.Equal(t, err.Error(), "EOF") _, err = r.RuneAt(13) assert.Equal(t, err.Error(), "EOF") _, err = r.RuneAt(14) assert.Equal(t, err.Error(), "EOF") r.Flush(13) _, err = r.RuneAt(0) assert.Equal(t, err.Error(), "EOF") _, err = r.RuneAt(1) assert.Equal(t, err.Error(), "EOF") _, err = r.RuneAt(2) assert.Equal(t, err.Error(), "EOF") } // In this test, I want to make sure that once a Reader returns an error, // that error is cached and will be returned when data for the offset where // the error occurred is read at a later time. func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) { input := &StubReader{ bytes: []byte{'a', 'b', 'c', 'd'}, errors: []error{ io.EOF, io.ErrUnexpectedEOF, // This error must never popup in the tests below. }, } r := read.New(input) // Read the last availble rune. readRune, _ := r.RuneAt(3) assert.Equal(t, 'd', readRune) // Reading the next offset must result in the io.EOF error from the stub. readRune, err := r.RuneAt(4) assert.Equal(t, utf8.RuneError, readRune) assert.Equal(t, io.EOF, err) // Reading even further should yield the same io.EOF error. readRune, err = r.RuneAt(5) assert.Equal(t, utf8.RuneError, readRune) assert.Equal(t, io.EOF, err) // After an error, we must still be able to read the last rune. readRune, _ = r.RuneAt(3) assert.Equal(t, 'd', readRune) // Flushing updates the error index too. r.Flush(3) // The last rune is now at offset 0. readRune, _ = r.RuneAt(0) assert.Equal(t, 'd', readRune) // The io.EOF is now at offset 1. _, err = r.RuneAt(1) assert.Equal(t, io.EOF, err) // Let's flush that last rune too. r.Flush(1) // The io.EOF is now at offset 0. _, err = r.RuneAt(0) assert.Equal(t, io.EOF, err) // And reading beyond that offset also yields io.EOF. _, err = r.RuneAt(1) assert.Equal(t, io.EOF, err) } func TestInputLargerThanDefaultBufSize64(t *testing.T) { input, size := makeLargeStubReader() r := read.New(input) readRune, err := r.RuneAt(0) assert.Equal(t, 'X', readRune) readRune, err = r.RuneAt(size - 1) assert.Equal(t, 'Y', readRune) readRune, err = r.RuneAt(size) assert.Equal(t, io.EOF, err) readRune, err = r.RuneAt(10) assert.Equal(t, 'X', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) { input, size := makeLargeStubReader() r := read.New(input) readRune, _ := r.RuneAt(size - 200) assert.Equal(t, 'X', readRune) readRune, _ = r.RuneAt(size - 1) assert.Equal(t, 'Y', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { input, size := makeLargeStubReader() r := read.New(input) readRune, _ := r.RuneAt(size - 1) assert.Equal(t, 'Y', readRune) } func makeLargeStubReader() (*StubReader, int) { size := utf8.UTFMax * 64 * 5 bytes := make([]byte, size) for i := range bytes { bytes[i] = 'X' } bytes[size-1] = 'Y' return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size } type StubReader struct { bytes []byte errors []error } func (r *StubReader) Read(p []byte) (n int, err error) { if len(r.bytes) > 0 { head, tail := r.bytes[0], r.bytes[1:] r.bytes = tail p[0] = head return 1, nil } if len(r.errors) > 0 { head, tail := r.errors[0], r.errors[1:] r.errors = tail return 0, head } panic("StubReader is all out of bytes and errors") }