Unit tests improved for the parsekit.read package.

This commit is contained in:
Maurice Makaay 2019-07-19 09:50:42 +00:00
parent 22bcf4677e
commit 3f9c745ac4
3 changed files with 100 additions and 105 deletions

View File

@ -44,12 +44,15 @@ func assertPanic(t *testing.T, code func(), expected string) {
code()
}
func assertCache(t *testing.T, name string, r *Buffer, code func(), bufLen, bufCap int) {
func assertBuffer(t *testing.T, name string, r *Buffer, code func(), capacity, start, length int) {
code()
if bufLen != len(r.buffer) {
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
if capacity != r.cap {
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, capacity, r.cap)
}
if bufCap != cap(r.buffer) {
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, bufCap, cap(r.buffer))
if start != r.start {
t.Errorf("[%s] Unexpected data starting point (expected %d, got %d)", name, start, r.start)
}
if length != r.len {
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, length, r.len)
}
}

View File

@ -93,7 +93,9 @@ func makeBufioReader(input interface{}) *bufio.Reader {
type Buffer struct {
bufio *bufio.Reader // used for ReadRune()
buffer []byte // input buffer, holding runes that were read from input
bufOffset int // the offset in the buffer at which the sliding data window starts
cap int // the full buffer capacity
start int // the offset from where to read buffered data in the buffer
len int // the length of the buffered data
err error // a read error, if one occurred
errOffset int // the offset in the buffer at which the read error was encountered
}
@ -117,11 +119,13 @@ type Buffer struct {
// Once a read error is encountered, that same read error will guaranteed
// be return on every subsequent read at or beyond the provided offset.
func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
buf.fill(offset + utf8.UTFMax)
if buf.len < offset+utf8.MaxRune && buf.err == nil {
buf.fill(offset + utf8.UTFMax)
}
if buf.err != nil && offset >= buf.errOffset {
return utf8.RuneError, 0, buf.err
}
r, w := utf8.DecodeRune(buf.buffer[buf.bufOffset+offset:])
r, w := utf8.DecodeRune(buf.buffer[buf.start+offset:])
return r, w, nil
}
@ -143,50 +147,37 @@ func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
// Once a read error is encountered, that same read error will guaranteed
// be return on every subsequent read at or beyond the provided offset.
func (buf *Buffer) ByteAt(offset int) (byte, error) {
buf.fill(offset + 1)
if buf.len < offset+1 && buf.err == nil {
buf.fill(offset + 1)
}
if buf.err != nil && offset >= buf.errOffset {
return 0, buf.err
}
return buf.buffer[buf.bufOffset+offset], nil
return buf.buffer[buf.start+offset], nil
}
func (buf *Buffer) fill(minBytes int) {
// Check the current length of the buffer data.
bufLen := len(buf.buffer[buf.bufOffset:])
// If the required amount of bytes fits in the available data, or when
// an error was encountered previously, then no action is needed.
if minBytes <= bufLen || buf.err != nil {
return
}
// Grow the buffer so it can contain at least the number of requested bytes.
// The return value is the actual capacity of the buffer after growing it.
//
// Note:
// The grow() method will always arrange the data to be at the start of the
// buffer, getting rid of the leading unused space that might exist due to
// calls to Flush(). This means that buf.bufOffset will be 0 from here on,
// so there's no need to accomodate for this offset in the following code.
bufLen, bufCap := buf.grow(minBytes)
if minBytes > buf.cap-buf.start {
buf.grow(minBytes)
}
// Now we try to fill the buffer completely with data from our source.
// This is more efficient than only filling the data up to the point where
// we can read the data at the 'minBytes' position. Ideally, the buffer is
// filled completely with data to work with.
for bufLen < bufCap {
for buf.len < buf.cap {
// Read bytes from our source, and append them to the end of the
// current buffer data.
n, err := buf.bufio.Read(buf.buffer[bufLen:bufCap])
bufLen += n
n, err := buf.bufio.Read(buf.buffer[buf.len:buf.cap])
buf.len += n
if err != nil {
buf.err = err
buf.errOffset = bufLen
buf.errOffset = buf.len
break
}
}
buf.buffer = buf.buffer[:bufLen] // TODO work with a separate bufLen field in the buffer stuct, that might be simpler to work with and maybe faster.
}
const bufferBlockSize = 1024
@ -196,20 +187,12 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
// grow grows the buffer to guarantee space for at least the requested amount
// of bytes, either shifting data around or reallocating the buffer.
func (buf *Buffer) grow(minBytes int) (int, int) {
if buf.err != nil {
panic("Cannot grow buffer, there was an error earlier on!")
}
func (buf *Buffer) grow(minBytes int) {
// When possible, grow the buffer by moving the data to the start of
// the buffer, freeing up extra capacity at the end.
bufLen := len(buf.buffer) - buf.bufOffset
bufCap := cap(buf.buffer)
if buf.bufOffset > 0 && minBytes <= bufCap {
copy(buf.buffer, buf.buffer[buf.bufOffset:])
buf.buffer = buf.buffer[:bufLen]
buf.bufOffset = 0
return bufLen, bufCap
if buf.start > 0 && minBytes <= buf.cap {
copy(buf.buffer, buf.buffer[buf.start:buf.start+buf.len])
buf.start = 0
}
// Grow the buffer store by allocating a new one and copying the data.
@ -217,23 +200,23 @@ func (buf *Buffer) grow(minBytes int) (int, int) {
if minBytes%bufferBlockSize > 0 {
newbufCap += bufferBlockSize
}
newStore := makeSlice(minBytes, newbufCap)
copy(newStore, buf.buffer[buf.bufOffset:])
newStore := makeSlice(newbufCap)
copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
buf.buffer = newStore
buf.bufOffset = 0
return bufLen, newbufCap
buf.start = 0
buf.cap = newbufCap
}
// makeSlice allocates a slice of size n. If the allocation fails, it panics
// with ErrTooLarge.
func makeSlice(l int, c int) []byte {
func makeSlice(c int) []byte {
// If the make fails, give a known error.
defer func() {
if recover() != nil {
panic(ErrTooLarge)
}
}()
return make([]byte, l, c)
return make([]byte, c)
}
// Flush deletes the provided number of bytes from the start of the Buffer.
@ -245,17 +228,15 @@ func (buf *Buffer) Flush(numberOfBytes int) {
return
}
bufLen := len(buf.buffer)
dataLen := bufLen - buf.bufOffset
if numberOfBytes > dataLen {
if numberOfBytes > buf.len {
panic(fmt.Sprintf(
"parsekit.read.Buffer.Flush(): number of bytes to flush (%d) "+
"exceeds size of the buffered data (%d)", numberOfBytes, dataLen))
"exceeds size of the buffered data (%d)", numberOfBytes, buf.len))
}
if dataLen == numberOfBytes {
buf.buffer = buf.buffer[:0]
buf.bufOffset = 0
if buf.len == numberOfBytes {
buf.len = 0
buf.start = 0
buf.errOffset = 0
return
}
@ -263,5 +244,6 @@ func (buf *Buffer) Flush(numberOfBytes int) {
if buf.err != nil {
buf.errOffset -= numberOfBytes
}
buf.bufOffset += numberOfBytes
buf.start += numberOfBytes
buf.len -= numberOfBytes
}

View File

@ -329,74 +329,113 @@ func TestAllocationPatterns(t *testing.T) {
r := New(input)
// The first read will create the standard buffer and fill it with data.
// The first rune is requested, but there's more input data availble,
// so the cache is filled up completely.
// buffer |xxxx1024xxxxx|
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 1024)
assertBuffer(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 0, 1024)
rn, _, _ := r.RuneAt(0)
assertEqual(t, 'A', rn)
// The first 1024 bytes will fit in the standard buffer.
// buffer |xxxx1024xxxxx|
assertCache(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 1024)
assertBuffer(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 0, 1024)
// Flushing zero input keeps everything as-is.
// buffer |xxxx1024xxxxx|
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 1024, 1024)
assertBuffer(t, "flush zero", r, func() { r.Flush(0) }, 1024, 0, 1024)
// Flushing all cached input truncates the cache.
// Flushing all cached input truncates the buffer.
// buffer | 1024 |
assertCache(t, "flush full cache", r, func() { r.Flush(1024) }, 0, 1024)
assertBuffer(t, "flush full cache", r, func() { r.Flush(1024) }, 1024, 0, 0)
// Reading 1025 chars will allocate a new store of 2 * 1024 and fill it with data.
// Offset 1024 is requested, but there's more input data availble,
// so the cache is filled up completely.
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
assertCache(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 2048)
runeBefore, _, _ := r.RuneAt(0)
assertBuffer(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 0, 2048)
runeAfter, _, _ := r.RuneAt(0)
// The bytes that we had before must be copied to the newly allocated store.
rn, _, _ = r.RuneAt(0)
assertEqual(t, 'K', rn)
assertEqual(t, runeBefore, runeAfter)
// A partial flush moves the buffer offset, but the stored data stay the same.
// buffer 25 |xxxxxxxxxxx2023xxxxxxxxxx|
assertCache(t, "flush partial", r, func() { r.Flush(25) }, 2048, 2048)
assertBuffer(t, "flush partial", r, func() { r.Flush(25) }, 2048, 25, 2023)
// The capacity for the usable part of the buffer is now 2023
// This number of runes can be read, without triggering a re-allocation.
// buffer 25 |xxxxxxxxxxx2023xxxxxxxxxx|
assertCache(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 2048)
assertBuffer(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 25, 2023)
// Flush the full input.
// store | 2048 |
// buffer | 2048 |
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 0, 2048)
assertBuffer(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 2048, 0, 0)
// Fill up the store again.
// Offset 1234 is requested, but there's more input data availble,
// so the cache is filled up completely.
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
assertCache(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 2048)
assertBuffer(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)
// Then flush almost all input.
// buffer 2047 |x1x|
assertCache(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2048)
assertBuffer(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2047, 1)
// Read some data beyond the single byte. This moves the single byte at the end to
// the start and fills up the rest of the buffer, without a reallocation.
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
assertCache(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 2048)
assertBuffer(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)
// Now flush only one rune from the cache.
// buffer 1 |xxxxxxxxx2047xxxxxxxxxxxxxx|
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 2048, 2048)
assertBuffer(t, "flush 1", r, func() { r.Flush(1) }, 2048, 1, 2047)
// Now read the full available capacity. This will not fit, so
// space has to be made. Since there's 1 free space at the start of the store,
// the data are moved to the start and no reallocation is needed.
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
assertCache(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 2048)
assertBuffer(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 0, 2048)
// Now read in the whole rest of the buffer, asking for an offset that is way out of range.
// It does allocate enough memory to store 10000 bytes (bringing us to 10240), but while reading it is
// detected that there are not enough bytes to fill it. That puts a limit on the amount of data in
// the buffer (5120 instead of the full 10240 buffer size).
// the buffer, so the buffer is not completely filled.
// buffer |xxxxxxxxxxxxxxx5120xxxxxxxxxxxxxxxxxxxx 10240-5120 |
assertCache(t, "over-ask", r, func() { r.ByteAt(10000) }, 5120, 10240)
remaining := input.remaining
assertBuffer(t, "over-ask", r, func() { r.ByteAt(10000) }, 10240, 0, 2048+remaining)
}
func makeLargeStubReader() (*StubReader, int) {
size := 8192
bytes := make([]byte, size)
for i := range bytes {
bytes[i] = 'A' + byte(i%26)
}
return &StubReader{bytes: bytes, errors: []error{io.EOF}, remaining: size}, size
}
type StubReader struct {
bytes []byte
errors []error
remaining int
}
func (r *StubReader) Read(p []byte) (n int, err error) {
if len(r.bytes) > 0 {
head, tail := r.bytes[0], r.bytes[1:]
r.bytes = tail
p[0] = head
r.remaining--
return 1, nil
}
if len(r.errors) > 0 {
head, tail := r.errors[0], r.errors[1:]
r.errors = tail
return 0, head
}
panic("StubReader is all out of bytes and errors")
}
func Benchmark0BytesInputFile(b *testing.B) {
@ -461,32 +500,3 @@ func processInputFile(b *testing.B, testSize int) {
}
}
}
func makeLargeStubReader() (*StubReader, int) {
size := 8192
bytes := make([]byte, size)
for i := range bytes {
bytes[i] = 'A' + byte(i%26)
}
return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
}
type StubReader struct {
bytes []byte
errors []error
}
func (r *StubReader) Read(p []byte) (n int, err error) {
if len(r.bytes) > 0 {
head, tail := r.bytes[0], r.bytes[1:]
r.bytes = tail
p[0] = head
return 1, nil
}
if len(r.errors) > 0 {
head, tail := r.errors[0], r.errors[1:]
r.errors = tail
return 0, head
}
panic("StubReader is all out of bytes and errors")
}