Unit tests improved for the parsekit.read package.
This commit is contained in:
parent
22bcf4677e
commit
3f9c745ac4
|
@ -44,12 +44,15 @@ func assertPanic(t *testing.T, code func(), expected string) {
|
|||
code()
|
||||
}
|
||||
|
||||
func assertCache(t *testing.T, name string, r *Buffer, code func(), bufLen, bufCap int) {
|
||||
func assertBuffer(t *testing.T, name string, r *Buffer, code func(), capacity, start, length int) {
|
||||
code()
|
||||
if bufLen != len(r.buffer) {
|
||||
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
|
||||
if capacity != r.cap {
|
||||
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, capacity, r.cap)
|
||||
}
|
||||
if bufCap != cap(r.buffer) {
|
||||
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, bufCap, cap(r.buffer))
|
||||
if start != r.start {
|
||||
t.Errorf("[%s] Unexpected data starting point (expected %d, got %d)", name, start, r.start)
|
||||
}
|
||||
if length != r.len {
|
||||
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, length, r.len)
|
||||
}
|
||||
}
|
||||
|
|
88
read/read.go
88
read/read.go
|
@ -93,7 +93,9 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
|||
type Buffer struct {
|
||||
bufio *bufio.Reader // used for ReadRune()
|
||||
buffer []byte // input buffer, holding runes that were read from input
|
||||
bufOffset int // the offset in the buffer at which the sliding data window starts
|
||||
cap int // the full buffer capacity
|
||||
start int // the offset from where to read buffered data in the buffer
|
||||
len int // the length of the buffered data
|
||||
err error // a read error, if one occurred
|
||||
errOffset int // the offset in the buffer at which the read error was encountered
|
||||
}
|
||||
|
@ -117,11 +119,13 @@ type Buffer struct {
|
|||
// Once a read error is encountered, that same read error will guaranteed
|
||||
// be return on every subsequent read at or beyond the provided offset.
|
||||
func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
|
||||
buf.fill(offset + utf8.UTFMax)
|
||||
if buf.len < offset+utf8.MaxRune && buf.err == nil {
|
||||
buf.fill(offset + utf8.UTFMax)
|
||||
}
|
||||
if buf.err != nil && offset >= buf.errOffset {
|
||||
return utf8.RuneError, 0, buf.err
|
||||
}
|
||||
r, w := utf8.DecodeRune(buf.buffer[buf.bufOffset+offset:])
|
||||
r, w := utf8.DecodeRune(buf.buffer[buf.start+offset:])
|
||||
|
||||
return r, w, nil
|
||||
}
|
||||
|
@ -143,50 +147,37 @@ func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
|
|||
// Once a read error is encountered, that same read error will guaranteed
|
||||
// be return on every subsequent read at or beyond the provided offset.
|
||||
func (buf *Buffer) ByteAt(offset int) (byte, error) {
|
||||
buf.fill(offset + 1)
|
||||
if buf.len < offset+1 && buf.err == nil {
|
||||
buf.fill(offset + 1)
|
||||
}
|
||||
if buf.err != nil && offset >= buf.errOffset {
|
||||
return 0, buf.err
|
||||
}
|
||||
return buf.buffer[buf.bufOffset+offset], nil
|
||||
return buf.buffer[buf.start+offset], nil
|
||||
}
|
||||
|
||||
func (buf *Buffer) fill(minBytes int) {
|
||||
// Check the current length of the buffer data.
|
||||
bufLen := len(buf.buffer[buf.bufOffset:])
|
||||
|
||||
// If the required amount of bytes fits in the available data, or when
|
||||
// an error was encountered previously, then no action is needed.
|
||||
if minBytes <= bufLen || buf.err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Grow the buffer so it can contain at least the number of requested bytes.
|
||||
// The return value is the actual capacity of the buffer after growing it.
|
||||
//
|
||||
// Note:
|
||||
// The grow() method will always arrange the data to be at the start of the
|
||||
// buffer, getting rid of the leading unused space that might exist due to
|
||||
// calls to Flush(). This means that buf.bufOffset will be 0 from here on,
|
||||
// so there's no need to accomodate for this offset in the following code.
|
||||
bufLen, bufCap := buf.grow(minBytes)
|
||||
if minBytes > buf.cap-buf.start {
|
||||
buf.grow(minBytes)
|
||||
}
|
||||
|
||||
// Now we try to fill the buffer completely with data from our source.
|
||||
// This is more efficient than only filling the data up to the point where
|
||||
// we can read the data at the 'minBytes' position. Ideally, the buffer is
|
||||
// filled completely with data to work with.
|
||||
for bufLen < bufCap {
|
||||
for buf.len < buf.cap {
|
||||
// Read bytes from our source, and append them to the end of the
|
||||
// current buffer data.
|
||||
n, err := buf.bufio.Read(buf.buffer[bufLen:bufCap])
|
||||
bufLen += n
|
||||
n, err := buf.bufio.Read(buf.buffer[buf.len:buf.cap])
|
||||
buf.len += n
|
||||
|
||||
if err != nil {
|
||||
buf.err = err
|
||||
buf.errOffset = bufLen
|
||||
buf.errOffset = buf.len
|
||||
break
|
||||
}
|
||||
}
|
||||
buf.buffer = buf.buffer[:bufLen] // TODO work with a separate bufLen field in the buffer stuct, that might be simpler to work with and maybe faster.
|
||||
}
|
||||
|
||||
const bufferBlockSize = 1024
|
||||
|
@ -196,20 +187,12 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
|
|||
|
||||
// grow grows the buffer to guarantee space for at least the requested amount
|
||||
// of bytes, either shifting data around or reallocating the buffer.
|
||||
func (buf *Buffer) grow(minBytes int) (int, int) {
|
||||
if buf.err != nil {
|
||||
panic("Cannot grow buffer, there was an error earlier on!")
|
||||
}
|
||||
|
||||
func (buf *Buffer) grow(minBytes int) {
|
||||
// When possible, grow the buffer by moving the data to the start of
|
||||
// the buffer, freeing up extra capacity at the end.
|
||||
bufLen := len(buf.buffer) - buf.bufOffset
|
||||
bufCap := cap(buf.buffer)
|
||||
if buf.bufOffset > 0 && minBytes <= bufCap {
|
||||
copy(buf.buffer, buf.buffer[buf.bufOffset:])
|
||||
buf.buffer = buf.buffer[:bufLen]
|
||||
buf.bufOffset = 0
|
||||
return bufLen, bufCap
|
||||
if buf.start > 0 && minBytes <= buf.cap {
|
||||
copy(buf.buffer, buf.buffer[buf.start:buf.start+buf.len])
|
||||
buf.start = 0
|
||||
}
|
||||
|
||||
// Grow the buffer store by allocating a new one and copying the data.
|
||||
|
@ -217,23 +200,23 @@ func (buf *Buffer) grow(minBytes int) (int, int) {
|
|||
if minBytes%bufferBlockSize > 0 {
|
||||
newbufCap += bufferBlockSize
|
||||
}
|
||||
newStore := makeSlice(minBytes, newbufCap)
|
||||
copy(newStore, buf.buffer[buf.bufOffset:])
|
||||
newStore := makeSlice(newbufCap)
|
||||
copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
|
||||
buf.buffer = newStore
|
||||
buf.bufOffset = 0
|
||||
return bufLen, newbufCap
|
||||
buf.start = 0
|
||||
buf.cap = newbufCap
|
||||
}
|
||||
|
||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||
// with ErrTooLarge.
|
||||
func makeSlice(l int, c int) []byte {
|
||||
func makeSlice(c int) []byte {
|
||||
// If the make fails, give a known error.
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
panic(ErrTooLarge)
|
||||
}
|
||||
}()
|
||||
return make([]byte, l, c)
|
||||
return make([]byte, c)
|
||||
}
|
||||
|
||||
// Flush deletes the provided number of bytes from the start of the Buffer.
|
||||
|
@ -245,17 +228,15 @@ func (buf *Buffer) Flush(numberOfBytes int) {
|
|||
return
|
||||
}
|
||||
|
||||
bufLen := len(buf.buffer)
|
||||
dataLen := bufLen - buf.bufOffset
|
||||
if numberOfBytes > dataLen {
|
||||
if numberOfBytes > buf.len {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.read.Buffer.Flush(): number of bytes to flush (%d) "+
|
||||
"exceeds size of the buffered data (%d)", numberOfBytes, dataLen))
|
||||
"exceeds size of the buffered data (%d)", numberOfBytes, buf.len))
|
||||
}
|
||||
|
||||
if dataLen == numberOfBytes {
|
||||
buf.buffer = buf.buffer[:0]
|
||||
buf.bufOffset = 0
|
||||
if buf.len == numberOfBytes {
|
||||
buf.len = 0
|
||||
buf.start = 0
|
||||
buf.errOffset = 0
|
||||
return
|
||||
}
|
||||
|
@ -263,5 +244,6 @@ func (buf *Buffer) Flush(numberOfBytes int) {
|
|||
if buf.err != nil {
|
||||
buf.errOffset -= numberOfBytes
|
||||
}
|
||||
buf.bufOffset += numberOfBytes
|
||||
buf.start += numberOfBytes
|
||||
buf.len -= numberOfBytes
|
||||
}
|
||||
|
|
|
@ -329,74 +329,113 @@ func TestAllocationPatterns(t *testing.T) {
|
|||
r := New(input)
|
||||
|
||||
// The first read will create the standard buffer and fill it with data.
|
||||
// The first rune is requested, but there's more input data availble,
|
||||
// so the cache is filled up completely.
|
||||
// buffer |xxxx1024xxxxx|
|
||||
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 1024)
|
||||
assertBuffer(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 0, 1024)
|
||||
rn, _, _ := r.RuneAt(0)
|
||||
assertEqual(t, 'A', rn)
|
||||
|
||||
// The first 1024 bytes will fit in the standard buffer.
|
||||
// buffer |xxxx1024xxxxx|
|
||||
assertCache(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 1024)
|
||||
assertBuffer(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 0, 1024)
|
||||
|
||||
// Flushing zero input keeps everything as-is.
|
||||
// buffer |xxxx1024xxxxx|
|
||||
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 1024, 1024)
|
||||
assertBuffer(t, "flush zero", r, func() { r.Flush(0) }, 1024, 0, 1024)
|
||||
|
||||
// Flushing all cached input truncates the cache.
|
||||
// Flushing all cached input truncates the buffer.
|
||||
// buffer | 1024 |
|
||||
assertCache(t, "flush full cache", r, func() { r.Flush(1024) }, 0, 1024)
|
||||
assertBuffer(t, "flush full cache", r, func() { r.Flush(1024) }, 1024, 0, 0)
|
||||
|
||||
// Reading 1025 chars will allocate a new store of 2 * 1024 and fill it with data.
|
||||
// Offset 1024 is requested, but there's more input data availble,
|
||||
// so the cache is filled up completely.
|
||||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
|
||||
assertCache(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 2048)
|
||||
runeBefore, _, _ := r.RuneAt(0)
|
||||
assertBuffer(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 0, 2048)
|
||||
runeAfter, _, _ := r.RuneAt(0)
|
||||
|
||||
// The bytes that we had before must be copied to the newly allocated store.
|
||||
rn, _, _ = r.RuneAt(0)
|
||||
assertEqual(t, 'K', rn)
|
||||
assertEqual(t, runeBefore, runeAfter)
|
||||
|
||||
// A partial flush moves the buffer offset, but the stored data stay the same.
|
||||
// buffer 25 |xxxxxxxxxxx2023xxxxxxxxxx|
|
||||
assertCache(t, "flush partial", r, func() { r.Flush(25) }, 2048, 2048)
|
||||
assertBuffer(t, "flush partial", r, func() { r.Flush(25) }, 2048, 25, 2023)
|
||||
|
||||
// The capacity for the usable part of the buffer is now 2023
|
||||
// This number of runes can be read, without triggering a re-allocation.
|
||||
// buffer 25 |xxxxxxxxxxx2023xxxxxxxxxx|
|
||||
assertCache(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 2048)
|
||||
assertBuffer(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 25, 2023)
|
||||
|
||||
// Flush the full input.
|
||||
// store | 2048 |
|
||||
// buffer | 2048 |
|
||||
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 0, 2048)
|
||||
assertBuffer(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 2048, 0, 0)
|
||||
|
||||
// Fill up the store again.
|
||||
// Offset 1234 is requested, but there's more input data availble,
|
||||
// so the cache is filled up completely.
|
||||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
|
||||
assertCache(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 2048)
|
||||
assertBuffer(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)
|
||||
|
||||
// Then flush almost all input.
|
||||
// buffer 2047 |x1x|
|
||||
assertCache(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2048)
|
||||
assertBuffer(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2047, 1)
|
||||
|
||||
// Read some data beyond the single byte. This moves the single byte at the end to
|
||||
// the start and fills up the rest of the buffer, without a reallocation.
|
||||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
|
||||
assertCache(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 2048)
|
||||
assertBuffer(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)
|
||||
|
||||
// Now flush only one rune from the cache.
|
||||
// buffer 1 |xxxxxxxxx2047xxxxxxxxxxxxxx|
|
||||
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 2048, 2048)
|
||||
assertBuffer(t, "flush 1", r, func() { r.Flush(1) }, 2048, 1, 2047)
|
||||
|
||||
// Now read the full available capacity. This will not fit, so
|
||||
// space has to be made. Since there's 1 free space at the start of the store,
|
||||
// the data are moved to the start and no reallocation is needed.
|
||||
// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
|
||||
assertCache(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 2048)
|
||||
assertBuffer(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 0, 2048)
|
||||
|
||||
// Now read in the whole rest of the buffer, asking for an offset that is way out of range.
|
||||
// It does allocate enough memory to store 10000 bytes (bringing us to 10240), but while reading it is
|
||||
// detected that there are not enough bytes to fill it. That puts a limit on the amount of data in
|
||||
// the buffer (5120 instead of the full 10240 buffer size).
|
||||
// the buffer, so the buffer is not completely filled.
|
||||
// buffer |xxxxxxxxxxxxxxx5120xxxxxxxxxxxxxxxxxxxx 10240-5120 |
|
||||
assertCache(t, "over-ask", r, func() { r.ByteAt(10000) }, 5120, 10240)
|
||||
remaining := input.remaining
|
||||
assertBuffer(t, "over-ask", r, func() { r.ByteAt(10000) }, 10240, 0, 2048+remaining)
|
||||
}
|
||||
|
||||
func makeLargeStubReader() (*StubReader, int) {
|
||||
size := 8192
|
||||
bytes := make([]byte, size)
|
||||
for i := range bytes {
|
||||
bytes[i] = 'A' + byte(i%26)
|
||||
}
|
||||
return &StubReader{bytes: bytes, errors: []error{io.EOF}, remaining: size}, size
|
||||
}
|
||||
|
||||
type StubReader struct {
|
||||
bytes []byte
|
||||
errors []error
|
||||
remaining int
|
||||
}
|
||||
|
||||
func (r *StubReader) Read(p []byte) (n int, err error) {
|
||||
if len(r.bytes) > 0 {
|
||||
head, tail := r.bytes[0], r.bytes[1:]
|
||||
r.bytes = tail
|
||||
p[0] = head
|
||||
r.remaining--
|
||||
return 1, nil
|
||||
}
|
||||
if len(r.errors) > 0 {
|
||||
head, tail := r.errors[0], r.errors[1:]
|
||||
r.errors = tail
|
||||
return 0, head
|
||||
}
|
||||
panic("StubReader is all out of bytes and errors")
|
||||
}
|
||||
|
||||
func Benchmark0BytesInputFile(b *testing.B) {
|
||||
|
@ -461,32 +500,3 @@ func processInputFile(b *testing.B, testSize int) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func makeLargeStubReader() (*StubReader, int) {
|
||||
size := 8192
|
||||
bytes := make([]byte, size)
|
||||
for i := range bytes {
|
||||
bytes[i] = 'A' + byte(i%26)
|
||||
}
|
||||
return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
|
||||
}
|
||||
|
||||
type StubReader struct {
|
||||
bytes []byte
|
||||
errors []error
|
||||
}
|
||||
|
||||
func (r *StubReader) Read(p []byte) (n int, err error) {
|
||||
if len(r.bytes) > 0 {
|
||||
head, tail := r.bytes[0], r.bytes[1:]
|
||||
r.bytes = tail
|
||||
p[0] = head
|
||||
return 1, nil
|
||||
}
|
||||
if len(r.errors) > 0 {
|
||||
head, tail := r.errors[0], r.errors[1:]
|
||||
r.errors = tail
|
||||
return 0, head
|
||||
}
|
||||
panic("StubReader is all out of bytes and errors")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue