Unit tests improved for the parsekit.read package.

2019-07-19 09:50:42 +00:00 · 2019-07-19 09:50:42 +00:00 · 3f9c745ac4
parent 22bcf4677e
commit 3f9c745ac4
3 changed files with 100 additions and 105 deletions
--- a/read/assertions_test.go
+++ b/read/assertions_test.go
@ -44,12 +44,15 @@ func assertPanic(t *testing.T, code func(), expected string) {
 	code()
 }

-func assertCache(t *testing.T, name string, r *Buffer, code func(), bufLen, bufCap int) {
+func assertBuffer(t *testing.T, name string, r *Buffer, code func(), capacity, start, length int) {
 	code()
-	if bufLen != len(r.buffer) {
-		t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
+	if capacity != r.cap {
+		t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, capacity, r.cap)
 	}
-	if bufCap != cap(r.buffer) {
-		t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, bufCap, cap(r.buffer))
+	if start != r.start {
+		t.Errorf("[%s] Unexpected data starting point (expected %d, got %d)", name, start, r.start)
+	}
+	if length != r.len {
+		t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, length, r.len)
 	}
 }
--- a/read/read.go
+++ b/read/read.go
@ -93,7 +93,9 @@ func makeBufioReader(input interface{}) *bufio.Reader {
 type Buffer struct {
 	bufio     *bufio.Reader // used for ReadRune()
 	buffer    []byte        // input buffer, holding runes that were read from input
-	bufOffset int           // the offset in the buffer at which the sliding data window starts
+	cap       int           // the full buffer capacity
+	start     int           // the offset from where to read buffered data in the buffer
+	len       int           // the length of the buffered data
 	err       error         // a read error, if one occurred
 	errOffset int           // the offset in the buffer at which the read error was encountered
 }
@ -117,11 +119,13 @@ type Buffer struct {
 // Once a read error is encountered, that same read error will guaranteed
 // be return on every subsequent read at or beyond the provided offset.
 func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
-	buf.fill(offset + utf8.UTFMax)
+	if buf.len < offset+utf8.MaxRune && buf.err == nil {
+		buf.fill(offset + utf8.UTFMax)
+	}
 	if buf.err != nil && offset >= buf.errOffset {
 		return utf8.RuneError, 0, buf.err
 	}
-	r, w := utf8.DecodeRune(buf.buffer[buf.bufOffset+offset:])
+	r, w := utf8.DecodeRune(buf.buffer[buf.start+offset:])

 	return r, w, nil
 }
@ -143,50 +147,37 @@ func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
 // Once a read error is encountered, that same read error will guaranteed
 // be return on every subsequent read at or beyond the provided offset.
 func (buf *Buffer) ByteAt(offset int) (byte, error) {
-	buf.fill(offset + 1)
+	if buf.len < offset+1 && buf.err == nil {
+		buf.fill(offset + 1)
+	}
 	if buf.err != nil && offset >= buf.errOffset {
 		return 0, buf.err
 	}
-	return buf.buffer[buf.bufOffset+offset], nil
+	return buf.buffer[buf.start+offset], nil
 }

 func (buf *Buffer) fill(minBytes int) {
-	// Check the current length of the buffer data.
-	bufLen := len(buf.buffer[buf.bufOffset:])
-
-	// If the required amount of bytes fits in the available data, or when
-	// an error was encountered previously, then no action is needed.
-	if minBytes <= bufLen || buf.err != nil {
-		return
-	}
-
 	// Grow the buffer so it can contain at least the number of requested bytes.
-	// The return value is the actual capacity of the buffer after growing it.
-	//
-	// Note:
-	// The grow() method will always arrange the data to be at the start of the
-	// buffer, getting rid of the leading unused space that might exist due to
-	// calls to Flush(). This means that buf.bufOffset will be 0 from here on,
-	// so there's no need to accomodate for this offset in the following code.
-	bufLen, bufCap := buf.grow(minBytes)
+	if minBytes > buf.cap-buf.start {
+		buf.grow(minBytes)
+	}

 	// Now we try to fill the buffer completely with data from our source.
 	// This is more efficient than only filling the data up to the point where
 	// we can read the data at the 'minBytes' position. Ideally, the buffer is
 	// filled completely with data to work with.
-	for bufLen < bufCap {
+	for buf.len < buf.cap {
 		// Read bytes from our source, and append them to the end of the
 		// current buffer data.
-		n, err := buf.bufio.Read(buf.buffer[bufLen:bufCap])
-		bufLen += n
+		n, err := buf.bufio.Read(buf.buffer[buf.len:buf.cap])
+		buf.len += n

 		if err != nil {
 			buf.err = err
-			buf.errOffset = bufLen
+			buf.errOffset = buf.len
 			break
 		}
 	}
-	buf.buffer = buf.buffer[:bufLen] // TODO work with a separate bufLen field in the buffer stuct, that might be simpler to work with and maybe faster.
 }

 const bufferBlockSize = 1024
@ -196,20 +187,12 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")

 // grow grows the buffer to guarantee space for at least the requested amount
 // of bytes, either shifting data around or reallocating the buffer.
-func (buf *Buffer) grow(minBytes int) (int, int) {
-	if buf.err != nil {
-		panic("Cannot grow buffer, there was an error earlier on!")
-	}
-
+func (buf *Buffer) grow(minBytes int) {
 	// When possible, grow the buffer by moving the data to the start of
 	// the buffer, freeing up extra capacity at the end.
-	bufLen := len(buf.buffer) - buf.bufOffset
-	bufCap := cap(buf.buffer)
-	if buf.bufOffset > 0 && minBytes <= bufCap {
-		copy(buf.buffer, buf.buffer[buf.bufOffset:])
-		buf.buffer = buf.buffer[:bufLen]
-		buf.bufOffset = 0
-		return bufLen, bufCap
+	if buf.start > 0 && minBytes <= buf.cap {
+		copy(buf.buffer, buf.buffer[buf.start:buf.start+buf.len])
+		buf.start = 0
 	}

 	// Grow the buffer store by allocating a new one and copying the data.
@ -217,23 +200,23 @@ func (buf *Buffer) grow(minBytes int) (int, int) {
 	if minBytes%bufferBlockSize > 0 {
 		newbufCap += bufferBlockSize
 	}
-	newStore := makeSlice(minBytes, newbufCap)
-	copy(newStore, buf.buffer[buf.bufOffset:])
+	newStore := makeSlice(newbufCap)
+	copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
 	buf.buffer = newStore
-	buf.bufOffset = 0
-	return bufLen, newbufCap
+	buf.start = 0
+	buf.cap = newbufCap
 }

 // makeSlice allocates a slice of size n. If the allocation fails, it panics
 // with ErrTooLarge.
-func makeSlice(l int, c int) []byte {
+func makeSlice(c int) []byte {
 	// If the make fails, give a known error.
 	defer func() {
 		if recover() != nil {
 			panic(ErrTooLarge)
 		}
 	}()
-	return make([]byte, l, c)
+	return make([]byte, c)
 }

 // Flush deletes the provided number of bytes from the start of the Buffer.
@ -245,17 +228,15 @@ func (buf *Buffer) Flush(numberOfBytes int) {
 		return
 	}

-	bufLen := len(buf.buffer)
-	dataLen := bufLen - buf.bufOffset
-	if numberOfBytes > dataLen {
+	if numberOfBytes > buf.len {
 		panic(fmt.Sprintf(
 			"parsekit.read.Buffer.Flush(): number of bytes to flush (%d) "+
-				"exceeds size of the buffered data (%d)", numberOfBytes, dataLen))
+				"exceeds size of the buffered data (%d)", numberOfBytes, buf.len))
 	}

-	if dataLen == numberOfBytes {
-		buf.buffer = buf.buffer[:0]
-		buf.bufOffset = 0
+	if buf.len == numberOfBytes {
+		buf.len = 0
+		buf.start = 0
 		buf.errOffset = 0
 		return
 	}
@ -263,5 +244,6 @@ func (buf *Buffer) Flush(numberOfBytes int) {
 	if buf.err != nil {
 		buf.errOffset -= numberOfBytes
 	}
-	buf.bufOffset += numberOfBytes
+	buf.start += numberOfBytes
+	buf.len -= numberOfBytes
 }
--- a/read/read_test.go
+++ b/read/read_test.go
@ -329,74 +329,113 @@ func TestAllocationPatterns(t *testing.T) {
 	r := New(input)

 	// The first read will create the standard buffer and fill it with data.
+	// The first rune is requested, but there's more input data availble,
+	// so the cache is filled up completely.
 	// buffer |xxxx1024xxxxx|
-	assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 1024)
+	assertBuffer(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 0, 1024)
 	rn, _, _ := r.RuneAt(0)
 	assertEqual(t, 'A', rn)

 	// The first 1024 bytes will fit in the standard buffer.
 	// buffer |xxxx1024xxxxx|
-	assertCache(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 1024)
+	assertBuffer(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 0, 1024)

 	// Flushing zero input keeps everything as-is.
 	// buffer |xxxx1024xxxxx|
-	assertCache(t, "flush zero", r, func() { r.Flush(0) }, 1024, 1024)
+	assertBuffer(t, "flush zero", r, func() { r.Flush(0) }, 1024, 0, 1024)

-	// Flushing all cached input truncates the cache.
+	// Flushing all cached input truncates the buffer.
 	// buffer |    1024     |
-	assertCache(t, "flush full cache", r, func() { r.Flush(1024) }, 0, 1024)
+	assertBuffer(t, "flush full cache", r, func() { r.Flush(1024) }, 1024, 0, 0)

 	// Reading 1025 chars will allocate a new store of 2 * 1024 and fill it with data.
+	// Offset 1024 is requested, but there's more input data availble,
+	// so the cache is filled up completely.
 	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
-	assertCache(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 2048)
+	runeBefore, _, _ := r.RuneAt(0)
+	assertBuffer(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 0, 2048)
+	runeAfter, _, _ := r.RuneAt(0)

 	// The bytes that we had before must be copied to the newly allocated store.
-	rn, _, _ = r.RuneAt(0)
-	assertEqual(t, 'K', rn)
+	assertEqual(t, runeBefore, runeAfter)

 	// A partial flush moves the buffer offset, but the stored data stay the same.
 	// buffer   25 |xxxxxxxxxxx2023xxxxxxxxxx|
-	assertCache(t, "flush partial", r, func() { r.Flush(25) }, 2048, 2048)
+	assertBuffer(t, "flush partial", r, func() { r.Flush(25) }, 2048, 25, 2023)

 	// The capacity for the usable part of the buffer is now 2023
 	// This number of runes can be read, without triggering a re-allocation.
 	// buffer   25 |xxxxxxxxxxx2023xxxxxxxxxx|
-	assertCache(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 2048)
+	assertBuffer(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 25, 2023)

 	// Flush the full input.
 	// store  |             2048             |
 	// buffer |             2048             |
-	assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 0, 2048)
+	assertBuffer(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 2048, 0, 0)

 	// Fill up the store again.
+	// Offset 1234 is requested, but there's more input data availble,
+	// so the cache is filled up completely.
 	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
-	assertCache(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 2048)
+	assertBuffer(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)

 	// Then flush almost all input.
 	// buffer        2047                |x1x|
-	assertCache(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2048)
+	assertBuffer(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2047, 1)

 	// Read some data beyond the single byte. This moves the single byte at the end to
 	// the start and fills up the rest of the buffer, without a reallocation.
 	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
-	assertCache(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 2048)
+	assertBuffer(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 0, 2048)

 	// Now flush only one rune from the cache.
 	// buffer  1 |xxxxxxxxx2047xxxxxxxxxxxxxx|
-	assertCache(t, "flush 1", r, func() { r.Flush(1) }, 2048, 2048)
+	assertBuffer(t, "flush 1", r, func() { r.Flush(1) }, 2048, 1, 2047)

 	// Now read the full available capacity. This will not fit, so
 	// space has to be made. Since there's 1 free space at the start of the store,
 	// the data are moved to the start and no reallocation is needed.
 	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
-	assertCache(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 2048)
+	assertBuffer(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 0, 2048)

 	// Now read in the whole rest of the buffer, asking for an offset that is way out of range.
 	// It does allocate enough memory to store 10000 bytes (bringing us to 10240), but while reading it is
 	// detected that there are not enough bytes to fill it. That puts a limit on the amount of data in
-	// the buffer (5120 instead of the full 10240 buffer size).
+	// the buffer, so the buffer is not completely filled.
 	// buffer |xxxxxxxxxxxxxxx5120xxxxxxxxxxxxxxxxxxxx          10240-5120                 |
-	assertCache(t, "over-ask", r, func() { r.ByteAt(10000) }, 5120, 10240)
+	remaining := input.remaining
+	assertBuffer(t, "over-ask", r, func() { r.ByteAt(10000) }, 10240, 0, 2048+remaining)
+}
+
+func makeLargeStubReader() (*StubReader, int) {
+	size := 8192
+	bytes := make([]byte, size)
+	for i := range bytes {
+		bytes[i] = 'A' + byte(i%26)
+	}
+	return &StubReader{bytes: bytes, errors: []error{io.EOF}, remaining: size}, size
+}
+
+type StubReader struct {
+	bytes     []byte
+	errors    []error
+	remaining int
+}
+
+func (r *StubReader) Read(p []byte) (n int, err error) {
+	if len(r.bytes) > 0 {
+		head, tail := r.bytes[0], r.bytes[1:]
+		r.bytes = tail
+		p[0] = head
+		r.remaining--
+		return 1, nil
+	}
+	if len(r.errors) > 0 {
+		head, tail := r.errors[0], r.errors[1:]
+		r.errors = tail
+		return 0, head
+	}
+	panic("StubReader is all out of bytes and errors")
 }

 func Benchmark0BytesInputFile(b *testing.B) {
@ -461,32 +500,3 @@ func processInputFile(b *testing.B, testSize int) {
 		}
 	}
 }
-
-func makeLargeStubReader() (*StubReader, int) {
-	size := 8192
-	bytes := make([]byte, size)
-	for i := range bytes {
-		bytes[i] = 'A' + byte(i%26)
-	}
-	return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
-}
-
-type StubReader struct {
-	bytes  []byte
-	errors []error
-}
-
-func (r *StubReader) Read(p []byte) (n int, err error) {
-	if len(r.bytes) > 0 {
-		head, tail := r.bytes[0], r.bytes[1:]
-		r.bytes = tail
-		p[0] = head
-		return 1, nil
-	}
-	if len(r.errors) > 0 {
-		head, tail := r.errors[0], r.errors[1:]
-		r.errors = tail
-		return 0, head
-	}
-	panic("StubReader is all out of bytes and errors")
-}