Some work on simlifying the reader code, to see if I can squeeze some more performance out of that part.

2019-07-19 08:47:13 +00:00 · 2019-07-19 08:47:13 +00:00 · 22bcf4677e
parent 1771e237c0
commit 22bcf4677e
10 changed files with 1950 additions and 166 deletions
--- a/read/assertions_test.go
+++ b/read/assertions_test.go
@ -44,14 +44,8 @@ func assertPanic(t *testing.T, code func(), expected string) {
 	code()
 }

-func assertCache(t *testing.T, name string, r *Buffer, code func(), storeLen, storeCap, bufLen, bufCap int) {
+func assertCache(t *testing.T, name string, r *Buffer, code func(), bufLen, bufCap int) {
 	code()
-	if storeLen != len(r.store) {
-		t.Errorf("[%s] Unexpected store len (expected %d, got %d)", name, storeLen, len(r.store))
-	}
-	if storeCap != cap(r.store) {
-		t.Errorf("[%s] Unexpected store cap (expected %d, got %d)", name, storeCap, cap(r.store))
-	}
 	if bufLen != len(r.buffer) {
 		t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
 	}
--- a/read/read.go
+++ b/read/read.go
@ -92,8 +92,8 @@ func makeBufioReader(input interface{}) *bufio.Reader {
 // This parserkit.reader.Reader is used internally by tokenize.API.
 type Buffer struct {
 	bufio     *bufio.Reader // used for ReadRune()
-	store     []byte        // buffer store, the buffer field is a slice on top of this one
 	buffer    []byte        // input buffer, holding runes that were read from input
+	bufOffset int           // the offset in the buffer at which the sliding data window starts
 	err       error         // a read error, if one occurred
 	errOffset int           // the offset in the buffer at which the read error was encountered
 }
@ -121,7 +121,7 @@ func (buf *Buffer) RuneAt(offset int) (rune, int, error) {
 	if buf.err != nil && offset >= buf.errOffset {
 		return utf8.RuneError, 0, buf.err
 	}
-	r, w := utf8.DecodeRune(buf.buffer[offset:])
+	r, w := utf8.DecodeRune(buf.buffer[buf.bufOffset+offset:])

 	return r, w, nil
 }
@ -147,20 +147,38 @@ func (buf *Buffer) ByteAt(offset int) (byte, error) {
 	if buf.err != nil && offset >= buf.errOffset {
 		return 0, buf.err
 	}
-	return buf.buffer[offset], nil
+	return buf.buffer[buf.bufOffset+offset], nil
 }

 func (buf *Buffer) fill(minBytes int) {
-	bufLen := len(buf.buffer)
+	// Check the current length of the buffer data.
+	bufLen := len(buf.buffer[buf.bufOffset:])
+
+	// If the required amount of bytes fits in the available data, or when
+	// an error was encountered previously, then no action is needed.
 	if minBytes <= bufLen || buf.err != nil {
 		return
 	}
-	buf.grow(minBytes)

-	for bufLen < minBytes {
-		n, err := buf.bufio.Read(buf.buffer[bufLen:cap(buf.buffer)])
+	// Grow the buffer so it can contain at least the number of requested bytes.
+	// The return value is the actual capacity of the buffer after growing it.
+	//
+	// Note:
+	// The grow() method will always arrange the data to be at the start of the
+	// buffer, getting rid of the leading unused space that might exist due to
+	// calls to Flush(). This means that buf.bufOffset will be 0 from here on,
+	// so there's no need to accomodate for this offset in the following code.
+	bufLen, bufCap := buf.grow(minBytes)
+
+	// Now we try to fill the buffer completely with data from our source.
+	// This is more efficient than only filling the data up to the point where
+	// we can read the data at the 'minBytes' position. Ideally, the buffer is
+	// filled completely with data to work with.
+	for bufLen < bufCap {
+		// Read bytes from our source, and append them to the end of the
+		// current buffer data.
+		n, err := buf.bufio.Read(buf.buffer[bufLen:bufCap])
 		bufLen += n
-		buf.buffer = buf.buffer[:bufLen]

 		if err != nil {
 			buf.err = err
@ -168,6 +186,7 @@ func (buf *Buffer) fill(minBytes int) {
 			break
 		}
 	}
+	buf.buffer = buf.buffer[:bufLen] // TODO work with a separate bufLen field in the buffer stuct, that might be simpler to work with and maybe faster.
 }

 const bufferBlockSize = 1024
@ -177,26 +196,32 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")

 // grow grows the buffer to guarantee space for at least the requested amount
 // of bytes, either shifting data around or reallocating the buffer.
-func (buf *Buffer) grow(atLeast int) {
-	capStore := cap(buf.store)
-	freeAtStartOfStore := capStore - cap(buf.buffer)
-	if freeAtStartOfStore > 0 && atLeast <= capStore {
-		buf.store = buf.store[0:atLeast]
-		copy(buf.store, buf.buffer)
-		buf.buffer = buf.store[:atLeast]
-		buf.store = buf.store[:0]
-		return
+func (buf *Buffer) grow(minBytes int) (int, int) {
+	if buf.err != nil {
+		panic("Cannot grow buffer, there was an error earlier on!")
+	}
+
+	// When possible, grow the buffer by moving the data to the start of
+	// the buffer, freeing up extra capacity at the end.
+	bufLen := len(buf.buffer) - buf.bufOffset
+	bufCap := cap(buf.buffer)
+	if buf.bufOffset > 0 && minBytes <= bufCap {
+		copy(buf.buffer, buf.buffer[buf.bufOffset:])
+		buf.buffer = buf.buffer[:bufLen]
+		buf.bufOffset = 0
+		return bufLen, bufCap
 	}

 	// Grow the buffer store by allocating a new one and copying the data.
-	size := (atLeast / bufferBlockSize) * bufferBlockSize
-	if atLeast%bufferBlockSize > 0 {
-		size += bufferBlockSize
+	newbufCap := (minBytes / bufferBlockSize) * bufferBlockSize
+	if minBytes%bufferBlockSize > 0 {
+		newbufCap += bufferBlockSize
 	}
-	newStore := makeSlice(atLeast, size)
-	copy(newStore, buf.buffer)
-	buf.store = newStore[:0]
-	buf.buffer = buf.store[:atLeast]
+	newStore := makeSlice(minBytes, newbufCap)
+	copy(newStore, buf.buffer[buf.bufOffset:])
+	buf.buffer = newStore
+	buf.bufOffset = 0
+	return bufLen, newbufCap
 }

 // makeSlice allocates a slice of size n. If the allocation fails, it panics
@ -220,19 +245,23 @@ func (buf *Buffer) Flush(numberOfBytes int) {
 		return
 	}

-	bufferLen := len(buf.buffer)
-	if numberOfBytes > bufferLen {
+	bufLen := len(buf.buffer)
+	dataLen := bufLen - buf.bufOffset
+	if numberOfBytes > dataLen {
 		panic(fmt.Sprintf(
-			"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
-				"exceeds size of the buffer (%d)", numberOfBytes, bufferLen))
+			"parsekit.read.Buffer.Flush(): number of bytes to flush (%d) "+
+				"exceeds size of the buffered data (%d)", numberOfBytes, dataLen))
 	}
-	if bufferLen == numberOfBytes {
-		buf.buffer = buf.store[:0]
+
+	if dataLen == numberOfBytes {
+		buf.buffer = buf.buffer[:0]
+		buf.bufOffset = 0
 		buf.errOffset = 0
 		return
 	}
-	buf.buffer = buf.buffer[numberOfBytes:]
+
 	if buf.err != nil {
-		buf.errOffset = buf.errOffset - numberOfBytes
+		buf.errOffset -= numberOfBytes
 	}
+	buf.bufOffset += numberOfBytes
 }
--- a/read/read_test.go
+++ b/read/read_test.go
@ -4,6 +4,8 @@ import (
 	"bufio"
 	"fmt"
 	"io"
+	"os"
+	"runtime"
 	"strings"
 	"testing"
 	"unicode/utf8"
@ -13,10 +15,10 @@ func ExampleNew() {
 	printFirstRuneOf := func(input interface{}) {
 		r := New(input)
 		c, w, _ := r.RuneAt(0)
-		fmt.Printf("rune %q, width %d\n", c, w)
+		fmt.Printf("rune %q, width in bytes = %d\n", c, w)
 	}

-	simpleString := "Hello, world!"
+	simpleString := "Ƕello, world!"
 	printFirstRuneOf(simpleString)

 	ioReaderImplementation := strings.NewReader("Good bye, world!")
@ -29,10 +31,10 @@ func ExampleNew() {
 	printFirstRuneOf(bufioReaderValue)

 	// Output:
-	// rune 'H', width 1
-	// rune 'G', width 1
-	// rune 'W', width 1
-	// rune 'Ɍ', width 2
+	// rune 'Ƕ', width in bytes = 2
+	// rune 'G', width in bytes = 1
+	// rune 'W', width in bytes = 1
+	// rune 'Ɍ', width in bytes = 2
 }

 func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
@ -138,8 +140,8 @@ func ExampleBuffer_RuneAt() {
 	fmt.Printf("Runes: ")
 	offset := 0
 	for {
-		r, w, err := reader.RuneAt(offset)
-		offset += w
+		r, _, err := reader.RuneAt(offset)
+		offset += utf8.RuneLen(r)
 		if err != nil {
 			fmt.Printf("\nErr: %s\n", err)
 			break
@ -192,6 +194,10 @@ func ExampleBuffer_Flush() {
 	// Read another 4 runes, because of the flushing, we start at offset 0.
 	fmt.Printf("%c%c%c%c", at(1), at(2), at(0), at(3))

+	// We might even read some more runes. That is no problem.
+	at(4)
+	at(5)
+
 	// Again, flush 4 runes, bringing offset 0 to the start of "dog!".
 	r.Flush(4)

@ -212,8 +218,7 @@ func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
 	// However, we flush 14 runes, which exceeds the buffer size.
 	assertPanic(t,
 		func() { r.Flush(14) },
-		"parsekit.read.Buffer.Flush(): number of runes to flush "+
-			"(14) exceeds size of the buffer (13)")
+		"parsekit.read.Buffer.Flush(): number of bytes to flush (14) exceeds size of the buffered data (13)")
 }

 func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
@ -249,7 +254,6 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
 	// Read the last availble rune.
 	readRune, _, _ := r.RuneAt(3)
 	assertEqual(t, 'd', readRune)
-	return

 	// Reading the next offset must result in the io.EOF error from the stub.
 	readRune, _, err := r.RuneAt(4)
@ -272,144 +276,198 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
 	readRune, _, _ = r.RuneAt(0)
 	assertEqual(t, 'd', readRune)

-	// // The io.EOF is now at offset 1.
-	// _, _, err = r.RuneAt(1)
-	// assertEqual(t, io.EOF, err)
+	// The io.EOF is now at offset 1.
+	_, _, err = r.RuneAt(1)
+	assertEqual(t, io.EOF, err)

-	// // Let's flush that last rune too.
-	// r.Flush(1)
+	// Let's flush that last rune too.
+	r.Flush(1)

-	// // The io.EOF is now at offset 0.
-	// _, _, err = r.RuneAt(0)
-	// assertEqual(t, io.EOF, err)
+	// The io.EOF is now at offset 0.
+	_, _, err = r.RuneAt(0)
+	assertEqual(t, io.EOF, err)

-	// // And reading beyond that offset also yields io.EOF.
-	// _, _, err = r.RuneAt(1)
-	// assertEqual(t, io.EOF, err)
+	// And reading beyond that offset also yields io.EOF.
+	_, _, err = r.RuneAt(1)
+	assertEqual(t, io.EOF, err)
 }

-func TestInputLargerThanDefaultBufSize64(t *testing.T) {
+func TestInputLargerThanDefaultBufSize(t *testing.T) {
 	input, size := makeLargeStubReader()
 	r := New(input)

 	readRune, _, err := r.RuneAt(0)
-	assertEqual(t, 'X', readRune)
+	assertEqual(t, 'A', readRune)
 	readRune, _, err = r.RuneAt(size - 1)
-	assertEqual(t, 'Y', readRune)
+	assertEqual(t, 'B', readRune)
 	readRune, _, err = r.RuneAt(size)
 	assertEqual(t, io.EOF, err)
 	readRune, _, err = r.RuneAt(10)
-	assertEqual(t, 'X', readRune)
+	assertEqual(t, 'K', readRune)
 }

-func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
+func TestInputLargerThanDefaultBufSize_WithFirstReadLargerThanBufSize(t *testing.T) {
 	input, size := makeLargeStubReader()
 	r := New(input)

 	readRune, _, _ := r.RuneAt(size - 200)
-	assertEqual(t, 'X', readRune)
+	assertEqual(t, 'K', readRune)
 	readRune, _, _ = r.RuneAt(size - 1)
-	assertEqual(t, 'Y', readRune)
+	assertEqual(t, 'B', readRune)
 }

-func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
+func TestInputLargerThanDefaultBufSize_WithFirstReadToLastByte(t *testing.T) {
 	input, size := makeLargeStubReader()
 	r := New(input)

 	readRune, _, _ := r.RuneAt(size - 1)
-	assertEqual(t, 'Y', readRune)
+	assertEqual(t, 'B', readRune)
 }

 func TestAllocationPatterns(t *testing.T) {
 	input, _ := makeLargeStubReader()
 	r := New(input)

-	// The first read will create the standard cache.
-	// store  |x   1024     |
-	// buffer |x   1024     |
-	assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 1024, 4, 1024)
-	rn, _, _ := r.RuneAt(0)
-	assertEqual(t, 'X', rn)
-
-	// The first 1024 bytes will fit in the standard cache.
-	// store  |xxxx1024xxxxx|
+	// The first read will create the standard buffer and fill it with data.
 	// buffer |xxxx1024xxxxx|
-	assertCache(t, "read fill cache", r, func() { r.ByteAt(1023) }, 0, 1024, 1024, 1024)
+	assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 1024, 1024)
+	rn, _, _ := r.RuneAt(0)
+	assertEqual(t, 'A', rn)
+
+	// The first 1024 bytes will fit in the standard buffer.
+	// buffer |xxxx1024xxxxx|
+	assertCache(t, "read fill cache", r, func() { r.ByteAt(1023) }, 1024, 1024)

 	// Flushing zero input keeps everything as-is.
-	// store  |xxxx1024xxxxx|
 	// buffer |xxxx1024xxxxx|
-	assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 1024, 1024, 1024)
+	assertCache(t, "flush zero", r, func() { r.Flush(0) }, 1024, 1024)

 	// Flushing all cached input truncates the cache.
-	// store  |    1024     |
 	// buffer |    1024     |
-	assertCache(t, "flush full cache", r, func() { r.Flush(1024) }, 0, 1024, 0, 1024)
+	assertCache(t, "flush full cache", r, func() { r.Flush(1024) }, 0, 1024)

-	// Reading 1025 chars will allocate a new store of 2 * 1024.
-	// store  |xxxxx1025xxxxx    1023        |
-	// buffer |xxxxx1025xxxxx    1023        |
-	assertCache(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 0, 2048, 1025, 2048)
+	// Reading 1025 chars will allocate a new store of 2 * 1024 and fill it with data.
+	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
+	assertCache(t, "read cap + 1", r, func() { r.ByteAt(1024) }, 2048, 2048)

 	// The bytes that we had before must be copied to the newly allocated store.
 	rn, _, _ = r.RuneAt(0)
-	assertEqual(t, 'X', rn)
+	assertEqual(t, 'K', rn)

-	// A partial flush frees the start of the store and moves
-	// the buffer slice.
-	// store  | 25  xxx1000xxx   1023        |
-	// buffer      |xxx1000xxx   1023        |
-	assertCache(t, "flush partial", r, func() { r.Flush(25) }, 0, 2048, 1000, 2048-25)
+	// A partial flush moves the buffer offset, but the stored data stay the same.
+	// buffer   25 |xxxxxxxxxxx2023xxxxxxxxxx|
+	assertCache(t, "flush partial", r, func() { r.Flush(25) }, 2048, 2048)

-	// The capacity for the buffer is now 2023
-	// This number of runes can be read, filling up the store
-	// without a new allocation.
-	// store  | 25  xxxxxxxxxxx2023xxxxxxxxxx|
-	// buffer      |xxxxxxxxxxx2023xxxxxxxxxx|
-	assertCache(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 0, 2048, 2023, 2048)
+	// The capacity for the usable part of the buffer is now 2023
+	// This number of runes can be read, without triggering a re-allocation.
+	// buffer   25 |xxxxxxxxxxx2023xxxxxxxxxx|
+	assertCache(t, "read fill cache after partial flush", r, func() { r.ByteAt(2022) }, 2048, 2048)

 	// Flush the full input.
-	// store  |            2048             |
-	// buffer |            2048             |
-	assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 0, 2048, 0, 2048)
+	// store  |             2048             |
+	// buffer |             2048             |
+	assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(2023) }, 0, 2048)

-	// Read a bit more than half the capacity.
-	// store  |xxxx1025xxxxxx     1023      |
-	// buffer |xxxx1025xxxxxx     1023      |
-	assertCache(t, "read more than half the cap", r, func() { r.ByteAt(1024) }, 0, 2048, 1025, 2048)
+	// Fill up the store again.
+	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
+	assertCache(t, "fill up the store again", r, func() { r.ByteAt(1234) }, 2048, 2048)

 	// Then flush almost all input.
-	// store  |    1024     x1x    1023     |
-	// buffer      1024    |x1x    1023     |
-	assertCache(t, "flush almost all input", r, func() { r.Flush(1024) }, 0, 2048, 1, 1024)
+	// buffer        2047                |x1x|
+	assertCache(t, "flush almost all input", r, func() { r.Flush(2047) }, 2048, 2048)

-	// Again read a bit more than half the capacity. This does not fit at the
-	// end of the store, but by moving the current buffer to the start of the
-	// store (where it fits), space is freed up for the read operation.
-	// store  |xxxxx1025xxxxxx     1023     |
-	// buffer |xxxxx1025xxxxxx     1023     |
-	assertCache(t, "read beyond cap with free space at start of store", r, func() { r.ByteAt(1024) }, 0, 2048, 1025, 2048)
+	// Read some data beyond the single byte. This moves the single byte at the end to
+	// the start and fills up the rest of the buffer, without a reallocation.
+	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxxx|
+	assertCache(t, "read the remaining size, triggering a move", r, func() { r.ByteAt(1234) }, 2048, 2048)

 	// Now flush only one rune from the cache.
-	// store  |1 xxx1024xxxxxx     1023     |
-	// buffer   |xxx1024xxxxxx     1023     |
-	assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 2048, 1024, 2047)
+	// buffer  1 |xxxxxxxxx2047xxxxxxxxxxxxxx|
+	assertCache(t, "flush 1", r, func() { r.Flush(1) }, 2048, 2048)

 	// Now read the full available capacity. This will not fit, so
 	// space has to be made. Since there's 1 free space at the start of the store,
 	// the data are moved to the start and no reallocation is needed.
-	// store  |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
 	// buffer |xxxxxxxxxxxx2048xxxxxxxxxxxxx|
-	assertCache(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 0, 2048, 2048, 2048)
+	assertCache(t, "read full capacity with 1 free byte at start", r, func() { r.ByteAt(2047) }, 2048, 2048)
+
+	// Now read in the whole rest of the buffer, asking for an offset that is way out of range.
+	// It does allocate enough memory to store 10000 bytes (bringing us to 10240), but while reading it is
+	// detected that there are not enough bytes to fill it. That puts a limit on the amount of data in
+	// the buffer (5120 instead of the full 10240 buffer size).
+	// buffer |xxxxxxxxxxxxxxx5120xxxxxxxxxxxxxxxxxxxx          10240-5120                 |
+	assertCache(t, "over-ask", r, func() { r.ByteAt(10000) }, 5120, 10240)
+}
+
+func Benchmark0BytesInputFile(b *testing.B) {
+	processInputFile(b, 0)
+}
+
+func Benchmark100BytesInputFile(b *testing.B) {
+	processInputFile(b, 100)
+}
+
+func Benchmark1024BytesInputFile(b *testing.B) {
+	processInputFile(b, 1024)
+}
+
+func Benchmark2048BytesInputFile(b *testing.B) {
+	processInputFile(b, 2048)
+}
+
+func Benchmark2000000BytesInputFile(b *testing.B) {
+	processInputFile(b, 2000000)
+}
+
+func processInputFile(b *testing.B, testSize int) {
+	for x := 0; x < b.N; x++ {
+		_, filename, _, _ := runtime.Caller(0)
+		path := strings.Replace(filename, "read_test.go", fmt.Sprintf("testfiles/%dbytes.txt", testSize), 1)
+		input, err := os.Open(path)
+		if err != nil {
+			panic(fmt.Sprintf("Cannot open file for test (%v): %s", path, err))
+		}
+
+		i := New(input)
+
+		offset := 0
+		readSize := 0
+		flushAt := 1024
+		for {
+			_, err := i.ByteAt(offset)
+			if err != nil {
+				break
+			}
+			offset++
+			readSize++
+			if offset == flushAt {
+				i.Flush(offset)
+				offset = 0
+
+				// So we flush full buffer sizes and partial buffer sizes to
+				// get more test coverage.
+				if flushAt == 1000 {
+					flushAt = 1024
+				} else {
+					flushAt = 1000
+				}
+			}
+			if readSize > testSize {
+				b.Fatalf("Test input is %d bytes, but read %d bytes so far!", testSize, readSize)
+			}
+		}
+		if readSize != testSize {
+			b.Fatalf("Expected to read %d bytes, but read %d bytes instead", testSize, readSize)
+		}
+	}
 }

 func makeLargeStubReader() (*StubReader, int) {
 	size := 8192
 	bytes := make([]byte, size)
 	for i := range bytes {
-		bytes[i] = 'X'
+		bytes[i] = 'A' + byte(i%26)
 	}
-	bytes[size-1] = 'Y'
 	return &StubReader{bytes: bytes, errors: []error{io.EOF}}, size
 }

--- a/read/testfiles/0bytes.txt
+++ b/read/testfiles/0bytes.txt
--- a/read/testfiles/100bytes.txt
+++ b/read/testfiles/100bytes.txt
@ -0,0 +1,3 @@
+ACEGIKMOQSUWY
+Z⅄XMΛ∩┴SɹQԀOW˥ſIHפℲƎpƆq∀
+Z⅄XMΛ∩┴SɹQԀOW˥ſIHפℲƎpƆq∀
--- a/read/testfiles/1024bytes.txt
+++ b/read/testfiles/1024bytes.txt
@ -0,0 +1,16 @@
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
--- a/read/testfiles/2000000bytes.txt
+++ b/read/testfiles/2000000bytes.txt
--- a/read/testfiles/2048bytes.txt
+++ b/read/testfiles/2048bytes.txt
@ -0,0 +1,32 @@
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
--- a/tokenize/api.go
+++ b/tokenize/api.go
@ -71,16 +71,15 @@ import (
 // can lead to hard to track bugs. I much prefer this forking method, since
 // no bookkeeping has to be implemented when implementing a parser.
 type API struct {
-	reader        *read.Buffer // the input data reader
-	lastRune      rune         // the rune as retrieved by the last NextRune() call
-	lastRuneWidth int          // the width in bytes of the last read rune
-	lastRuneErr   error        // the error for the last NextRune() call
-	runeRead      bool         // whether or not a rune was read using NextRune()
-	bytes         []byte       // accepted bytes
-	tokens        []Token      // accepted tokens
-	stackFrames   []stackFrame // the stack frames, containing stack level-specific data
-	stackLevel    int          // the current stack level
-	stackFrame    *stackFrame  // the current stack frame
+	reader      *read.Buffer // the input data reader
+	lastRune    rune         // the rune as retrieved by the last NextRune() call
+	lastRuneErr error        // the error for the last NextRune() call
+	runeRead    bool         // whether or not a rune was read using NextRune()
+	bytes       []byte       // accepted bytes
+	tokens      []Token      // accepted tokens
+	stackFrames []stackFrame // the stack frames, containing stack level-specific data
+	stackLevel  int          // the current stack level
+	stackFrame  *stackFrame  // the current stack frame
 }

 type stackFrame struct {
@ -131,9 +130,8 @@ func (i *API) NextRune() (rune, error) {
 			"without a prior call to Accept()")
 	}

-	readRune, runeWidth, err := i.reader.RuneAt(i.stackFrame.offset)
+	readRune, _, err := i.reader.RuneAt(i.stackFrame.offset)
 	i.lastRune = readRune
-	i.lastRuneWidth = runeWidth
 	i.lastRuneErr = err
 	i.runeRead = true

@ -168,7 +166,7 @@ func (i *API) Accept() {
 			"but the prior call to NextRune() failed")
 	}

-	i.acceptRunes(i.lastRuneWidth, i.lastRune)
+	i.acceptRunes(i.lastRune)
 }

 func (i *API) skipBytes(bytes ...byte) {
@ -207,7 +205,7 @@ func (i *API) skipRunes(width int, runes ...rune) {
 	i.runeRead = false
 }

-func (i *API) acceptRunes(width int, runes ...rune) {
+func (i *API) acceptRunes(runes ...rune) {
 	runesAsString := string(runes)
 	curBytesEnd := i.stackFrame.bytesEnd
 	newBytesEnd := curBytesEnd + len(runesAsString)
@ -346,7 +344,6 @@ func (i *API) Reset() {
 		i.stackFrame.line = 0
 		i.stackFrame.offset = 0
 	} else {
-		// TODO simplify! Store line/column/offset using a 0-based index in a fork. On merge add them to the parent's offsets?
 		parent := i.stackFrames[i.stackLevel-1]
 		i.stackFrame.column = parent.column
 		i.stackFrame.line = parent.line
@ -357,13 +354,12 @@ func (i *API) Reset() {
 	i.stackFrame.err = nil
 }

-// FlushInput flushes processed input data from the read.Buffer.
-// In this context 'processed' means all runes that were read using NextRune()
-// and that were added to the results using Accept().
+// FlushInput flushes input data from the read.Buffer up to the current
+// read offset of the parser.
 //
 // Note:
 // When writing your own TokenHandler, you normally won't have to call this
-// method yourself. It is automatically called by parsekit when needed.
+// method yourself. It is automatically called by parsekit when possible.
 func (i *API) FlushInput() bool {
 	if i.stackFrame.offset > 0 {
 		i.reader.Flush(i.stackFrame.offset)
@ -374,11 +370,13 @@ func (i *API) FlushInput() bool {
 }

 func (i *API) String() string {
-	return string(i.bytes[i.stackFrame.bytesStart:i.stackFrame.bytesEnd])
+	bytes := i.bytes[i.stackFrame.bytesStart:i.stackFrame.bytesEnd]
+	return string(bytes)
 }

 func (i *API) Runes() []rune {
-	return []rune(string(i.bytes[i.stackFrame.bytesStart:i.stackFrame.bytesEnd]))
+	bytes := i.bytes[i.stackFrame.bytesStart:i.stackFrame.bytesEnd]
+	return []rune(string(bytes))
 }

 func (i *API) Rune(offset int) rune {
@ -386,6 +384,28 @@ func (i *API) Rune(offset int) rune {
 	return r
 }

+func (i *API) ClearBytes() {
+	i.stackFrame.bytesEnd = i.stackFrame.bytesStart
+}
+
+func (i *API) SetBytes(bytes ...byte) {
+	i.ClearBytes()
+	i.AddBytes(bytes...)
+}
+
+func (i *API) AddBytes(bytes ...byte) {
+	// Grow the runes capacity when needed.
+	newBytesEnd := i.stackFrame.bytesEnd + len(bytes)
+	if cap(i.bytes) < newBytesEnd {
+		newBytes := make([]byte, newBytesEnd*2)
+		copy(newBytes, i.bytes)
+		i.bytes = newBytes
+	}
+
+	copy(i.bytes[i.stackFrame.bytesEnd:], bytes)
+	i.stackFrame.bytesEnd = newBytesEnd
+}
+
 func (i *API) ClearRunes() {
 	i.stackFrame.bytesEnd = i.stackFrame.bytesStart
 }
@ -410,11 +430,12 @@ func (i *API) AddRunes(runes ...rune) {
 }

 func (i *API) AddString(s string) {
-	i.AddRunes([]rune(s)...)
+	i.AddBytes([]byte(s)...)
 }

 func (i *API) SetString(s string) {
-	i.SetRunes([]rune(s)...)
+	i.ClearBytes()
+	i.SetBytes([]byte(s)...)
 }

 func (i *API) Cursor() string {
--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@ -365,9 +365,9 @@ func MatchRune(expected rune) Handler {
 		return MatchByte(byte(expected))
 	}
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil && r == expected {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -408,13 +408,13 @@ func MatchRunes(expected ...rune) Handler {
 		return MatchBytes(expectedBytes...)
 	}
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err != nil {
 			return false
 		}
 		for _, e := range expected {
 			if r == e {
-				t.acceptRunes(w, r)
+				t.acceptRunes(r)
 				return true
 			}
 		}
@ -458,9 +458,9 @@ func MatchRuneRange(start rune, end rune) Handler {
 		return MatchByteRange(byte(start), byte(end))
 	}
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil && r >= start && r <= end {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -605,9 +605,9 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
 // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
 func MatchRuneByCallback(callback func(rune) bool) Handler {
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil && callback(r) {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -639,7 +639,6 @@ func MatchEndOfLine() Handler {
 // MatchStr creates a Handler that matches the input against the provided string.
 func MatchStr(expected string) Handler {
 	expectedRunes := []rune(expected)
-	width := len(expected)

 	return func(t *API) bool {
 		offset := 0
@ -658,7 +657,7 @@ func MatchStr(expected string) Handler {
 				offset += w
 			}
 		}
-		t.acceptRunes(width, expectedRunes...)
+		t.acceptRunes(expectedRunes...)
 		return true
 	}
 }
@ -690,7 +689,7 @@ func MatchStrNoCase(expected string) Handler {
 			}
 			i++
 		}
-		t.acceptRunes(width, matches...)
+		t.acceptRunes(matches...)
 		return true
 	}
 }
@ -762,9 +761,9 @@ func MatchNot(handler Handler) Handler {
 			return false
 		}
 		t.Dispose(child)
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -1032,9 +1031,9 @@ func MatchAnyByte() Handler {
 // replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
 func MatchAnyRune() Handler {
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -1045,9 +1044,9 @@ func MatchAnyRune() Handler {
 // UTF8 rune can be read from the input.
 func MatchValidRune() Handler {
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil && r != utf8.RuneError {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -1058,9 +1057,9 @@ func MatchValidRune() Handler {
 // UTF8 rune can be read from the input.
 func MatchInvalidRune() Handler {
 	return func(t *API) bool {
-		r, w, err := t.PeekRune(0)
+		r, _, err := t.PeekRune(0)
 		if err == nil && r == utf8.RuneError {
-			t.acceptRunes(w, r)
+			t.acceptRunes(r)
 			return true
 		}
 		return false
@ -1551,7 +1550,7 @@ func ModifyDrop(handler Handler) Handler {
 	}
 }

-// ModifyDropUntilEndOfLine creates a Handler that drops all input until an end of line
+// ModifyDropUntilEndOfLine creates a Handler that drops all input until an end of line
 // (or end of file). This handler is typically used when ignoring any input data after
 // a comment start like '#' or '//' when parsing code or configuration data.
 func ModifyDropUntilEndOfLine() Handler {