Backup changes for performance fixes.

This commit is contained in:
Maurice Makaay 2019-07-05 15:07:07 +00:00
parent 5e9879326a
commit 7bc7fda593
10 changed files with 204 additions and 93 deletions

View File

@ -19,7 +19,7 @@ type API struct {
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek() result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
loopCheck map[filepos]bool // used for parser loop detection loopCheck map[uintptr]bool // used for parser loop detection
err error // parse error, retrieved by Error(), using API methods is denied when set err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop() stopped bool // a boolean set to true by Stop()
} }
@ -125,14 +125,14 @@ func (p *API) IsStoppedOrInError() bool {
// When Accept() is called, and the parser moved forward in the input data, // When Accept() is called, and the parser moved forward in the input data,
// this method is called to reset the map for the new read cursor position. // this method is called to reset the map for the new read cursor position.
func (p *API) initLoopCheck() { func (p *API) initLoopCheck() {
p.loopCheck = make(map[filepos]bool) p.loopCheck = make(map[uintptr]bool)
} }
// checkForLoops checks if the line of code from which Accept() or Peek() // checkForLoops checks if the line of code from which Accept() or Peek()
// was called has been seen before for the current read cursor position. // was called has been seen before for the current read cursor position.
// If yes, then the parser is in a loop and the method will panic. // If yes, then the parser is in a loop and the method will panic.
func (p *API) checkForLoops(name string) { func (p *API) checkForLoops(name string) {
filepos := callerFilepos(3) filepos := callerPointer(3)
if _, ok := p.loopCheck[filepos]; ok { if _, ok := p.loopCheck[filepos]; ok {
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}") callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
} }

View File

@ -39,6 +39,12 @@ func callerFilepos(depth int) filepos {
return filepos{file, line} return filepos{file, line}
} }
func callerPointer(depth int) uintptr {
// No error handling, because we call this method ourselves with safe depth values.
p, _, _, _ := runtime.Caller(depth + 1)
return p
}
func callerPanic(name, f string, data ...interface{}) { func callerPanic(name, f string, data ...interface{}) {
filepos := callerBefore(name) filepos := callerBefore(name)
m := fmt.Sprintf(f, data...) m := fmt.Sprintf(f, data...)

View File

@ -49,7 +49,7 @@ func new(startHandler Handler, sanityChecksEnabled bool) Func {
return func(input interface{}) error { return func(input interface{}) error {
api := &API{ api := &API{
tokenAPI: tokenize.NewAPI(input), tokenAPI: tokenize.NewAPI(input),
loopCheck: make(map[filepos]bool), loopCheck: make(map[uintptr]bool),
sanityChecksEnabled: sanityChecksEnabled, sanityChecksEnabled: sanityChecksEnabled,
} }
if api.Handle(startHandler) { if api.Handle(startHandler) {

View File

@ -1,4 +1,4 @@
package read_test package read
// This file contains some tools that are used for writing tests. // This file contains some tools that are used for writing tests.
@ -8,11 +8,18 @@ import (
"testing" "testing"
) )
func AssertEqual(t *testing.T, expected interface{}, actual interface{}) { func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
if expected != actual { if expected != actual {
t.Errorf( switch expected.(type) {
"Unexpected value at %s:\nexpected: %q\nactual: %q", case rune:
callerFilepos(1), expected, actual) t.Errorf(
"Unexpected value at %s:\nexpected: %q\nactual: %q",
callerFilepos(1), expected, actual)
default:
t.Errorf(
"Unexpected value at %s:\nexpected: %v\nactual: %v",
callerFilepos(1), expected, actual)
}
} }
} }
@ -22,7 +29,7 @@ func callerFilepos(depth int) string {
return fmt.Sprintf("%s:%d", file, line) return fmt.Sprintf("%s:%d", file, line)
} }
func AssertPanic(t *testing.T, code func(), expected string) { func assertPanic(t *testing.T, code func(), expected string) {
defer func() { defer func() {
if r := recover(); r != nil { if r := recover(); r != nil {
if expected != r.(string) { if expected != r.(string) {
@ -36,3 +43,19 @@ func AssertPanic(t *testing.T, code func(), expected string) {
}() }()
code() code()
} }
func assertCache(t *testing.T, name string, r *Buffer, code func(), storeLen, storeCap, bufLen, bufCap int) {
code()
if storeLen != len(r.store) {
t.Errorf("[%s] Unexpected store len (expected %d, got %d)", name, storeLen, len(r.store))
}
if storeCap != cap(r.store) {
t.Errorf("[%s] Unexpected store cap (expected %d, got %d)", name, storeCap, cap(r.store))
}
if bufLen != len(r.buffer) {
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
}
if bufCap != cap(r.buffer) {
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, bufCap, cap(r.buffer))
}
}

View File

@ -151,7 +151,6 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
r.buffer[writeAt] = readRune r.buffer[writeAt] = readRune
} }
} }
return r.buffer[offset], nil return r.buffer[offset], nil
} }
@ -173,29 +172,40 @@ func (r *Buffer) grow(n int) {
if b < n { if b < n {
b = n b = n
} }
r.store = make([]rune, n, b) r.store = make([]rune, 0, b)
r.buffer = r.store r.buffer = r.store[:n]
return return
} }
l := len(r.buffer)
c := cap(r.buffer) lenBuffer := len(r.buffer)
capBuffer := cap(r.buffer)
freeBuffer := capBuffer - lenBuffer
newSize := lenBuffer + n
// Grow the buffer store by reslicing within the available capacity. // Grow the buffer store by reslicing within the available capacity.
if n <= c-l { if freeBuffer >= n {
r.buffer = r.buffer[:l+n] r.buffer = r.buffer[:newSize]
return return
} }
capStore := cap(r.store)
freeAtStartOfStore := capStore - capBuffer
// Grow the buffer by moving the data to the start of the store. // Grow the buffer by moving the data to the start of the store.
if cap(r.store)-l-n > 0 { // Note: according to the spec, overlapping slices are allowed with copy().
if freeAtStartOfStore > 0 && newSize <= capStore {
r.store = r.store[0:newSize]
copy(r.store, r.buffer) copy(r.store, r.buffer)
r.buffer = r.store[:l+n] r.buffer = r.store[:newSize]
r.store = r.store[:0]
return return
} }
// Grow the buffer store by allocating a new one and copying the data. // Grow the buffer store by allocating a new one and copying the data.
buf := makeSlice(2*cap(r.store) + n) buf := makeSlice(2*capStore + n)
fmt.Printf("ALLOC %d\n", 2*cap(r.store)+n)
copy(buf, r.buffer) copy(buf, r.buffer)
r.store = buf r.store = buf
r.buffer = r.store[:l+n] r.buffer = r.store[:newSize]
} }
// makeSlice allocates a slice of size n. If the allocation fails, it panics // makeSlice allocates a slice of size n. If the allocation fails, it panics
@ -207,7 +217,7 @@ func makeSlice(n int) []rune {
panic(ErrTooLarge) panic(ErrTooLarge)
} }
}() }()
return make([]rune, n) return make([]rune, 0, n)
} }
// Flush deletes the provided number of runes from the start of the Buffer. // Flush deletes the provided number of runes from the start of the Buffer.

View File

@ -1,4 +1,4 @@
package read_test package read
import ( import (
"bufio" "bufio"
@ -7,13 +7,11 @@ import (
"strings" "strings"
"testing" "testing"
"unicode/utf8" "unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/read"
) )
func ExampleNew() { func ExampleNew() {
printFirstRuneOf := func(input interface{}) { printFirstRuneOf := func(input interface{}) {
r := read.New(input) r := New(input)
c, _ := r.RuneAt(0) c, _ := r.RuneAt(0)
fmt.Printf("%q\n", c) fmt.Printf("%q\n", c)
} }
@ -47,7 +45,7 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))}, {"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))}, {"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
} { } {
r := read.New(test.input) r := New(test.input)
firstRune, _ := r.RuneAt(0) firstRune, _ := r.RuneAt(0)
if firstRune != 'H' { if firstRune != 'H' {
t.Errorf("[%s] first rune not 'H'", test.name) t.Errorf("[%s] first rune not 'H'", test.name)
@ -60,42 +58,42 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
} }
func TestNew_UnhandledInputType_Panics(t *testing.T) { func TestNew_UnhandledInputType_Panics(t *testing.T) {
AssertPanic(t, assertPanic(t,
func() { read.New(12345) }, func() { New(12345) },
"parsekit.read.New(): no support for input of type int") "parsekit.read.New(): no support for input of type int")
} }
func TestBuffer_RuneAt(t *testing.T) { func TestBuffer_RuneAt(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!")) r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r } at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input. // It is possible to go back and forth while reading the input.
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0)) result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
AssertEqual(t, "H!wH", result) assertEqual(t, "H!wH", result)
} }
func TestBuffer_RuneAt_endOfFile(t *testing.T) { func TestBuffer_RuneAt_endOfFile(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!")) r := New(strings.NewReader("Hello, world!"))
rn, err := r.RuneAt(13) rn, err := r.RuneAt(13)
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
AssertEqual(t, "'<27>' EOF true", result) assertEqual(t, "'<27>' EOF true", result)
rn, err = r.RuneAt(20) rn, err = r.RuneAt(20)
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
AssertEqual(t, "'<27>' EOF true", result) assertEqual(t, "'<27>' EOF true", result)
} }
func TestBuffer_RuneAt_invalidRune(t *testing.T) { func TestBuffer_RuneAt_invalidRune(t *testing.T) {
r := read.New(strings.NewReader("Hello, \xcdworld!")) r := New(strings.NewReader("Hello, \xcdworld!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r } at := func(i int) rune { r, _ := r.RuneAt(i); return r }
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
AssertEqual(t, " <20>wo", result) assertEqual(t, " <20>wo", result)
} }
func ExampleBuffer_RuneAt() { func ExampleBuffer_RuneAt() {
reader := read.New(strings.NewReader("Hello, world!")) reader := New(strings.NewReader("Hello, world!"))
fmt.Printf("Runes: ") fmt.Printf("Runes: ")
for i := 0; ; i++ { for i := 0; ; i++ {
@ -113,21 +111,21 @@ func ExampleBuffer_RuneAt() {
} }
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
r := read.New(strings.NewReader("\uFEFFBommetje!")) r := New(strings.NewReader("\uFEFFBommetje!"))
b, _ := r.RuneAt(0) b, _ := r.RuneAt(0)
o, _ := r.RuneAt(1) o, _ := r.RuneAt(1)
m, _ := r.RuneAt(2) m, _ := r.RuneAt(2)
bom := fmt.Sprintf("%c%c%c", b, o, m) bom := fmt.Sprintf("%c%c%c", b, o, m)
AssertEqual(t, "Bom", bom) assertEqual(t, "Bom", bom)
} }
func TestBuffer_Flush(t *testing.T) { func TestBuffer_Flush(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!")) r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r } at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w" // Fills the buffer with the first 8 runes on the input: "Hello, w"
result := fmt.Sprintf("%c", at(7)) result := fmt.Sprintf("%c", at(7))
AssertEqual(t, "w", result) assertEqual(t, "w", result)
// Now flush the first 4 runes from the buffer (dropping "Hell" from it) // Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4) r.Flush(4)
@ -135,11 +133,11 @@ func TestBuffer_Flush(t *testing.T) {
// Rune 0 is now pointing at what originally was rune offset 4. // Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there. // We can continue reading from there.
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5)) result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
AssertEqual(t, "o, wor", result) assertEqual(t, "o, wor", result)
} }
func ExampleBuffer_Flush() { func ExampleBuffer_Flush() {
r := read.New(strings.NewReader("dog eat dog!")) r := New(strings.NewReader("dog eat dog!"))
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c } at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
// Read from the first 4 runes of the input. // Read from the first 4 runes of the input.
@ -162,33 +160,33 @@ func ExampleBuffer_Flush() {
} }
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!")) r := New(strings.NewReader("Hello, world!"))
// Fill buffer with "Hello, worl", the first 11 runes. // Fill buffer with "Hello, worl", the first 11 runes.
r.RuneAt(10) r.RuneAt(10)
// However, we flush 12 runes, which exceeds the buffer size. // However, we flush 12 runes, which exceeds the buffer size.
AssertPanic(t, assertPanic(t,
func() { r.Flush(12) }, func() { r.Flush(12) },
"parsekit.read.Buffer.Flush(): number of runes to flush "+ "parsekit.read.Buffer.Flush(): number of runes to flush "+
"(12) exceeds size of the buffer (11)") "(12) exceeds size of the buffer (11)")
} }
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) { func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!")) r := New(strings.NewReader("Hello, world!"))
_, err := r.RuneAt(13) _, err := r.RuneAt(13)
AssertEqual(t, err.Error(), "EOF") assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(13) _, err = r.RuneAt(13)
AssertEqual(t, err.Error(), "EOF") assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(14) _, err = r.RuneAt(14)
AssertEqual(t, err.Error(), "EOF") assertEqual(t, err.Error(), "EOF")
r.Flush(13) r.Flush(13)
_, err = r.RuneAt(0) _, err = r.RuneAt(0)
AssertEqual(t, err.Error(), "EOF") assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(1) _, err = r.RuneAt(1)
AssertEqual(t, err.Error(), "EOF") assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(2) _, err = r.RuneAt(2)
AssertEqual(t, err.Error(), "EOF") assertEqual(t, err.Error(), "EOF")
} }
// In this test, I want to make sure that once a Buffer returns an error, // In this test, I want to make sure that once a Buffer returns an error,
@ -202,79 +200,156 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
io.ErrUnexpectedEOF, // This error must never popup in the tests below. io.ErrUnexpectedEOF, // This error must never popup in the tests below.
}, },
} }
r := read.New(input) r := New(input)
// Read the last availble rune. // Read the last availble rune.
readRune, _ := r.RuneAt(3) readRune, _ := r.RuneAt(3)
AssertEqual(t, 'd', readRune) assertEqual(t, 'd', readRune)
// Reading the next offset must result in the io.EOF error from the stub. // Reading the next offset must result in the io.EOF error from the stub.
readRune, err := r.RuneAt(4) readRune, err := r.RuneAt(4)
AssertEqual(t, utf8.RuneError, readRune) assertEqual(t, utf8.RuneError, readRune)
AssertEqual(t, io.EOF, err) assertEqual(t, io.EOF, err)
// Reading even further should yield the same io.EOF error. // Reading even further should yield the same io.EOF error.
readRune, err = r.RuneAt(5) readRune, err = r.RuneAt(5)
AssertEqual(t, utf8.RuneError, readRune) assertEqual(t, utf8.RuneError, readRune)
AssertEqual(t, io.EOF, err) assertEqual(t, io.EOF, err)
// After an error, we must still be able to read the last rune. // After an error, we must still be able to read the last rune.
readRune, _ = r.RuneAt(3) readRune, _ = r.RuneAt(3)
AssertEqual(t, 'd', readRune) assertEqual(t, 'd', readRune)
// Flushing updates the error index too. // Flushing updates the error index too.
r.Flush(3) r.Flush(3)
// The last rune is now at offset 0. // The last rune is now at offset 0.
readRune, _ = r.RuneAt(0) readRune, _ = r.RuneAt(0)
AssertEqual(t, 'd', readRune) assertEqual(t, 'd', readRune)
// The io.EOF is now at offset 1. // The io.EOF is now at offset 1.
_, err = r.RuneAt(1) _, err = r.RuneAt(1)
AssertEqual(t, io.EOF, err) assertEqual(t, io.EOF, err)
// Let's flush that last rune too. // Let's flush that last rune too.
r.Flush(1) r.Flush(1)
// The io.EOF is now at offset 0. // The io.EOF is now at offset 0.
_, err = r.RuneAt(0) _, err = r.RuneAt(0)
AssertEqual(t, io.EOF, err) assertEqual(t, io.EOF, err)
// And reading beyond that offset also yields io.EOF. // And reading beyond that offset also yields io.EOF.
_, err = r.RuneAt(1) _, err = r.RuneAt(1)
AssertEqual(t, io.EOF, err) assertEqual(t, io.EOF, err)
} }
func TestInputLargerThanDefaultBufSize64(t *testing.T) { func TestInputLargerThanDefaultBufSize64(t *testing.T) {
input, size := makeLargeStubReader() input, size := makeLargeStubReader()
r := read.New(input) r := New(input)
readRune, err := r.RuneAt(0) readRune, err := r.RuneAt(0)
AssertEqual(t, 'X', readRune) assertEqual(t, 'X', readRune)
readRune, err = r.RuneAt(size - 1) readRune, err = r.RuneAt(size - 1)
AssertEqual(t, 'Y', readRune) assertEqual(t, 'Y', readRune)
readRune, err = r.RuneAt(size) readRune, err = r.RuneAt(size)
AssertEqual(t, io.EOF, err) assertEqual(t, io.EOF, err)
readRune, err = r.RuneAt(10) readRune, err = r.RuneAt(10)
AssertEqual(t, 'X', readRune) assertEqual(t, 'X', readRune)
} }
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) { func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
input, size := makeLargeStubReader() input, size := makeLargeStubReader()
r := read.New(input) r := New(input)
readRune, _ := r.RuneAt(size - 200) readRune, _ := r.RuneAt(size - 200)
AssertEqual(t, 'X', readRune) assertEqual(t, 'X', readRune)
readRune, _ = r.RuneAt(size - 1) readRune, _ = r.RuneAt(size - 1)
AssertEqual(t, 'Y', readRune) assertEqual(t, 'Y', readRune)
} }
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
input, size := makeLargeStubReader() input, size := makeLargeStubReader()
r := read.New(input) r := New(input)
readRune, _ := r.RuneAt(size - 1) readRune, _ := r.RuneAt(size - 1)
AssertEqual(t, 'Y', readRune) assertEqual(t, 'Y', readRune)
}
func TestAllocationPatterns(t *testing.T) {
input, _ := makeLargeStubReader()
r := New(input)
// The first read will create the standard cache.
// store |x 64 |
// buffer |x 64 |
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64)
// The first 64 reads will fit in the standard cache.
// store |xxxx64xxxxx|
// buffer |xxxx64xxxxx|
assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64)
// Flushing zero input keeps everything as-is.
// store |xxxx64xxxxx|
// buffer |xxxx64xxxxx|
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 64, 64, 64)
// Flushing all cached input truncates the cache.
// store | 64 |
// buffer | 64 |
assertCache(t, "flush full cache", r, func() { r.Flush(64) }, 0, 64, 0, 64)
// Reading 65 chars will allocate a new store of 2 * size + n.
// store |xxxxx65xxxxx 128 |
// buffer |xxxxx65xxxxx 128 |
assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128)
// A partial flush frees the start of the store and moves
// the buffer slice.
// store | 50 x15x 128 |
// buffer |x15x 128 |
assertCache(t, "flush partial", r, func() { r.Flush(50) }, 0, 50+15+128, 15, 15+128)
// The capacity for the buffer is now 2*64 + 15
// This number of runes can be read, filling up the store
// without a new allocation.
// store | 50 xxxxxxxxx143xxxxxxxx|
// buffer |xxxxxxxxx143xxxxxxxx|
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143)
// Flush the full input.
// store | 193 |
// buffer | |
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(143) }, 0, 193, 0, 193)
// Read a bit more than half the capacity.
// store |xxxxxx101xxxxxxxx 92 |
// buffer |xxxxxx101xxxxxxxx 92 |
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193)
// Then flush almost all input.
// store | 100 x1x 92 |
// buffer |x1x 92 |
assertCache(t, "flush almost all input", r, func() { r.Flush(100) }, 0, 193, 1, 93)
// Again read a bit more than half the capacity. This does not fit at the
// end of the store, but by moving the current buffer to the start of the
// store (where it fits), space is freed up for the read operation.
// store |xxxxx100xxxxxx 93 |
// buffer |xxxxx100xxxxxx 93 |
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193)
// Now flush only one rune from the cache.
// store |1 xxxx99xxxxx 93 |
// buffer |xxxx99xxxxx 93 |
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 193, 99, 192)
// Now read one more than the capacity. This will not fit, so space has
// to be made. Since there's 1 free space at the start of the store,
// the data is moved to the start and no reallocation is needed.
// store |1 xxxx99xxxxx 93 |
// buffer |xxxx99xxxxx 93 |
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193)
} }
func makeLargeStubReader() (*StubReader, int) { func makeLargeStubReader() (*StubReader, int) {

View File

@ -201,7 +201,7 @@ func (i *API) Dispose() {
func (i *API) clearResults() { func (i *API) clearResults() {
i.result.lastRune = nil i.result.lastRune = nil
i.result.runes = []rune{} i.result.runes = []rune{}
i.result.tokens = []*Token{} i.result.tokens = []Token{}
i.result.err = nil i.result.err = nil
} }

View File

@ -55,11 +55,11 @@ func ExampleAPI_Result() {
fmt.Printf("API result runes: %q\n", api.Result().Runes()) fmt.Printf("API result runes: %q\n", api.Result().Runes())
fmt.Printf("API third rune: %q\n", api.Result().Rune(2)) fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
result.AddTokens(&tokenize.Token{ result.AddTokens(tokenize.Token{
Runes: []rune("demo 1"), Runes: []rune("demo 1"),
Type: 42, Type: 42,
Value: "towel"}) Value: "towel"})
result.AddTokens(&tokenize.Token{ result.AddTokens(tokenize.Token{
Runes: []rune("demo 2"), Runes: []rune("demo 2"),
Type: 73, Type: 73,
Value: "Zaphod"}) Value: "Zaphod"})

View File

@ -712,11 +712,11 @@ func MatchEndOfFile() Handler {
} }
} }
// MatchUntilEndOfLine creates a Handler function that accepts any rune // MatchUntilEndOfLine creates a Handler function that accepts one or
// until the end of the line (or file when that's the case). // more runes until the end of the line (or file when that's the case).
// The newline itself is not included in the match. // The newline itself is not included in the match.
func MatchUntilEndOfLine() Handler { func MatchUntilEndOfLine() Handler {
return MatchZeroOrMore(MatchNot(MatchEndOfLine())) return MatchOneOrMore(MatchNot(MatchEndOfLine()))
} }
// MatchAnyRune creates a Handler function that checks if a rune can be // MatchAnyRune creates a Handler function that checks if a rune can be
@ -1437,7 +1437,7 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the // e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
// tokens will end up in the order "date", "year", "month", "day". When we'd add the // tokens will end up in the order "date", "year", "month", "day". When we'd add the
// token to the child here, the order would have been "year", "month", "day", "date". // token to the child here, the order would have been "year", "month", "day", "date".
token := &Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)} token := Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
t.Result().AddTokens(token) t.Result().AddTokens(token)
child.Merge() child.Merge()
@ -1454,7 +1454,7 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
child := t.Fork() child := t.Fork()
if handler(child) { if handler(child) {
result := child.Result() result := child.Result()
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()} token := Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
result.SetTokens(token) result.SetTokens(token)
child.Merge() child.Merge()
return true return true

View File

@ -10,7 +10,7 @@ import (
type Result struct { type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune() lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune // runes as added to the result by tokenize.Handler functions runes []rune // runes as added to the result by tokenize.Handler functions
tokens []*Token // Tokens as added to the result by tokenize.Handler functions tokens []Token // Tokens as added to the result by tokenize.Handler functions
cursor *Cursor // current read cursor position, relative to the start of the file cursor *Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input err error // can be used by a Handler to report a specific issue with the input
@ -69,7 +69,7 @@ func (t Token) String() string {
func newResult() *Result { func newResult() *Result {
return &Result{ return &Result{
runes: []rune{}, runes: []rune{},
tokens: []*Token{}, tokens: []Token{},
cursor: &Cursor{}, cursor: &Cursor{},
} }
} }
@ -122,29 +122,26 @@ func (r *Result) String() string {
// ClearTokens clears the tokens in the Result. // ClearTokens clears the tokens in the Result.
func (r *Result) ClearTokens() { func (r *Result) ClearTokens() {
r.tokens = []*Token{} r.tokens = []Token{}
} }
// SetTokens replaces the Tokens from the Result with the provided tokens. // SetTokens replaces the Tokens from the Result with the provided tokens.
func (r *Result) SetTokens(tokens ...*Token) { func (r *Result) SetTokens(tokens ...Token) {
r.ClearTokens() r.tokens = tokens
for _, t := range tokens {
r.AddTokens(t)
}
} }
// AddTokens is used to add Tokens to the Result. // AddTokens is used to add Tokens to the Result.
func (r *Result) AddTokens(tokens ...*Token) { func (r *Result) AddTokens(tokens ...Token) {
r.tokens = append(r.tokens, tokens...) r.tokens = append(r.tokens, tokens...)
} }
// Tokens retrieves the Tokens from the Result. // Tokens retrieves the Tokens from the Result.
func (r *Result) Tokens() []*Token { func (r *Result) Tokens() []Token {
return r.tokens return r.tokens
} }
// Token retrieves a single Token from the Result at the specified index. // Token retrieves a single Token from the Result at the specified index.
func (r *Result) Token(idx int) *Token { func (r *Result) Token(idx int) Token {
return r.tokens[idx] return r.tokens[idx]
} }