Backup changes for performance fixes.

This commit is contained in:
Maurice Makaay 2019-07-05 15:07:07 +00:00
parent 5e9879326a
commit 7bc7fda593
10 changed files with 204 additions and 93 deletions

View File

@ -19,7 +19,7 @@ type API struct {
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
loopCheck map[filepos]bool // used for parser loop detection
loopCheck map[uintptr]bool // used for parser loop detection
err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop()
}
@ -125,14 +125,14 @@ func (p *API) IsStoppedOrInError() bool {
// When Accept() is called, and the parser moved forward in the input data,
// this method is called to reset the map for the new read cursor position.
func (p *API) initLoopCheck() {
p.loopCheck = make(map[filepos]bool)
p.loopCheck = make(map[uintptr]bool)
}
// checkForLoops checks if the line of code from which Accept() or Peek()
// was called has been seen before for the current read cursor position.
// If yes, then the parser is in a loop and the method will panic.
func (p *API) checkForLoops(name string) {
filepos := callerFilepos(3)
filepos := callerPointer(3)
if _, ok := p.loopCheck[filepos]; ok {
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
}

View File

@ -39,6 +39,12 @@ func callerFilepos(depth int) filepos {
return filepos{file, line}
}
func callerPointer(depth int) uintptr {
// No error handling, because we call this method ourselves with safe depth values.
p, _, _, _ := runtime.Caller(depth + 1)
return p
}
func callerPanic(name, f string, data ...interface{}) {
filepos := callerBefore(name)
m := fmt.Sprintf(f, data...)

View File

@ -49,7 +49,7 @@ func new(startHandler Handler, sanityChecksEnabled bool) Func {
return func(input interface{}) error {
api := &API{
tokenAPI: tokenize.NewAPI(input),
loopCheck: make(map[filepos]bool),
loopCheck: make(map[uintptr]bool),
sanityChecksEnabled: sanityChecksEnabled,
}
if api.Handle(startHandler) {

View File

@ -1,4 +1,4 @@
package read_test
package read
// This file contains some tools that are used for writing tests.
@ -8,11 +8,18 @@ import (
"testing"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}) {
func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
if expected != actual {
t.Errorf(
"Unexpected value at %s:\nexpected: %q\nactual: %q",
callerFilepos(1), expected, actual)
switch expected.(type) {
case rune:
t.Errorf(
"Unexpected value at %s:\nexpected: %q\nactual: %q",
callerFilepos(1), expected, actual)
default:
t.Errorf(
"Unexpected value at %s:\nexpected: %v\nactual: %v",
callerFilepos(1), expected, actual)
}
}
}
@ -22,7 +29,7 @@ func callerFilepos(depth int) string {
return fmt.Sprintf("%s:%d", file, line)
}
func AssertPanic(t *testing.T, code func(), expected string) {
func assertPanic(t *testing.T, code func(), expected string) {
defer func() {
if r := recover(); r != nil {
if expected != r.(string) {
@ -36,3 +43,19 @@ func AssertPanic(t *testing.T, code func(), expected string) {
}()
code()
}
func assertCache(t *testing.T, name string, r *Buffer, code func(), storeLen, storeCap, bufLen, bufCap int) {
code()
if storeLen != len(r.store) {
t.Errorf("[%s] Unexpected store len (expected %d, got %d)", name, storeLen, len(r.store))
}
if storeCap != cap(r.store) {
t.Errorf("[%s] Unexpected store cap (expected %d, got %d)", name, storeCap, cap(r.store))
}
if bufLen != len(r.buffer) {
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
}
if bufCap != cap(r.buffer) {
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, bufCap, cap(r.buffer))
}
}

View File

@ -151,7 +151,6 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
r.buffer[writeAt] = readRune
}
}
return r.buffer[offset], nil
}
@ -173,29 +172,40 @@ func (r *Buffer) grow(n int) {
if b < n {
b = n
}
r.store = make([]rune, n, b)
r.buffer = r.store
r.store = make([]rune, 0, b)
r.buffer = r.store[:n]
return
}
l := len(r.buffer)
c := cap(r.buffer)
lenBuffer := len(r.buffer)
capBuffer := cap(r.buffer)
freeBuffer := capBuffer - lenBuffer
newSize := lenBuffer + n
// Grow the buffer store by reslicing within the available capacity.
if n <= c-l {
r.buffer = r.buffer[:l+n]
if freeBuffer >= n {
r.buffer = r.buffer[:newSize]
return
}
capStore := cap(r.store)
freeAtStartOfStore := capStore - capBuffer
// Grow the buffer by moving the data to the start of the store.
if cap(r.store)-l-n > 0 {
// Note: according to the spec, overlapping slices are allowed with copy().
if freeAtStartOfStore > 0 && newSize <= capStore {
r.store = r.store[0:newSize]
copy(r.store, r.buffer)
r.buffer = r.store[:l+n]
r.buffer = r.store[:newSize]
r.store = r.store[:0]
return
}
// Grow the buffer store by allocating a new one and copying the data.
buf := makeSlice(2*cap(r.store) + n)
fmt.Printf("ALLOC %d\n", 2*cap(r.store)+n)
buf := makeSlice(2*capStore + n)
copy(buf, r.buffer)
r.store = buf
r.buffer = r.store[:l+n]
r.buffer = r.store[:newSize]
}
// makeSlice allocates a slice of size n. If the allocation fails, it panics
@ -207,7 +217,7 @@ func makeSlice(n int) []rune {
panic(ErrTooLarge)
}
}()
return make([]rune, n)
return make([]rune, 0, n)
}
// Flush deletes the provided number of runes from the start of the Buffer.

View File

@ -1,4 +1,4 @@
package read_test
package read
import (
"bufio"
@ -7,13 +7,11 @@ import (
"strings"
"testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/read"
)
func ExampleNew() {
printFirstRuneOf := func(input interface{}) {
r := read.New(input)
r := New(input)
c, _ := r.RuneAt(0)
fmt.Printf("%q\n", c)
}
@ -47,7 +45,7 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
} {
r := read.New(test.input)
r := New(test.input)
firstRune, _ := r.RuneAt(0)
if firstRune != 'H' {
t.Errorf("[%s] first rune not 'H'", test.name)
@ -60,42 +58,42 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
}
func TestNew_UnhandledInputType_Panics(t *testing.T) {
AssertPanic(t,
func() { read.New(12345) },
assertPanic(t,
func() { New(12345) },
"parsekit.read.New(): no support for input of type int")
}
func TestBuffer_RuneAt(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input.
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
AssertEqual(t, "H!wH", result)
assertEqual(t, "H!wH", result)
}
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
r := New(strings.NewReader("Hello, world!"))
rn, err := r.RuneAt(13)
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
AssertEqual(t, "'<27>' EOF true", result)
assertEqual(t, "'<27>' EOF true", result)
rn, err = r.RuneAt(20)
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
AssertEqual(t, "'<27>' EOF true", result)
assertEqual(t, "'<27>' EOF true", result)
}
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
r := read.New(strings.NewReader("Hello, \xcdworld!"))
r := New(strings.NewReader("Hello, \xcdworld!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
AssertEqual(t, " <20>wo", result)
assertEqual(t, " <20>wo", result)
}
func ExampleBuffer_RuneAt() {
reader := read.New(strings.NewReader("Hello, world!"))
reader := New(strings.NewReader("Hello, world!"))
fmt.Printf("Runes: ")
for i := 0; ; i++ {
@ -113,21 +111,21 @@ func ExampleBuffer_RuneAt() {
}
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
r := read.New(strings.NewReader("\uFEFFBommetje!"))
r := New(strings.NewReader("\uFEFFBommetje!"))
b, _ := r.RuneAt(0)
o, _ := r.RuneAt(1)
m, _ := r.RuneAt(2)
bom := fmt.Sprintf("%c%c%c", b, o, m)
AssertEqual(t, "Bom", bom)
assertEqual(t, "Bom", bom)
}
func TestBuffer_Flush(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
r := New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w"
result := fmt.Sprintf("%c", at(7))
AssertEqual(t, "w", result)
assertEqual(t, "w", result)
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4)
@ -135,11 +133,11 @@ func TestBuffer_Flush(t *testing.T) {
// Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there.
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
AssertEqual(t, "o, wor", result)
assertEqual(t, "o, wor", result)
}
func ExampleBuffer_Flush() {
r := read.New(strings.NewReader("dog eat dog!"))
r := New(strings.NewReader("dog eat dog!"))
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
// Read from the first 4 runes of the input.
@ -162,33 +160,33 @@ func ExampleBuffer_Flush() {
}
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
r := New(strings.NewReader("Hello, world!"))
// Fill buffer with "Hello, worl", the first 11 runes.
r.RuneAt(10)
// However, we flush 12 runes, which exceeds the buffer size.
AssertPanic(t,
assertPanic(t,
func() { r.Flush(12) },
"parsekit.read.Buffer.Flush(): number of runes to flush "+
"(12) exceeds size of the buffer (11)")
}
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
r := New(strings.NewReader("Hello, world!"))
_, err := r.RuneAt(13)
AssertEqual(t, err.Error(), "EOF")
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(13)
AssertEqual(t, err.Error(), "EOF")
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(14)
AssertEqual(t, err.Error(), "EOF")
assertEqual(t, err.Error(), "EOF")
r.Flush(13)
_, err = r.RuneAt(0)
AssertEqual(t, err.Error(), "EOF")
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(1)
AssertEqual(t, err.Error(), "EOF")
assertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(2)
AssertEqual(t, err.Error(), "EOF")
assertEqual(t, err.Error(), "EOF")
}
// In this test, I want to make sure that once a Buffer returns an error,
@ -202,79 +200,156 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
},
}
r := read.New(input)
r := New(input)
// Read the last availble rune.
readRune, _ := r.RuneAt(3)
AssertEqual(t, 'd', readRune)
assertEqual(t, 'd', readRune)
// Reading the next offset must result in the io.EOF error from the stub.
readRune, err := r.RuneAt(4)
AssertEqual(t, utf8.RuneError, readRune)
AssertEqual(t, io.EOF, err)
assertEqual(t, utf8.RuneError, readRune)
assertEqual(t, io.EOF, err)
// Reading even further should yield the same io.EOF error.
readRune, err = r.RuneAt(5)
AssertEqual(t, utf8.RuneError, readRune)
AssertEqual(t, io.EOF, err)
assertEqual(t, utf8.RuneError, readRune)
assertEqual(t, io.EOF, err)
// After an error, we must still be able to read the last rune.
readRune, _ = r.RuneAt(3)
AssertEqual(t, 'd', readRune)
assertEqual(t, 'd', readRune)
// Flushing updates the error index too.
r.Flush(3)
// The last rune is now at offset 0.
readRune, _ = r.RuneAt(0)
AssertEqual(t, 'd', readRune)
assertEqual(t, 'd', readRune)
// The io.EOF is now at offset 1.
_, err = r.RuneAt(1)
AssertEqual(t, io.EOF, err)
assertEqual(t, io.EOF, err)
// Let's flush that last rune too.
r.Flush(1)
// The io.EOF is now at offset 0.
_, err = r.RuneAt(0)
AssertEqual(t, io.EOF, err)
assertEqual(t, io.EOF, err)
// And reading beyond that offset also yields io.EOF.
_, err = r.RuneAt(1)
AssertEqual(t, io.EOF, err)
assertEqual(t, io.EOF, err)
}
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := read.New(input)
r := New(input)
readRune, err := r.RuneAt(0)
AssertEqual(t, 'X', readRune)
assertEqual(t, 'X', readRune)
readRune, err = r.RuneAt(size - 1)
AssertEqual(t, 'Y', readRune)
assertEqual(t, 'Y', readRune)
readRune, err = r.RuneAt(size)
AssertEqual(t, io.EOF, err)
assertEqual(t, io.EOF, err)
readRune, err = r.RuneAt(10)
AssertEqual(t, 'X', readRune)
assertEqual(t, 'X', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := read.New(input)
r := New(input)
readRune, _ := r.RuneAt(size - 200)
AssertEqual(t, 'X', readRune)
assertEqual(t, 'X', readRune)
readRune, _ = r.RuneAt(size - 1)
AssertEqual(t, 'Y', readRune)
assertEqual(t, 'Y', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
input, size := makeLargeStubReader()
r := read.New(input)
r := New(input)
readRune, _ := r.RuneAt(size - 1)
AssertEqual(t, 'Y', readRune)
assertEqual(t, 'Y', readRune)
}
func TestAllocationPatterns(t *testing.T) {
input, _ := makeLargeStubReader()
r := New(input)
// The first read will create the standard cache.
// store |x 64 |
// buffer |x 64 |
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64)
// The first 64 reads will fit in the standard cache.
// store |xxxx64xxxxx|
// buffer |xxxx64xxxxx|
assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64)
// Flushing zero input keeps everything as-is.
// store |xxxx64xxxxx|
// buffer |xxxx64xxxxx|
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 64, 64, 64)
// Flushing all cached input truncates the cache.
// store | 64 |
// buffer | 64 |
assertCache(t, "flush full cache", r, func() { r.Flush(64) }, 0, 64, 0, 64)
// Reading 65 chars will allocate a new store of 2 * size + n.
// store |xxxxx65xxxxx 128 |
// buffer |xxxxx65xxxxx 128 |
assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128)
// A partial flush frees the start of the store and moves
// the buffer slice.
// store | 50 x15x 128 |
// buffer |x15x 128 |
assertCache(t, "flush partial", r, func() { r.Flush(50) }, 0, 50+15+128, 15, 15+128)
// The capacity for the buffer is now 2*64 + 15
// This number of runes can be read, filling up the store
// without a new allocation.
// store | 50 xxxxxxxxx143xxxxxxxx|
// buffer |xxxxxxxxx143xxxxxxxx|
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143)
// Flush the full input.
// store | 193 |
// buffer | |
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(143) }, 0, 193, 0, 193)
// Read a bit more than half the capacity.
// store |xxxxxx101xxxxxxxx 92 |
// buffer |xxxxxx101xxxxxxxx 92 |
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193)
// Then flush almost all input.
// store | 100 x1x 92 |
// buffer |x1x 92 |
assertCache(t, "flush almost all input", r, func() { r.Flush(100) }, 0, 193, 1, 93)
// Again read a bit more than half the capacity. This does not fit at the
// end of the store, but by moving the current buffer to the start of the
// store (where it fits), space is freed up for the read operation.
// store |xxxxx100xxxxxx 93 |
// buffer |xxxxx100xxxxxx 93 |
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193)
// Now flush only one rune from the cache.
// store |1 xxxx99xxxxx 93 |
// buffer |xxxx99xxxxx 93 |
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 193, 99, 192)
// Now read one more than the capacity. This will not fit, so space has
// to be made. Since there's 1 free space at the start of the store,
// the data is moved to the start and no reallocation is needed.
// store |1 xxxx99xxxxx 93 |
// buffer |xxxx99xxxxx 93 |
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193)
}
func makeLargeStubReader() (*StubReader, int) {

View File

@ -201,7 +201,7 @@ func (i *API) Dispose() {
func (i *API) clearResults() {
i.result.lastRune = nil
i.result.runes = []rune{}
i.result.tokens = []*Token{}
i.result.tokens = []Token{}
i.result.err = nil
}

View File

@ -55,11 +55,11 @@ func ExampleAPI_Result() {
fmt.Printf("API result runes: %q\n", api.Result().Runes())
fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
result.AddTokens(&tokenize.Token{
result.AddTokens(tokenize.Token{
Runes: []rune("demo 1"),
Type: 42,
Value: "towel"})
result.AddTokens(&tokenize.Token{
result.AddTokens(tokenize.Token{
Runes: []rune("demo 2"),
Type: 73,
Value: "Zaphod"})

View File

@ -712,11 +712,11 @@ func MatchEndOfFile() Handler {
}
}
// MatchUntilEndOfLine creates a Handler function that accepts any rune
// until the end of the line (or file when that's the case).
// MatchUntilEndOfLine creates a Handler function that accepts one or
// more runes until the end of the line (or file when that's the case).
// The newline itself is not included in the match.
func MatchUntilEndOfLine() Handler {
return MatchZeroOrMore(MatchNot(MatchEndOfLine()))
return MatchOneOrMore(MatchNot(MatchEndOfLine()))
}
// MatchAnyRune creates a Handler function that checks if a rune can be
@ -1437,7 +1437,7 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
// token to the child here, the order would have been "year", "month", "day", "date".
token := &Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
token := Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
t.Result().AddTokens(token)
child.Merge()
@ -1454,7 +1454,7 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
child := t.Fork()
if handler(child) {
result := child.Result()
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
token := Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
result.SetTokens(token)
child.Merge()
return true

View File

@ -10,7 +10,7 @@ import (
type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune // runes as added to the result by tokenize.Handler functions
tokens []*Token // Tokens as added to the result by tokenize.Handler functions
tokens []Token // Tokens as added to the result by tokenize.Handler functions
cursor *Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input
@ -69,7 +69,7 @@ func (t Token) String() string {
func newResult() *Result {
return &Result{
runes: []rune{},
tokens: []*Token{},
tokens: []Token{},
cursor: &Cursor{},
}
}
@ -122,29 +122,26 @@ func (r *Result) String() string {
// ClearTokens clears the tokens in the Result.
func (r *Result) ClearTokens() {
r.tokens = []*Token{}
r.tokens = []Token{}
}
// SetTokens replaces the Tokens from the Result with the provided tokens.
func (r *Result) SetTokens(tokens ...*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddTokens(t)
}
func (r *Result) SetTokens(tokens ...Token) {
r.tokens = tokens
}
// AddTokens is used to add Tokens to the Result.
func (r *Result) AddTokens(tokens ...*Token) {
func (r *Result) AddTokens(tokens ...Token) {
r.tokens = append(r.tokens, tokens...)
}
// Tokens retrieves the Tokens from the Result.
func (r *Result) Tokens() []*Token {
func (r *Result) Tokens() []Token {
return r.tokens
}
// Token retrieves a single Token from the Result at the specified index.
func (r *Result) Token(idx int) *Token {
func (r *Result) Token(idx int) Token {
return r.tokens[idx]
}