Backup changes for performance fixes.
This commit is contained in:
parent
5e9879326a
commit
7bc7fda593
|
@ -19,7 +19,7 @@ type API struct {
|
||||||
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
||||||
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
|
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
|
||||||
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
|
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
|
||||||
loopCheck map[filepos]bool // used for parser loop detection
|
loopCheck map[uintptr]bool // used for parser loop detection
|
||||||
err error // parse error, retrieved by Error(), using API methods is denied when set
|
err error // parse error, retrieved by Error(), using API methods is denied when set
|
||||||
stopped bool // a boolean set to true by Stop()
|
stopped bool // a boolean set to true by Stop()
|
||||||
}
|
}
|
||||||
|
@ -125,14 +125,14 @@ func (p *API) IsStoppedOrInError() bool {
|
||||||
// When Accept() is called, and the parser moved forward in the input data,
|
// When Accept() is called, and the parser moved forward in the input data,
|
||||||
// this method is called to reset the map for the new read cursor position.
|
// this method is called to reset the map for the new read cursor position.
|
||||||
func (p *API) initLoopCheck() {
|
func (p *API) initLoopCheck() {
|
||||||
p.loopCheck = make(map[filepos]bool)
|
p.loopCheck = make(map[uintptr]bool)
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkForLoops checks if the line of code from which Accept() or Peek()
|
// checkForLoops checks if the line of code from which Accept() or Peek()
|
||||||
// was called has been seen before for the current read cursor position.
|
// was called has been seen before for the current read cursor position.
|
||||||
// If yes, then the parser is in a loop and the method will panic.
|
// If yes, then the parser is in a loop and the method will panic.
|
||||||
func (p *API) checkForLoops(name string) {
|
func (p *API) checkForLoops(name string) {
|
||||||
filepos := callerFilepos(3)
|
filepos := callerPointer(3)
|
||||||
if _, ok := p.loopCheck[filepos]; ok {
|
if _, ok := p.loopCheck[filepos]; ok {
|
||||||
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
|
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,12 @@ func callerFilepos(depth int) filepos {
|
||||||
return filepos{file, line}
|
return filepos{file, line}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func callerPointer(depth int) uintptr {
|
||||||
|
// No error handling, because we call this method ourselves with safe depth values.
|
||||||
|
p, _, _, _ := runtime.Caller(depth + 1)
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
func callerPanic(name, f string, data ...interface{}) {
|
func callerPanic(name, f string, data ...interface{}) {
|
||||||
filepos := callerBefore(name)
|
filepos := callerBefore(name)
|
||||||
m := fmt.Sprintf(f, data...)
|
m := fmt.Sprintf(f, data...)
|
||||||
|
|
|
@ -49,7 +49,7 @@ func new(startHandler Handler, sanityChecksEnabled bool) Func {
|
||||||
return func(input interface{}) error {
|
return func(input interface{}) error {
|
||||||
api := &API{
|
api := &API{
|
||||||
tokenAPI: tokenize.NewAPI(input),
|
tokenAPI: tokenize.NewAPI(input),
|
||||||
loopCheck: make(map[filepos]bool),
|
loopCheck: make(map[uintptr]bool),
|
||||||
sanityChecksEnabled: sanityChecksEnabled,
|
sanityChecksEnabled: sanityChecksEnabled,
|
||||||
}
|
}
|
||||||
if api.Handle(startHandler) {
|
if api.Handle(startHandler) {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package read_test
|
package read
|
||||||
|
|
||||||
// This file contains some tools that are used for writing tests.
|
// This file contains some tools that are used for writing tests.
|
||||||
|
|
||||||
|
@ -8,11 +8,18 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}) {
|
func assertEqual(t *testing.T, expected interface{}, actual interface{}) {
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
|
switch expected.(type) {
|
||||||
|
case rune:
|
||||||
t.Errorf(
|
t.Errorf(
|
||||||
"Unexpected value at %s:\nexpected: %q\nactual: %q",
|
"Unexpected value at %s:\nexpected: %q\nactual: %q",
|
||||||
callerFilepos(1), expected, actual)
|
callerFilepos(1), expected, actual)
|
||||||
|
default:
|
||||||
|
t.Errorf(
|
||||||
|
"Unexpected value at %s:\nexpected: %v\nactual: %v",
|
||||||
|
callerFilepos(1), expected, actual)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +29,7 @@ func callerFilepos(depth int) string {
|
||||||
return fmt.Sprintf("%s:%d", file, line)
|
return fmt.Sprintf("%s:%d", file, line)
|
||||||
}
|
}
|
||||||
|
|
||||||
func AssertPanic(t *testing.T, code func(), expected string) {
|
func assertPanic(t *testing.T, code func(), expected string) {
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := recover(); r != nil {
|
if r := recover(); r != nil {
|
||||||
if expected != r.(string) {
|
if expected != r.(string) {
|
||||||
|
@ -36,3 +43,19 @@ func AssertPanic(t *testing.T, code func(), expected string) {
|
||||||
}()
|
}()
|
||||||
code()
|
code()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func assertCache(t *testing.T, name string, r *Buffer, code func(), storeLen, storeCap, bufLen, bufCap int) {
|
||||||
|
code()
|
||||||
|
if storeLen != len(r.store) {
|
||||||
|
t.Errorf("[%s] Unexpected store len (expected %d, got %d)", name, storeLen, len(r.store))
|
||||||
|
}
|
||||||
|
if storeCap != cap(r.store) {
|
||||||
|
t.Errorf("[%s] Unexpected store cap (expected %d, got %d)", name, storeCap, cap(r.store))
|
||||||
|
}
|
||||||
|
if bufLen != len(r.buffer) {
|
||||||
|
t.Errorf("[%s] Unexpected buffer len (expected %d, got %d)", name, bufLen, len(r.buffer))
|
||||||
|
}
|
||||||
|
if bufCap != cap(r.buffer) {
|
||||||
|
t.Errorf("[%s] Unexpected buffer cap (expected %d, got %d)", name, bufCap, cap(r.buffer))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
36
read/read.go
36
read/read.go
|
@ -151,7 +151,6 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
|
||||||
r.buffer[writeAt] = readRune
|
r.buffer[writeAt] = readRune
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return r.buffer[offset], nil
|
return r.buffer[offset], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,29 +172,40 @@ func (r *Buffer) grow(n int) {
|
||||||
if b < n {
|
if b < n {
|
||||||
b = n
|
b = n
|
||||||
}
|
}
|
||||||
r.store = make([]rune, n, b)
|
r.store = make([]rune, 0, b)
|
||||||
r.buffer = r.store
|
r.buffer = r.store[:n]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
l := len(r.buffer)
|
|
||||||
c := cap(r.buffer)
|
lenBuffer := len(r.buffer)
|
||||||
|
capBuffer := cap(r.buffer)
|
||||||
|
freeBuffer := capBuffer - lenBuffer
|
||||||
|
newSize := lenBuffer + n
|
||||||
|
|
||||||
// Grow the buffer store by reslicing within the available capacity.
|
// Grow the buffer store by reslicing within the available capacity.
|
||||||
if n <= c-l {
|
if freeBuffer >= n {
|
||||||
r.buffer = r.buffer[:l+n]
|
r.buffer = r.buffer[:newSize]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
capStore := cap(r.store)
|
||||||
|
freeAtStartOfStore := capStore - capBuffer
|
||||||
|
|
||||||
// Grow the buffer by moving the data to the start of the store.
|
// Grow the buffer by moving the data to the start of the store.
|
||||||
if cap(r.store)-l-n > 0 {
|
// Note: according to the spec, overlapping slices are allowed with copy().
|
||||||
|
if freeAtStartOfStore > 0 && newSize <= capStore {
|
||||||
|
r.store = r.store[0:newSize]
|
||||||
copy(r.store, r.buffer)
|
copy(r.store, r.buffer)
|
||||||
r.buffer = r.store[:l+n]
|
r.buffer = r.store[:newSize]
|
||||||
|
r.store = r.store[:0]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grow the buffer store by allocating a new one and copying the data.
|
// Grow the buffer store by allocating a new one and copying the data.
|
||||||
buf := makeSlice(2*cap(r.store) + n)
|
buf := makeSlice(2*capStore + n)
|
||||||
fmt.Printf("ALLOC %d\n", 2*cap(r.store)+n)
|
|
||||||
copy(buf, r.buffer)
|
copy(buf, r.buffer)
|
||||||
r.store = buf
|
r.store = buf
|
||||||
r.buffer = r.store[:l+n]
|
r.buffer = r.store[:newSize]
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||||
|
@ -207,7 +217,7 @@ func makeSlice(n int) []rune {
|
||||||
panic(ErrTooLarge)
|
panic(ErrTooLarge)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
return make([]rune, n)
|
return make([]rune, 0, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush deletes the provided number of runes from the start of the Buffer.
|
// Flush deletes the provided number of runes from the start of the Buffer.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package read_test
|
package read
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
@ -7,13 +7,11 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func ExampleNew() {
|
func ExampleNew() {
|
||||||
printFirstRuneOf := func(input interface{}) {
|
printFirstRuneOf := func(input interface{}) {
|
||||||
r := read.New(input)
|
r := New(input)
|
||||||
c, _ := r.RuneAt(0)
|
c, _ := r.RuneAt(0)
|
||||||
fmt.Printf("%q\n", c)
|
fmt.Printf("%q\n", c)
|
||||||
}
|
}
|
||||||
|
@ -47,7 +45,7 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
||||||
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
|
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
|
||||||
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
||||||
} {
|
} {
|
||||||
r := read.New(test.input)
|
r := New(test.input)
|
||||||
firstRune, _ := r.RuneAt(0)
|
firstRune, _ := r.RuneAt(0)
|
||||||
if firstRune != 'H' {
|
if firstRune != 'H' {
|
||||||
t.Errorf("[%s] first rune not 'H'", test.name)
|
t.Errorf("[%s] first rune not 'H'", test.name)
|
||||||
|
@ -60,42 +58,42 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNew_UnhandledInputType_Panics(t *testing.T) {
|
func TestNew_UnhandledInputType_Panics(t *testing.T) {
|
||||||
AssertPanic(t,
|
assertPanic(t,
|
||||||
func() { read.New(12345) },
|
func() { New(12345) },
|
||||||
"parsekit.read.New(): no support for input of type int")
|
"parsekit.read.New(): no support for input of type int")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuffer_RuneAt(t *testing.T) {
|
func TestBuffer_RuneAt(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
// It is possible to go back and forth while reading the input.
|
// It is possible to go back and forth while reading the input.
|
||||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||||
AssertEqual(t, "H!wH", result)
|
assertEqual(t, "H!wH", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
|
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
rn, err := r.RuneAt(13)
|
rn, err := r.RuneAt(13)
|
||||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||||
AssertEqual(t, "'<27>' EOF true", result)
|
assertEqual(t, "'<27>' EOF true", result)
|
||||||
|
|
||||||
rn, err = r.RuneAt(20)
|
rn, err = r.RuneAt(20)
|
||||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||||
AssertEqual(t, "'<27>' EOF true", result)
|
assertEqual(t, "'<27>' EOF true", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
|
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("Hello, \xcdworld!"))
|
r := New(strings.NewReader("Hello, \xcdworld!"))
|
||||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||||
AssertEqual(t, " <20>wo", result)
|
assertEqual(t, " <20>wo", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleBuffer_RuneAt() {
|
func ExampleBuffer_RuneAt() {
|
||||||
reader := read.New(strings.NewReader("Hello, world!"))
|
reader := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
fmt.Printf("Runes: ")
|
fmt.Printf("Runes: ")
|
||||||
for i := 0; ; i++ {
|
for i := 0; ; i++ {
|
||||||
|
@ -113,21 +111,21 @@ func ExampleBuffer_RuneAt() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("\uFEFFBommetje!"))
|
r := New(strings.NewReader("\uFEFFBommetje!"))
|
||||||
b, _ := r.RuneAt(0)
|
b, _ := r.RuneAt(0)
|
||||||
o, _ := r.RuneAt(1)
|
o, _ := r.RuneAt(1)
|
||||||
m, _ := r.RuneAt(2)
|
m, _ := r.RuneAt(2)
|
||||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||||
AssertEqual(t, "Bom", bom)
|
assertEqual(t, "Bom", bom)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuffer_Flush(t *testing.T) {
|
func TestBuffer_Flush(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||||
result := fmt.Sprintf("%c", at(7))
|
result := fmt.Sprintf("%c", at(7))
|
||||||
AssertEqual(t, "w", result)
|
assertEqual(t, "w", result)
|
||||||
|
|
||||||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||||
r.Flush(4)
|
r.Flush(4)
|
||||||
|
@ -135,11 +133,11 @@ func TestBuffer_Flush(t *testing.T) {
|
||||||
// Rune 0 is now pointing at what originally was rune offset 4.
|
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||||
// We can continue reading from there.
|
// We can continue reading from there.
|
||||||
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
||||||
AssertEqual(t, "o, wor", result)
|
assertEqual(t, "o, wor", result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleBuffer_Flush() {
|
func ExampleBuffer_Flush() {
|
||||||
r := read.New(strings.NewReader("dog eat dog!"))
|
r := New(strings.NewReader("dog eat dog!"))
|
||||||
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
||||||
|
|
||||||
// Read from the first 4 runes of the input.
|
// Read from the first 4 runes of the input.
|
||||||
|
@ -162,33 +160,33 @@ func ExampleBuffer_Flush() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
|
|
||||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||||
r.RuneAt(10)
|
r.RuneAt(10)
|
||||||
|
|
||||||
// However, we flush 12 runes, which exceeds the buffer size.
|
// However, we flush 12 runes, which exceeds the buffer size.
|
||||||
AssertPanic(t,
|
assertPanic(t,
|
||||||
func() { r.Flush(12) },
|
func() { r.Flush(12) },
|
||||||
"parsekit.read.Buffer.Flush(): number of runes to flush "+
|
"parsekit.read.Buffer.Flush(): number of runes to flush "+
|
||||||
"(12) exceeds size of the buffer (11)")
|
"(12) exceeds size of the buffer (11)")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
||||||
r := read.New(strings.NewReader("Hello, world!"))
|
r := New(strings.NewReader("Hello, world!"))
|
||||||
_, err := r.RuneAt(13)
|
_, err := r.RuneAt(13)
|
||||||
AssertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(13)
|
_, err = r.RuneAt(13)
|
||||||
AssertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(14)
|
_, err = r.RuneAt(14)
|
||||||
AssertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
r.Flush(13)
|
r.Flush(13)
|
||||||
_, err = r.RuneAt(0)
|
_, err = r.RuneAt(0)
|
||||||
AssertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(1)
|
_, err = r.RuneAt(1)
|
||||||
AssertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
_, err = r.RuneAt(2)
|
_, err = r.RuneAt(2)
|
||||||
AssertEqual(t, err.Error(), "EOF")
|
assertEqual(t, err.Error(), "EOF")
|
||||||
}
|
}
|
||||||
|
|
||||||
// In this test, I want to make sure that once a Buffer returns an error,
|
// In this test, I want to make sure that once a Buffer returns an error,
|
||||||
|
@ -202,79 +200,156 @@ func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
|
||||||
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
|
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
r := read.New(input)
|
r := New(input)
|
||||||
|
|
||||||
// Read the last availble rune.
|
// Read the last availble rune.
|
||||||
readRune, _ := r.RuneAt(3)
|
readRune, _ := r.RuneAt(3)
|
||||||
AssertEqual(t, 'd', readRune)
|
assertEqual(t, 'd', readRune)
|
||||||
|
|
||||||
// Reading the next offset must result in the io.EOF error from the stub.
|
// Reading the next offset must result in the io.EOF error from the stub.
|
||||||
readRune, err := r.RuneAt(4)
|
readRune, err := r.RuneAt(4)
|
||||||
AssertEqual(t, utf8.RuneError, readRune)
|
assertEqual(t, utf8.RuneError, readRune)
|
||||||
AssertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// Reading even further should yield the same io.EOF error.
|
// Reading even further should yield the same io.EOF error.
|
||||||
readRune, err = r.RuneAt(5)
|
readRune, err = r.RuneAt(5)
|
||||||
AssertEqual(t, utf8.RuneError, readRune)
|
assertEqual(t, utf8.RuneError, readRune)
|
||||||
AssertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// After an error, we must still be able to read the last rune.
|
// After an error, we must still be able to read the last rune.
|
||||||
readRune, _ = r.RuneAt(3)
|
readRune, _ = r.RuneAt(3)
|
||||||
AssertEqual(t, 'd', readRune)
|
assertEqual(t, 'd', readRune)
|
||||||
|
|
||||||
// Flushing updates the error index too.
|
// Flushing updates the error index too.
|
||||||
r.Flush(3)
|
r.Flush(3)
|
||||||
|
|
||||||
// The last rune is now at offset 0.
|
// The last rune is now at offset 0.
|
||||||
readRune, _ = r.RuneAt(0)
|
readRune, _ = r.RuneAt(0)
|
||||||
AssertEqual(t, 'd', readRune)
|
assertEqual(t, 'd', readRune)
|
||||||
|
|
||||||
// The io.EOF is now at offset 1.
|
// The io.EOF is now at offset 1.
|
||||||
_, err = r.RuneAt(1)
|
_, err = r.RuneAt(1)
|
||||||
AssertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// Let's flush that last rune too.
|
// Let's flush that last rune too.
|
||||||
r.Flush(1)
|
r.Flush(1)
|
||||||
|
|
||||||
// The io.EOF is now at offset 0.
|
// The io.EOF is now at offset 0.
|
||||||
_, err = r.RuneAt(0)
|
_, err = r.RuneAt(0)
|
||||||
AssertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
|
|
||||||
// And reading beyond that offset also yields io.EOF.
|
// And reading beyond that offset also yields io.EOF.
|
||||||
_, err = r.RuneAt(1)
|
_, err = r.RuneAt(1)
|
||||||
AssertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
||||||
input, size := makeLargeStubReader()
|
input, size := makeLargeStubReader()
|
||||||
r := read.New(input)
|
r := New(input)
|
||||||
|
|
||||||
readRune, err := r.RuneAt(0)
|
readRune, err := r.RuneAt(0)
|
||||||
AssertEqual(t, 'X', readRune)
|
assertEqual(t, 'X', readRune)
|
||||||
readRune, err = r.RuneAt(size - 1)
|
readRune, err = r.RuneAt(size - 1)
|
||||||
AssertEqual(t, 'Y', readRune)
|
assertEqual(t, 'Y', readRune)
|
||||||
readRune, err = r.RuneAt(size)
|
readRune, err = r.RuneAt(size)
|
||||||
AssertEqual(t, io.EOF, err)
|
assertEqual(t, io.EOF, err)
|
||||||
readRune, err = r.RuneAt(10)
|
readRune, err = r.RuneAt(10)
|
||||||
AssertEqual(t, 'X', readRune)
|
assertEqual(t, 'X', readRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
|
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
|
||||||
input, size := makeLargeStubReader()
|
input, size := makeLargeStubReader()
|
||||||
r := read.New(input)
|
r := New(input)
|
||||||
|
|
||||||
readRune, _ := r.RuneAt(size - 200)
|
readRune, _ := r.RuneAt(size - 200)
|
||||||
AssertEqual(t, 'X', readRune)
|
assertEqual(t, 'X', readRune)
|
||||||
readRune, _ = r.RuneAt(size - 1)
|
readRune, _ = r.RuneAt(size - 1)
|
||||||
AssertEqual(t, 'Y', readRune)
|
assertEqual(t, 'Y', readRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
|
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
|
||||||
input, size := makeLargeStubReader()
|
input, size := makeLargeStubReader()
|
||||||
r := read.New(input)
|
r := New(input)
|
||||||
|
|
||||||
readRune, _ := r.RuneAt(size - 1)
|
readRune, _ := r.RuneAt(size - 1)
|
||||||
AssertEqual(t, 'Y', readRune)
|
assertEqual(t, 'Y', readRune)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAllocationPatterns(t *testing.T) {
|
||||||
|
input, _ := makeLargeStubReader()
|
||||||
|
r := New(input)
|
||||||
|
|
||||||
|
// The first read will create the standard cache.
|
||||||
|
// store |x 64 |
|
||||||
|
// buffer |x 64 |
|
||||||
|
assertCache(t, "read 1", r, func() { r.RuneAt(0) }, 0, 64, 1, 64)
|
||||||
|
|
||||||
|
// The first 64 reads will fit in the standard cache.
|
||||||
|
// store |xxxx64xxxxx|
|
||||||
|
// buffer |xxxx64xxxxx|
|
||||||
|
assertCache(t, "read fill cache", r, func() { r.RuneAt(63) }, 0, 64, 64, 64)
|
||||||
|
|
||||||
|
// Flushing zero input keeps everything as-is.
|
||||||
|
// store |xxxx64xxxxx|
|
||||||
|
// buffer |xxxx64xxxxx|
|
||||||
|
assertCache(t, "flush zero", r, func() { r.Flush(0) }, 0, 64, 64, 64)
|
||||||
|
|
||||||
|
// Flushing all cached input truncates the cache.
|
||||||
|
// store | 64 |
|
||||||
|
// buffer | 64 |
|
||||||
|
assertCache(t, "flush full cache", r, func() { r.Flush(64) }, 0, 64, 0, 64)
|
||||||
|
|
||||||
|
// Reading 65 chars will allocate a new store of 2 * size + n.
|
||||||
|
// store |xxxxx65xxxxx 128 |
|
||||||
|
// buffer |xxxxx65xxxxx 128 |
|
||||||
|
assertCache(t, "read cap + 1", r, func() { r.RuneAt(64) }, 0, 65+128, 65, 65+128)
|
||||||
|
|
||||||
|
// A partial flush frees the start of the store and moves
|
||||||
|
// the buffer slice.
|
||||||
|
// store | 50 x15x 128 |
|
||||||
|
// buffer |x15x 128 |
|
||||||
|
assertCache(t, "flush partial", r, func() { r.Flush(50) }, 0, 50+15+128, 15, 15+128)
|
||||||
|
|
||||||
|
// The capacity for the buffer is now 2*64 + 15
|
||||||
|
// This number of runes can be read, filling up the store
|
||||||
|
// without a new allocation.
|
||||||
|
// store | 50 xxxxxxxxx143xxxxxxxx|
|
||||||
|
// buffer |xxxxxxxxx143xxxxxxxx|
|
||||||
|
assertCache(t, "read fill cache after partial flush", r, func() { r.RuneAt(142) }, 0, 50+143, 143, 143)
|
||||||
|
|
||||||
|
// Flush the full input.
|
||||||
|
// store | 193 |
|
||||||
|
// buffer | |
|
||||||
|
assertCache(t, "flush full cache after partial flush", r, func() { r.Flush(143) }, 0, 193, 0, 193)
|
||||||
|
|
||||||
|
// Read a bit more than half the capacity.
|
||||||
|
// store |xxxxxx101xxxxxxxx 92 |
|
||||||
|
// buffer |xxxxxx101xxxxxxxx 92 |
|
||||||
|
assertCache(t, "read more than half the cap", r, func() { r.RuneAt(100) }, 0, 193, 101, 193)
|
||||||
|
|
||||||
|
// Then flush almost all input.
|
||||||
|
// store | 100 x1x 92 |
|
||||||
|
// buffer |x1x 92 |
|
||||||
|
assertCache(t, "flush almost all input", r, func() { r.Flush(100) }, 0, 193, 1, 93)
|
||||||
|
|
||||||
|
// Again read a bit more than half the capacity. This does not fit at the
|
||||||
|
// end of the store, but by moving the current buffer to the start of the
|
||||||
|
// store (where it fits), space is freed up for the read operation.
|
||||||
|
// store |xxxxx100xxxxxx 93 |
|
||||||
|
// buffer |xxxxx100xxxxxx 93 |
|
||||||
|
assertCache(t, "read beyond cap with free space at start of store", r, func() { r.RuneAt(99) }, 0, 193, 100, 193)
|
||||||
|
|
||||||
|
// Now flush only one rune from the cache.
|
||||||
|
// store |1 xxxx99xxxxx 93 |
|
||||||
|
// buffer |xxxx99xxxxx 93 |
|
||||||
|
assertCache(t, "flush 1", r, func() { r.Flush(1) }, 0, 193, 99, 192)
|
||||||
|
|
||||||
|
// Now read one more than the capacity. This will not fit, so space has
|
||||||
|
// to be made. Since there's 1 free space at the start of the store,
|
||||||
|
// the data is moved to the start and no reallocation is needed.
|
||||||
|
// store |1 xxxx99xxxxx 93 |
|
||||||
|
// buffer |xxxx99xxxxx 93 |
|
||||||
|
assertCache(t, "read 1 more than cap with 1 free at start", r, func() { r.RuneAt(192) }, 0, 193, 193, 193)
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeLargeStubReader() (*StubReader, int) {
|
func makeLargeStubReader() (*StubReader, int) {
|
||||||
|
|
|
@ -201,7 +201,7 @@ func (i *API) Dispose() {
|
||||||
func (i *API) clearResults() {
|
func (i *API) clearResults() {
|
||||||
i.result.lastRune = nil
|
i.result.lastRune = nil
|
||||||
i.result.runes = []rune{}
|
i.result.runes = []rune{}
|
||||||
i.result.tokens = []*Token{}
|
i.result.tokens = []Token{}
|
||||||
i.result.err = nil
|
i.result.err = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -55,11 +55,11 @@ func ExampleAPI_Result() {
|
||||||
fmt.Printf("API result runes: %q\n", api.Result().Runes())
|
fmt.Printf("API result runes: %q\n", api.Result().Runes())
|
||||||
fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
|
fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
|
||||||
|
|
||||||
result.AddTokens(&tokenize.Token{
|
result.AddTokens(tokenize.Token{
|
||||||
Runes: []rune("demo 1"),
|
Runes: []rune("demo 1"),
|
||||||
Type: 42,
|
Type: 42,
|
||||||
Value: "towel"})
|
Value: "towel"})
|
||||||
result.AddTokens(&tokenize.Token{
|
result.AddTokens(tokenize.Token{
|
||||||
Runes: []rune("demo 2"),
|
Runes: []rune("demo 2"),
|
||||||
Type: 73,
|
Type: 73,
|
||||||
Value: "Zaphod"})
|
Value: "Zaphod"})
|
||||||
|
|
|
@ -712,11 +712,11 @@ func MatchEndOfFile() Handler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchUntilEndOfLine creates a Handler function that accepts any rune
|
// MatchUntilEndOfLine creates a Handler function that accepts one or
|
||||||
// until the end of the line (or file when that's the case).
|
// more runes until the end of the line (or file when that's the case).
|
||||||
// The newline itself is not included in the match.
|
// The newline itself is not included in the match.
|
||||||
func MatchUntilEndOfLine() Handler {
|
func MatchUntilEndOfLine() Handler {
|
||||||
return MatchZeroOrMore(MatchNot(MatchEndOfLine()))
|
return MatchOneOrMore(MatchNot(MatchEndOfLine()))
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchAnyRune creates a Handler function that checks if a rune can be
|
// MatchAnyRune creates a Handler function that checks if a rune can be
|
||||||
|
@ -1437,7 +1437,7 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
|
||||||
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
||||||
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
|
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
|
||||||
// token to the child here, the order would have been "year", "month", "day", "date".
|
// token to the child here, the order would have been "year", "month", "day", "date".
|
||||||
token := &Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
|
token := Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
|
||||||
t.Result().AddTokens(token)
|
t.Result().AddTokens(token)
|
||||||
child.Merge()
|
child.Merge()
|
||||||
|
|
||||||
|
@ -1454,7 +1454,7 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(child) {
|
||||||
result := child.Result()
|
result := child.Result()
|
||||||
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
token := Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
||||||
result.SetTokens(token)
|
result.SetTokens(token)
|
||||||
child.Merge()
|
child.Merge()
|
||||||
return true
|
return true
|
||||||
|
|
|
@ -10,7 +10,7 @@ import (
|
||||||
type Result struct {
|
type Result struct {
|
||||||
lastRune *runeInfo // Information about the last rune read using NextRune()
|
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||||
runes []rune // runes as added to the result by tokenize.Handler functions
|
runes []rune // runes as added to the result by tokenize.Handler functions
|
||||||
tokens []*Token // Tokens as added to the result by tokenize.Handler functions
|
tokens []Token // Tokens as added to the result by tokenize.Handler functions
|
||||||
cursor *Cursor // current read cursor position, relative to the start of the file
|
cursor *Cursor // current read cursor position, relative to the start of the file
|
||||||
offset int // current rune offset relative to the Reader's sliding window
|
offset int // current rune offset relative to the Reader's sliding window
|
||||||
err error // can be used by a Handler to report a specific issue with the input
|
err error // can be used by a Handler to report a specific issue with the input
|
||||||
|
@ -69,7 +69,7 @@ func (t Token) String() string {
|
||||||
func newResult() *Result {
|
func newResult() *Result {
|
||||||
return &Result{
|
return &Result{
|
||||||
runes: []rune{},
|
runes: []rune{},
|
||||||
tokens: []*Token{},
|
tokens: []Token{},
|
||||||
cursor: &Cursor{},
|
cursor: &Cursor{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -122,29 +122,26 @@ func (r *Result) String() string {
|
||||||
|
|
||||||
// ClearTokens clears the tokens in the Result.
|
// ClearTokens clears the tokens in the Result.
|
||||||
func (r *Result) ClearTokens() {
|
func (r *Result) ClearTokens() {
|
||||||
r.tokens = []*Token{}
|
r.tokens = []Token{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTokens replaces the Tokens from the Result with the provided tokens.
|
// SetTokens replaces the Tokens from the Result with the provided tokens.
|
||||||
func (r *Result) SetTokens(tokens ...*Token) {
|
func (r *Result) SetTokens(tokens ...Token) {
|
||||||
r.ClearTokens()
|
r.tokens = tokens
|
||||||
for _, t := range tokens {
|
|
||||||
r.AddTokens(t)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddTokens is used to add Tokens to the Result.
|
// AddTokens is used to add Tokens to the Result.
|
||||||
func (r *Result) AddTokens(tokens ...*Token) {
|
func (r *Result) AddTokens(tokens ...Token) {
|
||||||
r.tokens = append(r.tokens, tokens...)
|
r.tokens = append(r.tokens, tokens...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tokens retrieves the Tokens from the Result.
|
// Tokens retrieves the Tokens from the Result.
|
||||||
func (r *Result) Tokens() []*Token {
|
func (r *Result) Tokens() []Token {
|
||||||
return r.tokens
|
return r.tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
// Token retrieves a single Token from the Result at the specified index.
|
// Token retrieves a single Token from the Result at the specified index.
|
||||||
func (r *Result) Token(idx int) *Token {
|
func (r *Result) Token(idx int) Token {
|
||||||
return r.tokens[idx]
|
return r.tokens[idx]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue