Backup work to performance tuning.

This commit is contained in:
Maurice Makaay 2019-07-05 08:08:42 +00:00
parent 583197c37a
commit 5e9879326a
3 changed files with 76 additions and 16 deletions

View File

@ -67,8 +67,10 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
forkedAPI.Merge()
p.result = p.tokenAPI.Result()
forkedAPI.Dispose()
if p.sanityChecksEnabled && p.tokenAPI.FlushInput() {
p.initLoopCheck()
if p.tokenAPI.FlushInput() {
if p.sanityChecksEnabled {
p.initLoopCheck()
}
}
}
return ok

View File

@ -90,6 +90,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
// The parserkit.reader.Reader is used internally by tokenize.API.
type Buffer struct {
bufio *bufio.Reader // used for ReadRune()
store []rune // buffer store, the buffer field is a slice on top of this one
buffer []rune // input buffer, holding runes that were read from input
err error // a read error, if one occurred
errOffset int // the offset in the buffer at which the read error was encountered
@ -121,7 +122,11 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
// Rune at provided offset is not yet available in the input buffer.
// Read runes until we have enough runes to satisfy the offset.
l := len(r.buffer)
n := offset - l + 1 // nr of runes to add to the buffer to get to offset
// Number of runes to add to the buffer to have enough space to store
// the rune at the offset
n := offset - l + 1
if n > 0 {
r.grow(n)
for writeAt := l; writeAt <= offset; writeAt++ {
@ -162,26 +167,35 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
// It returns the index where bytes should be written.
// If the buffer can't grow it will panic with ErrTooLarge.
func (r *Buffer) grow(n int) {
// Instantiate new buffer.
if r.buffer == nil {
// Instantiate new buffer store
if r.store == nil {
b := smallBufferSize
if b < n {
b = n
}
r.buffer = make([]rune, n, b)
r.store = make([]rune, n, b)
r.buffer = r.store
return
}
l := len(r.buffer)
c := cap(r.buffer)
// Grow the buffer by reslicing within the available capacity.
// Grow the buffer store by reslicing within the available capacity.
if n <= c-l {
r.buffer = r.buffer[:l+n]
return
}
// Grow the buffer by allocating a new one and copying the data.
buf := makeSlice(2*c + n)
// Grow the buffer by moving the data to the start of the store.
if cap(r.store)-l-n > 0 {
copy(r.store, r.buffer)
r.buffer = r.store[:l+n]
return
}
// Grow the buffer store by allocating a new one and copying the data.
buf := makeSlice(2*cap(r.store) + n)
fmt.Printf("ALLOC %d\n", 2*cap(r.store)+n)
copy(buf, r.buffer)
r.buffer = buf[:l+n]
r.store = buf
r.buffer = r.store[:l+n]
}
// makeSlice allocates a slice of size n. If the allocation fails, it panics
@ -201,13 +215,22 @@ func makeSlice(n int) []rune {
// the rune that comes after the runes that were flushed.
// So what this basically does, is turn the Buffer into a sliding window.
func (r *Buffer) Flush(numberOfRunes int) {
if numberOfRunes > len(r.buffer) {
l := len(r.buffer)
if numberOfRunes > l {
panic(fmt.Sprintf(
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
"exceeds size of the buffer (%d)", numberOfRunes, l))
}
if numberOfRunes == 0 {
return
}
if l == numberOfRunes {
r.buffer = r.store[:0]
r.errOffset = 0
return
}
r.buffer = r.buffer[numberOfRunes:]
if r.err != nil {
r.errOffset -= numberOfRunes
r.errOffset = r.errOffset - numberOfRunes
}
}

View File

@ -39,6 +39,7 @@ var C = struct {
Except func(except Handler, handler Handler) Handler
FollowedBy func(lookAhead Handler, handler Handler) Handler
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
FlushInput func(Handler) Handler
}{
Any: MatchAny,
Not: MatchNot,
@ -54,6 +55,7 @@ var C = struct {
Except: MatchExcept,
FollowedBy: MatchFollowedBy,
NotFollowedBy: MatchNotFollowedBy,
FlushInput: MakeInputFlusher,
}
// A provides convenient access to a range of atoms or functions to build atoms.
@ -70,7 +72,9 @@ var A = struct {
RuneRange func(rune, rune) Handler
Str func(string) Handler
StrNoCase func(string) Handler
EndOfLine Handler
EndOfFile Handler
UntilEndOfLine Handler
AnyRune Handler
ValidRune Handler
InvalidRune Handler
@ -124,7 +128,6 @@ var A = struct {
Blanks Handler
Whitespace Handler
UnicodeSpace Handler
EndOfLine Handler
Digit Handler
DigitNotZero Handler
Digits Handler
@ -156,6 +159,8 @@ var A = struct {
Str: MatchStr,
StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(),
EndOfLine: MatchEndOfLine(),
UntilEndOfLine: MatchUntilEndOfLine(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
InvalidRune: MatchInvalidRune(),
@ -209,7 +214,6 @@ var A = struct {
Blanks: MatchBlanks(),
Whitespace: MatchWhitespace(),
UnicodeSpace: MatchUnicodeSpace(),
EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
@ -641,6 +645,31 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
}
}
// MakeInputFlusher creates a Handler that will flush the input buffer when the
// provided handler matches.
//
// This is useful when constructing a grammar using only parsekit.tokenize
// functionality (parsekit.parse will automatically flush the input for you)
// that has to process large input data.
//
// Without flushing the input, the input reader will allocate memory
// during the parsing process, eventually enough to hold the full input
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
// that the accumulated input so far will no longer be needed, allowing
// this input to be flushed from memory.
//
// Rule of thumb is: only use it when you have to actually fix a memory
// hogging issue for your use case.
func MakeInputFlusher(handler Handler) Handler {
return func(t *API) bool {
if handler(t) {
t.FlushInput()
return true
}
return false
}
}
// MatchSigned creates a Handler that checks if the provided Handler is
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
// atoms into a signed version, e.g.
@ -683,6 +712,13 @@ func MatchEndOfFile() Handler {
}
}
// MatchUntilEndOfLine creates a Handler function that accepts any rune
// until the end of the line (or file when that's the case).
// The newline itself is not included in the match.
func MatchUntilEndOfLine() Handler {
return MatchZeroOrMore(MatchNot(MatchEndOfLine()))
}
// MatchAnyRune creates a Handler function that checks if a rune can be
// read from the input. Invalid runes on the input are replaced with the UTF8
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
@ -1421,7 +1457,6 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
result.SetTokens(token)
child.Merge()
return true
}
return false