Backup work to performance tuning.
This commit is contained in:
parent
583197c37a
commit
5e9879326a
|
@ -67,8 +67,10 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
|||
forkedAPI.Merge()
|
||||
p.result = p.tokenAPI.Result()
|
||||
forkedAPI.Dispose()
|
||||
if p.sanityChecksEnabled && p.tokenAPI.FlushInput() {
|
||||
p.initLoopCheck()
|
||||
if p.tokenAPI.FlushInput() {
|
||||
if p.sanityChecksEnabled {
|
||||
p.initLoopCheck()
|
||||
}
|
||||
}
|
||||
}
|
||||
return ok
|
||||
|
|
45
read/read.go
45
read/read.go
|
@ -90,6 +90,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
|||
// The parserkit.reader.Reader is used internally by tokenize.API.
|
||||
type Buffer struct {
|
||||
bufio *bufio.Reader // used for ReadRune()
|
||||
store []rune // buffer store, the buffer field is a slice on top of this one
|
||||
buffer []rune // input buffer, holding runes that were read from input
|
||||
err error // a read error, if one occurred
|
||||
errOffset int // the offset in the buffer at which the read error was encountered
|
||||
|
@ -121,7 +122,11 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
|
|||
// Rune at provided offset is not yet available in the input buffer.
|
||||
// Read runes until we have enough runes to satisfy the offset.
|
||||
l := len(r.buffer)
|
||||
n := offset - l + 1 // nr of runes to add to the buffer to get to offset
|
||||
|
||||
// Number of runes to add to the buffer to have enough space to store
|
||||
// the rune at the offset
|
||||
n := offset - l + 1
|
||||
|
||||
if n > 0 {
|
||||
r.grow(n)
|
||||
for writeAt := l; writeAt <= offset; writeAt++ {
|
||||
|
@ -162,26 +167,35 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
|
|||
// It returns the index where bytes should be written.
|
||||
// If the buffer can't grow it will panic with ErrTooLarge.
|
||||
func (r *Buffer) grow(n int) {
|
||||
// Instantiate new buffer.
|
||||
if r.buffer == nil {
|
||||
// Instantiate new buffer store
|
||||
if r.store == nil {
|
||||
b := smallBufferSize
|
||||
if b < n {
|
||||
b = n
|
||||
}
|
||||
r.buffer = make([]rune, n, b)
|
||||
r.store = make([]rune, n, b)
|
||||
r.buffer = r.store
|
||||
return
|
||||
}
|
||||
l := len(r.buffer)
|
||||
c := cap(r.buffer)
|
||||
// Grow the buffer by reslicing within the available capacity.
|
||||
// Grow the buffer store by reslicing within the available capacity.
|
||||
if n <= c-l {
|
||||
r.buffer = r.buffer[:l+n]
|
||||
return
|
||||
}
|
||||
// Grow the buffer by allocating a new one and copying the data.
|
||||
buf := makeSlice(2*c + n)
|
||||
// Grow the buffer by moving the data to the start of the store.
|
||||
if cap(r.store)-l-n > 0 {
|
||||
copy(r.store, r.buffer)
|
||||
r.buffer = r.store[:l+n]
|
||||
return
|
||||
}
|
||||
// Grow the buffer store by allocating a new one and copying the data.
|
||||
buf := makeSlice(2*cap(r.store) + n)
|
||||
fmt.Printf("ALLOC %d\n", 2*cap(r.store)+n)
|
||||
copy(buf, r.buffer)
|
||||
r.buffer = buf[:l+n]
|
||||
r.store = buf
|
||||
r.buffer = r.store[:l+n]
|
||||
}
|
||||
|
||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||
|
@ -201,13 +215,22 @@ func makeSlice(n int) []rune {
|
|||
// the rune that comes after the runes that were flushed.
|
||||
// So what this basically does, is turn the Buffer into a sliding window.
|
||||
func (r *Buffer) Flush(numberOfRunes int) {
|
||||
if numberOfRunes > len(r.buffer) {
|
||||
l := len(r.buffer)
|
||||
if numberOfRunes > l {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, l))
|
||||
}
|
||||
if numberOfRunes == 0 {
|
||||
return
|
||||
}
|
||||
if l == numberOfRunes {
|
||||
r.buffer = r.store[:0]
|
||||
r.errOffset = 0
|
||||
return
|
||||
}
|
||||
r.buffer = r.buffer[numberOfRunes:]
|
||||
if r.err != nil {
|
||||
r.errOffset -= numberOfRunes
|
||||
r.errOffset = r.errOffset - numberOfRunes
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ var C = struct {
|
|||
Except func(except Handler, handler Handler) Handler
|
||||
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||
FlushInput func(Handler) Handler
|
||||
}{
|
||||
Any: MatchAny,
|
||||
Not: MatchNot,
|
||||
|
@ -54,6 +55,7 @@ var C = struct {
|
|||
Except: MatchExcept,
|
||||
FollowedBy: MatchFollowedBy,
|
||||
NotFollowedBy: MatchNotFollowedBy,
|
||||
FlushInput: MakeInputFlusher,
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms or functions to build atoms.
|
||||
|
@ -70,7 +72,9 @@ var A = struct {
|
|||
RuneRange func(rune, rune) Handler
|
||||
Str func(string) Handler
|
||||
StrNoCase func(string) Handler
|
||||
EndOfLine Handler
|
||||
EndOfFile Handler
|
||||
UntilEndOfLine Handler
|
||||
AnyRune Handler
|
||||
ValidRune Handler
|
||||
InvalidRune Handler
|
||||
|
@ -124,7 +128,6 @@ var A = struct {
|
|||
Blanks Handler
|
||||
Whitespace Handler
|
||||
UnicodeSpace Handler
|
||||
EndOfLine Handler
|
||||
Digit Handler
|
||||
DigitNotZero Handler
|
||||
Digits Handler
|
||||
|
@ -156,6 +159,8 @@ var A = struct {
|
|||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
EndOfLine: MatchEndOfLine(),
|
||||
UntilEndOfLine: MatchUntilEndOfLine(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
ValidRune: MatchValidRune(),
|
||||
InvalidRune: MatchInvalidRune(),
|
||||
|
@ -209,7 +214,6 @@ var A = struct {
|
|||
Blanks: MatchBlanks(),
|
||||
Whitespace: MatchWhitespace(),
|
||||
UnicodeSpace: MatchUnicodeSpace(),
|
||||
EndOfLine: MatchEndOfLine(),
|
||||
Digit: MatchDigit(),
|
||||
DigitNotZero: MatchDigitNotZero(),
|
||||
Digits: MatchDigits(),
|
||||
|
@ -641,6 +645,31 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
// MakeInputFlusher creates a Handler that will flush the input buffer when the
|
||||
// provided handler matches.
|
||||
//
|
||||
// This is useful when constructing a grammar using only parsekit.tokenize
|
||||
// functionality (parsekit.parse will automatically flush the input for you)
|
||||
// that has to process large input data.
|
||||
//
|
||||
// Without flushing the input, the input reader will allocate memory
|
||||
// during the parsing process, eventually enough to hold the full input
|
||||
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
|
||||
// that the accumulated input so far will no longer be needed, allowing
|
||||
// this input to be flushed from memory.
|
||||
//
|
||||
// Rule of thumb is: only use it when you have to actually fix a memory
|
||||
// hogging issue for your use case.
|
||||
func MakeInputFlusher(handler Handler) Handler {
|
||||
return func(t *API) bool {
|
||||
if handler(t) {
|
||||
t.FlushInput()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchSigned creates a Handler that checks if the provided Handler is
|
||||
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
||||
// atoms into a signed version, e.g.
|
||||
|
@ -683,6 +712,13 @@ func MatchEndOfFile() Handler {
|
|||
}
|
||||
}
|
||||
|
||||
// MatchUntilEndOfLine creates a Handler function that accepts any rune
|
||||
// until the end of the line (or file when that's the case).
|
||||
// The newline itself is not included in the match.
|
||||
func MatchUntilEndOfLine() Handler {
|
||||
return MatchZeroOrMore(MatchNot(MatchEndOfLine()))
|
||||
}
|
||||
|
||||
// MatchAnyRune creates a Handler function that checks if a rune can be
|
||||
// read from the input. Invalid runes on the input are replaced with the UTF8
|
||||
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
||||
|
@ -1421,7 +1457,6 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
|||
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
||||
result.SetTokens(token)
|
||||
child.Merge()
|
||||
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
Loading…
Reference in New Issue