Backup work to performance tuning.
This commit is contained in:
parent
583197c37a
commit
5e9879326a
|
@ -67,8 +67,10 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
||||||
forkedAPI.Merge()
|
forkedAPI.Merge()
|
||||||
p.result = p.tokenAPI.Result()
|
p.result = p.tokenAPI.Result()
|
||||||
forkedAPI.Dispose()
|
forkedAPI.Dispose()
|
||||||
if p.sanityChecksEnabled && p.tokenAPI.FlushInput() {
|
if p.tokenAPI.FlushInput() {
|
||||||
p.initLoopCheck()
|
if p.sanityChecksEnabled {
|
||||||
|
p.initLoopCheck()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ok
|
return ok
|
||||||
|
|
45
read/read.go
45
read/read.go
|
@ -90,6 +90,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
||||||
// The parserkit.reader.Reader is used internally by tokenize.API.
|
// The parserkit.reader.Reader is used internally by tokenize.API.
|
||||||
type Buffer struct {
|
type Buffer struct {
|
||||||
bufio *bufio.Reader // used for ReadRune()
|
bufio *bufio.Reader // used for ReadRune()
|
||||||
|
store []rune // buffer store, the buffer field is a slice on top of this one
|
||||||
buffer []rune // input buffer, holding runes that were read from input
|
buffer []rune // input buffer, holding runes that were read from input
|
||||||
err error // a read error, if one occurred
|
err error // a read error, if one occurred
|
||||||
errOffset int // the offset in the buffer at which the read error was encountered
|
errOffset int // the offset in the buffer at which the read error was encountered
|
||||||
|
@ -121,7 +122,11 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
|
||||||
// Rune at provided offset is not yet available in the input buffer.
|
// Rune at provided offset is not yet available in the input buffer.
|
||||||
// Read runes until we have enough runes to satisfy the offset.
|
// Read runes until we have enough runes to satisfy the offset.
|
||||||
l := len(r.buffer)
|
l := len(r.buffer)
|
||||||
n := offset - l + 1 // nr of runes to add to the buffer to get to offset
|
|
||||||
|
// Number of runes to add to the buffer to have enough space to store
|
||||||
|
// the rune at the offset
|
||||||
|
n := offset - l + 1
|
||||||
|
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
r.grow(n)
|
r.grow(n)
|
||||||
for writeAt := l; writeAt <= offset; writeAt++ {
|
for writeAt := l; writeAt <= offset; writeAt++ {
|
||||||
|
@ -162,26 +167,35 @@ var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
|
||||||
// It returns the index where bytes should be written.
|
// It returns the index where bytes should be written.
|
||||||
// If the buffer can't grow it will panic with ErrTooLarge.
|
// If the buffer can't grow it will panic with ErrTooLarge.
|
||||||
func (r *Buffer) grow(n int) {
|
func (r *Buffer) grow(n int) {
|
||||||
// Instantiate new buffer.
|
// Instantiate new buffer store
|
||||||
if r.buffer == nil {
|
if r.store == nil {
|
||||||
b := smallBufferSize
|
b := smallBufferSize
|
||||||
if b < n {
|
if b < n {
|
||||||
b = n
|
b = n
|
||||||
}
|
}
|
||||||
r.buffer = make([]rune, n, b)
|
r.store = make([]rune, n, b)
|
||||||
|
r.buffer = r.store
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
l := len(r.buffer)
|
l := len(r.buffer)
|
||||||
c := cap(r.buffer)
|
c := cap(r.buffer)
|
||||||
// Grow the buffer by reslicing within the available capacity.
|
// Grow the buffer store by reslicing within the available capacity.
|
||||||
if n <= c-l {
|
if n <= c-l {
|
||||||
r.buffer = r.buffer[:l+n]
|
r.buffer = r.buffer[:l+n]
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Grow the buffer by allocating a new one and copying the data.
|
// Grow the buffer by moving the data to the start of the store.
|
||||||
buf := makeSlice(2*c + n)
|
if cap(r.store)-l-n > 0 {
|
||||||
|
copy(r.store, r.buffer)
|
||||||
|
r.buffer = r.store[:l+n]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Grow the buffer store by allocating a new one and copying the data.
|
||||||
|
buf := makeSlice(2*cap(r.store) + n)
|
||||||
|
fmt.Printf("ALLOC %d\n", 2*cap(r.store)+n)
|
||||||
copy(buf, r.buffer)
|
copy(buf, r.buffer)
|
||||||
r.buffer = buf[:l+n]
|
r.store = buf
|
||||||
|
r.buffer = r.store[:l+n]
|
||||||
}
|
}
|
||||||
|
|
||||||
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
// makeSlice allocates a slice of size n. If the allocation fails, it panics
|
||||||
|
@ -201,13 +215,22 @@ func makeSlice(n int) []rune {
|
||||||
// the rune that comes after the runes that were flushed.
|
// the rune that comes after the runes that were flushed.
|
||||||
// So what this basically does, is turn the Buffer into a sliding window.
|
// So what this basically does, is turn the Buffer into a sliding window.
|
||||||
func (r *Buffer) Flush(numberOfRunes int) {
|
func (r *Buffer) Flush(numberOfRunes int) {
|
||||||
if numberOfRunes > len(r.buffer) {
|
l := len(r.buffer)
|
||||||
|
if numberOfRunes > l {
|
||||||
panic(fmt.Sprintf(
|
panic(fmt.Sprintf(
|
||||||
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
|
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
|
||||||
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
|
"exceeds size of the buffer (%d)", numberOfRunes, l))
|
||||||
|
}
|
||||||
|
if numberOfRunes == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if l == numberOfRunes {
|
||||||
|
r.buffer = r.store[:0]
|
||||||
|
r.errOffset = 0
|
||||||
|
return
|
||||||
}
|
}
|
||||||
r.buffer = r.buffer[numberOfRunes:]
|
r.buffer = r.buffer[numberOfRunes:]
|
||||||
if r.err != nil {
|
if r.err != nil {
|
||||||
r.errOffset -= numberOfRunes
|
r.errOffset = r.errOffset - numberOfRunes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,7 @@ var C = struct {
|
||||||
Except func(except Handler, handler Handler) Handler
|
Except func(except Handler, handler Handler) Handler
|
||||||
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||||
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||||
|
FlushInput func(Handler) Handler
|
||||||
}{
|
}{
|
||||||
Any: MatchAny,
|
Any: MatchAny,
|
||||||
Not: MatchNot,
|
Not: MatchNot,
|
||||||
|
@ -54,6 +55,7 @@ var C = struct {
|
||||||
Except: MatchExcept,
|
Except: MatchExcept,
|
||||||
FollowedBy: MatchFollowedBy,
|
FollowedBy: MatchFollowedBy,
|
||||||
NotFollowedBy: MatchNotFollowedBy,
|
NotFollowedBy: MatchNotFollowedBy,
|
||||||
|
FlushInput: MakeInputFlusher,
|
||||||
}
|
}
|
||||||
|
|
||||||
// A provides convenient access to a range of atoms or functions to build atoms.
|
// A provides convenient access to a range of atoms or functions to build atoms.
|
||||||
|
@ -70,7 +72,9 @@ var A = struct {
|
||||||
RuneRange func(rune, rune) Handler
|
RuneRange func(rune, rune) Handler
|
||||||
Str func(string) Handler
|
Str func(string) Handler
|
||||||
StrNoCase func(string) Handler
|
StrNoCase func(string) Handler
|
||||||
|
EndOfLine Handler
|
||||||
EndOfFile Handler
|
EndOfFile Handler
|
||||||
|
UntilEndOfLine Handler
|
||||||
AnyRune Handler
|
AnyRune Handler
|
||||||
ValidRune Handler
|
ValidRune Handler
|
||||||
InvalidRune Handler
|
InvalidRune Handler
|
||||||
|
@ -124,7 +128,6 @@ var A = struct {
|
||||||
Blanks Handler
|
Blanks Handler
|
||||||
Whitespace Handler
|
Whitespace Handler
|
||||||
UnicodeSpace Handler
|
UnicodeSpace Handler
|
||||||
EndOfLine Handler
|
|
||||||
Digit Handler
|
Digit Handler
|
||||||
DigitNotZero Handler
|
DigitNotZero Handler
|
||||||
Digits Handler
|
Digits Handler
|
||||||
|
@ -156,6 +159,8 @@ var A = struct {
|
||||||
Str: MatchStr,
|
Str: MatchStr,
|
||||||
StrNoCase: MatchStrNoCase,
|
StrNoCase: MatchStrNoCase,
|
||||||
EndOfFile: MatchEndOfFile(),
|
EndOfFile: MatchEndOfFile(),
|
||||||
|
EndOfLine: MatchEndOfLine(),
|
||||||
|
UntilEndOfLine: MatchUntilEndOfLine(),
|
||||||
AnyRune: MatchAnyRune(),
|
AnyRune: MatchAnyRune(),
|
||||||
ValidRune: MatchValidRune(),
|
ValidRune: MatchValidRune(),
|
||||||
InvalidRune: MatchInvalidRune(),
|
InvalidRune: MatchInvalidRune(),
|
||||||
|
@ -209,7 +214,6 @@ var A = struct {
|
||||||
Blanks: MatchBlanks(),
|
Blanks: MatchBlanks(),
|
||||||
Whitespace: MatchWhitespace(),
|
Whitespace: MatchWhitespace(),
|
||||||
UnicodeSpace: MatchUnicodeSpace(),
|
UnicodeSpace: MatchUnicodeSpace(),
|
||||||
EndOfLine: MatchEndOfLine(),
|
|
||||||
Digit: MatchDigit(),
|
Digit: MatchDigit(),
|
||||||
DigitNotZero: MatchDigitNotZero(),
|
DigitNotZero: MatchDigitNotZero(),
|
||||||
Digits: MatchDigits(),
|
Digits: MatchDigits(),
|
||||||
|
@ -641,6 +645,31 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MakeInputFlusher creates a Handler that will flush the input buffer when the
|
||||||
|
// provided handler matches.
|
||||||
|
//
|
||||||
|
// This is useful when constructing a grammar using only parsekit.tokenize
|
||||||
|
// functionality (parsekit.parse will automatically flush the input for you)
|
||||||
|
// that has to process large input data.
|
||||||
|
//
|
||||||
|
// Without flushing the input, the input reader will allocate memory
|
||||||
|
// during the parsing process, eventually enough to hold the full input
|
||||||
|
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
|
||||||
|
// that the accumulated input so far will no longer be needed, allowing
|
||||||
|
// this input to be flushed from memory.
|
||||||
|
//
|
||||||
|
// Rule of thumb is: only use it when you have to actually fix a memory
|
||||||
|
// hogging issue for your use case.
|
||||||
|
func MakeInputFlusher(handler Handler) Handler {
|
||||||
|
return func(t *API) bool {
|
||||||
|
if handler(t) {
|
||||||
|
t.FlushInput()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MatchSigned creates a Handler that checks if the provided Handler is
|
// MatchSigned creates a Handler that checks if the provided Handler is
|
||||||
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
||||||
// atoms into a signed version, e.g.
|
// atoms into a signed version, e.g.
|
||||||
|
@ -683,6 +712,13 @@ func MatchEndOfFile() Handler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchUntilEndOfLine creates a Handler function that accepts any rune
|
||||||
|
// until the end of the line (or file when that's the case).
|
||||||
|
// The newline itself is not included in the match.
|
||||||
|
func MatchUntilEndOfLine() Handler {
|
||||||
|
return MatchZeroOrMore(MatchNot(MatchEndOfLine()))
|
||||||
|
}
|
||||||
|
|
||||||
// MatchAnyRune creates a Handler function that checks if a rune can be
|
// MatchAnyRune creates a Handler function that checks if a rune can be
|
||||||
// read from the input. Invalid runes on the input are replaced with the UTF8
|
// read from the input. Invalid runes on the input are replaced with the UTF8
|
||||||
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
||||||
|
@ -1421,7 +1457,6 @@ func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
||||||
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
token := &Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
||||||
result.SetTokens(token)
|
result.SetTokens(token)
|
||||||
child.Merge()
|
child.Merge()
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|
Loading…
Reference in New Issue