Getting rid of forking, the new system delivers more performance.
This commit is contained in:
parent
87cdadae78
commit
4c94374107
|
@ -93,7 +93,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
|||
// To minimize memory use, it is also possible to flush the read buffer when there is
|
||||
// no more need to go back to previously read data.
|
||||
//
|
||||
// This parserkit.reader.Reader is used internally by tokenize.API.
|
||||
// This buffer is used internally by tokenize.API.
|
||||
type Buffer struct {
|
||||
bufio *bufio.Reader // used for ReadRune()
|
||||
buffer []byte // input buffer, holding bytes that were read from input
|
||||
|
|
174
tokenize/api.go
174
tokenize/api.go
|
@ -71,20 +71,14 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
|
||||
reader *read.Buffer // the buffered input reader
|
||||
Input Input // provides input-related functionality
|
||||
Byte InputByteMode // access to a set of byte-based input methods
|
||||
Rune InputRuneMode // access to a set of rune-based input methods
|
||||
|
||||
Output Output // provides output-related functionality
|
||||
outputTokens []Token // accepted tokens
|
||||
outputBytes []byte // accepted bytes
|
||||
|
||||
snapshot [9]int // storage for the Snapshot() / RestoreSnapshot() feature
|
||||
reader *read.Buffer // the buffered input reader
|
||||
pointers stackFrame // various pointers for keeping track of input, output, cursor.
|
||||
Input Input // access to a set of general input-related methods
|
||||
Byte InputByteMode // access to a set of byte-based input methods
|
||||
Rune InputRuneMode // access to a set of rune-based input methods
|
||||
Output Output // access to a set of output-related functionality
|
||||
outputTokens []Token // storage for accepted tokens
|
||||
outputBytes []byte // storage for accepted bytes
|
||||
}
|
||||
|
||||
type stackFrame struct {
|
||||
|
@ -96,14 +90,10 @@ type stackFrame struct {
|
|||
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||
|
||||
// TODO
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
}
|
||||
|
||||
const initialStackDepth = 64
|
||||
const initialTokenStoreLength = 64
|
||||
const initialByteStoreLength = 1024
|
||||
const initialByteStoreLength = 128
|
||||
|
||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||
// For an overview of allowed inputs, take a look at the documentation
|
||||
|
@ -111,7 +101,6 @@ const initialByteStoreLength = 1024
|
|||
func NewAPI(input interface{}) *API {
|
||||
reader := read.New(input)
|
||||
tokenAPI := &API{
|
||||
stackFrames: make([]stackFrame, initialStackDepth),
|
||||
outputBytes: make([]byte, initialByteStoreLength),
|
||||
outputTokens: make([]Token, initialTokenStoreLength),
|
||||
reader: reader,
|
||||
|
@ -120,154 +109,15 @@ func NewAPI(input interface{}) *API {
|
|||
tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader}
|
||||
tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader}
|
||||
tokenAPI.Output = Output{api: tokenAPI}
|
||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[0]
|
||||
tokenAPI.snapshot[0] = -1
|
||||
|
||||
return tokenAPI
|
||||
}
|
||||
|
||||
// Fork forks off a child of the API struct. It will reuse the same
|
||||
// read buffer and cursor position, but for the rest this can be considered
|
||||
// a fresh API.
|
||||
//
|
||||
// By forking an API, you can freely work with the forked child, without
|
||||
// affecting the parent API. This is for example useful when you must perform
|
||||
// some form of lookahead.
|
||||
//
|
||||
// When processing of the Handler was successful and you want to add the results
|
||||
// to the parent API, you can call Merge() on the forked child.
|
||||
// This will add the results to the results of the parent (runes, tokens).
|
||||
// It also updates the read cursor position of the parent to that of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child API can
|
||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
func (tokenAPI *API) Fork() int {
|
||||
tokenAPI.stackLevel++
|
||||
newStackLevel := tokenAPI.stackLevel
|
||||
|
||||
// Grow the stack frames capacity when needed.
|
||||
frames := tokenAPI.stackFrames
|
||||
if cap(frames) < (newStackLevel + 1) {
|
||||
newFrames := make([]stackFrame, cap(frames)*2)
|
||||
copy(newFrames, frames)
|
||||
tokenAPI.stackFrames = newFrames
|
||||
}
|
||||
|
||||
parent := tokenAPI.stackFrame
|
||||
tokenAPI.stackFrames[newStackLevel] = stackFrame{
|
||||
offset: parent.offset,
|
||||
bytesStart: parent.bytesEnd,
|
||||
bytesEnd: parent.bytesEnd,
|
||||
tokenStart: parent.tokenEnd,
|
||||
tokenEnd: parent.tokenEnd,
|
||||
}
|
||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[newStackLevel]
|
||||
|
||||
return newStackLevel
|
||||
}
|
||||
|
||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||
// results of its parent. The read cursor of the parent is also updated
|
||||
// to that of the forked child.
|
||||
//
|
||||
// After the merge operation, the child results are reset so it can immediately
|
||||
// be reused for performing another match. This means that all Result data are
|
||||
// cleared, but the read cursor position is kept at its current position.
|
||||
// This allows a child to feed results in chunks to its parent.
|
||||
//
|
||||
// Once the child is no longer needed, it can be disposed of by using the
|
||||
// method Dispose(), which will return the tokenizer to the parent.
|
||||
func (tokenAPI *API) Merge(stackLevel int) {
|
||||
tokenAPI.checkStackLevelForMethod("Merge", stackLevel)
|
||||
parent := &tokenAPI.stackFrames[stackLevel-1]
|
||||
f := tokenAPI.stackFrame
|
||||
|
||||
// The end of the parent slice aligns with the start of the child slice.
|
||||
// Because of this, to merge the parent slice can simply be expanded
|
||||
// to include the child slice.
|
||||
// parent : |----------|
|
||||
// child: |------|
|
||||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
parent.bytesEnd = f.bytesEnd
|
||||
f.bytesStart = f.bytesEnd
|
||||
|
||||
// The same logic applies to tokens.
|
||||
parent.tokenEnd = f.tokenEnd
|
||||
f.tokenStart = f.tokenEnd
|
||||
|
||||
// Update the parent read offset.
|
||||
parent.offsetLocal = parent.offsetLocal + (f.offset - parent.offset)
|
||||
parent.offset = f.offset
|
||||
|
||||
// Update the parent cursor position.
|
||||
if f.line > parent.line {
|
||||
parent.line += f.line
|
||||
parent.column = f.column
|
||||
} else {
|
||||
parent.column += f.column
|
||||
}
|
||||
|
||||
f.line = 0
|
||||
f.column = 0
|
||||
f.err = nil
|
||||
}
|
||||
|
||||
func (tokenAPI *API) Dispose(stackLevel int) {
|
||||
tokenAPI.checkStackLevelForMethod("Dispose", stackLevel)
|
||||
tokenAPI.stackLevel = stackLevel - 1
|
||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
|
||||
}
|
||||
|
||||
func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != tokenAPI.stackLevel {
|
||||
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
|
||||
}
|
||||
}
|
||||
|
||||
type Snapshot [9]int
|
||||
type Snapshot stackFrame
|
||||
|
||||
func (tokenAPI *API) MakeSnapshot() Snapshot {
|
||||
f := tokenAPI.stackFrame
|
||||
|
||||
return Snapshot{
|
||||
tokenAPI.stackLevel,
|
||||
f.bytesStart,
|
||||
f.bytesEnd,
|
||||
f.tokenStart,
|
||||
f.tokenEnd,
|
||||
f.offset,
|
||||
f.offsetLocal,
|
||||
f.line,
|
||||
f.column,
|
||||
}
|
||||
return Snapshot(tokenAPI.pointers)
|
||||
}
|
||||
|
||||
func (tokenAPI *API) RestoreSnapshot(snap Snapshot) {
|
||||
f := tokenAPI.stackFrame
|
||||
|
||||
if snap[0] != tokenAPI.stackLevel {
|
||||
callerPanic("RestoreSnapshot", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the provided snapshot was created for stack level %d",
|
||||
tokenAPI.stackLevel, snap[0])
|
||||
}
|
||||
|
||||
f.bytesStart = snap[1]
|
||||
f.bytesEnd = snap[2]
|
||||
f.tokenStart = snap[3]
|
||||
f.tokenEnd = snap[4]
|
||||
f.offset = snap[5]
|
||||
f.offsetLocal = snap[6]
|
||||
f.line = snap[7]
|
||||
f.column = snap[8]
|
||||
tokenAPI.pointers = stackFrame(snap)
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ type InputByteMode struct {
|
|||
// When an offset is requested that is beyond the length of the available input
|
||||
// data, then the error will be io.EOF.
|
||||
func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
||||
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
|
||||
return byteMode.reader.ByteAt(byteMode.api.pointers.offset + offset)
|
||||
}
|
||||
|
||||
// PeekMulti returns at max the provided maximum number of bytes at the provided
|
||||
|
@ -22,7 +22,7 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
|||
// error as such. The returned error can in such case be set to io.EOF to indicate
|
||||
// that the end of the input was reached though.
|
||||
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
|
||||
return byteMode.reader.BytesAt(byteMode.api.stackFrame.offset+offset, count)
|
||||
return byteMode.reader.BytesAt(byteMode.api.pointers.offset+offset, count)
|
||||
}
|
||||
|
||||
func (byteMode InputByteMode) Accept(b byte) {
|
||||
|
@ -53,16 +53,16 @@ func (byteMode InputByteMode) AcceptMulti(bytes ...byte) {
|
|||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||
// the first byte at the new cursor position.
|
||||
func (byteMode InputByteMode) MoveCursor(b byte) {
|
||||
f := byteMode.api.stackFrame
|
||||
a := byteMode.api
|
||||
if b == '\n' {
|
||||
f.column = 0
|
||||
f.line++
|
||||
a.pointers.column = 0
|
||||
a.pointers.line++
|
||||
} else {
|
||||
f.column++
|
||||
a.pointers.column++
|
||||
}
|
||||
|
||||
f.offset++
|
||||
f.offsetLocal++
|
||||
a.pointers.offset++
|
||||
a.pointers.offsetLocal++
|
||||
}
|
||||
|
||||
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
||||
|
|
|
@ -15,29 +15,10 @@ type Input struct {
|
|||
|
||||
// Cursor returns a string that describes the current read cursor position.
|
||||
func (i Input) Cursor() string {
|
||||
column, line := 0, 0
|
||||
for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
|
||||
if f.line > 0 {
|
||||
column = f.column
|
||||
line += f.line
|
||||
} else {
|
||||
column += f.column
|
||||
}
|
||||
}
|
||||
if line == 0 && column == 0 {
|
||||
if i.api.pointers.line == 0 && i.api.pointers.column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", line+1, column+1)
|
||||
}
|
||||
|
||||
func (i Input) Reset() {
|
||||
f := i.api.stackFrame
|
||||
if f.offsetLocal > 0 {
|
||||
f.column = 0
|
||||
f.line = 0
|
||||
f.offset -= f.offsetLocal
|
||||
f.offsetLocal = 0
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", i.api.pointers.line+1, i.api.pointers.column+1)
|
||||
}
|
||||
|
||||
// Flush flushes input data from the read buffer up to the current
|
||||
|
@ -47,11 +28,11 @@ func (i Input) Reset() {
|
|||
// Parsekit will call this method at points where it knows it is a
|
||||
// safe thing to do.
|
||||
func (i Input) Flush() bool {
|
||||
f := i.api.stackFrame
|
||||
if f.offset > 0 {
|
||||
i.reader.Flush(f.offset)
|
||||
f.offset = 0
|
||||
f.offsetLocal = 0
|
||||
a := i.api
|
||||
if a.pointers.offset > 0 {
|
||||
i.reader.Flush(a.pointers.offset)
|
||||
a.pointers.offset = 0
|
||||
a.pointers.offsetLocal = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
|
@ -11,8 +11,7 @@ type Output struct {
|
|||
|
||||
func (o Output) String() string {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
bytes := a.outputBytes[f.bytesStart:f.bytesEnd]
|
||||
bytes := a.outputBytes[a.pointers.bytesStart:a.pointers.bytesEnd]
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
|
@ -22,36 +21,35 @@ func (o Output) Runes() []rune {
|
|||
|
||||
func (o Output) Rune(offset int) rune {
|
||||
a := o.api
|
||||
r, _ := utf8.DecodeRune(a.outputBytes[a.stackFrame.bytesStart+offset:])
|
||||
r, _ := utf8.DecodeRune(a.outputBytes[a.pointers.bytesStart+offset:])
|
||||
return r
|
||||
}
|
||||
|
||||
type Split [2]int
|
||||
|
||||
func (o Output) Split() Split {
|
||||
f := o.api.stackFrame
|
||||
split := Split{f.bytesStart, f.tokenStart}
|
||||
f.bytesStart = f.bytesEnd
|
||||
f.tokenStart = f.tokenEnd
|
||||
a := o.api
|
||||
split := Split{a.pointers.bytesStart, a.pointers.tokenStart}
|
||||
a.pointers.bytesStart = a.pointers.bytesEnd
|
||||
a.pointers.tokenStart = a.pointers.tokenEnd
|
||||
return split
|
||||
}
|
||||
|
||||
func (o Output) MergeSplit(split Split) {
|
||||
f := o.api.stackFrame
|
||||
f.bytesStart = split[0]
|
||||
f.tokenStart = split[1]
|
||||
a := o.api
|
||||
a.pointers.bytesStart = split[0]
|
||||
a.pointers.tokenStart = split[1]
|
||||
}
|
||||
|
||||
func (o Output) Reset() {
|
||||
f := o.api.stackFrame
|
||||
f.bytesEnd = f.bytesStart
|
||||
f.tokenEnd = f.tokenStart
|
||||
f.err = nil
|
||||
a := o.api
|
||||
a.pointers.bytesEnd = a.pointers.bytesStart
|
||||
a.pointers.tokenEnd = a.pointers.tokenStart
|
||||
}
|
||||
|
||||
func (o Output) ClearData() {
|
||||
f := o.api.stackFrame
|
||||
f.bytesEnd = f.bytesStart
|
||||
a := o.api
|
||||
a.pointers.bytesEnd = a.pointers.bytesStart
|
||||
}
|
||||
|
||||
func (o Output) SetBytes(bytes ...byte) {
|
||||
|
@ -61,11 +59,10 @@ func (o Output) SetBytes(bytes ...byte) {
|
|||
|
||||
func (o Output) AddByte(b byte) {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
curBytesEnd := f.bytesEnd
|
||||
curBytesEnd := a.pointers.bytesEnd
|
||||
a.growOutputData(curBytesEnd + 1)
|
||||
a.outputBytes[curBytesEnd] = b
|
||||
f.bytesEnd++
|
||||
a.pointers.bytesEnd++
|
||||
}
|
||||
|
||||
func (o Output) SetRunes(runes ...rune) {
|
||||
|
@ -75,22 +72,20 @@ func (o Output) SetRunes(runes ...rune) {
|
|||
|
||||
func (o Output) AddBytes(bytes ...byte) {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
curBytesEnd := f.bytesEnd
|
||||
curBytesEnd := a.pointers.bytesEnd
|
||||
newBytesEnd := curBytesEnd + len(bytes)
|
||||
a.growOutputData(newBytesEnd)
|
||||
copy(a.outputBytes[curBytesEnd:], bytes)
|
||||
f.bytesEnd = newBytesEnd
|
||||
a.pointers.bytesEnd = newBytesEnd
|
||||
}
|
||||
|
||||
func (o Output) AddRunes(runes ...rune) {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
runesAsString := string(runes)
|
||||
newBytesEnd := f.bytesEnd + len(runesAsString)
|
||||
newBytesEnd := a.pointers.bytesEnd + len(runesAsString)
|
||||
a.growOutputData(newBytesEnd)
|
||||
copy(a.outputBytes[f.bytesEnd:], runesAsString)
|
||||
f.bytesEnd = newBytesEnd
|
||||
copy(a.outputBytes[a.pointers.bytesEnd:], runesAsString)
|
||||
a.pointers.bytesEnd = newBytesEnd
|
||||
}
|
||||
|
||||
func (o Output) AddString(s string) {
|
||||
|
@ -104,23 +99,22 @@ func (o Output) SetString(s string) {
|
|||
|
||||
func (o Output) Tokens() []Token {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
return a.outputTokens[f.tokenStart:f.tokenEnd]
|
||||
return a.outputTokens[a.pointers.tokenStart:a.pointers.tokenEnd]
|
||||
}
|
||||
|
||||
func (o Output) Token(offset int) Token {
|
||||
a := o.api
|
||||
return a.outputTokens[a.stackFrame.tokenStart+offset]
|
||||
return a.outputTokens[a.pointers.tokenStart+offset]
|
||||
}
|
||||
|
||||
func (o Output) TokenValue(offset int) interface{} {
|
||||
a := o.api
|
||||
return a.outputTokens[a.stackFrame.tokenStart+offset].Value
|
||||
return a.outputTokens[a.pointers.tokenStart+offset].Value
|
||||
}
|
||||
|
||||
func (o Output) ClearTokens() {
|
||||
f := o.api.stackFrame
|
||||
f.tokenEnd = f.tokenStart
|
||||
a := o.api
|
||||
a.pointers.tokenEnd = a.pointers.tokenStart
|
||||
}
|
||||
|
||||
func (o Output) SetTokens(tokens ...Token) {
|
||||
|
@ -130,18 +124,16 @@ func (o Output) SetTokens(tokens ...Token) {
|
|||
|
||||
func (o Output) AddToken(token Token) {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
tokenEnd := f.tokenEnd
|
||||
tokenEnd := a.pointers.tokenEnd
|
||||
a.growOutputTokens(tokenEnd + 1)
|
||||
a.outputTokens[tokenEnd] = token
|
||||
f.tokenEnd++
|
||||
a.pointers.tokenEnd++
|
||||
}
|
||||
|
||||
func (o Output) InsertTokenAtStart(token Token) {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
tokenEnd := f.tokenEnd
|
||||
tokenStart := f.tokenStart
|
||||
tokenEnd := a.pointers.tokenEnd
|
||||
tokenStart := a.pointers.tokenStart
|
||||
a.growOutputTokens(tokenEnd + 1)
|
||||
if tokenStart == tokenEnd {
|
||||
a.outputTokens[tokenEnd] = token
|
||||
|
@ -149,16 +141,15 @@ func (o Output) InsertTokenAtStart(token Token) {
|
|||
copy(a.outputTokens[tokenStart+1:], a.outputTokens[tokenStart:tokenEnd])
|
||||
a.outputTokens[tokenStart] = token
|
||||
}
|
||||
f.tokenEnd++
|
||||
a.pointers.tokenEnd++
|
||||
}
|
||||
|
||||
func (o Output) AddTokens(tokens ...Token) {
|
||||
a := o.api
|
||||
f := a.stackFrame
|
||||
a.growOutputTokens(f.tokenEnd + len(tokens))
|
||||
a.growOutputTokens(a.pointers.tokenEnd + len(tokens))
|
||||
for _, t := range tokens {
|
||||
a.outputTokens[f.tokenEnd] = t
|
||||
f.tokenEnd++
|
||||
a.outputTokens[a.pointers.tokenEnd] = t
|
||||
a.pointers.tokenEnd++
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ type InputRuneMode struct {
|
|||
// When an offset is requested that is beyond the length of the available input
|
||||
// data, then the error will be io.EOF.
|
||||
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
||||
return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset)
|
||||
return runeMode.reader.RuneAt(runeMode.api.pointers.offset + offset)
|
||||
}
|
||||
|
||||
// Accept is used to accept a single rune that was read from the input.
|
||||
|
@ -42,14 +42,11 @@ func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
|||
// the first byte after the accepted rune.
|
||||
func (runeMode InputRuneMode) Accept(r rune) {
|
||||
a := runeMode.api
|
||||
f := a.stackFrame
|
||||
|
||||
curBytesEnd := f.bytesEnd
|
||||
curBytesEnd := a.pointers.bytesEnd
|
||||
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
||||
a.growOutputData(maxRequiredBytes)
|
||||
w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
|
||||
f.bytesEnd += w
|
||||
|
||||
a.pointers.bytesEnd += w
|
||||
runeMode.MoveCursor(r)
|
||||
}
|
||||
|
||||
|
@ -66,9 +63,7 @@ func (runeMode InputRuneMode) Accept(r rune) {
|
|||
// the first byte after the accepted runes.
|
||||
func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
||||
a := runeMode.api
|
||||
f := a.stackFrame
|
||||
|
||||
curBytesEnd := f.bytesEnd
|
||||
curBytesEnd := a.pointers.bytesEnd
|
||||
maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
|
||||
a.growOutputData(maxBytes)
|
||||
|
||||
|
@ -77,7 +72,7 @@ func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
|||
curBytesEnd += w
|
||||
runeMode.MoveCursor(r)
|
||||
}
|
||||
f.bytesEnd = curBytesEnd
|
||||
a.pointers.bytesEnd = curBytesEnd
|
||||
}
|
||||
|
||||
// MoveCursor updates the position of the read cursor, based on the provided rune.
|
||||
|
@ -87,17 +82,17 @@ func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
|||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||
// the first rune at the new cursor position.
|
||||
func (runeMode InputRuneMode) MoveCursor(r rune) int {
|
||||
f := runeMode.api.stackFrame
|
||||
a := runeMode.api
|
||||
if r == '\n' {
|
||||
f.column = 0
|
||||
f.line++
|
||||
a.pointers.column = 0
|
||||
a.pointers.line++
|
||||
} else {
|
||||
f.column++
|
||||
a.pointers.column++
|
||||
}
|
||||
|
||||
width := utf8.RuneLen(r)
|
||||
f.offset += width
|
||||
f.offsetLocal += width
|
||||
a.pointers.offset += width
|
||||
a.pointers.offsetLocal += width
|
||||
return width
|
||||
}
|
||||
|
||||
|
|
|
@ -707,27 +707,15 @@ func MatchOptional(handler Handler) Handler {
|
|||
// reports successful match.
|
||||
func MatchSeq(handlers ...Handler) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
f := tokenAPI.stackFrame
|
||||
snap := tokenAPI.MakeSnapshot()
|
||||
for _, handler := range handlers {
|
||||
tokenAPI.Output.Split()
|
||||
// Move forward the output pointers, so the handler that we're about
|
||||
// to call will make use of a fresh output buffer.
|
||||
f.bytesStart = f.bytesEnd
|
||||
f.tokenStart = f.tokenEnd
|
||||
|
||||
split := tokenAPI.Output.Split()
|
||||
if !handler(tokenAPI) {
|
||||
tokenAPI.RestoreSnapshot(snap)
|
||||
return false
|
||||
}
|
||||
tokenAPI.Output.MergeSplit(split)
|
||||
}
|
||||
|
||||
// Move back the output pointers to where they were originally. This
|
||||
// stiches together all the pieces of output that were genarated by
|
||||
// the individual handlers in the sequence.
|
||||
f.bytesStart = snap[1]
|
||||
f.tokenStart = snap[3]
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -842,7 +830,10 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
|||
snap := tokenAPI.MakeSnapshot()
|
||||
for total < min {
|
||||
total++
|
||||
if !handler(tokenAPI) {
|
||||
split := tokenAPI.Output.Split()
|
||||
ok := handler(tokenAPI)
|
||||
tokenAPI.Output.MergeSplit(split)
|
||||
if !ok {
|
||||
tokenAPI.RestoreSnapshot(snap)
|
||||
return false
|
||||
}
|
||||
|
@ -853,7 +844,10 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
|||
//child.Merge()
|
||||
for max < 0 || total < max {
|
||||
total++
|
||||
if !handler(tokenAPI) {
|
||||
split := tokenAPI.Output.Split()
|
||||
ok := handler(tokenAPI)
|
||||
tokenAPI.Output.MergeSplit(split)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -1522,13 +1516,13 @@ func MatchIPv6Net(normalize bool) Handler {
|
|||
// In both cases, it would match the first form.
|
||||
func ModifyDrop(handler Handler) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
runeEnd := tokenAPI.stackFrame.bytesEnd
|
||||
tokenEnd := tokenAPI.stackFrame.tokenEnd
|
||||
runeEnd := tokenAPI.pointers.bytesEnd
|
||||
tokenEnd := tokenAPI.pointers.tokenEnd
|
||||
if handler(tokenAPI) {
|
||||
// We keep offset and cursor updates, but rollback any runes / tokens
|
||||
// that were added by the handler.
|
||||
tokenAPI.stackFrame.bytesEnd = runeEnd
|
||||
tokenAPI.stackFrame.tokenEnd = tokenEnd
|
||||
tokenAPI.pointers.bytesEnd = runeEnd
|
||||
tokenAPI.pointers.tokenEnd = tokenEnd
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -1921,8 +1915,6 @@ func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) H
|
|||
// its input and must return the token value.
|
||||
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
snap := tokenAPI.MakeSnapshot()
|
||||
split := tokenAPI.Output.Split()
|
||||
if handler(tokenAPI) {
|
||||
// When a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
||||
// tokens must end up in the order "date", "year", "month", "day" and not
|
||||
|
@ -1931,11 +1923,9 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(to
|
|||
// that were already created by the handler call.
|
||||
token := Token{Type: toktype, Value: makeValue(tokenAPI)}
|
||||
tokenAPI.Output.InsertTokenAtStart(token)
|
||||
tokenAPI.Output.MergeSplit(split)
|
||||
return true
|
||||
}
|
||||
|
||||
tokenAPI.RestoreSnapshot(snap)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue