Getting rid of forking, the new system delivers more performance.
This commit is contained in:
parent
87cdadae78
commit
4c94374107
|
@ -93,7 +93,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
||||||
// To minimize memory use, it is also possible to flush the read buffer when there is
|
// To minimize memory use, it is also possible to flush the read buffer when there is
|
||||||
// no more need to go back to previously read data.
|
// no more need to go back to previously read data.
|
||||||
//
|
//
|
||||||
// This parserkit.reader.Reader is used internally by tokenize.API.
|
// This buffer is used internally by tokenize.API.
|
||||||
type Buffer struct {
|
type Buffer struct {
|
||||||
bufio *bufio.Reader // used for ReadRune()
|
bufio *bufio.Reader // used for ReadRune()
|
||||||
buffer []byte // input buffer, holding bytes that were read from input
|
buffer []byte // input buffer, holding bytes that were read from input
|
||||||
|
|
174
tokenize/api.go
174
tokenize/api.go
|
@ -71,20 +71,14 @@ import (
|
||||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||||
// no bookkeeping has to be implemented when implementing a parser.
|
// no bookkeeping has to be implemented when implementing a parser.
|
||||||
type API struct {
|
type API struct {
|
||||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
reader *read.Buffer // the buffered input reader
|
||||||
stackLevel int // the current stack level
|
pointers stackFrame // various pointers for keeping track of input, output, cursor.
|
||||||
stackFrame *stackFrame // the current stack frame
|
Input Input // access to a set of general input-related methods
|
||||||
|
Byte InputByteMode // access to a set of byte-based input methods
|
||||||
reader *read.Buffer // the buffered input reader
|
Rune InputRuneMode // access to a set of rune-based input methods
|
||||||
Input Input // provides input-related functionality
|
Output Output // access to a set of output-related functionality
|
||||||
Byte InputByteMode // access to a set of byte-based input methods
|
outputTokens []Token // storage for accepted tokens
|
||||||
Rune InputRuneMode // access to a set of rune-based input methods
|
outputBytes []byte // storage for accepted bytes
|
||||||
|
|
||||||
Output Output // provides output-related functionality
|
|
||||||
outputTokens []Token // accepted tokens
|
|
||||||
outputBytes []byte // accepted bytes
|
|
||||||
|
|
||||||
snapshot [9]int // storage for the Snapshot() / RestoreSnapshot() feature
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type stackFrame struct {
|
type stackFrame struct {
|
||||||
|
@ -96,14 +90,10 @@ type stackFrame struct {
|
||||||
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
||||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||||
|
|
||||||
// TODO
|
|
||||||
err error // can be used by a Handler to report a specific issue with the input
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const initialStackDepth = 64
|
|
||||||
const initialTokenStoreLength = 64
|
const initialTokenStoreLength = 64
|
||||||
const initialByteStoreLength = 1024
|
const initialByteStoreLength = 128
|
||||||
|
|
||||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||||
// For an overview of allowed inputs, take a look at the documentation
|
// For an overview of allowed inputs, take a look at the documentation
|
||||||
|
@ -111,7 +101,6 @@ const initialByteStoreLength = 1024
|
||||||
func NewAPI(input interface{}) *API {
|
func NewAPI(input interface{}) *API {
|
||||||
reader := read.New(input)
|
reader := read.New(input)
|
||||||
tokenAPI := &API{
|
tokenAPI := &API{
|
||||||
stackFrames: make([]stackFrame, initialStackDepth),
|
|
||||||
outputBytes: make([]byte, initialByteStoreLength),
|
outputBytes: make([]byte, initialByteStoreLength),
|
||||||
outputTokens: make([]Token, initialTokenStoreLength),
|
outputTokens: make([]Token, initialTokenStoreLength),
|
||||||
reader: reader,
|
reader: reader,
|
||||||
|
@ -120,154 +109,15 @@ func NewAPI(input interface{}) *API {
|
||||||
tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader}
|
tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader}
|
||||||
tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader}
|
tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader}
|
||||||
tokenAPI.Output = Output{api: tokenAPI}
|
tokenAPI.Output = Output{api: tokenAPI}
|
||||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[0]
|
|
||||||
tokenAPI.snapshot[0] = -1
|
|
||||||
|
|
||||||
return tokenAPI
|
return tokenAPI
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fork forks off a child of the API struct. It will reuse the same
|
type Snapshot stackFrame
|
||||||
// read buffer and cursor position, but for the rest this can be considered
|
|
||||||
// a fresh API.
|
|
||||||
//
|
|
||||||
// By forking an API, you can freely work with the forked child, without
|
|
||||||
// affecting the parent API. This is for example useful when you must perform
|
|
||||||
// some form of lookahead.
|
|
||||||
//
|
|
||||||
// When processing of the Handler was successful and you want to add the results
|
|
||||||
// to the parent API, you can call Merge() on the forked child.
|
|
||||||
// This will add the results to the results of the parent (runes, tokens).
|
|
||||||
// It also updates the read cursor position of the parent to that of the child.
|
|
||||||
//
|
|
||||||
// When the lookahead was unsuccessful, then the forked child API can
|
|
||||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
|
||||||
// Garbage collection will take care of this automatically.
|
|
||||||
// The parent API was never modified, so it can safely be used after disposal
|
|
||||||
// as if the lookahead never happened.
|
|
||||||
func (tokenAPI *API) Fork() int {
|
|
||||||
tokenAPI.stackLevel++
|
|
||||||
newStackLevel := tokenAPI.stackLevel
|
|
||||||
|
|
||||||
// Grow the stack frames capacity when needed.
|
|
||||||
frames := tokenAPI.stackFrames
|
|
||||||
if cap(frames) < (newStackLevel + 1) {
|
|
||||||
newFrames := make([]stackFrame, cap(frames)*2)
|
|
||||||
copy(newFrames, frames)
|
|
||||||
tokenAPI.stackFrames = newFrames
|
|
||||||
}
|
|
||||||
|
|
||||||
parent := tokenAPI.stackFrame
|
|
||||||
tokenAPI.stackFrames[newStackLevel] = stackFrame{
|
|
||||||
offset: parent.offset,
|
|
||||||
bytesStart: parent.bytesEnd,
|
|
||||||
bytesEnd: parent.bytesEnd,
|
|
||||||
tokenStart: parent.tokenEnd,
|
|
||||||
tokenEnd: parent.tokenEnd,
|
|
||||||
}
|
|
||||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[newStackLevel]
|
|
||||||
|
|
||||||
return newStackLevel
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
|
||||||
// results of its parent. The read cursor of the parent is also updated
|
|
||||||
// to that of the forked child.
|
|
||||||
//
|
|
||||||
// After the merge operation, the child results are reset so it can immediately
|
|
||||||
// be reused for performing another match. This means that all Result data are
|
|
||||||
// cleared, but the read cursor position is kept at its current position.
|
|
||||||
// This allows a child to feed results in chunks to its parent.
|
|
||||||
//
|
|
||||||
// Once the child is no longer needed, it can be disposed of by using the
|
|
||||||
// method Dispose(), which will return the tokenizer to the parent.
|
|
||||||
func (tokenAPI *API) Merge(stackLevel int) {
|
|
||||||
tokenAPI.checkStackLevelForMethod("Merge", stackLevel)
|
|
||||||
parent := &tokenAPI.stackFrames[stackLevel-1]
|
|
||||||
f := tokenAPI.stackFrame
|
|
||||||
|
|
||||||
// The end of the parent slice aligns with the start of the child slice.
|
|
||||||
// Because of this, to merge the parent slice can simply be expanded
|
|
||||||
// to include the child slice.
|
|
||||||
// parent : |----------|
|
|
||||||
// child: |------|
|
|
||||||
// After merge operation:
|
|
||||||
// parent: |-----------------|
|
|
||||||
// child: |---> continue reading from here
|
|
||||||
parent.bytesEnd = f.bytesEnd
|
|
||||||
f.bytesStart = f.bytesEnd
|
|
||||||
|
|
||||||
// The same logic applies to tokens.
|
|
||||||
parent.tokenEnd = f.tokenEnd
|
|
||||||
f.tokenStart = f.tokenEnd
|
|
||||||
|
|
||||||
// Update the parent read offset.
|
|
||||||
parent.offsetLocal = parent.offsetLocal + (f.offset - parent.offset)
|
|
||||||
parent.offset = f.offset
|
|
||||||
|
|
||||||
// Update the parent cursor position.
|
|
||||||
if f.line > parent.line {
|
|
||||||
parent.line += f.line
|
|
||||||
parent.column = f.column
|
|
||||||
} else {
|
|
||||||
parent.column += f.column
|
|
||||||
}
|
|
||||||
|
|
||||||
f.line = 0
|
|
||||||
f.column = 0
|
|
||||||
f.err = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tokenAPI *API) Dispose(stackLevel int) {
|
|
||||||
tokenAPI.checkStackLevelForMethod("Dispose", stackLevel)
|
|
||||||
tokenAPI.stackLevel = stackLevel - 1
|
|
||||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tokenAPI *API) checkStackLevelForMethod(name string, stackLevel int) {
|
|
||||||
if stackLevel == 0 {
|
|
||||||
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on the top-level API stack level 0")
|
|
||||||
}
|
|
||||||
if stackLevel != tokenAPI.stackLevel {
|
|
||||||
callerPanic(name, "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on API stack level %d, but the current stack level is %d "+
|
|
||||||
"(forgot to Dispose() a forked child?)", stackLevel, tokenAPI.stackLevel)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type Snapshot [9]int
|
|
||||||
|
|
||||||
func (tokenAPI *API) MakeSnapshot() Snapshot {
|
func (tokenAPI *API) MakeSnapshot() Snapshot {
|
||||||
f := tokenAPI.stackFrame
|
return Snapshot(tokenAPI.pointers)
|
||||||
|
|
||||||
return Snapshot{
|
|
||||||
tokenAPI.stackLevel,
|
|
||||||
f.bytesStart,
|
|
||||||
f.bytesEnd,
|
|
||||||
f.tokenStart,
|
|
||||||
f.tokenEnd,
|
|
||||||
f.offset,
|
|
||||||
f.offsetLocal,
|
|
||||||
f.line,
|
|
||||||
f.column,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tokenAPI *API) RestoreSnapshot(snap Snapshot) {
|
func (tokenAPI *API) RestoreSnapshot(snap Snapshot) {
|
||||||
f := tokenAPI.stackFrame
|
tokenAPI.pointers = stackFrame(snap)
|
||||||
|
|
||||||
if snap[0] != tokenAPI.stackLevel {
|
|
||||||
callerPanic("RestoreSnapshot", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on API stack level %d, but the provided snapshot was created for stack level %d",
|
|
||||||
tokenAPI.stackLevel, snap[0])
|
|
||||||
}
|
|
||||||
|
|
||||||
f.bytesStart = snap[1]
|
|
||||||
f.bytesEnd = snap[2]
|
|
||||||
f.tokenStart = snap[3]
|
|
||||||
f.tokenEnd = snap[4]
|
|
||||||
f.offset = snap[5]
|
|
||||||
f.offsetLocal = snap[6]
|
|
||||||
f.line = snap[7]
|
|
||||||
f.column = snap[8]
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ type InputByteMode struct {
|
||||||
// When an offset is requested that is beyond the length of the available input
|
// When an offset is requested that is beyond the length of the available input
|
||||||
// data, then the error will be io.EOF.
|
// data, then the error will be io.EOF.
|
||||||
func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
||||||
return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
|
return byteMode.reader.ByteAt(byteMode.api.pointers.offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
// PeekMulti returns at max the provided maximum number of bytes at the provided
|
// PeekMulti returns at max the provided maximum number of bytes at the provided
|
||||||
|
@ -22,7 +22,7 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
||||||
// error as such. The returned error can in such case be set to io.EOF to indicate
|
// error as such. The returned error can in such case be set to io.EOF to indicate
|
||||||
// that the end of the input was reached though.
|
// that the end of the input was reached though.
|
||||||
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
|
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
|
||||||
return byteMode.reader.BytesAt(byteMode.api.stackFrame.offset+offset, count)
|
return byteMode.reader.BytesAt(byteMode.api.pointers.offset+offset, count)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (byteMode InputByteMode) Accept(b byte) {
|
func (byteMode InputByteMode) Accept(b byte) {
|
||||||
|
@ -53,16 +53,16 @@ func (byteMode InputByteMode) AcceptMulti(bytes ...byte) {
|
||||||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||||
// the first byte at the new cursor position.
|
// the first byte at the new cursor position.
|
||||||
func (byteMode InputByteMode) MoveCursor(b byte) {
|
func (byteMode InputByteMode) MoveCursor(b byte) {
|
||||||
f := byteMode.api.stackFrame
|
a := byteMode.api
|
||||||
if b == '\n' {
|
if b == '\n' {
|
||||||
f.column = 0
|
a.pointers.column = 0
|
||||||
f.line++
|
a.pointers.line++
|
||||||
} else {
|
} else {
|
||||||
f.column++
|
a.pointers.column++
|
||||||
}
|
}
|
||||||
|
|
||||||
f.offset++
|
a.pointers.offset++
|
||||||
f.offsetLocal++
|
a.pointers.offsetLocal++
|
||||||
}
|
}
|
||||||
|
|
||||||
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
||||||
|
|
|
@ -15,29 +15,10 @@ type Input struct {
|
||||||
|
|
||||||
// Cursor returns a string that describes the current read cursor position.
|
// Cursor returns a string that describes the current read cursor position.
|
||||||
func (i Input) Cursor() string {
|
func (i Input) Cursor() string {
|
||||||
column, line := 0, 0
|
if i.api.pointers.line == 0 && i.api.pointers.column == 0 {
|
||||||
for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
|
|
||||||
if f.line > 0 {
|
|
||||||
column = f.column
|
|
||||||
line += f.line
|
|
||||||
} else {
|
|
||||||
column += f.column
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if line == 0 && column == 0 {
|
|
||||||
return fmt.Sprintf("start of file")
|
return fmt.Sprintf("start of file")
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("line %d, column %d", line+1, column+1)
|
return fmt.Sprintf("line %d, column %d", i.api.pointers.line+1, i.api.pointers.column+1)
|
||||||
}
|
|
||||||
|
|
||||||
func (i Input) Reset() {
|
|
||||||
f := i.api.stackFrame
|
|
||||||
if f.offsetLocal > 0 {
|
|
||||||
f.column = 0
|
|
||||||
f.line = 0
|
|
||||||
f.offset -= f.offsetLocal
|
|
||||||
f.offsetLocal = 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush flushes input data from the read buffer up to the current
|
// Flush flushes input data from the read buffer up to the current
|
||||||
|
@ -47,11 +28,11 @@ func (i Input) Reset() {
|
||||||
// Parsekit will call this method at points where it knows it is a
|
// Parsekit will call this method at points where it knows it is a
|
||||||
// safe thing to do.
|
// safe thing to do.
|
||||||
func (i Input) Flush() bool {
|
func (i Input) Flush() bool {
|
||||||
f := i.api.stackFrame
|
a := i.api
|
||||||
if f.offset > 0 {
|
if a.pointers.offset > 0 {
|
||||||
i.reader.Flush(f.offset)
|
i.reader.Flush(a.pointers.offset)
|
||||||
f.offset = 0
|
a.pointers.offset = 0
|
||||||
f.offsetLocal = 0
|
a.pointers.offsetLocal = 0
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|
|
@ -11,8 +11,7 @@ type Output struct {
|
||||||
|
|
||||||
func (o Output) String() string {
|
func (o Output) String() string {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
bytes := a.outputBytes[a.pointers.bytesStart:a.pointers.bytesEnd]
|
||||||
bytes := a.outputBytes[f.bytesStart:f.bytesEnd]
|
|
||||||
return string(bytes)
|
return string(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,36 +21,35 @@ func (o Output) Runes() []rune {
|
||||||
|
|
||||||
func (o Output) Rune(offset int) rune {
|
func (o Output) Rune(offset int) rune {
|
||||||
a := o.api
|
a := o.api
|
||||||
r, _ := utf8.DecodeRune(a.outputBytes[a.stackFrame.bytesStart+offset:])
|
r, _ := utf8.DecodeRune(a.outputBytes[a.pointers.bytesStart+offset:])
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
type Split [2]int
|
type Split [2]int
|
||||||
|
|
||||||
func (o Output) Split() Split {
|
func (o Output) Split() Split {
|
||||||
f := o.api.stackFrame
|
a := o.api
|
||||||
split := Split{f.bytesStart, f.tokenStart}
|
split := Split{a.pointers.bytesStart, a.pointers.tokenStart}
|
||||||
f.bytesStart = f.bytesEnd
|
a.pointers.bytesStart = a.pointers.bytesEnd
|
||||||
f.tokenStart = f.tokenEnd
|
a.pointers.tokenStart = a.pointers.tokenEnd
|
||||||
return split
|
return split
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) MergeSplit(split Split) {
|
func (o Output) MergeSplit(split Split) {
|
||||||
f := o.api.stackFrame
|
a := o.api
|
||||||
f.bytesStart = split[0]
|
a.pointers.bytesStart = split[0]
|
||||||
f.tokenStart = split[1]
|
a.pointers.tokenStart = split[1]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) Reset() {
|
func (o Output) Reset() {
|
||||||
f := o.api.stackFrame
|
a := o.api
|
||||||
f.bytesEnd = f.bytesStart
|
a.pointers.bytesEnd = a.pointers.bytesStart
|
||||||
f.tokenEnd = f.tokenStart
|
a.pointers.tokenEnd = a.pointers.tokenStart
|
||||||
f.err = nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) ClearData() {
|
func (o Output) ClearData() {
|
||||||
f := o.api.stackFrame
|
a := o.api
|
||||||
f.bytesEnd = f.bytesStart
|
a.pointers.bytesEnd = a.pointers.bytesStart
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) SetBytes(bytes ...byte) {
|
func (o Output) SetBytes(bytes ...byte) {
|
||||||
|
@ -61,11 +59,10 @@ func (o Output) SetBytes(bytes ...byte) {
|
||||||
|
|
||||||
func (o Output) AddByte(b byte) {
|
func (o Output) AddByte(b byte) {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
curBytesEnd := a.pointers.bytesEnd
|
||||||
curBytesEnd := f.bytesEnd
|
|
||||||
a.growOutputData(curBytesEnd + 1)
|
a.growOutputData(curBytesEnd + 1)
|
||||||
a.outputBytes[curBytesEnd] = b
|
a.outputBytes[curBytesEnd] = b
|
||||||
f.bytesEnd++
|
a.pointers.bytesEnd++
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) SetRunes(runes ...rune) {
|
func (o Output) SetRunes(runes ...rune) {
|
||||||
|
@ -75,22 +72,20 @@ func (o Output) SetRunes(runes ...rune) {
|
||||||
|
|
||||||
func (o Output) AddBytes(bytes ...byte) {
|
func (o Output) AddBytes(bytes ...byte) {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
curBytesEnd := a.pointers.bytesEnd
|
||||||
curBytesEnd := f.bytesEnd
|
|
||||||
newBytesEnd := curBytesEnd + len(bytes)
|
newBytesEnd := curBytesEnd + len(bytes)
|
||||||
a.growOutputData(newBytesEnd)
|
a.growOutputData(newBytesEnd)
|
||||||
copy(a.outputBytes[curBytesEnd:], bytes)
|
copy(a.outputBytes[curBytesEnd:], bytes)
|
||||||
f.bytesEnd = newBytesEnd
|
a.pointers.bytesEnd = newBytesEnd
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) AddRunes(runes ...rune) {
|
func (o Output) AddRunes(runes ...rune) {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
|
||||||
runesAsString := string(runes)
|
runesAsString := string(runes)
|
||||||
newBytesEnd := f.bytesEnd + len(runesAsString)
|
newBytesEnd := a.pointers.bytesEnd + len(runesAsString)
|
||||||
a.growOutputData(newBytesEnd)
|
a.growOutputData(newBytesEnd)
|
||||||
copy(a.outputBytes[f.bytesEnd:], runesAsString)
|
copy(a.outputBytes[a.pointers.bytesEnd:], runesAsString)
|
||||||
f.bytesEnd = newBytesEnd
|
a.pointers.bytesEnd = newBytesEnd
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) AddString(s string) {
|
func (o Output) AddString(s string) {
|
||||||
|
@ -104,23 +99,22 @@ func (o Output) SetString(s string) {
|
||||||
|
|
||||||
func (o Output) Tokens() []Token {
|
func (o Output) Tokens() []Token {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
return a.outputTokens[a.pointers.tokenStart:a.pointers.tokenEnd]
|
||||||
return a.outputTokens[f.tokenStart:f.tokenEnd]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) Token(offset int) Token {
|
func (o Output) Token(offset int) Token {
|
||||||
a := o.api
|
a := o.api
|
||||||
return a.outputTokens[a.stackFrame.tokenStart+offset]
|
return a.outputTokens[a.pointers.tokenStart+offset]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) TokenValue(offset int) interface{} {
|
func (o Output) TokenValue(offset int) interface{} {
|
||||||
a := o.api
|
a := o.api
|
||||||
return a.outputTokens[a.stackFrame.tokenStart+offset].Value
|
return a.outputTokens[a.pointers.tokenStart+offset].Value
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) ClearTokens() {
|
func (o Output) ClearTokens() {
|
||||||
f := o.api.stackFrame
|
a := o.api
|
||||||
f.tokenEnd = f.tokenStart
|
a.pointers.tokenEnd = a.pointers.tokenStart
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) SetTokens(tokens ...Token) {
|
func (o Output) SetTokens(tokens ...Token) {
|
||||||
|
@ -130,18 +124,16 @@ func (o Output) SetTokens(tokens ...Token) {
|
||||||
|
|
||||||
func (o Output) AddToken(token Token) {
|
func (o Output) AddToken(token Token) {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
tokenEnd := a.pointers.tokenEnd
|
||||||
tokenEnd := f.tokenEnd
|
|
||||||
a.growOutputTokens(tokenEnd + 1)
|
a.growOutputTokens(tokenEnd + 1)
|
||||||
a.outputTokens[tokenEnd] = token
|
a.outputTokens[tokenEnd] = token
|
||||||
f.tokenEnd++
|
a.pointers.tokenEnd++
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) InsertTokenAtStart(token Token) {
|
func (o Output) InsertTokenAtStart(token Token) {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
tokenEnd := a.pointers.tokenEnd
|
||||||
tokenEnd := f.tokenEnd
|
tokenStart := a.pointers.tokenStart
|
||||||
tokenStart := f.tokenStart
|
|
||||||
a.growOutputTokens(tokenEnd + 1)
|
a.growOutputTokens(tokenEnd + 1)
|
||||||
if tokenStart == tokenEnd {
|
if tokenStart == tokenEnd {
|
||||||
a.outputTokens[tokenEnd] = token
|
a.outputTokens[tokenEnd] = token
|
||||||
|
@ -149,16 +141,15 @@ func (o Output) InsertTokenAtStart(token Token) {
|
||||||
copy(a.outputTokens[tokenStart+1:], a.outputTokens[tokenStart:tokenEnd])
|
copy(a.outputTokens[tokenStart+1:], a.outputTokens[tokenStart:tokenEnd])
|
||||||
a.outputTokens[tokenStart] = token
|
a.outputTokens[tokenStart] = token
|
||||||
}
|
}
|
||||||
f.tokenEnd++
|
a.pointers.tokenEnd++
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o Output) AddTokens(tokens ...Token) {
|
func (o Output) AddTokens(tokens ...Token) {
|
||||||
a := o.api
|
a := o.api
|
||||||
f := a.stackFrame
|
a.growOutputTokens(a.pointers.tokenEnd + len(tokens))
|
||||||
a.growOutputTokens(f.tokenEnd + len(tokens))
|
|
||||||
for _, t := range tokens {
|
for _, t := range tokens {
|
||||||
a.outputTokens[f.tokenEnd] = t
|
a.outputTokens[a.pointers.tokenEnd] = t
|
||||||
f.tokenEnd++
|
a.pointers.tokenEnd++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ type InputRuneMode struct {
|
||||||
// When an offset is requested that is beyond the length of the available input
|
// When an offset is requested that is beyond the length of the available input
|
||||||
// data, then the error will be io.EOF.
|
// data, then the error will be io.EOF.
|
||||||
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
||||||
return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset)
|
return runeMode.reader.RuneAt(runeMode.api.pointers.offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accept is used to accept a single rune that was read from the input.
|
// Accept is used to accept a single rune that was read from the input.
|
||||||
|
@ -42,14 +42,11 @@ func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
||||||
// the first byte after the accepted rune.
|
// the first byte after the accepted rune.
|
||||||
func (runeMode InputRuneMode) Accept(r rune) {
|
func (runeMode InputRuneMode) Accept(r rune) {
|
||||||
a := runeMode.api
|
a := runeMode.api
|
||||||
f := a.stackFrame
|
curBytesEnd := a.pointers.bytesEnd
|
||||||
|
|
||||||
curBytesEnd := f.bytesEnd
|
|
||||||
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
||||||
a.growOutputData(maxRequiredBytes)
|
a.growOutputData(maxRequiredBytes)
|
||||||
w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
|
w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
|
||||||
f.bytesEnd += w
|
a.pointers.bytesEnd += w
|
||||||
|
|
||||||
runeMode.MoveCursor(r)
|
runeMode.MoveCursor(r)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,9 +63,7 @@ func (runeMode InputRuneMode) Accept(r rune) {
|
||||||
// the first byte after the accepted runes.
|
// the first byte after the accepted runes.
|
||||||
func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
||||||
a := runeMode.api
|
a := runeMode.api
|
||||||
f := a.stackFrame
|
curBytesEnd := a.pointers.bytesEnd
|
||||||
|
|
||||||
curBytesEnd := f.bytesEnd
|
|
||||||
maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
|
maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
|
||||||
a.growOutputData(maxBytes)
|
a.growOutputData(maxBytes)
|
||||||
|
|
||||||
|
@ -77,7 +72,7 @@ func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
||||||
curBytesEnd += w
|
curBytesEnd += w
|
||||||
runeMode.MoveCursor(r)
|
runeMode.MoveCursor(r)
|
||||||
}
|
}
|
||||||
f.bytesEnd = curBytesEnd
|
a.pointers.bytesEnd = curBytesEnd
|
||||||
}
|
}
|
||||||
|
|
||||||
// MoveCursor updates the position of the read cursor, based on the provided rune.
|
// MoveCursor updates the position of the read cursor, based on the provided rune.
|
||||||
|
@ -87,17 +82,17 @@ func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
|
||||||
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
|
||||||
// the first rune at the new cursor position.
|
// the first rune at the new cursor position.
|
||||||
func (runeMode InputRuneMode) MoveCursor(r rune) int {
|
func (runeMode InputRuneMode) MoveCursor(r rune) int {
|
||||||
f := runeMode.api.stackFrame
|
a := runeMode.api
|
||||||
if r == '\n' {
|
if r == '\n' {
|
||||||
f.column = 0
|
a.pointers.column = 0
|
||||||
f.line++
|
a.pointers.line++
|
||||||
} else {
|
} else {
|
||||||
f.column++
|
a.pointers.column++
|
||||||
}
|
}
|
||||||
|
|
||||||
width := utf8.RuneLen(r)
|
width := utf8.RuneLen(r)
|
||||||
f.offset += width
|
a.pointers.offset += width
|
||||||
f.offsetLocal += width
|
a.pointers.offsetLocal += width
|
||||||
return width
|
return width
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -707,27 +707,15 @@ func MatchOptional(handler Handler) Handler {
|
||||||
// reports successful match.
|
// reports successful match.
|
||||||
func MatchSeq(handlers ...Handler) Handler {
|
func MatchSeq(handlers ...Handler) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
f := tokenAPI.stackFrame
|
|
||||||
snap := tokenAPI.MakeSnapshot()
|
snap := tokenAPI.MakeSnapshot()
|
||||||
for _, handler := range handlers {
|
for _, handler := range handlers {
|
||||||
tokenAPI.Output.Split()
|
split := tokenAPI.Output.Split()
|
||||||
// Move forward the output pointers, so the handler that we're about
|
|
||||||
// to call will make use of a fresh output buffer.
|
|
||||||
f.bytesStart = f.bytesEnd
|
|
||||||
f.tokenStart = f.tokenEnd
|
|
||||||
|
|
||||||
if !handler(tokenAPI) {
|
if !handler(tokenAPI) {
|
||||||
tokenAPI.RestoreSnapshot(snap)
|
tokenAPI.RestoreSnapshot(snap)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
tokenAPI.Output.MergeSplit(split)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move back the output pointers to where they were originally. This
|
|
||||||
// stiches together all the pieces of output that were genarated by
|
|
||||||
// the individual handlers in the sequence.
|
|
||||||
f.bytesStart = snap[1]
|
|
||||||
f.tokenStart = snap[3]
|
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -842,7 +830,10 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
||||||
snap := tokenAPI.MakeSnapshot()
|
snap := tokenAPI.MakeSnapshot()
|
||||||
for total < min {
|
for total < min {
|
||||||
total++
|
total++
|
||||||
if !handler(tokenAPI) {
|
split := tokenAPI.Output.Split()
|
||||||
|
ok := handler(tokenAPI)
|
||||||
|
tokenAPI.Output.MergeSplit(split)
|
||||||
|
if !ok {
|
||||||
tokenAPI.RestoreSnapshot(snap)
|
tokenAPI.RestoreSnapshot(snap)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -853,7 +844,10 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
||||||
//child.Merge()
|
//child.Merge()
|
||||||
for max < 0 || total < max {
|
for max < 0 || total < max {
|
||||||
total++
|
total++
|
||||||
if !handler(tokenAPI) {
|
split := tokenAPI.Output.Split()
|
||||||
|
ok := handler(tokenAPI)
|
||||||
|
tokenAPI.Output.MergeSplit(split)
|
||||||
|
if !ok {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1522,13 +1516,13 @@ func MatchIPv6Net(normalize bool) Handler {
|
||||||
// In both cases, it would match the first form.
|
// In both cases, it would match the first form.
|
||||||
func ModifyDrop(handler Handler) Handler {
|
func ModifyDrop(handler Handler) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
runeEnd := tokenAPI.stackFrame.bytesEnd
|
runeEnd := tokenAPI.pointers.bytesEnd
|
||||||
tokenEnd := tokenAPI.stackFrame.tokenEnd
|
tokenEnd := tokenAPI.pointers.tokenEnd
|
||||||
if handler(tokenAPI) {
|
if handler(tokenAPI) {
|
||||||
// We keep offset and cursor updates, but rollback any runes / tokens
|
// We keep offset and cursor updates, but rollback any runes / tokens
|
||||||
// that were added by the handler.
|
// that were added by the handler.
|
||||||
tokenAPI.stackFrame.bytesEnd = runeEnd
|
tokenAPI.pointers.bytesEnd = runeEnd
|
||||||
tokenAPI.stackFrame.tokenEnd = tokenEnd
|
tokenAPI.pointers.tokenEnd = tokenEnd
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -1921,8 +1915,6 @@ func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) H
|
||||||
// its input and must return the token value.
|
// its input and must return the token value.
|
||||||
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler {
|
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
snap := tokenAPI.MakeSnapshot()
|
|
||||||
split := tokenAPI.Output.Split()
|
|
||||||
if handler(tokenAPI) {
|
if handler(tokenAPI) {
|
||||||
// When a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
// When a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
||||||
// tokens must end up in the order "date", "year", "month", "day" and not
|
// tokens must end up in the order "date", "year", "month", "day" and not
|
||||||
|
@ -1931,11 +1923,9 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(to
|
||||||
// that were already created by the handler call.
|
// that were already created by the handler call.
|
||||||
token := Token{Type: toktype, Value: makeValue(tokenAPI)}
|
token := Token{Type: toktype, Value: makeValue(tokenAPI)}
|
||||||
tokenAPI.Output.InsertTokenAtStart(token)
|
tokenAPI.Output.InsertTokenAtStart(token)
|
||||||
tokenAPI.Output.MergeSplit(split)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenAPI.RestoreSnapshot(snap)
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue