211 lines
7.4 KiB
Go
211 lines
7.4 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
)
|
|
|
|
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve input data and
|
|
// to report back results. For easy lookahead support, a forking strategy is
|
|
// provided.
|
|
//
|
|
// BASIC OPERATION:
|
|
//
|
|
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
|
|
//
|
|
// When the rune is to be accepted as input, call the method Accept(). The rune
|
|
// is then added to the results of the TokenAPI and the read cursor is moved
|
|
// forward. Runes collected this way can later on be retrieved using for
|
|
// example the method Result().Runes().
|
|
//
|
|
// It is mandatory to call Accept() after retrieving a rune, before calling
|
|
// NextRune() again. Failing to do so will result in a panic.
|
|
//
|
|
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
|
// with as many runes as needed.
|
|
//
|
|
// Next to adding runes to the output, it is also possible to modify the
|
|
// already collected runes or to produce lexical Tokens. For all things
|
|
// concerning results, take a look at the Result struct, which can be
|
|
// accessed though the method Result().
|
|
//
|
|
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
|
//
|
|
// Sometimes, we must be able to perform a lookahead, which might either
|
|
// succeed or fail. In case of a failing lookahead, the state of the
|
|
// TokenAPI must be brought back to the original state, so we can try
|
|
// a different route.
|
|
//
|
|
// The way in which this is supported, is by forking a TokenAPI struct by
|
|
// calling method Fork(). This will return a forked child TokenAPI, with
|
|
// an empty result buffer, but using the same read cursor position as the
|
|
// forked parent.
|
|
//
|
|
// After forking, the same interface as described for BASIC OPERATION can be
|
|
// used to fill the result buffer. When the lookahead was successful, then
|
|
// Merge() can be called on the forked child to append the child's result
|
|
// buffer to the parent's result buffer, and to move the read cursor position
|
|
// to that of the child.
|
|
//
|
|
// When the lookahead was unsuccessful, then the forked child TokenAPI can
|
|
// simply be discarded. The parent TokenAPI was never modified, so it can
|
|
// safely be used as if the lookahead never happened.
|
|
//
|
|
// Note:
|
|
// Many tokenizers/parsers take a different approach on lookaheads by using
|
|
// peeks and by moving the read cursor position back and forth, or by putting
|
|
// read input back on the input stream. That often leads to code that is
|
|
// efficient, however, in my opinion, not very intuitive to read.
|
|
type TokenAPI struct {
|
|
reader *reader
|
|
cursor *Cursor // current read cursor position, rel. to the input start
|
|
offset int // current rune offset rel. to the Reader's sliding window
|
|
result *TokenResult // results as produced by a TokenHandler (runes, Tokens)
|
|
root *TokenAPI // the root TokenAPI
|
|
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
|
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
|
}
|
|
|
|
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
|
func NewTokenAPI(r io.Reader) *TokenAPI {
|
|
input := &TokenAPI{
|
|
reader: newReader(r),
|
|
cursor: &Cursor{},
|
|
result: newTokenResult(),
|
|
}
|
|
input.root = input
|
|
return input
|
|
}
|
|
|
|
// NextRune returns the rune at the current read offset.
|
|
//
|
|
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
|
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
|
// when needed.
|
|
//
|
|
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
|
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
|
// without explicitly accepting, this method will panic.
|
|
func (i *TokenAPI) NextRune() (rune, error) {
|
|
if i.result.lastRune != nil {
|
|
_, linepos := getCaller(1)
|
|
panic(fmt.Sprintf(
|
|
"parsekit.TokenAPI.NextRune(): NextRune() called at %s without a "+
|
|
"prior call to Accept()", linepos))
|
|
}
|
|
i.detachChilds()
|
|
|
|
readRune, err := i.reader.runeAt(i.offset)
|
|
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
|
return readRune, err
|
|
}
|
|
|
|
// Accept the last rune as read by NextRune() into the result buffer and move
|
|
// the cursor forward.
|
|
//
|
|
// It is not allowed to call Accept() when the previous call to NextRune()
|
|
// returned an error. Calling Accept() in such case will result in a panic.
|
|
func (i *TokenAPI) Accept() {
|
|
if i.result.lastRune == nil {
|
|
_, linepos := getCaller(1)
|
|
panic(fmt.Sprintf(
|
|
"parsekit.TokenAPI.Accept(): Accept() called at %s without "+
|
|
"first calling NextRune()", linepos))
|
|
} else if i.result.lastRune.err != nil {
|
|
_, linepos := getCaller(1)
|
|
panic(fmt.Sprintf(
|
|
"parsekit.TokenAPI.Accept(): Accept() called at %s, but the "+
|
|
"prior call to NextRune() failed", linepos))
|
|
}
|
|
i.result.runes = append(i.result.runes, i.result.lastRune.r)
|
|
i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
|
|
i.offset++
|
|
i.result.lastRune = nil
|
|
}
|
|
|
|
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
|
|
// read cursor position, but for the rest this is a fresh TokenAPI.
|
|
//
|
|
// By forking a TokenAPI, you can freely work with the forked child, without
|
|
// affecting the parent TokenAPI. This is for example useful when you must perform
|
|
// some form of lookahead.
|
|
//
|
|
// When such lookahead turned out successful and you want to accept the results
|
|
// into the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
|
|
// child. This will add the runes in the result buffer to the result buffer of
|
|
// the parent. It also updates the read cursor position of the parent to that
|
|
// of the child.
|
|
//
|
|
// When the lookahead failed, or you don't the results as produced by that
|
|
// lookahead, the forked child can simply be discarded. You can continue to work
|
|
// with the parent TokenAPI as if nothing ever happened.
|
|
func (i *TokenAPI) Fork() *TokenAPI {
|
|
i.detachChilds()
|
|
|
|
// Create the new fork.
|
|
child := &TokenAPI{
|
|
reader: i.reader,
|
|
cursor: &Cursor{},
|
|
offset: i.offset,
|
|
root: i.root,
|
|
parent: i,
|
|
}
|
|
child.result = newTokenResult()
|
|
*child.cursor = *i.cursor
|
|
i.child = child
|
|
i.result.lastRune = nil
|
|
return child
|
|
}
|
|
|
|
// Merge appends the Result of a forked child TokenAPI to the Result of its
|
|
// parent. The read cursor position of the parent is also updated to that of
|
|
// the forked child.
|
|
//
|
|
// After the merge operation, the child is reset so it can immediately be
|
|
// reused for performing another match. This means that all Result data are
|
|
// cleared, but the read cursor position is kept at its current position.
|
|
// This allows a child to feed results in chunks to its parent.
|
|
func (i *TokenAPI) Merge() {
|
|
if i.parent == nil {
|
|
_, filepos := getCaller(1)
|
|
panic(fmt.Sprintf(
|
|
"parsekit.TokenAPI.Merge(): Merge() called at %s "+
|
|
"on a non-forked TokenAPI", filepos))
|
|
}
|
|
|
|
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
|
|
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
|
|
i.parent.offset = i.offset
|
|
i.parent.cursor = i.cursor
|
|
|
|
i.detachChilds()
|
|
i.result = newTokenResult()
|
|
}
|
|
|
|
// Result returns the TokenResult data for the TokenAPI. The returned struct
|
|
// can be used to retrieve and to modify result data.
|
|
func (i *TokenAPI) Result() *TokenResult {
|
|
return i.result
|
|
}
|
|
|
|
// Cursor retrieves the current read cursor data.
|
|
// TODO make this and offset part of Result struct?
|
|
func (i *TokenAPI) Cursor() Cursor {
|
|
return *i.cursor
|
|
}
|
|
|
|
func (i *TokenAPI) detachChilds() {
|
|
if i.child != nil {
|
|
i.child.detachChildsRecurse()
|
|
i.child = nil
|
|
}
|
|
}
|
|
|
|
func (i *TokenAPI) detachChildsRecurse() {
|
|
if i.child != nil {
|
|
i.child.detachChildsRecurse()
|
|
}
|
|
i.child = nil
|
|
i.parent = nil
|
|
}
|