193 lines
7.9 KiB
Go
193 lines
7.9 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
)
|
|
|
|
// TokenHandler is the function type that is involved in turning a low level
|
|
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
|
|
// data matches some kind of pattern and to report back the match.
|
|
//
|
|
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
|
|
// parsekit.Matcher().
|
|
//
|
|
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
|
// indicate whether or not it found a match on the input. The TokenAPI is used
|
|
// for retrieving input data to match against and for reporting back results.
|
|
type TokenHandler func(t *TokenAPI) bool
|
|
|
|
// TokenAPI is used by TokenHandler functions to retrieve runes from the
|
|
// input to match against and to report back results.
|
|
//
|
|
// Basic operation:
|
|
//
|
|
// To retrieve the next rune from the input, the TokenHandler function can call
|
|
// the TokenAPI.NextRune() method.
|
|
//
|
|
// The TokenHandler function can then evaluate the retrieved rune and either
|
|
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
|
|
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
|
|
// the rune will not be added to the output. It is mandatory for a TokenHandler
|
|
// to call either Accept() or Skip() after retrieving a rune, before calling
|
|
// NextRune() again.
|
|
//
|
|
// Eventually, the TokenHandler function must return a boolean value, indicating
|
|
// whether or not a match was found. When true, then the calling code will
|
|
// use the runes that were accepted into the TokenAPI's resulting output.
|
|
//
|
|
// Forking operation for easy lookahead support:
|
|
//
|
|
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
|
|
// might either succeed or fail. In case of a failing lookahead, the state
|
|
// of the TokenAPI must be brought back to the original state.
|
|
//
|
|
// The way in which this is supported, is by forking a TokenAPI by calling
|
|
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
|
|
// output buffer, but using the same input cursor position as the forked parent.
|
|
//
|
|
// The TokenHandler function can then use the same interface as described for
|
|
// normal operation to retrieve runes from the input and to fill the resulting
|
|
// output. When the TokenHandler function decides that the lookahead was successful,
|
|
// then the method TokenAPI.Merge() can be called on the forked child to
|
|
// append the resulting output from the child to the parent's resulting output,
|
|
// and to update the parent input cursor position to that of the child.
|
|
//
|
|
// When the TokenHandler function decides that the lookahead was unsuccessful,
|
|
// then it can simply discard the forked child. The parent TokenAPI was never
|
|
// modified, so a new match can be safely started using that parent, as if the
|
|
// lookahead never happened.
|
|
type TokenAPI struct {
|
|
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
|
|
inputOffset int // the byte offset into the input
|
|
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
|
|
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
|
currRune *runeInfo // hold information for the last rune that was read from the input
|
|
parent *TokenAPI // the parent MatchDialog, in case this one was forked
|
|
}
|
|
|
|
// runeInfo describes a single rune and its metadata.
|
|
type runeInfo struct {
|
|
Rune rune // an UTF8 rune
|
|
ByteSize int // the number of bytes in the rune
|
|
OK bool // false when the rune represents an invalid UTF8 rune or EOF
|
|
}
|
|
|
|
// NextRune retrieves the next rune from the input.
|
|
//
|
|
// It returns the rune and a boolean. The boolean will be false in case an
|
|
// invalid UTF8 rune or the end of the file was encountered.
|
|
//
|
|
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
|
// to respectively add the rune to the TokenAPI's resulting output or to
|
|
// fully ignore it. This way, a TokenHandler has full control over what runes are
|
|
// significant for the resulting output of that TokenHandler.
|
|
//
|
|
// After using NextRune(), this method can not be reinvoked, until the last read
|
|
// rune is explicitly accepted or skipped as described above.
|
|
func (t *TokenAPI) NextRune() (rune, bool) {
|
|
if t.currRune != nil {
|
|
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
|
|
}
|
|
r, w, ok := t.p.peek(t.inputOffset)
|
|
t.currRune = &runeInfo{r, w, ok}
|
|
if ok {
|
|
t.input = append(t.input, r)
|
|
}
|
|
return r, ok
|
|
}
|
|
|
|
// Fork splits off a child TokenAPI, containing the same input cursor position
|
|
// as the parent TokenAPI, but with all other data in a fresh state.
|
|
//
|
|
// By forking, a TokenHandler function can freely work with a TokenAPI, without
|
|
// affecting the parent TokenAPI. This is for example useful when the
|
|
// TokenHandler function must perform some form of lookahead.
|
|
//
|
|
// When a successful match was found, the TokenHandler function can call
|
|
// TokenAPI.Merge() on the forked child to have the resulting output added
|
|
// to the parent TokenAPI.
|
|
//
|
|
// When no match was found, the forked child can simply be discarded.
|
|
//
|
|
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
|
// This is done in 4 steps and only after finishing all steps, the TokenHandler
|
|
// function can confirm a successful match. The TokenHandler function for this
|
|
// case could look like this (yes, it's naive, but it shows the point):
|
|
// TODO make proper tested example
|
|
//
|
|
// func MatchAbcd(t *TokenAPI) bool {
|
|
// child := t.Fork() // fork to keep m from input untouched
|
|
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
|
// if r, ok := t.NextRune(); !ok || r != letter {
|
|
// return false // report mismatch, t is left untouched
|
|
// }
|
|
// child.Accept() // add rune to child output
|
|
// }
|
|
// child.Merge() // we have a match, add resulting output to parent
|
|
// return true // and report the successful match
|
|
// }
|
|
func (t *TokenAPI) Fork() *TokenAPI {
|
|
return &TokenAPI{
|
|
p: t.p,
|
|
inputOffset: t.inputOffset,
|
|
parent: t,
|
|
}
|
|
}
|
|
|
|
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
|
|
// output of the TokenAPI.
|
|
func (t *TokenAPI) Accept() {
|
|
t.checkAllowedCall("Accept()")
|
|
t.output = append(t.output, t.currRune.Rune)
|
|
t.inputOffset += t.currRune.ByteSize
|
|
t.currRune = nil
|
|
}
|
|
|
|
// Skip will ignore the last rune as read by NextRune().
|
|
func (t *TokenAPI) Skip() {
|
|
t.checkAllowedCall("Skip()")
|
|
t.inputOffset += t.currRune.ByteSize
|
|
t.currRune = nil
|
|
}
|
|
|
|
func (t *TokenAPI) checkAllowedCall(name string) {
|
|
if t.currRune == nil {
|
|
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
|
|
}
|
|
if !t.currRune.OK {
|
|
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRune() did not return OK (EOF or invalid rune)", name))
|
|
}
|
|
}
|
|
|
|
// Merge merges the resulting output from a forked child TokenAPI back into
|
|
// its parent: The runes that are accepted in the child are added to the parent
|
|
// runes and the parent's input cursor position is advanced to the child's
|
|
// cursor position.
|
|
//
|
|
// After the merge, the child TokenAPI is reset so it can immediately be
|
|
// reused for performing another match (all data are cleared, except for the
|
|
// input offset which is kept at its current position).
|
|
func (t *TokenAPI) Merge() bool {
|
|
if t.parent == nil {
|
|
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
|
}
|
|
t.parent.input = append(t.parent.input, t.input...)
|
|
t.parent.output = append(t.parent.output, t.output...)
|
|
t.parent.inputOffset = t.inputOffset
|
|
t.ClearOutput()
|
|
t.ClearInput()
|
|
return true
|
|
}
|
|
|
|
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
|
|
// the input and input offset as-is.
|
|
func (t *TokenAPI) ClearOutput() {
|
|
t.output = []rune{}
|
|
}
|
|
|
|
// ClearInput clears the input for the TokenAPI, but it keeps the output
|
|
// and input offset as-is.
|
|
func (t *TokenAPI) ClearInput() {
|
|
t.input = []rune{}
|
|
}
|