140 lines
5.6 KiB
Go
140 lines
5.6 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"runtime"
|
|
)
|
|
|
|
// TokenHandler is the function type that is involved in turning a low level
|
|
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
|
// data matches some kind of pattern and to report back the token(s).
|
|
//
|
|
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
|
// indicate whether or not it found a match on the input. The TokenAPI is used
|
|
// for retrieving input data to match against and for reporting back results.
|
|
type TokenHandler func(t *TokenAPI) bool
|
|
|
|
// NextRune retrieves the next rune from the input.
|
|
//
|
|
// It returns the rune and a boolean. The boolean will be false in case an
|
|
// invalid UTF8 rune or the end of the file was encountered.
|
|
//
|
|
// After retrieving a rune, Accept() or Skip() can be called to respectively add
|
|
// the rune to the TokenAPIold's string buffer or to fully ignore it. This way,
|
|
// a TokenHandler has full control over what runes are significant for the
|
|
// resulting output of that TokenHandler.
|
|
//
|
|
// After using NextRune(), this method can not be reinvoked, until the last read
|
|
// rune is explicitly accepted or skipped as described above.
|
|
// func (t *TokenAPIold) NextRune() (rune, bool) {
|
|
// if t.lastRune != nil {
|
|
// caller, filepos := getCaller(1)
|
|
// panic(fmt.Sprintf(
|
|
// "TokenHandler bug: NextRune() was called from %s at %s "+
|
|
// "without accepting or skipping the previously read rune", caller, filepos))
|
|
// }
|
|
// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset)
|
|
// t.lastRune = &runeInfo{r, w, ok}
|
|
// if ok {
|
|
// t.result.Input = append(t.result.Input, r)
|
|
// }
|
|
// return r, ok
|
|
// }
|
|
|
|
// Fork splits off a child TokenAPIold, containing the same input cursor position
|
|
// as the parent TokenAPIold, but with all other data in a fresh state.
|
|
//
|
|
// By forking, a TokenHandler function can freely work with a TokenAPIold, without
|
|
// affecting the parent TokenAPIold. This is for example useful when the
|
|
// TokenHandler function must perform some form of lookahead.
|
|
//
|
|
// When a successful match was found, the TokenHandler function can call
|
|
// TokenAPIold.Merge() on the forked child to have the resulting output added
|
|
// to the parent TokenAPIold.
|
|
//
|
|
// When no match was found, the forked child can simply be discarded.
|
|
//
|
|
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
|
// This is done in 4 steps and only after finishing all steps, the TokenHandler
|
|
// function can confirm a successful match. The TokenHandler function for this
|
|
// case could look like this (yes, it's naive, but it shows the point):
|
|
// TODO make proper tested example
|
|
//
|
|
// func MatchAbcd(t *TokenAPIold) bool {
|
|
// child := t.Fork() // fork to keep m from input untouched
|
|
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
|
// if r, ok := t.NextRune(); !ok || r != letter {
|
|
// return false // report mismatch, t is left untouched
|
|
// }
|
|
// child.Accept() // add rune to child output
|
|
// }
|
|
// child.Merge() // we have a match, add resulting output to parent
|
|
// return true // and report the successful match
|
|
// }
|
|
|
|
// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting
|
|
// output of the TokenAPIold.
|
|
// func (t *TokenAPIold) Accept() {
|
|
// t.checkAllowedCall("Accept()")
|
|
// t.buffer = append(t.buffer, t.lastRune.Rune)
|
|
// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune)
|
|
// t.inputOffset += t.lastRune.ByteSize
|
|
// t.lastRune = nil
|
|
// }
|
|
|
|
// Skip will ignore the last rune as read by NextRune().
|
|
// func (t *TokenAPIold) Skip() {
|
|
// t.checkAllowedCall("Skip()")
|
|
// t.inputOffset += t.lastRune.ByteSize
|
|
// t.lastRune = nil
|
|
// }
|
|
|
|
// func (t *TokenAPIold) checkAllowedCall(name string) {
|
|
// if t.lastRune == nil {
|
|
// caller, filepos := getCaller(2)
|
|
// panic(fmt.Sprintf(
|
|
// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
|
|
// name, caller, filepos))
|
|
// }
|
|
// if !t.lastRune.OK {
|
|
// caller, filepos := getCaller(2)
|
|
// panic(fmt.Sprintf(
|
|
// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
|
|
// "did not return OK (EOF or invalid rune)", name, caller, filepos))
|
|
// }
|
|
// }
|
|
|
|
// AddToken is used to add a token to the results of the TokenHandler.
|
|
// func (t *TokenAPIold) AddToken(tok *Token) {
|
|
// t.result.Tokens = append(t.result.Tokens, tok)
|
|
// }
|
|
|
|
// Merge merges the resulting output from a forked child TokenAPIold back into
|
|
// its parent: The runes that are accepted in the child are added to the parent
|
|
// runes and the parent's input cursor position is advanced to the child's
|
|
// cursor position.
|
|
//
|
|
// After the merge, the child TokenAPIold is reset so it can immediately be
|
|
// reused for performing another match (all data are cleared, except for the
|
|
// input offset which is kept at its current position).
|
|
// func (t *TokenAPIold) Merge() bool {
|
|
// if t.parent == nil {
|
|
// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
|
|
// }
|
|
// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...)
|
|
// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...)
|
|
// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...)
|
|
// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...)
|
|
// t.parent.inputOffset = t.inputOffset
|
|
// t.result = &TokResult{}
|
|
// return true
|
|
// }
|
|
|
|
func getCaller(depth int) (string, string) {
|
|
// No error handling, because we call this method ourselves with safe depth values.
|
|
pc, file, line, _ := runtime.Caller(depth + 1)
|
|
filepos := fmt.Sprintf("%s:%d", file, line)
|
|
caller := runtime.FuncForPC(pc)
|
|
return caller.Name(), filepos
|
|
}
|