go-parsekit/matcher.go

package parsekit

import (
	"fmt"
)

// Matcher is the function type that must be implemented to create a function
// that can be used in conjunction with parsekit.P.On() or parsekit.New().
// Its purpose is to check if input data matches some kind of pattern and to
// report back the match.
//
// A Matcher function gets a MatchDialog as its input and returns a boolean to
// indicate whether or not the Matcher found a match on the input.
// The MatchDialog is used for retrieving input data to match against
// and for reporting back results.
type Matcher func(m *MatchDialog) bool

// MatchDialog is used by Matcher functions to retrieve runes from the
// input to match against and to report back results.
//
// Basic operation:
//
// To retrieve the next rune from the input, the Matcher function can call
// the MatchDialog.NextRune() method.
//
// The Matcher function can then evaluate the retrieved rune and either
// accept of skip the rune. When accepting it using MatchDialog.Accept(),
// the rune is added to the output of the MatchDialog. When using
// MatchDialog.Skip(), the rune will not be added to the output. It is
// mandatory for a Matcher to call either Accept() or Skip() after retrieving
// a rune, before calling NextRune() again.
//
// Eventually, the Matcher function must return a boolean value, indicating
// whether or not a match was found. When true, then the calling code will
// use the runes that were accepted into the MatchDialog's resulting output.
//
// Forking operation for easy lookahead support:
//
// Sometimes, a Matcher function must be able to perform a lookahead, which
// might either succeed or fail. In case of a failing lookahead, the state
// of the MatchDialog must be brought back to the original state.
//
// The way in which this is supported, is by forking a MatchDialog by calling
// MatchDialog.Fork(). This will return a child MatchDialog, with an empty
// output buffer, but using the same input offset as the forked parent.
//
// The Matcher function can then use the same interface as described for
// normal operation to retrieve runes from the input and to fill the output
// buffer. When the Matcher function decides that the lookahead was successful,
// then the method MatchDialog.Merge() can be called on the forked child to
// append the resulting output from the child to the parent's resulting output,
// and to update the parent input offset to that of the child.
//
// When the Matcher function decides that the lookahead was unsuccessful, then
// it can simply discard the forked child. The parent MatchDialog was never
// modified, so a new match can be safely started using that parent, as if the
// lookahead never happened.
type MatchDialog struct {
	p           *P           // parser state, used to retrieve input data to match against (TODO should be interface)
	inputOffset int          // the byte offset into the input
	input       []rune       // a slice of runes that represents the retrieved input runes for the Matcher
	output      []rune       // a slice of runes that represents the accepted output runes for the Matcher
	currRune    *runeToken   // hold the last rune that was read from the input
	parent      *MatchDialog // the parent MatchDialog, in case this one was forked
}

type runeToken struct {
	Rune     rune
	ByteSize int
	OK       bool
}

// NextRune retrieves the next rune from the input.
//
// It returns the rune and a boolean. The boolean will be false in case an
// invalid UTF8 rune or the end of the file was encountered.
//
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
// to respectively add the rune to the MatchDialog's resulting output or to
// fully ignore it. This way, a Matcher has full control over what runes are
// significant for the resulting output of that matcher.
//
// After using NextRune(), this method can not be reinvoked, until the last read
// rune is explicitly accepted or skipped as described above.
func (m *MatchDialog) NextRune() (rune, bool) {
	if m.currRune != nil {
		panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
	}
	r, w, ok := m.p.peek(m.inputOffset)
	m.currRune = &runeToken{r, w, ok}
	if ok {
		m.input = append(m.input, r)
	}
	return r, ok
}

// Fork splits off a child MatchDialog, containing the same offset as the
// parent MatchDialog, but with all other data in a fresh state.
//
// By forking, a Matcher function can freely work with a MatchDialog, without
// affecting the parent MatchDialog. This is for example useful when the
// Matcher function must perform some form of lookahead.
//
// When a successful match was found, the Matcher function can call
// child.Merge() to have the resulting output added to the parent MatchDialog.
// When no match was found, the forked child can simply be discarded.
//
// Example case: A Matcher checks for a sequence of runes: 'a', 'b', 'c', 'd'.
// This is done in 4 steps and only after finishing all steps, the Matcher
// function can confirm a successful match. The Matcher function for this
// case could look like this (yes, it's naive, but it shows the point):
//
//     func MatchAbcd(m *MatchDialog) bool {
//         child := m.Fork() // fork to keep m from input untouched
//         for _, letter := []rune {'a', 'b', 'c', 'd'} {
//             if r, ok := m.NextRune(); !ok || r != letter {
//                 return false // report mismatch, m is left untouched
//             }
//             child.Accept() // add rune to child output
//         }
//         child.Merge() // we have a match, add resulting output to parent
//         return true // and report the successful match
//     }
func (m *MatchDialog) Fork() *MatchDialog {
	child := &MatchDialog{
		p:           m.p,
		inputOffset: m.inputOffset,
		parent:      m,
	}
	return child
}

// Accept will add the last rune as read by NextRune() to the resulting
// output of the MatchDialog.
func (m *MatchDialog) Accept() {
	m.checkAllowedCall("Accept()")
	m.output = append(m.output, m.currRune.Rune)
	m.inputOffset += m.currRune.ByteSize
	m.currRune = nil
}

// Skip will ignore the last rune as read by NextRune().
func (m *MatchDialog) Skip() {
	m.checkAllowedCall("Skip()")
	m.inputOffset += m.currRune.ByteSize
	m.currRune = nil
}

func (m *MatchDialog) checkAllowedCall(name string) {
	if m.currRune == nil {
		panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
	}
	if !m.currRune.OK {
		panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
	}
}

// Merge merges the resulting output from a forked child MatchDialog back into
// its parent: The runes that are accepted in the child are added to the parent
// runes and the parent's offset is advanced to the child's offset.
//
// After the merge, the child MatchDialog is reset so it can immediately be
// reused for performing another match (all data are cleared, except for the
// input offset which is kept at its current position).
func (m *MatchDialog) Merge() bool {
	if m.parent == nil {
		panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
	}
	m.parent.input = append(m.parent.input, m.input...)
	m.parent.output = append(m.parent.output, m.output...)
	m.parent.inputOffset = m.inputOffset
	m.ClearOutput()
	m.ClearInput()
	return true
}

// ClearOutput clears the resulting output for the MatchDialog, but it keeps
// the input and input offset as-is.
func (m *MatchDialog) ClearOutput() {
	m.output = []rune{}
}

// ClearInput clears the input for the MatchDialog, but it keeps the output
// and input offset as-is.
func (m *MatchDialog) ClearInput() {
	m.input = []rune{}
}