go-parsekit/tokenapi.go

package parsekit

import (
	"fmt"
	"io"
)

// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and
// to report back results. For easy lookahead support, a forking strategy is
// provided.
//
// BASIC OPERATION:
//
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
//
// When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the result buffer of the TokenAPI struct.
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.
//
// By invoking NextRune() + Accept() multiple times, the result buffer is extended
// with as many runes as needed.
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
//
// Sometimes, we must be able to perform a lookahead, which might either
// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be
// brought back to the original state, so we can try a different route.
//
// The way in which this is supported, is by forking a TokenAPI struct by calling
// Fork(). This will return a forked child TokenAPI, with an empty result buffer,
// but using the same input cursor position as the forked parent.
//
// After forking, the same interface as described for BASIC OPERATION can be
// used to fill the result buffer. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's result
// buffer to the parent's result buffer, and to move the input cursor position
// to that of the child.
//
// When the lookahead was unsuccessful, then the forked child TokenAPI can simply
// be discarded. The parent TokenAPI was never modified, so it can safely be used
// as if the lookahead never happened.
//
// Note:
// Many tokenizers/parsers take a different approach on lookaheads by using
// peeks and by moving the input cursor position back and forth, or by putting
// read input back on the input stream. That often leads to code that is
// efficient, however, in my opinion, not very untuitive to read.
type TokenAPI struct {
	reader *Reader
	cursor *Cursor   // current read cursor position, rel. to the input start
	offset int       // current rune offset rel. to the Reader's sliding window
	result *Result   // results as produced by a TokenHandler (runes, Tokens)
	root   *TokenAPI // the root TokenAPI
	parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
	child  *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
}

// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(r io.Reader) *TokenAPI {
	input := &TokenAPI{
		reader: NewReader(r),
		cursor: &Cursor{},
		result: NewResult(),
	}
	input.root = input
	return input
}

// NextRune returns the rune at the current read offset.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// After reading a rune it must be Accept()-ed to move the read cursor forward
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
// without explicitly accepting, this method will panic.
func (i *TokenAPI) NextRune() (rune, error) {
	if i.result.lastRune != nil {
		caller, linepos := getCaller(1)
		panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+
			"to Accept() from %s at %s", caller, linepos))
	}
	i.detachChilds()

	readRune, err := i.reader.RuneAt(i.offset)
	i.result.lastRune = &runeInfo{r: readRune, err: err}
	return readRune, err
}

// Accept the last rune as read by NextRune() into the result buffer and move
// the cursor forward.
//
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *TokenAPI) Accept() {
	if i.result.lastRune == nil {
		caller, linepos := getCaller(1)
		panic(fmt.Sprintf(
			"parsekit.TokenAPI.Accept(): Accept() called without first "+
				"calling NextRune() from %s at %s", caller, linepos))
	} else if i.result.lastRune.err != nil {
		caller, linepos := getCaller(1)
		panic(fmt.Sprintf(
			"parsekit.TokenAPI.Accept(): Accept() called while the previous "+
				"call to NextRune() failed from %s at %s", caller, linepos))
	}
	i.result.runes = append(i.result.runes, i.result.lastRune.r)
	i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
	i.offset++
	i.result.lastRune = nil
}

// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
// read cursor position, but for the rest this is a fresh TokenAPI.
func (i *TokenAPI) Fork() *TokenAPI {
	i.detachChilds()

	// Create the new fork.
	child := &TokenAPI{
		reader: i.reader,
		cursor: &Cursor{},
		offset: i.offset,
		root:   i.root,
		parent: i,
	}
	child.result = NewResult()
	*child.cursor = *i.cursor
	i.child = child
	i.result.lastRune = nil
	return child
}

// Merge adds the data of the forked child TokenAPI that Merge() is called on to the
// data of its parent (results and read cursor position).
func (i *TokenAPI) Merge() {
	if i.parent == nil {
		panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI")
	}

	i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
	i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
	i.parent.offset = i.offset
	i.parent.cursor = i.cursor

	i.detachChilds()
	i.result = NewResult()
}

// Result returns the result data for the TokenAPI. The returned struct
// can be used to retrieve and modify the result data.
func (i *TokenAPI) Result() *Result {
	return i.result
}

// Cursor retrieves the current read cursor data.
// TODO make this and offset part of Result struct?
func (i *TokenAPI) Cursor() Cursor {
	return *i.cursor
}

// FlushReaderBuffer delegates to the Flush() method of the contained
// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the
// reader cache.
func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) {
	if i != i.root {
		panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child")
	}
	i.detachChilds()
	i.reader.Flush(numberOfRunes)
	i.offset = 0
}

func (i *TokenAPI) detachChilds() {
	if i.child != nil {
		i.child.detachChildsRecurse()
		i.child = nil
	}
}

func (i *TokenAPI) detachChildsRecurse() {
	if i.child != nil {
		i.child.detachChildsRecurse()
	}
	i.child = nil
	i.parent = nil
}