go-parsekit/statehandler.go

package parsekit

import "unicode/utf8"

// StateHandler defines the type of function that must be implemented to
// handle a parsing state.
//
// A StateHandler function gets a P struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the StateHandler must use to interact with the parser.
type StateHandler func(*P)

// P holds the internal state of a parse run and provides an API to
// StateHandler methods to communicate with the parser.
type P struct {
	state        StateHandler   // the function that handles the current state
	nextState    StateHandler   // the function that will handle the next state
	routeStack   []StateHandler // route stack, for handling nested parsing
	input        string         // the scanned input
	inputPos     int            // current byte cursor position in the input
	cursorLine   int            // current rune cursor row number in the input
	cursorColumn int            // current rune cursor column position in the input
	len          int            // the total length of the input in bytes
	newline      bool           // keep track of when we have scanned a newline
	expecting    string         // a description of what the current state expects to find
	buffer       stringBuffer   // an efficient buffer, used to build string values
	items        chan Item      // channel of resulting Parser items
	item         Item           // the current item as reached by Next() and retrieved by Get()
	err          *Error         // an error when lexing failed, retrieved by Error()

	LastMatch string // a string representation of the last matched input data
}

// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a StateHandler, you do not need to
// handle unexpected input yourself. When the end of the function is reached
// without setting the next state, an automatic error will be emitted.
// This error can differentiate between the following issues:
//
// * there is valid data on input, but it was not accepted by the function
//
// * there is an invalid UTF8 character on input
//
// * the end of the file was reached.
func (p *P) Expects(description string) {
	p.expecting = description
}

// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
func (p *P) peek(byteOffset int) (rune, int, bool) {
	r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
	return handleRuneError(r, w)
}

// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1

// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError

// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
	if r == utf8.RuneError {
		if w == 0 {
			return EOF, 0, false
		}
		return INVALID, w, false
	}
	return r, w, true
}

// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
	p.nextState = state
	return &routeFollowupAction{chainAction: chainAction{p, true}}
}

// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *chainAction {
	p.RouteTo(p.state)
	return &chainAction{nil, true}
}

// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *chainAction {
	p.nextState = p.popRoute()
	return &chainAction{nil, true}
}

// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
	p.routeStack = append(p.routeStack, state)
}

// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
	last := len(p.routeStack) - 1
	head, tail := p.routeStack[:last], p.routeStack[last]
	p.routeStack = head
	return tail
}