go-parsekit/statehandler.go

141 lines
5.5 KiB
Go

package parsekit
import "unicode/utf8"
// StateHandler defines the type of function that must be implemented to
// handle a parsing state.
//
// A StateHandler function gets a P struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the StateHandler must use to interact with the parser.
type StateHandler func(*P)
// P holds the internal state of a parse run and provides an API to
// StateHandler methods to communicate with the parser.
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
LastMatch string // a string representation of the last matched input data
}
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a StateHandler, you do not need to
// handle unexpected input yourself. When the end of the function is reached
// without setting the next state, an automatic error will be emitted.
// This error can differentiate between the following issues:
//
// * there is valid data on input, but it was not accepted by the function
//
// * there is an invalid UTF8 character on input
//
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}
// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
func (p *P) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}
// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError
// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return EOF, 0, false
}
return INVALID, w, false
}
return r, w, true
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
p.nextState = state
return &routeFollowupAction{chainAction: chainAction{p, true}}
}
// RouteRep indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRep() *chainAction {
p.RouteTo(p.state)
return &chainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *chainAction {
p.nextState = p.popRoute()
return &chainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}
// ExpectEndOfFile can be used from a StateHandler function to indicate that
// your parser expects to be at the end of the file. This will schedule
// a parsekit-provided StateHandler which will do the actual check for this.
func (p *P) ExpectEndOfFile() {
p.RouteTo(func(p *P) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay().End() {
p.Emit(ItemEOF, "EOF")
}
})
}