129 lines
5.1 KiB
Go
129 lines
5.1 KiB
Go
package parsekit
|
|
|
|
import "unicode/utf8"
|
|
|
|
// StateHandler defines the type of function that must be implemented to
|
|
// handle a parsing state.
|
|
//
|
|
// A StateHandler function gets a P struct as its input. This struct holds
|
|
// all the internal state for the parsing state machine and provides the
|
|
// interface that the StateHandler must use to interact with the parser.
|
|
type StateHandler func(*P)
|
|
|
|
// P holds the internal state of a parse run and provides an API to
|
|
// StateHandler methods to communicate with the parser.
|
|
type P struct {
|
|
state StateHandler // the function that handles the current state
|
|
nextState StateHandler // the function that will handle the next state
|
|
routeStack []StateHandler // route stack, for handling nested parsing
|
|
input string // the scanned input
|
|
inputPos int // current byte cursor position in the input
|
|
cursorLine int // current rune cursor row number in the input
|
|
cursorColumn int // current rune cursor column position in the input
|
|
len int // the total length of the input in bytes
|
|
newline bool // keep track of when we have scanned a newline
|
|
expecting string // a description of what the current state expects to find
|
|
buffer stringBuffer // an efficient buffer, used to build string values
|
|
items chan Item // channel of resulting Parser items
|
|
item Item // the current item as reached by Next() and retrieved by Get()
|
|
err *Error // an error when lexing failed, retrieved by Error()
|
|
|
|
LastMatch string // a string representation of the last matched input data
|
|
}
|
|
|
|
// Expects is used to let a state function describe what input it is expecting.
|
|
// This expectation is used in error messages to make them more descriptive.
|
|
//
|
|
// When defining an expectation inside a StateHandler, you do not need to
|
|
// handle unexpected input yourself. When the end of the function is reached
|
|
// without setting the next state, an automatic error will be emitted.
|
|
// This error can differentiate between the following issues:
|
|
//
|
|
// * there is valid data on input, but it was not accepted by the function
|
|
//
|
|
// * there is an invalid UTF8 character on input
|
|
//
|
|
// * the end of the file was reached.
|
|
func (p *P) Expects(description string) {
|
|
p.expecting = description
|
|
}
|
|
|
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
|
// Returns the rune, its width in bytes and a boolean.
|
|
// The boolean will be false in case no upcoming rune can be peeked
|
|
// (end of data or invalid UTF8 character).
|
|
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
|
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
|
return handleRuneError(r, w)
|
|
}
|
|
|
|
// EOF is a special rune, which is used to indicate an end of file when
|
|
// reading a character from the input.
|
|
// It can be treated as a rune when writing parsing rules, so a valid way to
|
|
// say 'I now expect the end of the file' is using something like:
|
|
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
|
const EOF rune = -1
|
|
|
|
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
|
// rune on the input.
|
|
const INVALID rune = utf8.RuneError
|
|
|
|
// handleRuneError is used to normale rune value in case of errors.
|
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
// This can however indicate one of two situations:
|
|
// * w == 0: end of file is reached
|
|
// * w == 1: invalid UTF character on input
|
|
// This function lets these two cases return respectively the
|
|
// package's own EOF or INVALID runes, to make it easy for client
|
|
// code to distinct between these two cases.
|
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
if r == utf8.RuneError {
|
|
if w == 0 {
|
|
return EOF, 0, false
|
|
}
|
|
return INVALID, w, false
|
|
}
|
|
return r, w, true
|
|
}
|
|
|
|
// RouteTo tells the parser what StateHandler function to invoke
|
|
// in the next parsing cycle.
|
|
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
|
p.nextState = state
|
|
return &routeFollowupAction{chainAction: chainAction{p, true}}
|
|
}
|
|
|
|
// RouteRepeat indicates that on the next parsing cycle, the current
|
|
// StateHandler must be reinvoked.
|
|
func (p *P) RouteRepeat() *chainAction {
|
|
p.RouteTo(p.state)
|
|
return &chainAction{nil, true}
|
|
}
|
|
|
|
// RouteReturn tells the parser that on the next cycle the last
|
|
// StateHandler that was pushed on the route stack must be invoked.
|
|
//
|
|
// Using this method is optional. When implementating a StateHandler that
|
|
// is used as a sort of subroutine (using constructions like
|
|
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
|
// providing an explicit routing decision from that handler. The parser will
|
|
// automatically assume a RouteReturn() in that case.
|
|
func (p *P) RouteReturn() *chainAction {
|
|
p.nextState = p.popRoute()
|
|
return &chainAction{nil, true}
|
|
}
|
|
|
|
// pushRoute adds the StateHandler to the route stack.
|
|
// This is used for implementing nested parsing.
|
|
func (p *P) pushRoute(state StateHandler) {
|
|
p.routeStack = append(p.routeStack, state)
|
|
}
|
|
|
|
// popRoute pops the last pushed StateHandler from the route stack.
|
|
func (p *P) popRoute() StateHandler {
|
|
last := len(p.routeStack) - 1
|
|
head, tail := p.routeStack[:last], p.routeStack[last]
|
|
p.routeStack = head
|
|
return tail
|
|
}
|