go-parsekit/parsehandler.go

70 lines
3.1 KiB
Go

package parsekit
import "unicode/utf8"
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the ParseHandler uses to interact with the parser.
type ParseHandler func(*ParseAPI)
// ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser.
type ParseAPI struct {
state ParseHandler // the function that handles the current state
nextState ParseHandler // the function that will handle the next state
routeStack []ParseHandler // route stack, for handling nested parsing
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
items []Item // a slice of resulting Parser items (see P.Emit())
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when parsing failed, can be retrieved by Error()
LastMatch string // a string representation of the last matched input data
}
// peek returns but does not advance the cursor to the next rune in the input.
// Returns the rune, its width in bytes and a boolean.
//
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character). In this case, the returned rune
// will be one of eofRune or invalidRune.
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}
// eofRune is a special rune that is used to indicate an end of file when
// reading a character from the input.
const eofRune rune = -1
// invalidRune is a special rune that is used to indicate an invalid UTF8
// rune on the input.
const invalidRune rune = utf8.RuneError
// handleRuneError is used to create specific rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// 1) w == 0: end of file is reached
// 2) w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own eofRune or invalidRune, to make it easy for calling code
// to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return eofRune, 0, false
}
return invalidRune, w, false
}
return r, w, true
}