70 lines
3.1 KiB
Go
70 lines
3.1 KiB
Go
package parsekit
|
|
|
|
import "unicode/utf8"
|
|
|
|
// StateHandler defines the type of function that must be implemented to
|
|
// handle a parsing state.
|
|
//
|
|
// A StateHandler function gets a P struct as its input. This struct holds
|
|
// all the internal state for the parsing state machine and provides the
|
|
// interface that the StateHandler must use to interact with the parser.
|
|
type StateHandler func(*P)
|
|
|
|
// P holds the internal state of a parse run and provides an API to
|
|
// StateHandler methods to communicate with the parser.
|
|
type P struct {
|
|
state StateHandler // the function that handles the current state
|
|
nextState StateHandler // the function that will handle the next state
|
|
routeStack []StateHandler // route stack, for handling nested parsing
|
|
input string // the input that is being scanned by the parser
|
|
inputPos int // current byte cursor position in the input
|
|
cursorLine int // current rune cursor row number in the input
|
|
cursorColumn int // current rune cursor column position in the input
|
|
len int // the total length of the input in bytes
|
|
newline bool // keep track of when we have scanned a newline
|
|
expecting string // a description of what the current state expects to find (see P.Expects())
|
|
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
|
items chan Item // channel of resulting Parser items (see P.Emit())
|
|
item Item // the current item as reached by Next() and retrieved by Get()
|
|
err *Error // an error when lexing failed, retrieved by Error()
|
|
|
|
LastMatch string // a string representation of the last matched input data
|
|
}
|
|
|
|
// peek returns but does not advance the cursor to the next rune in the input.
|
|
// Returns the rune, its width in bytes and a boolean.
|
|
//
|
|
// The boolean will be false in case no upcoming rune can be peeked
|
|
// (end of data or invalid UTF8 character). In this case, the returned rune
|
|
// will be one of eofRune or invalidRune.
|
|
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
|
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
|
return handleRuneError(r, w)
|
|
}
|
|
|
|
// eofRune is a special rune, which is used to indicate an end of file when
|
|
// reading a character from the input.
|
|
const eofRune rune = -1
|
|
|
|
// invalidRune is a special rune, which is used to indicate an invalid UTF8
|
|
// rune on the input.
|
|
const invalidRune rune = utf8.RuneError
|
|
|
|
// handleRuneError is used to create specific rune value in case of errors.
|
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
// This can however indicate one of two situations:
|
|
// 1) w == 0: end of file is reached
|
|
// 2) w == 1: invalid UTF character on input
|
|
// This function lets these two cases return respectively the
|
|
// package's own eofRune or invalidRune, to make it easy for calling code
|
|
// to distinct between these two cases.
|
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
if r == utf8.RuneError {
|
|
if w == 0 {
|
|
return eofRune, 0, false
|
|
}
|
|
return invalidRune, w, false
|
|
}
|
|
return r, w, true
|
|
}
|