96 lines
3.4 KiB
Go
96 lines
3.4 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// P holds the internal state of the parser.
|
|
type P struct {
|
|
state StateFn // the function that handles the current state
|
|
nextState StateFn // the function that will handle the next state
|
|
stack []StateFn // state function stack, for nested parsing
|
|
input string // the scanned input
|
|
len int // the total length of the input in bytes
|
|
pos int // current byte scanning position in the input
|
|
newline bool // keep track of when we have scanned a newline
|
|
cursorRow int // current row number in the input
|
|
cursorColumn int // current column position in the input
|
|
expecting string // a description of what the current state expects to find
|
|
buffer stringBuffer // an efficient buffer, used to build string values
|
|
items chan Item // channel of resulting Parser items
|
|
item Item // the current item as reached by Next() and retrieved by Get()
|
|
err *Error // an error when lexing failed, retrieved by Error()
|
|
}
|
|
|
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
|
// Returns the rune, its width in bytes and a boolean.
|
|
// The boolean will be false in case no upcoming rune can be peeked
|
|
// (end of data or invalid UTF8 character).
|
|
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
|
return handleRuneError(r, w)
|
|
}
|
|
|
|
// handleRuneError is used to normale rune value in case of errors.
|
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
// This can however indicate one of two situations:
|
|
// * w == 0: end of file is reached
|
|
// * w == 1: invalid UTF character on input
|
|
// This function lets these two cases return respectively the
|
|
// package's own EOF or INVALID runes, to make it easy for client
|
|
// code to distinct between these two cases.
|
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
if r == utf8.RuneError {
|
|
if w == 0 {
|
|
return EOF, 0, false
|
|
}
|
|
return INVALID, w, false
|
|
}
|
|
return r, w, true
|
|
}
|
|
|
|
// EOF is a special rune, which is used to indicate an end of file when
|
|
// reading a character from the input.
|
|
// It can be treated as a rune when writing parsing rules, so a valid way to
|
|
// say 'I now expect the end of the file' is using something like:
|
|
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
|
const EOF rune = -1
|
|
|
|
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
|
// rune on the input.
|
|
const INVALID rune = utf8.RuneError
|
|
|
|
// StateFn defines the type of function that can be used to
|
|
// handle a parser state.
|
|
type StateFn func(*P)
|
|
|
|
// ItemType represents the type of a parser Item.
|
|
type ItemType int
|
|
|
|
// ItemEOF is a built-in parser item type that is used for flagging that the
|
|
// end of the input was reached.
|
|
const ItemEOF ItemType = -1
|
|
|
|
// ItemError is a built-in parser item type that is used for flagging that
|
|
// an error has occurred during parsing.
|
|
const ItemError ItemType = -2
|
|
|
|
// Item represents an item returned from the parser.
|
|
type Item struct {
|
|
Type ItemType
|
|
Value string
|
|
}
|
|
|
|
// Error is used as the error type when parsing errors occur.
|
|
// The error includes some extra meta information to allow for useful
|
|
// error messages to the user.
|
|
type Error struct {
|
|
Message string
|
|
Row int
|
|
Column int
|
|
}
|
|
|
|
func (err *Error) Error() string {
|
|
return err.Message
|
|
}
|