112 lines
4.3 KiB
Go
112 lines
4.3 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"runtime"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// ParseHandler defines the type of function that must be implemented to handle
|
|
// a parsing state in a Parser state machine.
|
|
//
|
|
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
|
|
// all the internal state for the parsing state machine and provides the
|
|
// interface that the ParseHandler uses to interact with the parser.
|
|
type ParseHandler func(*ParseAPI)
|
|
|
|
// ParseAPI holds the internal state of a parse run and provides an API to
|
|
// ParseHandler methods to communicate with the parser.
|
|
type ParseAPI struct {
|
|
input string // the input that is being scanned by the parser
|
|
inputPos int // current byte cursor position in the input
|
|
loopCheck map[string]bool // used for parser loop detection
|
|
cursorLine int // current rune cursor row number in the input
|
|
cursorColumn int // current rune cursor column position in the input
|
|
len int // the total length of the input in bytes
|
|
newline bool // keep track of when we have scanned a newline
|
|
expecting string // a description of what the current state expects to find (see P.Expects())
|
|
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
|
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
|
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
|
|
|
LastMatch string // a string representation of the last matched input data
|
|
}
|
|
|
|
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
|
// or when it has been stopped. It is used from the ParseAPI methods, to
|
|
// prevent further calls to the ParseAPI on these occasions.
|
|
//
|
|
// Basically, this guard ensures proper coding of parsers, making sure
|
|
// that clean routes are followed. You can consider this check a runtime
|
|
// unit test.
|
|
func (p *ParseAPI) panicWhenStoppedOrInError() {
|
|
if !p.isStoppedOrInError() {
|
|
return
|
|
}
|
|
// No error handling, because it's private known-to-work use only.
|
|
pc, _, _, _ := runtime.Caller(1)
|
|
call := runtime.FuncForPC(pc)
|
|
pc, _, _, _ = runtime.Caller(2)
|
|
caller := runtime.FuncForPC(pc)
|
|
|
|
after := "Error()"
|
|
if p.stopped {
|
|
after = "Stop()"
|
|
}
|
|
parts := strings.Split(call.Name(), ".")
|
|
name := parts[len(parts)-1]
|
|
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s: no calls allowed after ParseAPI.%s", name, caller.Name(), after))
|
|
}
|
|
|
|
func (p *ParseAPI) isStoppedOrInError() bool {
|
|
return p.stopped || p.err != nil
|
|
}
|
|
|
|
func (p *ParseAPI) checkForLoops() {
|
|
pc, file, line, _ := runtime.Caller(2)
|
|
id := fmt.Sprintf("%s:%d", file, line)
|
|
if _, ok := p.loopCheck[id]; ok {
|
|
caller := runtime.FuncForPC(pc)
|
|
panic(fmt.Sprintf("Loop detected in parser in %s at %s, line %d", caller.Name(), file, line))
|
|
}
|
|
p.loopCheck[id] = true
|
|
}
|
|
|
|
// peek returns but does not advance the cursor to the next rune in the input.
|
|
// Returns the rune, its width in bytes and a boolean.
|
|
//
|
|
// The boolean will be false in case no upcoming rune can be peeked
|
|
// (end of data or invalid UTF8 character). In this case, the returned rune
|
|
// will be one of eofRune or invalidRune.
|
|
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
|
|
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
|
return handleRuneError(r, w)
|
|
}
|
|
|
|
// eofRune is a special rune that is used to indicate an end of file when
|
|
// reading a character from the input.
|
|
const eofRune rune = -1
|
|
|
|
// invalidRune is a special rune that is used to indicate an invalid UTF8
|
|
// rune on the input.
|
|
const invalidRune rune = utf8.RuneError
|
|
|
|
// handleRuneError is used to create specific rune value in case of errors.
|
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
// This can however indicate one of two situations:
|
|
// 1) w == 0: end of file is reached
|
|
// 2) w == 1: invalid UTF character on input
|
|
// This function lets these two cases return respectively the
|
|
// package's own eofRune or invalidRune, to make it easy for calling code
|
|
// to distinct between these two cases.
|
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
if r == utf8.RuneError {
|
|
if w == 0 {
|
|
return eofRune, 0, false
|
|
}
|
|
return invalidRune, w, false
|
|
}
|
|
return r, w, true
|
|
}
|