go-parsekit/parsehandler.go

117 lines
4.3 KiB
Go

package parsekit
import (
"fmt"
"runtime"
"strings"
"unicode/utf8"
)
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the ParseHandler uses to interact with the parser.
type ParseHandler func(*ParseAPI)
// ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser.
type ParseAPI struct {
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
loopCheck map[string]bool // used for parser loop detection
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
LastMatch string // a string representation of the last matched input data
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the ParseAPI methods, to
// prevent further calls to the ParseAPI on these occasions.
//
// Basically, this guard ensures proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *ParseAPI) panicWhenStoppedOrInError() {
if !p.isStoppedOrInError() {
return
}
called, _ := p.getCaller(1)
parts := strings.Split(called, ".")
calledShort := parts[len(parts)-1]
caller, filepos := p.getCaller(2)
after := "Error()"
if p.stopped {
after = "Stop()"
}
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
}
func (p *ParseAPI) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
func (p *ParseAPI) checkForLoops() {
caller, filepos := p.getCaller(2)
if _, ok := p.loopCheck[filepos]; ok {
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
}
p.loopCheck[filepos] = true
}
// peek returns but does not advance the cursor to the next rune in the input.
// Returns the rune, its width in bytes and a boolean.
//
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character). In this case, the returned rune
// will be one of eofRune or invalidRune.
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}
// eofRune is a special rune that is used to indicate an end of file when
// reading a character from the input.
const eofRune rune = -1
// invalidRune is a special rune that is used to indicate an invalid UTF8
// rune on the input.
const invalidRune rune = utf8.RuneError
// handleRuneError is used to create specific rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// 1) w == 0: end of file is reached
// 2) w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own eofRune or invalidRune, to make it easy for calling code
// to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return eofRune, 0, false
}
return invalidRune, w, false
}
return r, w, true
}
func (p *ParseAPI) getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}