270 lines
9.4 KiB
Go
270 lines
9.4 KiB
Go
package parse
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
|
|
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
|
)
|
|
|
|
// API holds the internal state of a parse run and provides an API that
|
|
// parse.Handler functions can use to:
|
|
//
|
|
// • communicate with tokenize.Handler functions (Peek, Accept, ExpectEndOfFile, Result)
|
|
//
|
|
// • update the parser status (Error, Expected, Stop)
|
|
//
|
|
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
|
|
type API struct {
|
|
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
|
Result TokenizeResult // a struct, holding the results of the last Peek() or Accept() call
|
|
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
|
|
loopCheck map[uintptr]bool // used for parser loop detection
|
|
err error // parse error, retrieved by Error(), using API methods is denied when set
|
|
stopped bool // a boolean set to true by Stop()
|
|
}
|
|
|
|
// TokenizeResult holds the results of the last Peek() or Accept() call.
|
|
type TokenizeResult struct {
|
|
Tokens []tokenize.Token // the resulting tokens from the last call to Peek() or Accept()
|
|
Runes []rune // the resulting runes from the last call to Peek() or Accept()
|
|
}
|
|
|
|
func (result *TokenizeResult) String() string {
|
|
return string(result.Runes)
|
|
}
|
|
|
|
// DisableSanityChecks disables the built-in parser implementation sanity checks,
|
|
// which detects parser implementation errors like loops and continuing parsing
|
|
// after an error or invoking Stop().
|
|
//
|
|
// These tests do cause a performance hit. When your parser has to handle a lot
|
|
// of input data and is fairly complex, you might want to disable the sanity
|
|
// checks. When you're not sure, You probably don't want to use this method,
|
|
// and enjoy the added safety of the built-in checks.
|
|
func (p *API) DisableSanityChecks() {
|
|
p.sanityChecksEnabled = true
|
|
}
|
|
|
|
// Peek checks if the upcoming input data matches the provided tokenize.Handler.
|
|
// If it does, then true will be returned, false otherwise. The read cursor
|
|
// will be kept at the same position, so the next call to Peek() or Accept()
|
|
// will start from the same cursor position.
|
|
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
|
|
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
|
|
if ok {
|
|
p.Result.Tokens = p.tokenAPI.Tokens()
|
|
p.Result.Runes = p.tokenAPI.Runes()
|
|
}
|
|
p.tokenAPI.Dispose(forkedAPI)
|
|
return ok
|
|
}
|
|
|
|
// Accept checks if the upcoming input data matches the provided tokenize.Handler.
|
|
// If it does, then true will be returned and the read cursor will be moved
|
|
// forward to beyond the match that was found. Otherwise false will be
|
|
// and the read cursor will stay at the same position.
|
|
//
|
|
// After calling this method, you can retrieve the results using the Result() method.
|
|
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
|
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
|
|
if ok {
|
|
// Keep track of the results.
|
|
p.Result.Tokens = p.tokenAPI.Tokens()
|
|
p.Result.Runes = p.tokenAPI.Runes()
|
|
|
|
// Merge to the parent level.
|
|
p.tokenAPI.Merge(forkedAPI)
|
|
p.tokenAPI.Dispose(forkedAPI)
|
|
|
|
// And flush the input reader buffer.
|
|
if p.tokenAPI.FlushInput() {
|
|
if p.sanityChecksEnabled {
|
|
p.initLoopCheck()
|
|
}
|
|
}
|
|
} else {
|
|
p.tokenAPI.Dispose(forkedAPI)
|
|
}
|
|
return ok
|
|
}
|
|
|
|
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
|
|
if p.sanityChecksEnabled {
|
|
p.panicWhenStoppedOrInError(name)
|
|
p.checkForLoops(name)
|
|
if tokenHandler == nil {
|
|
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
|
|
}
|
|
}
|
|
|
|
p.tokenAPI.Reset()
|
|
child := p.tokenAPI.Fork()
|
|
ok := tokenHandler(p.tokenAPI)
|
|
|
|
return child, ok
|
|
}
|
|
|
|
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
|
// or when it has been stopped. It is used from the API methods, to
|
|
// prevent further calls to the API on these occasions.
|
|
//
|
|
// Basically, this guard helps with proper coding of parsers, making sure
|
|
// that clean routes are followed. You can consider this check a runtime
|
|
// unit test.
|
|
func (p *API) panicWhenStoppedOrInError(name string) {
|
|
if !p.IsStoppedOrInError() {
|
|
return
|
|
}
|
|
|
|
after := "Error()"
|
|
if p.stopped {
|
|
after = "Stop()"
|
|
}
|
|
callerPanic(name, "parsekit.parse.API.{name}(): Illegal call to {name}() at {caller}: "+
|
|
"no calls allowed after API.%s", after)
|
|
}
|
|
|
|
// IsStoppedOrInError checks if the parser has stopped or if an error was set.
|
|
// When true, then the parser can no longer continue. If your parser tries to
|
|
// call parse.API methods when true is returned, this will result in a panic.
|
|
func (p *API) IsStoppedOrInError() bool {
|
|
return p.stopped || p.err != nil
|
|
}
|
|
|
|
// initLoopCheck clears the loop check data, a map in which we keep
|
|
// track of the lines of code from which Accept() and/or Peek() are called.
|
|
// When Accept() is called, and the parser moved forward in the input data,
|
|
// this method is called to reset the map for the new read cursor position.
|
|
func (p *API) initLoopCheck() {
|
|
p.loopCheck = make(map[uintptr]bool)
|
|
}
|
|
|
|
// checkForLoops checks if the line of code from which Accept() or Peek()
|
|
// was called has been seen before for the current read cursor position.
|
|
// If yes, then the parser is in a loop and the method will panic.
|
|
func (p *API) checkForLoops(name string) {
|
|
filepos := callerPointer(3)
|
|
if _, ok := p.loopCheck[filepos]; ok {
|
|
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
|
|
}
|
|
p.loopCheck[filepos] = true
|
|
}
|
|
|
|
// Handle executes other parse.Handler functions from within the active
|
|
// parse.Handler function.
|
|
//
|
|
// The boolean return value is true when the parser can still continue.
|
|
// It will be false when either an error was set using Error(), or the
|
|
// parser was stopped using Stop().
|
|
//
|
|
// When multiple parse.Handler functions are provided as arguments, they
|
|
// will be executed in the provided order. When one of those handlers stops
|
|
// the parser or sets an error, then the following handlers will not be called.
|
|
//
|
|
// Instead of calling another handler using this method, you can also call
|
|
// that other handler directly. However, it is generally advised to make use
|
|
// of this method, because it performs some sanity checks and it will return
|
|
// an easy to use boolean indicating whether the parser can continue or not.
|
|
func (p *API) Handle(parseHandler ...Handler) bool {
|
|
if p.sanityChecksEnabled {
|
|
p.panicWhenStoppedOrInError("Handle")
|
|
}
|
|
for _, handler := range parseHandler {
|
|
if p.sanityChecksEnabled {
|
|
p.panicWhenHandlerNil("Handle", handler)
|
|
}
|
|
handler(p)
|
|
if p.IsStoppedOrInError() {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) {
|
|
if parseHandler == nil {
|
|
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil input at {caller}")
|
|
}
|
|
}
|
|
|
|
// Stop tells the parser that the parsing process has been completed.
|
|
//
|
|
// When the initial parse.Handler function returns without stopping first
|
|
// and without running into an error, the method ExpectEndOfFile() is automatically
|
|
// called to verify if the end of the file was reached. If not, then things will
|
|
// end in an unexpected input error.
|
|
//
|
|
// Note:
|
|
// Even though this fallback mechanism will work in a lot of cases, try to make
|
|
// your parser explicit about things and call Stop() actively yourself.
|
|
//
|
|
// After stopping, no more calls to API methods are allowed.
|
|
// Calling a method in this state will result in a panic.
|
|
func (p *API) Stop() {
|
|
p.stopped = true
|
|
}
|
|
|
|
// Error sets the error message in the API.
|
|
//
|
|
// After setting an error, no more calls to API methods are allowed.
|
|
// Calling a method in this state will result in a panic.
|
|
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
|
|
func (p *API) Error(format string, data ...interface{}) {
|
|
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
|
// set a different error message when needed.
|
|
message := fmt.Sprintf(format, data...)
|
|
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor())
|
|
}
|
|
|
|
// ExpectEndOfFile can be used to check if the input is at end of file.
|
|
//
|
|
// When it finds that the end of the file was indeed reached, then the parser
|
|
// will be stopped through Stop(). Otherwise, the unexpected input is reported
|
|
// using Expected("end of file").
|
|
func (p *API) ExpectEndOfFile() {
|
|
if p.sanityChecksEnabled {
|
|
p.panicWhenStoppedOrInError("ExpectEndofFile")
|
|
}
|
|
if p.Peek(tokenize.A.EndOfFile) {
|
|
p.Stop()
|
|
} else {
|
|
p.Expected("end of file")
|
|
}
|
|
}
|
|
|
|
// Expected sets a parser error that indicates that some unexpected
|
|
// input was encountered.
|
|
//
|
|
// The 'expected' argument can be an empty string. In that case the error
|
|
// message will not contain a description of the expected input.
|
|
//
|
|
// This method automatically produces an error message for a couple of situations:
|
|
//
|
|
// • the input simply didn't match the expectation
|
|
//
|
|
// • the end of the input was reached
|
|
//
|
|
// • there was an error while reading the input.
|
|
func (p *API) Expected(expected string) {
|
|
if p.sanityChecksEnabled {
|
|
p.panicWhenStoppedOrInError("Expected")
|
|
}
|
|
_, err := p.tokenAPI.NextRune()
|
|
switch {
|
|
case err == nil:
|
|
p.Error("unexpected input%s", fmtExpects(expected))
|
|
case err == io.EOF:
|
|
p.Error("unexpected end of file%s", fmtExpects(expected))
|
|
default:
|
|
p.Error("unexpected error '%s'%s", err, fmtExpects(expected))
|
|
}
|
|
}
|
|
|
|
func fmtExpects(expected string) string {
|
|
if expected == "" {
|
|
return ""
|
|
}
|
|
return fmt.Sprintf(" (expected %s)", expected)
|
|
}
|