go-parsekit/parse/api.go

package parse

import (
	"fmt"
	"io"

	"git.makaay.nl/mauricem/go-parsekit/tokenize"
)

// API holds the internal state of a parse run and provides an API that
// parse.Handler functions can use to:
//
// • communicate with tokenize.Handler functions (Peek, Accept, ExpectEndOfFile, Result)
//
// • update the parser status (Error, Expected, Stop)
//
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
type API struct {
	tokenAPI  *tokenize.API    // the tokenize.API, used for communicating with tokenize.Handler functions
	result    *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
	loopCheck map[string]bool  // used for parser loop detection
	err       error            // parse error, retrieved by Error(), using API methods is denied when set
	stopped   bool             // a boolean set to true by Stop(), using API methods is denied when true
}

// Peek checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned, false otherwise. The read cursor
// will be kept at the same position, so the next call to Peek() or Accept()
// will start from the same cursor position.
//
// After calling this method, you can retrieve the produced tokenize.Result
// struct using the Result() method.
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
	p.result = nil
	forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
	if ok {
		p.result = forkedAPI.Result()
		p.tokenAPI.Reset()
	}
	return ok
}

// Accept checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned and the read cursor will be moved
// forward to beyond the match that was found. Otherwise false will be
// and the read cursor will stay at the same position.
//
// After calling this method, you can retrieve the tokenize.Result
// using the Result() method.
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
	p.result = nil
	forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
	if ok {
		forkedAPI.Merge()
		p.result = p.tokenAPI.Result()
		forkedAPI.Dispose()
		if p.tokenAPI.FlushInput() {
			p.initLoopCheck()
		}
	}
	return ok
}

func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) {
	p.panicWhenStoppedOrInError(name)
	p.checkForLoops()
	if tokenHandler == nil {
		callerPanic(2, "parsekit.parse.API.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
	}

	p.result = nil
	p.tokenAPI.Reset()
	child := p.tokenAPI.Fork()
	ok := tokenHandler(child)

	return child, ok
}

// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the API methods, to
// prevent further calls to the API on these occasions.
//
// Basically, this guard helps with proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *API) panicWhenStoppedOrInError(name string) {
	if !p.isStoppedOrInError() {
		return
	}

	after := "Error()"
	if p.stopped {
		after = "Stop()"
	}

	callerPanic(2, "parsekit.parse.API.%s(): Illegal call to %s() at {caller}: "+
		"no calls allowed after API.%s", name, name, after)
}

func (p *API) isStoppedOrInError() bool {
	return p.stopped || p.err != nil
}

// initLoopCheck clears the loop check data, a map in which we keep
// track of the lines of code from which Accept() and/or Peek() are called.
// When Accept() is called, and the parser moved forward in the input data,
// this method is called to reset the map for the new read cursor position.
func (p *API) initLoopCheck() {
	p.loopCheck = map[string]bool{}
}

// checkForLoops checks if the line of code from which Accept() or Peek()
// was called has been seen before for the current read cursor position.
// If yes, then the parser is in a loop and the method will panic.
func (p *API) checkForLoops() {
	filepos := callerFilepos(3)
	if _, ok := p.loopCheck[filepos]; ok {
		callerPanic(3, "parsekit.parse.API: Loop detected in parser at {caller}")
	}
	p.loopCheck[filepos] = true
}

// Result returns the tokenize.Result struct, containing results as produced by the
// last Peek() or Accept() call.
//
// When Result() is called without first doing a Peek() or Accept(), then no
// result will be available and the method will panic.
func (p *API) Result() *tokenize.Result {
	result := p.result
	if p.result == nil {
		callerPanic(1, "parsekit.parse.API.Result(): Result() called "+
			"at {caller} without calling API.Peek() or API.Accept() on beforehand")
	}
	return result
}

// Handle executes another parse.Handler function from within the active
// parse.Handler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set using Error(), or the
// parser was stopped using Stop().
//
// Instead of calling another handler using this method, you can also call
// that other handler directly. However, it is generally advised to make use
// of this method, because it performs some sanity checks and it will return
// an easy to use boolean indicating whether the parser can continue or not.
func (p *API) Handle(parseHandler Handler) bool {
	p.panicWhenStoppedOrInError("Handle")
	p.panicWhenHandlerNil(parseHandler)
	parseHandler(p)
	return !p.isStoppedOrInError()
}

func (p *API) panicWhenHandlerNil(parseHandler Handler) {
	if parseHandler == nil {
		callerPanic(2, "parsekit.parse.API.Handle(): Handle() called with nil input at {caller}")
	}
}

// Stop tells the parser that the parsing process has been completed.
//
// When the initial parse.Handler function returns without stopping first
// and without running into an error, the method ExpectEndOfFile() is automatically
// called to verify if the end of the file was reached. If not, then things will
// end in an unexpected input error.
//
// Note:
// Even though this fallback mechanism will work in a lot of cases, try to make
// your parser explicit about things and call Stop() actively yourself.
//
// After stopping, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic.
func (p *API) Stop() {
	p.stopped = true
}

// Error sets the error message in the API.
//
// After setting an error, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic.
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
func (p *API) Error(format string, args ...interface{}) {
	// No call to p.panicWhenStoppedOrInError(), to allow a parser to
	// set a different error message when needed.
	message := fmt.Sprintf(format, args...)
	p.err = fmt.Errorf("%s at %s", message, *p.tokenAPI.Result().Cursor())
}

// ExpectEndOfFile can be used to check if the input is at end of file.
//
// When it finds that the end of the file was indeed reached, then the parser
// will be stopped through Stop(). Otherwise, the unexpected input is reported
// using Expected("end of file").
func (p *API) ExpectEndOfFile() {
	p.panicWhenStoppedOrInError("ExpectEndofFile")
	if p.Peek(tokenize.A.EndOfFile) {
		p.Stop()
	} else {
		p.Expected("end of file")
	}
}

// Expected sets a parser error that indicates that some unexpected
// input was encountered.
//
// The 'expected' argument can be an empty string. In that case the error
// message will not contain a description of the expected input.
//
// This method automatically produces an error message for a couple of situations:
//
// • the input simply didn't match the expectation
//
// • the end of the input was reached
//
// • there was an error while reading the input.
func (p *API) Expected(expected string) {
	p.panicWhenStoppedOrInError("Expected")
	_, err := p.tokenAPI.NextRune()
	switch {
	case err == nil:
		p.Error("unexpected input%s", fmtExpects(expected))
	case err == io.EOF:
		p.Error("unexpected end of file%s", fmtExpects(expected))
	default:
		p.Error("unexpected error '%s'%s", err, fmtExpects(expected))
	}
}

func fmtExpects(expected string) string {
	if expected == "" {
		return ""
	}
	return fmt.Sprintf(" (expected %s)", expected)
}