package parse import ( "fmt" "io" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) // API holds the internal state of a parse run and provides an API that // parse.Handler functions can use to: // // • communicate with tokenize.Handler functions (Peek, Accept, ExpectEndOfFile, Result) // // • update the parser status (Error, Expected, Stop) // // • call other parse.Handler functions, the core of recursive-descent parsing (Handle) type API struct { tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek() loopCheck map[string]bool // used for parser loop detection err error // parse error, retrieved by Error(), using API methods is denied when set stopped bool // a boolean set to true by Stop(), using API methods is denied when true } // Peek checks if the upcoming input data matches the provided tokenize.Handler. // If it does, then true will be returned, false otherwise. The read cursor // will be kept at the same position, so the next call to Peek() or Accept() // will start from the same cursor position. // // After calling this method, you can retrieve the produced tokenize.Result // struct using the Result() method. func (p *API) Peek(tokenHandler tokenize.Handler) bool { p.result = nil forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) if ok { p.result = forkedAPI.Result() p.tokenAPI.Reset() } return ok } // Accept checks if the upcoming input data matches the provided tokenize.Handler. // If it does, then true will be returned and the read cursor will be moved // forward to beyond the match that was found. Otherwise false will be // and the read cursor will stay at the same position. // // After calling this method, you can retrieve the tokenize.Result // using the Result() method. func (p *API) Accept(tokenHandler tokenize.Handler) bool { p.result = nil forkedAPI, ok := p.invokeHandler("Accept", tokenHandler) if ok { forkedAPI.Merge() p.result = p.tokenAPI.Result() forkedAPI.Dispose() if p.tokenAPI.FlushInput() { p.initLoopCheck() } } return ok } func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) { p.panicWhenStoppedOrInError(name) p.checkForLoops() if tokenHandler == nil { callerPanic(2, "parsekit.parse.API.%s(): %s() called with nil tokenHandler argument at {caller}", name, name) } p.result = nil p.tokenAPI.Reset() child := p.tokenAPI.Fork() ok := tokenHandler(child) return child, ok } // panicWhenStoppedOrInError will panic when the parser has produced an error // or when it has been stopped. It is used from the API methods, to // prevent further calls to the API on these occasions. // // Basically, this guard helps with proper coding of parsers, making sure // that clean routes are followed. You can consider this check a runtime // unit test. func (p *API) panicWhenStoppedOrInError(name string) { if !p.isStoppedOrInError() { return } after := "Error()" if p.stopped { after = "Stop()" } callerPanic(2, "parsekit.parse.API.%s(): Illegal call to %s() at {caller}: "+ "no calls allowed after API.%s", name, name, after) } func (p *API) isStoppedOrInError() bool { return p.stopped || p.err != nil } // initLoopCheck clears the loop check data, a map in which we keep // track of the lines of code from which Accept() and/or Peek() are called. // When Accept() is called, and the parser moved forward in the input data, // this method is called to reset the map for the new read cursor position. func (p *API) initLoopCheck() { p.loopCheck = map[string]bool{} } // checkForLoops checks if the line of code from which Accept() or Peek() // was called has been seen before for the current read cursor position. // If yes, then the parser is in a loop and the method will panic. func (p *API) checkForLoops() { filepos := callerFilepos(3) if _, ok := p.loopCheck[filepos]; ok { callerPanic(3, "parsekit.parse.API: Loop detected in parser at {caller}") } p.loopCheck[filepos] = true } // Result returns the tokenize.Result struct, containing results as produced by the // last Peek() or Accept() call. // // When Result() is called without first doing a Peek() or Accept(), then no // result will be available and the method will panic. func (p *API) Result() *tokenize.Result { result := p.result if p.result == nil { callerPanic(1, "parsekit.parse.API.Result(): Result() called "+ "at {caller} without calling API.Peek() or API.Accept() on beforehand") } return result } // Handle executes another parse.Handler function from within the active // parse.Handler function. // // The boolean return value is true when the parser can still continue. // It will be false when either an error was set using Error(), or the // parser was stopped using Stop(). // // Instead of calling another handler using this method, you can also call // that other handler directly. However, it is generally advised to make use // of this method, because it performs some sanity checks and it will return // an easy to use boolean indicating whether the parser can continue or not. func (p *API) Handle(parseHandler Handler) bool { p.panicWhenStoppedOrInError("Handle") p.panicWhenHandlerNil(parseHandler) parseHandler(p) return !p.isStoppedOrInError() } func (p *API) panicWhenHandlerNil(parseHandler Handler) { if parseHandler == nil { callerPanic(2, "parsekit.parse.API.Handle(): Handle() called with nil input at {caller}") } } // Stop tells the parser that the parsing process has been completed. // // When the initial parse.Handler function returns without stopping first // and without running into an error, the method ExpectEndOfFile() is automatically // called to verify if the end of the file was reached. If not, then things will // end in an unexpected input error. // // Note: // Even though this fallback mechanism will work in a lot of cases, try to make // your parser explicit about things and call Stop() actively yourself. // // After stopping, no more calls to API methods are allowed. // Calling a method in this state will result in a panic. func (p *API) Stop() { p.stopped = true } // Error sets the error message in the API. // // After setting an error, no more calls to API methods are allowed. // Calling a method in this state will result in a panic. // TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example? func (p *API) Error(format string, args ...interface{}) { // No call to p.panicWhenStoppedOrInError(), to allow a parser to // set a different error message when needed. message := fmt.Sprintf(format, args...) p.err = fmt.Errorf("%s at %s", message, *p.tokenAPI.Result().Cursor()) } // ExpectEndOfFile can be used to check if the input is at end of file. // // When it finds that the end of the file was indeed reached, then the parser // will be stopped through Stop(). Otherwise, the unexpected input is reported // using Expected("end of file"). func (p *API) ExpectEndOfFile() { p.panicWhenStoppedOrInError("ExpectEndofFile") if p.Peek(tokenize.A.EndOfFile) { p.Stop() } else { p.Expected("end of file") } } // Expected sets a parser error that indicates that some unexpected // input was encountered. // // The 'expected' argument can be an empty string. In that case the error // message will not contain a description of the expected input. // // This method automatically produces an error message for a couple of situations: // // • the input simply didn't match the expectation // // • the end of the input was reached // // • there was an error while reading the input. func (p *API) Expected(expected string) { p.panicWhenStoppedOrInError("Expected") _, err := p.tokenAPI.NextRune() switch { case err == nil: p.Error("unexpected input%s", fmtExpects(expected)) case err == io.EOF: p.Error("unexpected end of file%s", fmtExpects(expected)) default: p.Error("unexpected error '%s'%s", err, fmtExpects(expected)) } } func fmtExpects(expected string) string { if expected == "" { return "" } return fmt.Sprintf(" (expected %s)", expected) }