go-parsekit/parsekit.go

210 lines
6.9 KiB
Go

package parsekit
import (
"fmt"
"reflect"
"runtime"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
startHandler ParseHandler // the function that handles the very first state
}
// NewParser instantiates a new Parser.
//
// The Parser is a state machine-style recursive descent parser, in which
// ParseHandler functions are used to move the state machine forward during
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
return &Parser{startHandler: startHandler}
}
// ParseRun represents a single parse run for a Parser.
// Deprecated
type ParseRun struct {
p *ParseAPI // holds parser state and provides an API to ParseHandler functions
}
// Execute starts the parser for the provided input.
// When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Execute(input string) *Error {
api := &ParseAPI{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startHandler,
}
p.startHandler(api)
if !api.stopped {
api.UnexpectedInput()
}
return api.err
}
// Parse starts a parse run on the provided input data.
// To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method.
// Deprecated
func (p *Parser) Parse(input string) *ParseRun {
panic("Parse() is deprecated, use Execute()")
// return &ParseRun{
// p: &ParseAPI{
// input: input,
// len: len(input),
// cursorLine: 1,
// cursorColumn: 1,
// nextState: p.startHandler,
// },
// }
}
// Next retrieves the next parsed item for a parse run.
//
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, false will be returned and the error return value will
// be set (default is nil).
func (run *ParseRun) Next() (Item, *Error, bool) {
// State handling loop: we handle states, until an Item is ready to be returned.
for {
// If a state handler has emitted one or more parser Items, then the next
// available Item is returned to the caller.
if len(run.p.items) > 0 {
item, rest := run.p.items[0], run.p.items[1:]
run.p.items = rest
return run.makeReturnValues(item)
}
// Otherwise, the next state handler is looked up and invoked.
run.runNextParseHandler()
}
}
func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
run.p.item = i
return i, nil, true
}
}
// runNextParseHandler moves the parser, that is bascially a state machine,
// to its next status. It does so by invoking a function of the
// type ParseHandler. This function represents the current status and
// is responsible for moving the parser to its next status, depending
// on the parsed input data.
func (run *ParseRun) runNextParseHandler() {
if state, ok := run.getNextParseHandler(); ok {
run.invokeNextParseHandler(state)
}
}
// getNextParseHandler determines the next ParseHandler to invoke in order
// to move the parsing state machine one step further.
//
// When implementing a parser, the ParseHandler functions must provide
// a routing decision in every invocation. A routing decision is one
// of the following:
//
// * A route is specified explicitly, which means that the next ParseHandler
// function to invoke is registered during the ParseHandler function
// invocation. For example: p.RouteTo(nextStatus)
//
// * A route is specified implicitly, which means that a previous ParseHandler
// invocation has registered the followup route for the current state.
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
// In this example, the nextStatus ParseHandler will not have to specify
// a route explicitly, but otherStatus will be used implicitly after
// the nextStatus function has returned.
//
// * An expectation is registered by the ParseHandler.
// For example: p.Expects("a cool thing")
// When the ParseHandler returns without having specified a route, this
// expectation is used to generate an "unexpected input" error message.
//
// When no routing decision is provided by a ParseHandler, then this is
// considered a bug in the state handler, and the parser will panic.
func (run *ParseRun) getNextParseHandler() (ParseHandler, bool) {
switch {
case run.p.nextState != nil:
return run.p.nextState, true
case len(run.p.routeStack) > 0:
return run.p.popRoute(), true
case run.p.expecting != "":
run.p.UnexpectedInput()
return nil, false
default:
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
panic(fmt.Sprintf("internal parser error: ParseHandler %s did not provide a routing decision", name))
}
}
// invokeNextParseHandler moves the parser state to the provided state
// and invokes the ParseHandler function.
func (run *ParseRun) invokeNextParseHandler(state ParseHandler) {
run.p.state = state
run.p.nextState = nil
run.p.expecting = ""
run.p.state(run.p)
}
// Matcher is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Matcher can be instantiated using the parsekit.NewMatcher()
// method.
//
// To match input data against the wrapped Matcher function, use the method
// Matcher.Parse().
type Matcher struct {
parser *Parser
match string
}
// NewMatcher instantiates a new Matcher.
//
// This is a simple wrapper around a TokenHandler function. It can be used to
// match an input string against that TokenHandler function and retrieve the
// results in a straight forward way.
//
// The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler.
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
matcher := &Matcher{}
matcher.parser = NewParser(func(p *ParseAPI) {
if p.On(tokenHandler).Accept() {
matcher.match = p.BufLiteral()
p.Stop()
} else {
p.Expects(expects)
p.UnexpectedInput()
}
})
return matcher
}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the matched input string and an error. When an error
// occurred during parsing, the error will be set, nil otherwise.
func (m *Matcher) Execute(input string) (string, *Error) {
err := m.parser.Execute(input)
return m.match, err
}
// Parse checks for a match on the provided input data.
func (m *Matcher) Parse(input string) (string, *Error) {
item, err, ok := m.parser.Parse(input).Next()
if !ok {
return "", err
}
return item.Value, nil
}