179 lines
6.1 KiB
Go
179 lines
6.1 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"reflect"
|
|
"runtime"
|
|
)
|
|
|
|
// Parser is the top-level struct that holds the configuration for a parser.
|
|
// The Parser can be instantiated using the parsekit.New() method.
|
|
//
|
|
// To start parsing input data, use the method Parser.Parse().
|
|
type Parser struct {
|
|
startState StateHandler // the function that handles the very first state
|
|
}
|
|
|
|
// New instantiates a new Parser.
|
|
// The logic parameter provides the parsing logic to apply. This can be:
|
|
//
|
|
// 1) A StateHandler function: in this case, a state machine-style
|
|
// recursive descent parser is created, in which StateHandler functions
|
|
// are used to move the state machine forward during parsing.
|
|
// This type of parser offers a lot of flexibility and it is possible to
|
|
// emit multiple items from the parse flow.
|
|
//
|
|
// This style of parser is typically used for parsing languages and
|
|
// structured data formats (like json, toml, etc.)
|
|
//
|
|
// 2) A Matcher function: in this case, a parser/combinator-style parser
|
|
// is created, which can be used to match against the provided logic.
|
|
// The parser can only check input against the Matcher function, and
|
|
// reports back a successful match or a failure.
|
|
//
|
|
// This style of parser can typically be used for validation and normalization
|
|
// of input data. However, when you are about to use parsekit for that
|
|
// task, consider using regular expressions instead. They might serve
|
|
// you better.
|
|
func New(logic interface{}) *Parser {
|
|
switch logic := logic.(type) {
|
|
case func(*P):
|
|
return makeParserForStateHandler(logic)
|
|
case StateHandler:
|
|
return makeParserForStateHandler(logic)
|
|
case func(m *MatchDialog) bool:
|
|
return makeParserForMatcher(logic)
|
|
case Matcher:
|
|
return makeParserForMatcher(logic)
|
|
default:
|
|
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
|
|
}
|
|
}
|
|
|
|
func makeParserForStateHandler(handler StateHandler) *Parser {
|
|
return &Parser{startState: handler}
|
|
}
|
|
|
|
func makeParserForMatcher(matcher Matcher) *Parser {
|
|
return New(StateHandler(func(p *P) {
|
|
p.Expects("match")
|
|
if p.On(matcher).Accept().RouteRep().End() {
|
|
p.EmitLiteral(MatchedItem)
|
|
}
|
|
}))
|
|
}
|
|
|
|
// Run represents a single parse run for a Parser.
|
|
type Run struct {
|
|
p *P // a struct holding the internal state of a parse run
|
|
}
|
|
|
|
// Parse starts a parse run on the provided input data.
|
|
// To retrieve parse items from the run, make use of the Run.Next() method.
|
|
func (p *Parser) Parse(input string) *Run {
|
|
return &Run{
|
|
p: &P{
|
|
input: input,
|
|
len: len(input),
|
|
cursorLine: 1,
|
|
cursorColumn: 1,
|
|
nextState: p.startState,
|
|
items: make(chan Item, 2),
|
|
},
|
|
}
|
|
}
|
|
|
|
// Next retrieves the next parsed item for a parse run.
|
|
//
|
|
// When a valid item was found, then the boolean return parameter will be true.
|
|
// On error or when successfully reaching the end of the input, false is returned.
|
|
// When an error occurred, false will be returned and the error return value will
|
|
// be set (default is nil).
|
|
func (run *Run) Next() (Item, *Error, bool) {
|
|
// State handling loop: we handle states, until an Item is ready to be returned.
|
|
for {
|
|
select {
|
|
// If a state handler has emitted an (error) Item, then the state handling
|
|
// loop is stopped and the Item is returned to the caller.
|
|
case i := <-run.p.items:
|
|
return run.makeReturnValues(i)
|
|
// Otherwise, the next state handler is looked up and invoked.
|
|
default:
|
|
run.runNextStateHandler()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
|
switch {
|
|
case i.Type == ItemEOF:
|
|
return i, nil, false
|
|
case i.Type == ItemError:
|
|
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
|
return i, run.p.err, false
|
|
default:
|
|
run.p.item = i
|
|
return i, nil, true
|
|
}
|
|
}
|
|
|
|
// runNextStateHandler moves the parser, which is bascially a state machine,
|
|
// to its next status. It does so by invoking a function of the
|
|
// type StateHandler. This function represents the current status and
|
|
// is responsible for moving the parser to its next status, depending
|
|
// on the parsed input data.
|
|
func (run *Run) runNextStateHandler() {
|
|
if state, ok := run.getNextStateHandler(); ok {
|
|
run.invokeNextStateHandler(state)
|
|
}
|
|
}
|
|
|
|
// getNextStateHandler determines the next StateHandler to invoke in order
|
|
// to move the parsing state machine one step further.
|
|
//
|
|
// When implementing a parser, the StateHandler functions must provide
|
|
// a routing decision in every invocation. A routing decision is one
|
|
// of the following:
|
|
//
|
|
// * A route is specified explicitly, which means that the next StateHandler
|
|
// function to invoke is registered during the StateHandler function
|
|
// invocation. For example: p.RouteTo(nextStatus)
|
|
//
|
|
// * A route is specified implicitly, which means that a previous StateHandler
|
|
// invocation has registered the followup route for the current state.
|
|
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
|
|
// In this example, the nextStatus StateHandler will not have to specify
|
|
// a route explicitly, but otherStatus will be used implicitly after
|
|
// the nextStatus function has returned.
|
|
//
|
|
// * An expectation is registered by the StateHandler.
|
|
// For example: p.Expects("a cool thing")
|
|
// When the StateHandler returns without having specified a route, this
|
|
// expectation is used to generate an "unexpected input" error message.
|
|
//
|
|
// When no routing decision is provided by a StateHandler, then this is
|
|
// considered a bug in the state handler, and the parser will panic.
|
|
func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
|
switch {
|
|
case run.p.nextState != nil:
|
|
return run.p.nextState, true
|
|
case len(run.p.routeStack) > 0:
|
|
return run.p.popRoute(), true
|
|
case run.p.expecting != "":
|
|
run.p.UnexpectedInput()
|
|
return nil, false
|
|
default:
|
|
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
|
|
panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
|
|
}
|
|
}
|
|
|
|
// invokeNextStateHandler moves the parser state to the provided state
|
|
// and invokes the StateHandler function.
|
|
func (run *Run) invokeNextStateHandler(state StateHandler) {
|
|
run.p.state = state
|
|
run.p.nextState = nil
|
|
run.p.expecting = ""
|
|
run.p.state(run.p)
|
|
}
|