go-parsekit/parsekit.go

193 lines
6.8 KiB
Go

package parsekit
import (
"fmt"
"reflect"
"runtime"
)
// New instantiates a new Parser.
// The logic parameter provides the parsing logic to apply. This can be:
//
// 1) A StateHandler function: in this case, a state machine-style
// recursive descent parser is created, in which StateHandler functions
// are used to move the state machine forward during parsing.
// This type of parser offers a lot of flexibility and it is possible to
// emit multiple items from the parse flow.
//
// This style of parser is typically used for parsing languages and
// structured data formats (like json, toml, etc.)
//
// 2) A Matcher function: in this case, a parser/combinator-style parser
// is created, which can be used to match against the provided logic.
// The parser can only check input against the Matcher function, and
// reports back a successful match or a failure.
//
// This style of parser can typically be used for validation and normalization
// of input data. However, when you are about to use parsekit for that
// task, consider using regular expressions instead. They might serve
// you better.
func New(logic interface{}) *Parser {
switch logic := logic.(type) {
case func(*P):
return makeParserForStateHandler(logic)
case StateHandler:
return makeParserForStateHandler(logic)
case func(m *MatchDialog) bool:
return makeParserForMatcher(logic)
case Matcher:
return makeParserForMatcher(logic)
default:
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
}
}
func makeParserForStateHandler(handler StateHandler) *Parser {
return &Parser{startState: handler}
}
func makeParserForMatcher(matcher Matcher) *Parser {
return New(StateHandler(func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().RouteRepeat().End() {
p.EmitLiteral(MatchedItem)
}
}))
}
// Parser is the top-level parser.
type Parser struct {
startState StateHandler // the function that handles the very first state
}
// Parse starts a parse run on the provided input data.
func (p *Parser) Parse(input string) *Run {
return &Run{
p: &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startState,
items: make(chan Item, 2),
},
}
}
// Run represents a single parse run for a Parser.
type Run struct {
p *P // a struct holding the internal state of a parse run
}
// P holds the internal state of a parse run.
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorLine int // current row number in the input
cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
LastMatch string // a string representation of the last matched input data
}
// Next retrieves the next parsed item for a parse run.
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise.
func (run *Run) Next() (Item, *Error, bool) {
for {
select {
case i := <-run.p.items:
return run.makeReturnValues(i)
default:
run.runStatusHandler()
}
}
}
// StateHandler defines the type of function that can be used to
// handle a parser state.
type StateHandler func(*P)
// runStatusHandler moves the parser, which is bascially a state machine,
// to its next status. It does so by invoking a function of the
// type StateHandler. This function represents the current status and
// is responsible for moving the parser to its next status, depending
// on the parsed input data.
func (run *Run) runStatusHandler() {
if state, ok := run.getNextStateHandler(); ok {
run.invokeNextStatusHandler(state)
}
}
// getNextStateHandler determines the next StatusHandler to invoke in order
// to move the parsing state machine one step further.
//
// When implementing a parser, the StateHandler functions must provide
// a routing decision in every invocation. A routing decision is one
// of the following:
//
// * A route is specified explicitly, which means that the next StatusHandler
// function to invoke is registered during the StateHandler function
// invocation. For example: p.RouteTo(nextStatus)
//
// * A route is specified implicitly, which means that a previous StateHandler
// invocation has registered the followup route for the current state.
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
// In this example, the nextStatus StateHandler will not have to specify
// a route explicitly, but otherStatus will be used implicitly after
// the nextStatus function has returned.
//
// * An expectation is registered by the StatusHandler.
// For example: p.Expects("a cool thing")
// When the StatusHandler returns without having specified a route, this
// expectation is used to generate an "unexpected input" error message.
//
// When no routing decision is provided by a StateHandler, then this is
// considered a bug in the state handler, and the parser will panic.
func (run *Run) getNextStateHandler() (StateHandler, bool) {
switch {
case run.p.nextState != nil:
return run.p.nextState, true
case len(run.p.routeStack) > 0:
return run.p.popRoute(), true
case run.p.expecting != "":
run.p.UnexpectedInput()
return nil, false
default:
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
}
}
// invokeNextStatusHandler moves the parser state to the provided state
// and invokes the StatusHandler function.
func (run *Run) invokeNextStatusHandler(state StateHandler) {
run.p.state = state
run.p.nextState = nil
run.p.expecting = ""
run.p.state(run.p)
}
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
run.p.item = i
return i, nil, true
}
}