package parsekit import ( "fmt" "reflect" "runtime" ) // Parser is the top-level struct that holds the configuration for a parser. // The Parser can be instantiated using the parsekit.New() method. // // To start parsing input data, use the method Parser.Parse(). type Parser struct { startState StateHandler // the function that handles the very first state } // New instantiates a new Parser. // The logic parameter provides the parsing logic to apply. This can be: // // 1) A StateHandler function: in this case, a state machine-style // recursive descent parser is created, in which StateHandler functions // are used to move the state machine forward during parsing. // This type of parser offers a lot of flexibility and it is possible to // emit multiple items from the parse flow. // // This style of parser is typically used for parsing languages and // structured data formats (like json, toml, etc.) // // 2) A Matcher function: in this case, a parser/combinator-style parser // is created, which can be used to match against the provided logic. // The parser can only check input against the Matcher function, and // reports back a successful match or a failure. // // This style of parser can typically be used for validation and normalization // of input data. However, when you are about to use parsekit for that // task, consider using regular expressions instead. They might serve // you better. func New(logic interface{}) *Parser { switch logic := logic.(type) { case func(*P): return makeParserForStateHandler(logic) case StateHandler: return makeParserForStateHandler(logic) case func(m *MatchDialog) bool: return makeParserForMatcher(logic) case Matcher: return makeParserForMatcher(logic) default: panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic)) } } func makeParserForStateHandler(handler StateHandler) *Parser { return &Parser{startState: handler} } func makeParserForMatcher(matcher Matcher) *Parser { return New(StateHandler(func(p *P) { p.Expects("match") if p.On(matcher).Accept().RouteRep().End() { p.EmitLiteral(MatchedItem) } })) } // Run represents a single parse run for a Parser. type Run struct { p *P // a struct holding the internal state of a parse run } // Parse starts a parse run on the provided input data. // To retrieve parse items from the run, make use of the Run.Next() method. func (p *Parser) Parse(input string) *Run { return &Run{ p: &P{ input: input, len: len(input), cursorLine: 1, cursorColumn: 1, nextState: p.startState, items: make(chan Item, 2), }, } } // Next retrieves the next parsed item for a parse run. // // When a valid item was found, then the boolean return parameter will be true. // On error or when successfully reaching the end of the input, false is returned. // When an error occurred, false will be returned and the error return value will // be set (default is nil). func (run *Run) Next() (Item, *Error, bool) { // State handling loop: we handle states, until an Item is ready to be returned. for { select { // If a state handler has emitted an (error) Item, then the state handling // loop is stopped and the Item is returned to the caller. case i := <-run.p.items: return run.makeReturnValues(i) // Otherwise, the next state handler is looked up and invoked. default: run.runNextStateHandler() } } } func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) { switch { case i.Type == ItemEOF: return i, nil, false case i.Type == ItemError: run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} return i, run.p.err, false default: run.p.item = i return i, nil, true } } // runNextStateHandler moves the parser, which is bascially a state machine, // to its next status. It does so by invoking a function of the // type StateHandler. This function represents the current status and // is responsible for moving the parser to its next status, depending // on the parsed input data. func (run *Run) runNextStateHandler() { if state, ok := run.getNextStateHandler(); ok { run.invokeNextStateHandler(state) } } // getNextStateHandler determines the next StateHandler to invoke in order // to move the parsing state machine one step further. // // When implementing a parser, the StateHandler functions must provide // a routing decision in every invocation. A routing decision is one // of the following: // // * A route is specified explicitly, which means that the next StateHandler // function to invoke is registered during the StateHandler function // invocation. For example: p.RouteTo(nextStatus) // // * A route is specified implicitly, which means that a previous StateHandler // invocation has registered the followup route for the current state. // For example: p.RouteTo(nextStatus).ThenTo(otherStatus) // In this example, the nextStatus StateHandler will not have to specify // a route explicitly, but otherStatus will be used implicitly after // the nextStatus function has returned. // // * An expectation is registered by the StateHandler. // For example: p.Expects("a cool thing") // When the StateHandler returns without having specified a route, this // expectation is used to generate an "unexpected input" error message. // // When no routing decision is provided by a StateHandler, then this is // considered a bug in the state handler, and the parser will panic. func (run *Run) getNextStateHandler() (StateHandler, bool) { switch { case run.p.nextState != nil: return run.p.nextState, true case len(run.p.routeStack) > 0: return run.p.popRoute(), true case run.p.expecting != "": run.p.UnexpectedInput() return nil, false default: name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name() panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name)) } } // invokeNextStateHandler moves the parser state to the provided state // and invokes the StateHandler function. func (run *Run) invokeNextStateHandler(state StateHandler) { run.p.state = state run.p.nextState = nil run.p.expecting = "" run.p.state(run.p) }