package parsekit import ( "fmt" "reflect" "runtime" ) // Parser is the top-level struct that holds the configuration for a parser. // The Parser can be instantiated using the parsekit.NewParser() method. type Parser struct { startState StateHandler // the function that handles the very first state } // NewParser instantiates a new Parser. // // The Parser is a state machine-style recursive descent parser, in which // StateHandler functions are used to move the state machine forward during // parsing. This style of parser is typically used for parsing languages and // structured data formats (like json, toml, etc.) // // To start parsing input data, use the method Parser.Parse(). func NewParser(startState StateHandler) *Parser { return &Parser{startState: startState} } // Run represents a single parse run for a Parser. // TODO rename to ParseRun type Run struct { p *P // a struct holding the internal state of a parse run } // Parse starts a parse run on the provided input data. // To retrieve parse items from the run, make use of the Run.Next() method. func (p *Parser) Parse(input string) *Run { return &Run{ p: &P{ input: input, len: len(input), cursorLine: 1, cursorColumn: 1, nextState: p.startState, items: make(chan Item, 2), }, } } // Next retrieves the next parsed item for a parse run. // // When a valid item was found, then the boolean return parameter will be true. // On error or when successfully reaching the end of the input, false is returned. // When an error occurred, false will be returned and the error return value will // be set (default is nil). func (run *Run) Next() (Item, *Error, bool) { // State handling loop: we handle states, until an Item is ready to be returned. for { select { // If a state handler has emitted an (error) Item, then the state handling // loop is stopped and the Item is returned to the caller. case i := <-run.p.items: return run.makeReturnValues(i) // Otherwise, the next state handler is looked up and invoked. default: run.runNextStateHandler() } } } func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) { switch { case i.Type == ItemEOF: return i, nil, false case i.Type == ItemError: run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} return i, run.p.err, false default: run.p.item = i return i, nil, true } } // runNextStateHandler moves the parser, which is bascially a state machine, // to its next status. It does so by invoking a function of the // type StateHandler. This function represents the current status and // is responsible for moving the parser to its next status, depending // on the parsed input data. func (run *Run) runNextStateHandler() { if state, ok := run.getNextStateHandler(); ok { run.invokeNextStateHandler(state) } } // getNextStateHandler determines the next StateHandler to invoke in order // to move the parsing state machine one step further. // // When implementing a parser, the StateHandler functions must provide // a routing decision in every invocation. A routing decision is one // of the following: // // * A route is specified explicitly, which means that the next StateHandler // function to invoke is registered during the StateHandler function // invocation. For example: p.RouteTo(nextStatus) // // * A route is specified implicitly, which means that a previous StateHandler // invocation has registered the followup route for the current state. // For example: p.RouteTo(nextStatus).ThenTo(otherStatus) // In this example, the nextStatus StateHandler will not have to specify // a route explicitly, but otherStatus will be used implicitly after // the nextStatus function has returned. // // * An expectation is registered by the StateHandler. // For example: p.Expects("a cool thing") // When the StateHandler returns without having specified a route, this // expectation is used to generate an "unexpected input" error message. // // When no routing decision is provided by a StateHandler, then this is // considered a bug in the state handler, and the parser will panic. func (run *Run) getNextStateHandler() (StateHandler, bool) { switch { case run.p.nextState != nil: return run.p.nextState, true case len(run.p.routeStack) > 0: return run.p.popRoute(), true case run.p.expecting != "": run.p.UnexpectedInput() return nil, false default: name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name() panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name)) } } // invokeNextStateHandler moves the parser state to the provided state // and invokes the StateHandler function. func (run *Run) invokeNextStateHandler(state StateHandler) { run.p.state = state run.p.nextState = nil run.p.expecting = "" run.p.state(run.p) } // MatcherWrapper is the top-level struct that holds the configuration for // a parser that is based solely on a Wrapper function. // The MatcherWrapper can be instantiated using the parsekit.NewMatcher() // method. // // To match input data against the wrapped Matcher function, use the method // MatcherWrapper.Match(). type MatcherWrapper struct { parser *Parser } // NewMatcherWrapper instantiates a new MatcherWrapper. // // This is a simple wrapper around a Matcher function. It can be used to // match an input string against that Matcher function and retrieve the // results in a straight forward way. func NewMatcherWrapper(matcher Matcher) *MatcherWrapper { handler := func(p *P) { p.Expects("match") if p.On(matcher).Accept().End() { p.EmitLiteral(0) // ItemType is irrelevant } } return &MatcherWrapper{parser: NewParser(handler)} } // Match runs the wrapped Matcher function against the provided input data. func (w *MatcherWrapper) Match(input string) (string, *Error, bool) { item, err, ok := w.parser.Parse(input).Next() if !ok { return "", err, false } return item.Value, nil, true }