package parsekit import "unicode/utf8" // StateHandler defines the type of function that must be implemented to // handle a parsing state. // // A StateHandler function gets a P struct as its input. This struct holds // all the internal state for the parsing state machine and provides the // interface that the StateHandler must use to interact with the parser. type StateHandler func(*P) // P holds the internal state of a parse run and provides an API to // StateHandler methods to communicate with the parser. type P struct { state StateHandler // the function that handles the current state nextState StateHandler // the function that will handle the next state routeStack []StateHandler // route stack, for handling nested parsing input string // the scanned input inputPos int // current byte cursor position in the input cursorLine int // current rune cursor row number in the input cursorColumn int // current rune cursor column position in the input len int // the total length of the input in bytes newline bool // keep track of when we have scanned a newline expecting string // a description of what the current state expects to find buffer stringBuffer // an efficient buffer, used to build string values items chan Item // channel of resulting Parser items item Item // the current item as reached by Next() and retrieved by Get() err *Error // an error when lexing failed, retrieved by Error() LastMatch string // a string representation of the last matched input data } // Expects is used to let a state function describe what input it is expecting. // This expectation is used in error messages to make them more descriptive. // // When defining an expectation inside a StateHandler, you do not need to // handle unexpected input yourself. When the end of the function is reached // without setting the next state, an automatic error will be emitted. // This error can differentiate between the following issues: // // * there is valid data on input, but it was not accepted by the function // // * there is an invalid UTF8 character on input // // * the end of the file was reached. func (p *P) Expects(description string) { p.expecting = description } // peek returns but does not advance the cursor to the next rune(s) in the input. // Returns the rune, its width in bytes and a boolean. // The boolean will be false in case no upcoming rune can be peeked // (end of data or invalid UTF8 character). func (p *P) peek(byteOffset int) (rune, int, bool) { r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:]) return handleRuneError(r, w) } // EOF is a special rune, which is used to indicate an end of file when // reading a character from the input. // It can be treated as a rune when writing parsing rules, so a valid way to // say 'I now expect the end of the file' is using something like: // if (p.On(c.Rune(EOF)).Skip()) { ... } const EOF rune = -1 // INVALID is a special rune, which is used to indicate an invalid UTF8 // rune on the input. const INVALID rune = utf8.RuneError // handleRuneError is used to normale rune value in case of errors. // When an error occurs, then utf8.RuneError will be in the rune. // This can however indicate one of two situations: // * w == 0: end of file is reached // * w == 1: invalid UTF character on input // This function lets these two cases return respectively the // package's own EOF or INVALID runes, to make it easy for client // code to distinct between these two cases. func handleRuneError(r rune, w int) (rune, int, bool) { if r == utf8.RuneError { if w == 0 { return EOF, 0, false } return INVALID, w, false } return r, w, true } // RouteTo tells the parser what StateHandler function to invoke // in the next parsing cycle. func (p *P) RouteTo(state StateHandler) *routeFollowupAction { p.nextState = state return &routeFollowupAction{chainAction: chainAction{p, true}} } // RouteRepeat indicates that on the next parsing cycle, the current // StateHandler must be reinvoked. func (p *P) RouteRepeat() *chainAction { p.RouteTo(p.state) return &chainAction{nil, true} } // RouteReturn tells the parser that on the next cycle the last // StateHandler that was pushed on the route stack must be invoked. // // Using this method is optional. When implementating a StateHandler that // is used as a sort of subroutine (using constructions like // p.RouteTo(subroutine).ThenReturnHere()), you can refrain from // providing an explicit routing decision from that handler. The parser will // automatically assume a RouteReturn() in that case. func (p *P) RouteReturn() *chainAction { p.nextState = p.popRoute() return &chainAction{nil, true} } // pushRoute adds the StateHandler to the route stack. // This is used for implementing nested parsing. func (p *P) pushRoute(state StateHandler) { p.routeStack = append(p.routeStack, state) } // popRoute pops the last pushed StateHandler from the route stack. func (p *P) popRoute() StateHandler { last := len(p.routeStack) - 1 head, tail := p.routeStack[:last], p.routeStack[last] p.routeStack = head return tail } // ExpectEndOfFile can be used from a StateHandler function to indicate that // your parser expects to be at the end of the file. This will schedule // a parsekit-provided StateHandler which will do the actual check for this. func (p *P) ExpectEndOfFile() { p.RouteTo(func(p *P) { p.Expects("end of file") if p.On(A.EndOfFile).Stay().End() { p.Emit(ItemEOF, "EOF") } }) }