package parsekit import "unicode/utf8" // StateHandler defines the type of function that must be implemented to // handle a parsing state. // // A StateHandler function gets a P struct as its input. This struct holds // all the internal state for the parsing state machine and provides the // interface that the StateHandler must use to interact with the parser. type StateHandler func(*P) // P holds the internal state of a parse run and provides an API to // StateHandler methods to communicate with the parser. type P struct { state StateHandler // the function that handles the current state nextState StateHandler // the function that will handle the next state routeStack []StateHandler // route stack, for handling nested parsing input string // the input that is being scanned by the parser inputPos int // current byte cursor position in the input cursorLine int // current rune cursor row number in the input cursorColumn int // current rune cursor column position in the input len int // the total length of the input in bytes newline bool // keep track of when we have scanned a newline expecting string // a description of what the current state expects to find (see P.Expects()) buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) items chan Item // channel of resulting Parser items (see P.Emit()) item Item // the current item as reached by Next() and retrieved by Get() err *Error // an error when lexing failed, retrieved by Error() LastMatch string // a string representation of the last matched input data } // peek returns but does not advance the cursor to the next rune in the input. // Returns the rune, its width in bytes and a boolean. // // The boolean will be false in case no upcoming rune can be peeked // (end of data or invalid UTF8 character). In this case, the returned rune // will be one of eofRune or invalidRune. func (p *P) peek(byteOffset int) (rune, int, bool) { r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:]) return handleRuneError(r, w) } // eofRune is a special rune, which is used to indicate an end of file when // reading a character from the input. const eofRune rune = -1 // invalidRune is a special rune, which is used to indicate an invalid UTF8 // rune on the input. const invalidRune rune = utf8.RuneError // handleRuneError is used to create specific rune value in case of errors. // When an error occurs, then utf8.RuneError will be in the rune. // This can however indicate one of two situations: // 1) w == 0: end of file is reached // 2) w == 1: invalid UTF character on input // This function lets these two cases return respectively the // package's own eofRune or invalidRune, to make it easy for calling code // to distinct between these two cases. func handleRuneError(r rune, w int) (rune, int, bool) { if r == utf8.RuneError { if w == 0 { return eofRune, 0, false } return invalidRune, w, false } return r, w, true }