package parsekit import ( "unicode/utf8" ) // P holds the internal state of the parser. type P struct { state StateFn // the function that handles the current state nextState StateFn // the function that will handle the next state stack []StateFn // state function stack, for nested parsing input string // the scanned input len int // the total length of the input in bytes pos int // current byte scanning position in the input newline bool // keep track of when we have scanned a newline cursorRow int // current row number in the input cursorColumn int // current column position in the input expecting string // a description of what the current state expects to find buffer stringBuffer // an efficient buffer, used to build string values items chan Item // channel of resulting Parser items item Item // the current item as reached by Next() and retrieved by Get() err *Error // an error when lexing failed, retrieved by Error() } // peek returns but does not advance the cursor to the next rune(s) in the input. // Returns the rune, its width in bytes and a boolean. // The boolean will be false in case no upcoming rune can be peeked // (end of data or invalid UTF8 character). func (p *P) peek(offsetInBytes int) (rune, int, bool) { r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:]) return handleRuneError(r, w) } // handleRuneError is used to normale rune value in case of errors. // When an error occurs, then utf8.RuneError will be in the rune. // This can however indicate one of two situations: // * w == 0: end of file is reached // * w == 1: invalid UTF character on input // This function lets these two cases return respectively the // package's own EOF or INVALID runes, to make it easy for client // code to distinct between these two cases. func handleRuneError(r rune, w int) (rune, int, bool) { if r == utf8.RuneError { if w == 0 { return EOF, 0, false } return INVALID, w, false } return r, w, true } // EOF is a special rune, which is used to indicate an end of file when // reading a character from the input. // It can be treated as a rune when writing parsing rules, so a valid way to // say 'I now expect the end of the file' is using something like: // if (p.On(c.Rune(EOF)).Skip()) { ... } const EOF rune = -1 // INVALID is a special rune, which is used to indicate an invalid UTF8 // rune on the input. const INVALID rune = utf8.RuneError // StateFn defines the type of function that can be used to // handle a parser state. type StateFn func(*P) // ItemType represents the type of a parser Item. type ItemType int // ItemEOF is a built-in parser item type that is used for flagging that the // end of the input was reached. const ItemEOF ItemType = -1 // ItemError is a built-in parser item type that is used for flagging that // an error has occurred during parsing. const ItemError ItemType = -2 // Item represents an item returned from the parser. type Item struct { Type ItemType Value string } // Error is used as the error type when parsing errors occur. // The error includes some extra meta information to allow for useful // error messages to the user. type Error struct { Message string Row int Column int } func (err *Error) Error() string { return err.Message }