package parsekit import ( "fmt" "runtime" "strings" "unicode/utf8" ) // ParseHandler defines the type of function that must be implemented to handle // a parsing state in a Parser state machine. // // A ParseHandler function gets a ParseAPI struct as its input. This struct holds // all the internal state for the parsing state machine and provides the // interface that the ParseHandler uses to interact with the parser. type ParseHandler func(*ParseAPI) // ParseAPI holds the internal state of a parse run and provides an API to // ParseHandler methods to communicate with the parser. type ParseAPI struct { input string // the input that is being scanned by the parser inputPos int // current byte cursor position in the input loopCheck map[string]bool // used for parser loop detection cursorLine int // current rune cursor row number in the input cursorColumn int // current rune cursor column position in the input len int // the total length of the input in bytes newline bool // keep track of when we have scanned a newline expecting string // a description of what the current state expects to find (see P.Expects()) buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored LastMatch string // a string representation of the last matched input data } // panicWhenStoppedOrInError will panic when the parser has produced an error // or when it has been stopped. It is used from the ParseAPI methods, to // prevent further calls to the ParseAPI on these occasions. // // Basically, this guard ensures proper coding of parsers, making sure // that clean routes are followed. You can consider this check a runtime // unit test. func (p *ParseAPI) panicWhenStoppedOrInError() { if !p.isStoppedOrInError() { return } // No error handling, because it's private known-to-work use only. pc, _, _, _ := runtime.Caller(1) call := runtime.FuncForPC(pc) pc, _, _, _ = runtime.Caller(2) caller := runtime.FuncForPC(pc) after := "Error()" if p.stopped { after = "Stop()" } parts := strings.Split(call.Name(), ".") name := parts[len(parts)-1] panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s: no calls allowed after ParseAPI.%s", name, caller.Name(), after)) } func (p *ParseAPI) isStoppedOrInError() bool { return p.stopped || p.err != nil } func (p *ParseAPI) checkForLoops() { pc, file, line, _ := runtime.Caller(2) id := fmt.Sprintf("%s:%d", file, line) if _, ok := p.loopCheck[id]; ok { caller := runtime.FuncForPC(pc) panic(fmt.Sprintf("Loop detected in parser in %s at %s, line %d", caller.Name(), file, line)) } p.loopCheck[id] = true } // peek returns but does not advance the cursor to the next rune in the input. // Returns the rune, its width in bytes and a boolean. // // The boolean will be false in case no upcoming rune can be peeked // (end of data or invalid UTF8 character). In this case, the returned rune // will be one of eofRune or invalidRune. func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) { r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:]) return handleRuneError(r, w) } // eofRune is a special rune that is used to indicate an end of file when // reading a character from the input. const eofRune rune = -1 // invalidRune is a special rune that is used to indicate an invalid UTF8 // rune on the input. const invalidRune rune = utf8.RuneError // handleRuneError is used to create specific rune value in case of errors. // When an error occurs, then utf8.RuneError will be in the rune. // This can however indicate one of two situations: // 1) w == 0: end of file is reached // 2) w == 1: invalid UTF character on input // This function lets these two cases return respectively the // package's own eofRune or invalidRune, to make it easy for calling code // to distinct between these two cases. func handleRuneError(r rune, w int) (rune, int, bool) { if r == utf8.RuneError { if w == 0 { return eofRune, 0, false } return invalidRune, w, false } return r, w, true }