package parsekit import ( "fmt" "runtime" "strings" "unicode/utf8" ) // ParseHandler defines the type of function that must be implemented to handle // a parsing state in a Parser state machine. // // A ParseHandler function gets a ParseAPI struct as its input. This struct holds // all the internal state for the parsing state machine and provides the // interface that the ParseHandler uses to interact with the parser. type ParseHandler func(*ParseAPI) // ParseAPI holds the internal state of a parse run and provides an API to // ParseHandler methods to communicate with the parser. type ParseAPI struct { input string // the input that is being scanned by the parser inputPos int // current byte cursor position in the input loopCheck map[string]bool // used for parser loop detection cursorLine int // current rune cursor row number in the input cursorColumn int // current rune cursor column position in the input len int // the total length of the input in bytes newline bool // keep track of when we have scanned a newline expecting string // a description of what the current state expects to find (see P.Expects()) buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored LastMatch string // a string representation of the last matched input data } // panicWhenStoppedOrInError will panic when the parser has produced an error // or when it has been stopped. It is used from the ParseAPI methods, to // prevent further calls to the ParseAPI on these occasions. // // Basically, this guard ensures proper coding of parsers, making sure // that clean routes are followed. You can consider this check a runtime // unit test. func (p *ParseAPI) panicWhenStoppedOrInError() { if !p.isStoppedOrInError() { return } called, _ := p.getCaller(1) parts := strings.Split(called, ".") calledShort := parts[len(parts)-1] caller, filepos := p.getCaller(2) after := "Error()" if p.stopped { after = "Stop()" } panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after)) } func (p *ParseAPI) isStoppedOrInError() bool { return p.stopped || p.err != nil } func (p *ParseAPI) checkForLoops() { caller, filepos := p.getCaller(2) if _, ok := p.loopCheck[filepos]; ok { panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos)) } p.loopCheck[filepos] = true } // peek returns but does not advance the cursor to the next rune in the input. // Returns the rune, its width in bytes and a boolean. // // The boolean will be false in case no upcoming rune can be peeked // (end of data or invalid UTF8 character). In this case, the returned rune // will be one of eofRune or invalidRune. func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) { r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:]) return handleRuneError(r, w) } // eofRune is a special rune that is used to indicate an end of file when // reading a character from the input. const eofRune rune = -1 // invalidRune is a special rune that is used to indicate an invalid UTF8 // rune on the input. const invalidRune rune = utf8.RuneError // handleRuneError is used to create specific rune value in case of errors. // When an error occurs, then utf8.RuneError will be in the rune. // This can however indicate one of two situations: // 1) w == 0: end of file is reached // 2) w == 1: invalid UTF character on input // This function lets these two cases return respectively the // package's own eofRune or invalidRune, to make it easy for calling code // to distinct between these two cases. func handleRuneError(r rune, w int) (rune, int, bool) { if r == utf8.RuneError { if w == 0 { return eofRune, 0, false } return invalidRune, w, false } return r, w, true } func (p *ParseAPI) getCaller(depth int) (string, string) { // No error handling, because we call this method ourselves with safe depth values. pc, file, line, _ := runtime.Caller(depth + 1) filepos := fmt.Sprintf("%s:%d", file, line) caller := runtime.FuncForPC(pc) return caller.Name(), filepos }