diff --git a/example_basiccalculator1_test.go b/example_basiccalculator1_test.go index 4191f9d..4b79f70 100644 --- a/example_basiccalculator1_test.go +++ b/example_basiccalculator1_test.go @@ -82,7 +82,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) { value, err := strconv.ParseInt(p.BufLiteral(), 10, 64) p.BufClear() if err != nil { - p.EmitError("invalid value: %s", err) + p.Error("invalid value: %s", err) } else { c.Result += c.op * value p.Handle(c.operatorOrEndOfFile) diff --git a/example_basiccalculator2_test.go b/example_basiccalculator2_test.go index 1b5cda0..d2f9404 100644 --- a/example_basiccalculator2_test.go +++ b/example_basiccalculator2_test.go @@ -84,10 +84,10 @@ func Compute(input string) (float64, *parsekit.Error) { } func (c *calculator) computation(p *parsekit.ParseAPI) { - p.Handle(c.expr) - p.ExpectEndOfFile() - - c.result = c.interpreter.result + if p.Handle(c.expr) { + p.ExpectEndOfFile() + c.result = c.interpreter.result + } } // expr : term ((ADD|SUB) term)* @@ -95,11 +95,14 @@ func (c *calculator) expr(p *parsekit.ParseAPI) { c.interpreter.push() var pc, a = parsekit.C, parsekit.A - p.Handle(c.term) - for p.On(pc.Any(a.Add, a.Subtract)).Skip() { - c.interpreter.pushOperator(p.LastMatch) - p.Handle(c.term) - c.interpreter.eval() + if p.Handle(c.term) { + for p.On(pc.Any(a.Add, a.Subtract)).Skip() { + c.interpreter.pushOperator(p.LastMatch) + if !p.Handle(c.term) { + return + } + c.interpreter.eval() + } } c.interpreter.pop() @@ -110,11 +113,14 @@ func (c *calculator) term(p *parsekit.ParseAPI) { c.interpreter.push() var pc, a = parsekit.C, parsekit.A - p.Handle(c.factor) - for p.On(pc.Any(a.Multiply, a.Divide)).Skip() { - c.interpreter.pushOperator(p.LastMatch) - p.Handle(c.factor) - c.interpreter.eval() + if p.Handle(c.factor) { + for p.On(pc.Any(a.Multiply, a.Divide)).Skip() { + c.interpreter.pushOperator(p.LastMatch) + if !p.Handle(c.factor) { + return + } + c.interpreter.eval() + } } c.interpreter.pop() @@ -130,18 +136,23 @@ func (c *calculator) factor(p *parsekit.ParseAPI) { p.BufClear() value, err := strconv.ParseFloat(floatStr, 64) if err != nil { - p.EmitError("invalid number %s: %s", floatStr, err) + p.Error("invalid number %s: %s", floatStr, err) + return } else { c.interpreter.pushValue(value) } case p.On(a.LeftParen).Skip(): - p.Handle(c.expr) + if !p.Handle(c.expr) { + return + } if !p.On(a.RightParen).Skip() { p.Expects("')'") p.UnexpectedInput() + return } default: p.UnexpectedInput() + return } p.On(a.Whitespace).Skip() } diff --git a/example_dutchpostcode_test.go b/example_dutchpostcode_test.go index 82de3e3..cd7c549 100644 --- a/example_dutchpostcode_test.go +++ b/example_dutchpostcode_test.go @@ -25,7 +25,7 @@ func Example_dutchPostcodeUsingMatcher() { } { output, err := parser.Execute(input) if err != nil { - fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull()) + fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full()) } else { fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) } diff --git a/example_hellomatcher_test.go b/example_hellomatcher_test.go index acc33f1..ee3f294 100644 --- a/example_hellomatcher_test.go +++ b/example_hellomatcher_test.go @@ -27,7 +27,7 @@ func Example_helloWorldUsingMatcher() { } { output, err := parser.Execute(input) if err != nil { - fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull()) + fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full()) } else { fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) } diff --git a/example_helloparser1_test.go b/example_helloparser1_test.go index 3e0abfe..b2cc184 100644 --- a/example_helloparser1_test.go +++ b/example_helloparser1_test.go @@ -117,7 +117,7 @@ func (h *helloparser1) end(p *parsekit.ParseAPI) { h.greetee = strings.TrimSpace(p.BufLiteral()) if h.greetee == "" { - p.EmitError("The name cannot be empty") + p.Error("The name cannot be empty") } else { p.Stop() } diff --git a/example_helloparser2_test.go b/example_helloparser2_test.go index 0c3bdb2..5d2e05c 100644 --- a/example_helloparser2_test.go +++ b/example_helloparser2_test.go @@ -81,7 +81,7 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) { // Note: // For efficiency, we could have either: // -// 1) added a return after every call to p.EmitError() +// 1) added a return after every call to p.Error() // 2) done an 'else if' for every 'if' after the first // // For code readability, I omitted these however. The ParseAPI knows it @@ -91,19 +91,19 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) { func (h *helloparser2) start(p *parsekit.ParseAPI) { c, a, m := parsekit.C, parsekit.A, parsekit.M if !p.On(c.StrNoCase("hello")).Skip() { - p.EmitError("the greeting is not being friendly") + p.Error("the greeting is not being friendly") } else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() { - p.EmitError("the greeting is not properly separated") + p.Error("the greeting is not properly separated") } else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() { - p.EmitError("the greeting is targeted at thin air") + p.Error("the greeting is targeted at thin air") } else if !p.On(a.Excl).Skip() { - p.EmitError("the greeting is not loud enough") + p.Error("the greeting is not loud enough") } else if !p.On(a.EndOfFile).Stay() { - p.EmitError("too much stuff going on after the closing '!'") + p.Error("too much stuff going on after the closing '!'") } else { h.greetee = p.BufLiteral() if h.greetee == "" { - p.EmitError("the name cannot be empty") + p.Error("the name cannot be empty") } p.Stop() } diff --git a/examples_test.go b/examples_test.go index 1211225..7d1e607 100644 --- a/examples_test.go +++ b/examples_test.go @@ -15,7 +15,7 @@ func ExampleError() { fmt.Println(err.Error()) fmt.Printf("%s\n", err) - fmt.Println(err.ErrorFull()) + fmt.Println(err.Full()) // Output: // it broke down // it broke down @@ -36,14 +36,14 @@ func ExampleError_Error() { // it broke down } -func ExampleError_ErrorFull() { +func ExampleError_Full() { err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } - fmt.Println(err.ErrorFull()) + fmt.Println(err.Full()) // Output: // it broke down at line 10, column 42 } diff --git a/parsehandler.go b/parsehandler.go index 00ecb8c..c9dd150 100644 --- a/parsehandler.go +++ b/parsehandler.go @@ -1,6 +1,11 @@ package parsekit -import "unicode/utf8" +import ( + "fmt" + "runtime" + "strings" + "unicode/utf8" +) // ParseHandler defines the type of function that must be implemented to handle // a parsing state in a Parser state machine. @@ -13,25 +18,50 @@ type ParseHandler func(*ParseAPI) // ParseAPI holds the internal state of a parse run and provides an API to // ParseHandler methods to communicate with the parser. type ParseAPI struct { - state ParseHandler // the function that handles the current state - nextState ParseHandler // the function that will handle the next state - routeStack []ParseHandler // route stack, for handling nested parsing - input string // the input that is being scanned by the parser - inputPos int // current byte cursor position in the input - cursorLine int // current rune cursor row number in the input - cursorColumn int // current rune cursor column position in the input - len int // the total length of the input in bytes - newline bool // keep track of when we have scanned a newline - expecting string // a description of what the current state expects to find (see P.Expects()) - buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) - items []Item // a slice of resulting Parser items (see P.Emit()) - item Item // the current item as reached by Next(), retrieved by Get() - err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored - stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored + input string // the input that is being scanned by the parser + inputPos int // current byte cursor position in the input + cursorLine int // current rune cursor row number in the input + cursorColumn int // current rune cursor column position in the input + len int // the total length of the input in bytes + newline bool // keep track of when we have scanned a newline + expecting string // a description of what the current state expects to find (see P.Expects()) + buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) + err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored + stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored LastMatch string // a string representation of the last matched input data } +// panicWhenStoppedOrInError will panic when the parser has produced an error +// or when it has been stopped. It is used from the ParseAPI methods, to +// prevent further calls to the ParseAPI on these occasions. +// +// Basically, this guard ensures proper coding of parsers, making sure +// that clean routes are followed. You can consider this check a runtime +// unit test. +func (p *ParseAPI) panicWhenStoppedOrInError() { + if !p.isStoppedOrInError() { + return + } + // No error handling, because it's private known-to-work use only. + pc, _, _, _ := runtime.Caller(1) + call := runtime.FuncForPC(pc) + pc, _, _, _ = runtime.Caller(2) + caller := runtime.FuncForPC(pc) + + after := "Error()" + if p.stopped { + after = "Stop()" + } + parts := strings.Split(call.Name(), ".") + name := parts[len(parts)-1] + panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s: no calls allowed after ParseAPI.%s", name, caller.Name(), after)) +} + +func (p *ParseAPI) isStoppedOrInError() bool { + return p.stopped || p.err != nil +} + // peek returns but does not advance the cursor to the next rune in the input. // Returns the rune, its width in bytes and a boolean. // diff --git a/parsehandler_emit.go b/parsehandler_emit.go deleted file mode 100644 index c1c9c8e..0000000 --- a/parsehandler_emit.go +++ /dev/null @@ -1,176 +0,0 @@ -package parsekit - -import ( - "fmt" -) - -// Item represents an item that can be emitted from a ParseHandler function. -type Item struct { - Type ItemType - Value string -} - -// ItemType represents the type of a parser Item. -// -// When creating your own ItemType values, then make use of positive integer -// values. Negative values are possible, but they are reserved for internal -// use by parsekit. -type ItemType int - -// ItemEOF is a built-in parser item type that is used for flagging that the -// end of the input was reached. -const ItemEOF ItemType = -1 - -// ItemError is a built-in parser item type that is used for flagging that -// an error has occurred during parsing. -const ItemError ItemType = -2 - -// Emit passes a Parser item to the client, including the provided string. -// Deprecated -func (p *ParseAPI) Emit(t ItemType, v string) { - p.items = append(p.items, Item{t, v}) - p.buffer.reset() -} - -// BufLiteral retrieves the contents of the parser's string buffer (all the -// runes that were added to it using ParseAPI.Accept()) as a literal string. -// -// Literal means that if the input had for example the subsequent runes '\' and 'n' -// in it, then the literal string would have a backslash and an 'n' it in, not a -// linefeed (ASCII char 10). -// -// Retrieving the buffer contents will not affect the buffer itself. New runes can -// still be added to it. Only when calling P.BufClear(), the buffer will be cleared. -func (p *ParseAPI) BufLiteral() string { - return p.buffer.asLiteralString() -} - -// EmitLiteral passes a parser Item to the client, including the accumulated -// string buffer data as a literal string. -// Deprecated -func (p *ParseAPI) EmitLiteral(t ItemType) { - p.Emit(t, p.BufLiteral()) -} - -// BufClear clears the contents of the parser string buffer. -func (p *ParseAPI) BufClear() { - p.buffer.reset() -} - -// BufInterpreted retrieves the contents of the parser's string buffer (all -// the runes that were added to it using ParseAPI.Accept()) as an -// interpreted string. -// -// Interpreted means that the contents are treated as a Go double quoted -// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the -// input had for example the subsequent runes '\' and 'n' in it, then the interpreted -// string would have an actual linefeed (ASCII char 10) in it. -// -// This method returns a boolean value, indicating whether or not the string -// interpretation was successful. On invalid string data, an error will -// automatically be emitted and the boolean return value will be false. -// -// Retrieving the buffer contents will not affect the buffer itself. New runes can -// still be added to it. Only when calling P.Emit(), the buffer will be cleared. -func (p *ParseAPI) BufInterpreted() (string, bool) { - s, err := p.buffer.asInterpretedString() - if err != nil { - p.EmitError( - "invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)", - p.buffer.asLiteralString(), err) - return "", false - } - return s, true -} - -// EmitInterpreted passes a Parser item to the client, including accumulated -// string buffer data a Go double quoted interpreted string (handling escape -// codes like \n, \t, \uXXXX, etc.) -// This method returns a boolean value, indicating whether or not the string -// interpretation was successful. On invalid string data, an error will -// automatically be emitted and false will be returned. -func (p *ParseAPI) EmitInterpreted(t ItemType) bool { - if s, ok := p.BufInterpreted(); ok { - p.Emit(t, s) - return true - } - return false -} - -// Error is used as the error type when parsing errors occur. -// The error includes some extra meta information to allow for useful -// error messages to the user. -type Error struct { - Message string - Line int - Column int -} - -func (err *Error) Error() string { - if err == nil { - panic("internal parser error: Error() method called on the parser, but no error was set") - } - return err.Message -} - -// ErrorFull returns the current error message, including information about -// the position in the input where the error occurred. -func (err *Error) ErrorFull() string { - return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column) -} - -// EmitError sets an error message in the parser API. This error message -// will eventually be returned by the Parser.Execute() method. -func (p *ParseAPI) EmitError(format string, args ...interface{}) { - message := fmt.Sprintf(format, args...) - p.err = &Error{message, p.cursorLine, p.cursorColumn} -} - -// Stop is used by the parser impementation to tell the API that it has -// completed the parsing process successfully. -// -// When the parser implementation returns without stopping first, the -// Parser.Execute() will assume that something went wrong and calls -// ParserAPI.UnexpectedInput() to report an error about this. -// -// The parser implementation can define what was being expected, by -// providing a description to ParseAPI.Expecting(). -func (p *ParseAPI) Stop() { - p.stopped = true -} - -// UnexpectedInput is used to set an error that tells the user that some -// unexpected input was encountered. -// -// It can automatically produce an error message for a couple of situations: -// 1) input simply didn't match the expectation -// 2) the end of the input was reached -// 3) there was an invalid UTF8 character on the input. -// -// The parser implementation can provide some feedback for this error by -// calling ParseAPI.Expects() to set the expectation. When set, the -// expectation is included in the error message. -func (p *ParseAPI) UnexpectedInput() { - // When some previous parsing step yielded an error, skip this operation. - if p.err != nil || p.stopped { - return - } - r, _, ok := p.peek(0) - switch { - case ok: - p.EmitError("unexpected character %q%s", r, fmtExpects(p)) - case r == eofRune: - p.EmitError("unexpected end of file%s", fmtExpects(p)) - case r == invalidRune: - p.EmitError("invalid UTF8 character in input%s", fmtExpects(p)) - default: - panic("parsekit bug: Unhandled output from peek()") - } -} - -func fmtExpects(p *ParseAPI) string { - if p.expecting == "" { - return "" - } - return fmt.Sprintf(" (expected %s)", p.expecting) -} diff --git a/parsehandler_error.go b/parsehandler_error.go new file mode 100644 index 0000000..5a5ac2e --- /dev/null +++ b/parsehandler_error.go @@ -0,0 +1,33 @@ +package parsekit + +import ( + "fmt" +) + +// Error is used as the error type when parsing errors occur. +// The error includes some context information to allow for useful +// error messages to the user. +type Error struct { + Message string + Line int + Column int +} + +func (err *Error) Error() string { + return err.Message +} + +// Full returns the current error message, including information about +// the position in the input where the error occurred. +func (err *Error) Full() string { + return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column) +} + +// Error sets the error message in the parser API. This error message +// will eventually be returned by the Parser.Execute() method. +func (p *ParseAPI) Error(format string, args ...interface{}) { + // No call to p.panicWhenStoppedOrInError(), to allow a parser to + // set a different error message when needed. + message := fmt.Sprintf(format, args...) + p.err = &Error{message, p.cursorLine, p.cursorColumn} +} diff --git a/parsehandler_expects.go b/parsehandler_expects.go deleted file mode 100644 index 266510e..0000000 --- a/parsehandler_expects.go +++ /dev/null @@ -1,23 +0,0 @@ -package parsekit - -// Expects is used to let a ParseHandler function describe what input it is expecting. -// This expectation is used in error messages to make them more descriptive. -// -// When defining an expectation inside a ParseHandler, you do not need to -// handle unexpected input yourself. When the end of the function is reached -// without setting the next state, an automatic error will be emitted. -// This error can differentiate between the following issues: -// -// 1) there is valid data on input, but it was not accepted by the function -// -// 2) there is an invalid UTF8 character on input -// -// 3) the end of the file was reached. -func (p *ParseAPI) Expects(description string) { - // TODO make this into some debugging tool? - // fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn) - if p.err != nil || p.stopped { - return - } - p.expecting = description -} diff --git a/parsehandler_on.go b/parsehandler_on.go index f0d65a4..2062984 100644 --- a/parsehandler_on.go +++ b/parsehandler_on.go @@ -37,13 +37,7 @@ package parsekit // p.Emit(SomeItemType, p.BufLiteral()) // } func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction { - // When some previous parsing step yielded an error, skip this operation. - if p.err != nil || p.stopped { - return &MatchAction{ - p: p, - ok: false, - } - } + p.panicWhenStoppedOrInError() // Perform the matching operation. m := &TokenAPI{p: p} diff --git a/parsehandler_route.go b/parsehandler_route.go deleted file mode 100644 index 52356a5..0000000 --- a/parsehandler_route.go +++ /dev/null @@ -1,135 +0,0 @@ -package parsekit - -// Handle is used to execute other ParseHandler functions from within your -// ParseHandler function. -func (p *ParseAPI) Handle(handlers ...ParseHandler) { - for _, handler := range handlers { - // When some previous parsing step yielded an error, skip this operation. - if p.err != nil || p.stopped { - break - } - handler(p) - } -} - -// RouteTo tells the parser what ParseHandler function to invoke on -// the next parse cycle. -// Deprecated -func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction { - p.nextState = handler - return &RouteFollowupAction{p} -} - -// RouteRepeat tells the parser that on the next parsing cycle, the current -// ParseHandler must be reinvoked. -// Deprecated -func (p *ParseAPI) RouteRepeat() { - p.RouteTo(p.state) -} - -// RouteReturn tells the parser that on the next cycle the last ParseHandler -// that was pushed on the route stack must be invoked. -// -// Using this method is optional. When implementating a ParseHandler that -// is used as a sort of subroutine (using constructions like -// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from -// providing an explicit routing decision from that handler. The parser will -// automatically assume a RouteReturn() in that case. -// Deprecated -func (p *ParseAPI) RouteReturn() { - p.nextState = p.popRoute() -} - -// RouteFollowupAction chains parsing routes. -// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB). -// Deprecated -type RouteFollowupAction struct { - p *ParseAPI -} - -// ThenTo schedules a ParseHandler that must be invoked after the RouteTo -// ParseHandler has been completed. -// For example: -// -// p.RouteTo(handlerA).ThenTo(handlerB) -// Deprecated -func (a *RouteFollowupAction) ThenTo(state ParseHandler) { - a.p.pushRoute(state) -} - -// ThenReturnHere schedules the current ParseHandler to be invoked after -// the RouteTo ParseHandler has been completed. -// For example: -// -// p.RouteTo(handlerA).ThenReturnHere() -// Deprecated -func (a *RouteFollowupAction) ThenReturnHere() { - a.p.pushRoute(a.p.state) -} - -// pushRoute adds the ParseHandler to the route stack. -// This is used for implementing nested parsing. -// Deprecated -func (p *ParseAPI) pushRoute(state ParseHandler) { - p.routeStack = append(p.routeStack, state) -} - -// popRoute pops the last pushed ParseHandler from the route stack. -// Deprecated -func (p *ParseAPI) popRoute() ParseHandler { - last := len(p.routeStack) - 1 - head, tail := p.routeStack[:last], p.routeStack[last] - p.routeStack = head - return tail -} - -// ExpectEndOfFile can be used to check if the input is at end of file. -// Intended use: -// -// func yourParseHandler(p *parsekit.ParseAPI) { -// ... -// p.ExpectEndOfFile() -// } -// -// This will execute the end of file test right away. If you want to -// use the end of file check as a StateHandler instead, you can also -// make use of another form, for example: -// -// func yourParseHandler(p *parsekit.ParseAPI) { -// p.RouteTo(yourHandler).ThenTo(parsekit.ExpectEndOfFile) -// } -func (p *ParseAPI) ExpectEndOfFile() { - // When some previous parsing step yielded an error, skip this operation. - if p.err != nil || p.stopped { - return - } - if p.On(A.EndOfFile).Stay() { - p.Stop() - } else { - p.Expects("end of file") - p.UnexpectedInput() - } -} - -// ExpectEndOfFile can be scheduled as a ParseHandler function. -// It makes sure that the input is at the end of file. -// Intended use: -// -// func yourParseHandler(p *parsekit.ParseAPI) { -// ... -// p.RouteTo(parsekit.ExpectEndOfFile) -// } -// -// It is not mandatory to use this ParseHandler. You can take care fo EOF -// yourself too. Simply emit an ItemEOF when the end of the input was reached -// to stop the parser loop: -// -// p.Stop() -// TODO meh, get rid of this one, once we don't use state scheduling anymore. -// Deprecated -func ExpectEndOfFile(p *ParseAPI) { - p.Expects("end of file") - if p.On(A.EndOfFile).Stay() { - p.Stop() - } -} diff --git a/parsehandler_routing.go b/parsehandler_routing.go new file mode 100644 index 0000000..d6167df --- /dev/null +++ b/parsehandler_routing.go @@ -0,0 +1,89 @@ +package parsekit + +import "fmt" + +// Handle is used to execute other ParseHandler functions from within your +// ParseHandler function. +// +// The boolean return value is true when the parser can still continue. +// It will be false when either an error was set (using ParseAPI.Error()), +// or the parser was stopped (using ParseAPI.Stop()). +func (p *ParseAPI) Handle(parseHandler ParseHandler) bool { + p.panicWhenStoppedOrInError() + parseHandler(p) + return !p.isStoppedOrInError() +} + +// Expects is used to let a ParseHandler function describe what input it is +// expecting. This expectation is used in error messages to provide some +// context to them. +// +// When defining an expectation inside a ParseHandler, you do not need to +// handle unexpected input yourself. When the end of the parser is reached +// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(), +// an automatic error will be emitted using ParseAPI.UnexpectedInput(). +func (p *ParseAPI) Expects(description string) { + p.panicWhenStoppedOrInError() + p.expecting = description +} + +// Stop is used by the parser impementation to tell the API that it has +// completed the parsing process successfully. +// +// When the parser implementation returns without stopping first, the +// Parser.Execute() will assume that something went wrong and calls +// ParserAPI.UnexpectedInput() to report an error about this. +// +// The parser implementation can define what was being expected, by +// providing a description to ParseAPI.Expecting(). +func (p *ParseAPI) Stop() { + p.stopped = true +} + +// ExpectEndOfFile can be used to check if the input is at end of file. +// Intended use: +// +// When it finds that the end of the file was indeed reached, then the +// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected +// input is reported through ParseAPI.UnexpectedInput() with "end of file" +// as the expectation. +func (p *ParseAPI) ExpectEndOfFile() { + p.panicWhenStoppedOrInError() + if p.On(A.EndOfFile).Stay() { + p.Stop() + } else { + p.Expects("end of file") + p.UnexpectedInput() + } +} + +// UnexpectedInput is used to set an error that tells the user that some +// unexpected input was encountered. +// +// It can automatically produce an error message for a couple of situations: +// 1) input simply didn't match the expectation +// 2) the end of the input was reached +// 3) there was an invalid UTF8 character on the input. +// +// The parser implementation can provide some feedback for this error by +// calling ParseAPI.Expects() to set the expectation. When set, the +// expectation is included in the error message. +func (p *ParseAPI) UnexpectedInput() { + p.panicWhenStoppedOrInError() + r, _, ok := p.peek(0) + switch { + case ok: + p.Error("unexpected character %q%s", r, fmtExpects(p)) + case r == eofRune: + p.Error("unexpected end of file%s", fmtExpects(p)) + case r == invalidRune: + p.Error("invalid UTF8 character in input%s", fmtExpects(p)) + } +} + +func fmtExpects(p *ParseAPI) string { + if p.expecting == "" { + return "" + } + return fmt.Sprintf(" (expected %s)", p.expecting) +} diff --git a/parsehandler_stringbuf.go b/parsehandler_stringbuf.go new file mode 100644 index 0000000..50f928c --- /dev/null +++ b/parsehandler_stringbuf.go @@ -0,0 +1,47 @@ +package parsekit + +// BufLiteral retrieves the contents of the parser's string buffer (all the +// runes that were added to it using ParseAPI.Accept()) as a literal string. +// +// Literal means that if the input had for example the subsequent runes '\' and +// 'n' in it, then the literal string would have a backslash and an 'n' it in, +// not a linefeed (ASCII char 10). +// +// Retrieving the buffer contents will not affect the buffer itself. New runes +// can still be added to it. Only when calling P.BufClear(), the buffer will be +// cleared. +func (p *ParseAPI) BufLiteral() string { + return p.buffer.asLiteralString() +} + +// BufInterpreted retrieves the contents of the parser's string buffer (all the +// runes that were added to it using ParseAPI.Accept()) as an interpreted +// string. +// +// Interpreted means that the contents are treated as a Go double quoted +// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the +// input had for example the subsequent runes '\' and 'n' in it, then the +// interpreted string would have an actual linefeed (ASCII char 10) in it. +// +// This method returns a boolean value, indicating whether or not the string +// interpretation was successful. On invalid string data, an error will +// automatically be emitted and the boolean return value will be false. +// +// Retrieving the buffer contents will not affect the buffer itself. New runes +// can still be added to it. Only when calling P.BufClear(), the buffer will be +// cleared. +func (p *ParseAPI) BufInterpreted() (string, bool) { + s, err := p.buffer.asInterpretedString() + if err != nil { + p.Error( + "invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)", + p.buffer.asLiteralString(), err) + return "", false + } + return s, true +} + +// BufClear clears the contents of the parser's string buffer. +func (p *ParseAPI) BufClear() { + p.buffer.reset() +} diff --git a/parsehandler_test.go b/parsehandler_test.go new file mode 100644 index 0000000..f516926 --- /dev/null +++ b/parsehandler_test.go @@ -0,0 +1,75 @@ +package parsekit_test + +import ( + "testing" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func TestGivenNilTokenHandler_WhenCallingOn_ParsekitPanics(t *testing.T) { + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.On(nil) + }) + RunPanicTest(t, PanicTest{ + func() { p.Execute("") }, + "internal parser error: tokenHandler argument for On() is nil"}) +} + +func TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics(t *testing.T) { + otherHandler := func(p *parsekit.ParseAPI) { + panic("This is not the handler you're looking for") + } + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.Stop() + p.Handle(otherHandler) + }) + RunPanicTest(t, PanicTest{ + func() { p.Execute("") }, + "Illegal call to ParseAPI.Handle() from git.makaay.nl/mauricem/go-parsekit_test." + + "TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics.func2: " + + "no calls allowed after ParseAPI.Stop()"}) +} + +func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) { + otherHandler := func(p *parsekit.ParseAPI) { + panic("This is not the handler you're looking for") + } + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.Error("It ends here") + p.Handle(otherHandler) + }) + RunPanicTest(t, PanicTest{ + func() { p.Execute("") }, + "Illegal call to ParseAPI.Handle() from git.makaay.nl/mauricem/go-parsekit_test." + + "TestGivenParserWithError_WhenCallingHandle_ParsekitPanics.func2: " + + "no calls allowed after ParseAPI.Error()"}) +} + +func TestGivenFilledStringBuffer_BufInterpreted_ReturnsInterpretedString(t *testing.T) { + var interpreted string + var literal string + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept() + literal = p.BufLiteral() + interpreted, _ = p.BufInterpreted() + }) + p.Execute(`This\tis\ta\tcool\tstring`) + + if literal != `This\tis\ta\tcool\tstring` { + t.Fatal("literal string is incorrect") + } + if interpreted != "This\tis\ta\tcool\tstring" { + t.Fatal("interpreted string is incorrect") + } +} + +func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *testing.T) { + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept() + p.BufInterpreted() + }) + err := p.Execute(`This \is wrongly escaped`) + if err.Error() != `invalid string: This \is wrongly escaped (invalid syntax, forgot to escape a double quote or backslash maybe?)` { + t.Fatalf("Got unexpected error: %s", err.Error()) + } +} diff --git a/parsekit.go b/parsekit.go index f3d9ff2..6ed61c8 100644 --- a/parsekit.go +++ b/parsekit.go @@ -1,11 +1,5 @@ package parsekit -import ( - "fmt" - "reflect" - "runtime" -) - // Parser is the top-level struct that holds the configuration for a parser. // The Parser can be instantiated using the parsekit.NewParser() method. type Parser struct { @@ -24,12 +18,6 @@ func NewParser(startHandler ParseHandler) *Parser { return &Parser{startHandler: startHandler} } -// ParseRun represents a single parse run for a Parser. -// Deprecated -type ParseRun struct { - p *ParseAPI // holds parser state and provides an API to ParseHandler functions -} - // Execute starts the parser for the provided input. // When an error occurs during parsing, then this error is returned. Nil otherwise. func (p *Parser) Execute(input string) *Error { @@ -38,125 +26,14 @@ func (p *Parser) Execute(input string) *Error { len: len(input), cursorLine: 1, cursorColumn: 1, - nextState: p.startHandler, } - p.startHandler(api) - if !api.stopped { + api.Handle(p.startHandler) + if !api.stopped && api.err == nil { api.UnexpectedInput() } return api.err } -// Parse starts a parse run on the provided input data. -// To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method. -// Deprecated -func (p *Parser) Parse(input string) *ParseRun { - panic("Parse() is deprecated, use Execute()") - // return &ParseRun{ - // p: &ParseAPI{ - // input: input, - // len: len(input), - // cursorLine: 1, - // cursorColumn: 1, - // nextState: p.startHandler, - // }, - // } -} - -// Next retrieves the next parsed item for a parse run. -// -// When a valid item was found, then the boolean return parameter will be true. -// On error or when successfully reaching the end of the input, false is returned. -// When an error occurred, false will be returned and the error return value will -// be set (default is nil). -func (run *ParseRun) Next() (Item, *Error, bool) { - // State handling loop: we handle states, until an Item is ready to be returned. - for { - // If a state handler has emitted one or more parser Items, then the next - // available Item is returned to the caller. - if len(run.p.items) > 0 { - item, rest := run.p.items[0], run.p.items[1:] - run.p.items = rest - return run.makeReturnValues(item) - } - // Otherwise, the next state handler is looked up and invoked. - run.runNextParseHandler() - } -} - -func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) { - switch { - case i.Type == ItemEOF: - return i, nil, false - case i.Type == ItemError: - run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} - return i, run.p.err, false - default: - run.p.item = i - return i, nil, true - } -} - -// runNextParseHandler moves the parser, that is bascially a state machine, -// to its next status. It does so by invoking a function of the -// type ParseHandler. This function represents the current status and -// is responsible for moving the parser to its next status, depending -// on the parsed input data. -func (run *ParseRun) runNextParseHandler() { - if state, ok := run.getNextParseHandler(); ok { - run.invokeNextParseHandler(state) - } -} - -// getNextParseHandler determines the next ParseHandler to invoke in order -// to move the parsing state machine one step further. -// -// When implementing a parser, the ParseHandler functions must provide -// a routing decision in every invocation. A routing decision is one -// of the following: -// -// * A route is specified explicitly, which means that the next ParseHandler -// function to invoke is registered during the ParseHandler function -// invocation. For example: p.RouteTo(nextStatus) -// -// * A route is specified implicitly, which means that a previous ParseHandler -// invocation has registered the followup route for the current state. -// For example: p.RouteTo(nextStatus).ThenTo(otherStatus) -// In this example, the nextStatus ParseHandler will not have to specify -// a route explicitly, but otherStatus will be used implicitly after -// the nextStatus function has returned. -// -// * An expectation is registered by the ParseHandler. -// For example: p.Expects("a cool thing") -// When the ParseHandler returns without having specified a route, this -// expectation is used to generate an "unexpected input" error message. -// -// When no routing decision is provided by a ParseHandler, then this is -// considered a bug in the state handler, and the parser will panic. -func (run *ParseRun) getNextParseHandler() (ParseHandler, bool) { - switch { - case run.p.nextState != nil: - return run.p.nextState, true - case len(run.p.routeStack) > 0: - return run.p.popRoute(), true - case run.p.expecting != "": - run.p.UnexpectedInput() - return nil, false - default: - name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name() - panic(fmt.Sprintf("internal parser error: ParseHandler %s did not provide a routing decision", name)) - } -} - -// invokeNextParseHandler moves the parser state to the provided state -// and invokes the ParseHandler function. -func (run *ParseRun) invokeNextParseHandler(state ParseHandler) { - run.p.state = state - run.p.nextState = nil - run.p.expecting = "" - run.p.state(run.p) -} - // Matcher is the top-level struct that holds the configuration for // a parser that is based solely on a TokenHandler function. // The Matcher can be instantiated using the parsekit.NewMatcher() @@ -198,12 +75,3 @@ func (m *Matcher) Execute(input string) (string, *Error) { err := m.parser.Execute(input) return m.match, err } - -// Parse checks for a match on the provided input data. -func (m *Matcher) Parse(input string) (string, *Error) { - item, err, ok := m.parser.Parse(input).Next() - if !ok { - return "", err - } - return item.Value, nil -} diff --git a/parsekit_test.go b/parsekit_test.go index f70c416..8698693 100644 --- a/parsekit_test.go +++ b/parsekit_test.go @@ -9,16 +9,14 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -const TestItem parsekit.ItemType = 1 - // Easy access to the parsekit definitions. var c, a, m = parsekit.C, parsekit.A, parsekit.M type TokenHandlerTest struct { - input string - tokenHandler parsekit.TokenHandler - mustMatch bool - expected string + Input string + TokenHandler parsekit.TokenHandler + MustMatch bool + Expected string } func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) { @@ -28,17 +26,40 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) { } func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) { - output, err := parsekit.NewMatcher(test.tokenHandler, "a match").Execute(test.input) - - if test.mustMatch { + output, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input) + if test.MustMatch { if err != nil { - t.Errorf("Test %q failed with error: %s", test.input, err) - } else if output != test.expected { - t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output) + t.Errorf("Test %q failed with error: %s", test.Input, err) + } else if output != test.Expected { + t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output) } } else { if err == nil { - t.Errorf("Test %q failed: should not match, but it did", test.input) + t.Errorf("Test %q failed: should not match, but it did", test.Input) } } } + +type PanicTest struct { + function func() + epxected string +} + +func RunPanicTest(t *testing.T, p PanicTest) { + defer func() { + if r := recover(); r != nil { + if r != p.epxected { + t.Errorf("Function did panic, but unexpected panic message received:\nexpected: %q\nactual: %q\n", p.epxected, r) + } + } else { + t.Errorf("Function did not panic (expected panic message: %s)", p.epxected) + } + }() + p.function() +} + +func RunPanicTests(t *testing.T, testSet []PanicTest) { + for _, test := range testSet { + RunPanicTest(t, test) + } +} diff --git a/tokenhandler.go b/tokenhandler.go index 95d0663..8089710 100644 --- a/tokenhandler.go +++ b/tokenhandler.go @@ -155,7 +155,7 @@ func (t *TokenAPI) checkAllowedCall(name string) { panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name)) } if !t.currRune.OK { - panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name)) + panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRune() did not return OK (EOF or invalid rune)", name)) } } diff --git a/tokenhandler_test.go b/tokenhandler_test.go new file mode 100644 index 0000000..69fdc45 --- /dev/null +++ b/tokenhandler_test.go @@ -0,0 +1,75 @@ +package parsekit_test + +import ( + "testing" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func TestWithinTokenHandler_AcceptIncludesAndSkipIgnoresRuneInOutput(t *testing.T) { + parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + for i := 0; i < 33; i++ { + t.NextRune() + t.Accept() + t.NextRune() + t.Skip() + } + return true + }, "test") + output, _ := parser.Execute("Txhxixsx xsxhxoxuxlxdx xbxexcxoxmxex xqxuxixtxex xrxexaxdxaxbxlxex") + if output != "This should become quite readable" { + t.Fatalf("Got unexpected output from TokenHandler: %s", output) + } +} + +func TestGivenNextRuneCalled_WithoutAcceptOrSkip_NextCallToNextRunePanics(t *testing.T) { + parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + t.NextRune() + t.NextRune() + return false + }, "test") + RunPanicTest(t, PanicTest{ + func() { parser.Execute("input string") }, + "internal Matcher error: NextRune() was called without accepting or skipping the previously read rune"}) +} + +func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) { + parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + t.Accept() + return false + }, "test") + RunPanicTest(t, PanicTest{ + func() { parser.Execute("input string") }, + "internal Matcher error: Accept() was called without a prior call to NextRune()"}) +} + +func TestGivenNextRuneNotCalled_CallToSkipPanics(t *testing.T) { + parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + t.Skip() + return false + }, "test") + RunPanicTest(t, PanicTest{ + func() { parser.Execute("input string") }, + "internal Matcher error: Skip() was called without a prior call to NextRune()"}) +} + +func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) { + parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + t.NextRune() + t.Accept() + return false + }, "test") + RunPanicTest(t, PanicTest{ + func() { parser.Execute("\xcd") }, + "internal Matcher error: Accept() was called, but prior call to NextRune() did not return OK (EOF or invalid rune)"}) +} + +func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) { + RunPanicTest(t, PanicTest{ + func() { + a := parsekit.TokenAPI{} + a.Merge() + }, + "internal parser error: Cannot call Merge a a non-forked MatchDialog", + }) +} diff --git a/tokenhandlers_builtin.go b/tokenhandlers_builtin.go index bfd5ce0..6f0de4b 100644 --- a/tokenhandlers_builtin.go +++ b/tokenhandlers_builtin.go @@ -93,10 +93,10 @@ func MatchRunes(expected ...rune) TokenHandler { // // creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'. func MatchRuneRange(start rune, end rune) TokenHandler { + if end < start { + panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end)) + } return func(t *TokenAPI) bool { - if end < start { - panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end)) - } input, ok := t.NextRune() if ok && input >= start && input <= end { t.Accept() @@ -202,14 +202,17 @@ func MatchNot(handler TokenHandler) TokenHandler { // will not match input "XXX", it will match input "XXXX", but also "XXXXXX". // In that last case, there will be a remainder "XX" on the input. func MatchRep(times int, handler TokenHandler) TokenHandler { - return matchMinMax(times, times, handler) + return matchMinMax(times, times, handler, "MatchRep") } // MatchMin creates a TokenHandler that checks if the provided TokenHandler can be // applied at least the provided minimum number of times. // When more matches are possible, these will be included in the output. func MatchMin(min int, handler TokenHandler) TokenHandler { - return matchMinMax(min, -1, handler) + if min < 0 { + panic("internal parser error: MatchMin definition error: min must be >= 0") + } + return matchMinMax(min, -1, handler, "MatchMin") } // MatchMax creates a TokenHandler that checks if the provided TokenHandler can be @@ -217,20 +220,23 @@ func MatchMin(min int, handler TokenHandler) TokenHandler { // When more matches are possible, these will be included in the output. // Zero matches are considered a successful match. func MatchMax(max int, handler TokenHandler) TokenHandler { - return matchMinMax(0, max, handler) + if max < 0 { + panic("internal parser error: MatchMax definition error: max must be >= 0") + } + return matchMinMax(0, max, handler, "MatchMax") } // MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can // be applied zero or more times. All matches will be included in the output. // Zero matches are considered a successful match. func MatchZeroOrMore(handler TokenHandler) TokenHandler { - return matchMinMax(0, -1, handler) + return matchMinMax(0, -1, handler, "MatchZeroOfMore") } // MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can // be applied one or more times. All matches will be included in the output. func MatchOneOrMore(handler TokenHandler) TokenHandler { - return matchMinMax(1, -1, handler) + return matchMinMax(1, -1, handler, "MatchOneOrMore") } // MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can @@ -238,20 +244,20 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler { // inclusive. All matches will be included in the output. func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler { if max < 0 { - panic("internal parser error: MatchMinMax definition error: max must be >= 0 ") + panic("internal parser error: MatchMinMax definition error: max must be >= 0") } if min < 0 { - panic("internal parser error: MatchMinMax definition error: min must be >= 0 ") + panic("internal parser error: MatchMinMax definition error: min must be >= 0") } - return matchMinMax(min, max, handler) + return matchMinMax(min, max, handler, "MatchMinMax") } -func matchMinMax(min int, max int, handler TokenHandler) TokenHandler { +func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler { + if max >= 0 && min > max { + panic(fmt.Sprintf("internal parser error: %s definition error: max %d must not be < min %d", name, max, min)) + } return func(t *TokenAPI) bool { child := t.Fork() - if max >= 0 && min > max { - panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min)) - } total := 0 // Check for the minimum required amount of matches. for total < min { diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index f7e4c38..c5afbdd 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -78,6 +78,23 @@ func TestCombinators(t *testing.T) { }) } +func TestCombinatorPanics(t *testing.T) { + RunPanicTests(t, []PanicTest{ + {func() { parsekit.C.RuneRange('z', 'a') }, + "internal parser error: MatchRuneRange definition error: start 'z' must not be < end 'a'"}, + {func() { parsekit.C.MinMax(-1, 1, parsekit.A.Space) }, + "internal parser error: MatchMinMax definition error: min must be >= 0"}, + {func() { parsekit.C.MinMax(1, -1, parsekit.A.Space) }, + "internal parser error: MatchMinMax definition error: max must be >= 0"}, + {func() { parsekit.C.MinMax(10, 5, parsekit.A.Space) }, + "internal parser error: MatchMinMax definition error: max 5 must not be < min 10"}, + {func() { parsekit.C.Min(-10, parsekit.A.Space) }, + "internal parser error: MatchMin definition error: min must be >= 0"}, + {func() { parsekit.C.Max(-42, parsekit.A.Space) }, + "internal parser error: MatchMax definition error: max must be >= 0"}, + }) +} + func TestAtoms(t *testing.T) { RunTokenHandlerTests(t, []TokenHandlerTest{ {"", a.EndOfFile, true, ""},