Cleanup of stuff that I don't need anymore, because it has been fully deprecated. Also added some tests for panic() calls in parsekit, which brings test coverage to 100%. It's not a goal as such, but it's good to know that I got there without cheaty tests :)

This commit is contained in:
Maurice Makaay 2019-05-28 13:41:58 +00:00
parent 3dfa99c965
commit 2d851103e5
22 changed files with 482 additions and 550 deletions

View File

@ -82,7 +82,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
value, err := strconv.ParseInt(p.BufLiteral(), 10, 64)
p.BufClear()
if err != nil {
p.EmitError("invalid value: %s", err)
p.Error("invalid value: %s", err)
} else {
c.Result += c.op * value
p.Handle(c.operatorOrEndOfFile)

View File

@ -84,10 +84,10 @@ func Compute(input string) (float64, *parsekit.Error) {
}
func (c *calculator) computation(p *parsekit.ParseAPI) {
p.Handle(c.expr)
p.ExpectEndOfFile()
c.result = c.interpreter.result
if p.Handle(c.expr) {
p.ExpectEndOfFile()
c.result = c.interpreter.result
}
}
// expr : term ((ADD|SUB) term)*
@ -95,11 +95,14 @@ func (c *calculator) expr(p *parsekit.ParseAPI) {
c.interpreter.push()
var pc, a = parsekit.C, parsekit.A
p.Handle(c.term)
for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
c.interpreter.pushOperator(p.LastMatch)
p.Handle(c.term)
c.interpreter.eval()
if p.Handle(c.term) {
for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
c.interpreter.pushOperator(p.LastMatch)
if !p.Handle(c.term) {
return
}
c.interpreter.eval()
}
}
c.interpreter.pop()
@ -110,11 +113,14 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
c.interpreter.push()
var pc, a = parsekit.C, parsekit.A
p.Handle(c.factor)
for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
c.interpreter.pushOperator(p.LastMatch)
p.Handle(c.factor)
c.interpreter.eval()
if p.Handle(c.factor) {
for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
c.interpreter.pushOperator(p.LastMatch)
if !p.Handle(c.factor) {
return
}
c.interpreter.eval()
}
}
c.interpreter.pop()
@ -130,18 +136,23 @@ func (c *calculator) factor(p *parsekit.ParseAPI) {
p.BufClear()
value, err := strconv.ParseFloat(floatStr, 64)
if err != nil {
p.EmitError("invalid number %s: %s", floatStr, err)
p.Error("invalid number %s: %s", floatStr, err)
return
} else {
c.interpreter.pushValue(value)
}
case p.On(a.LeftParen).Skip():
p.Handle(c.expr)
if !p.Handle(c.expr) {
return
}
if !p.On(a.RightParen).Skip() {
p.Expects("')'")
p.UnexpectedInput()
return
}
default:
p.UnexpectedInput()
return
}
p.On(a.Whitespace).Skip()
}

View File

@ -25,7 +25,7 @@ func Example_dutchPostcodeUsingMatcher() {
} {
output, err := parser.Execute(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full())
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}

View File

@ -27,7 +27,7 @@ func Example_helloWorldUsingMatcher() {
} {
output, err := parser.Execute(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full())
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}

View File

@ -117,7 +117,7 @@ func (h *helloparser1) end(p *parsekit.ParseAPI) {
h.greetee = strings.TrimSpace(p.BufLiteral())
if h.greetee == "" {
p.EmitError("The name cannot be empty")
p.Error("The name cannot be empty")
} else {
p.Stop()
}

View File

@ -81,7 +81,7 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
// Note:
// For efficiency, we could have either:
//
// 1) added a return after every call to p.EmitError()
// 1) added a return after every call to p.Error()
// 2) done an 'else if' for every 'if' after the first
//
// For code readability, I omitted these however. The ParseAPI knows it
@ -91,19 +91,19 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
func (h *helloparser2) start(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
if !p.On(c.StrNoCase("hello")).Skip() {
p.EmitError("the greeting is not being friendly")
p.Error("the greeting is not being friendly")
} else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
p.EmitError("the greeting is not properly separated")
p.Error("the greeting is not properly separated")
} else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
p.EmitError("the greeting is targeted at thin air")
p.Error("the greeting is targeted at thin air")
} else if !p.On(a.Excl).Skip() {
p.EmitError("the greeting is not loud enough")
p.Error("the greeting is not loud enough")
} else if !p.On(a.EndOfFile).Stay() {
p.EmitError("too much stuff going on after the closing '!'")
p.Error("too much stuff going on after the closing '!'")
} else {
h.greetee = p.BufLiteral()
if h.greetee == "" {
p.EmitError("the name cannot be empty")
p.Error("the name cannot be empty")
}
p.Stop()
}

View File

@ -15,7 +15,7 @@ func ExampleError() {
fmt.Println(err.Error())
fmt.Printf("%s\n", err)
fmt.Println(err.ErrorFull())
fmt.Println(err.Full())
// Output:
// it broke down
// it broke down
@ -36,14 +36,14 @@ func ExampleError_Error() {
// it broke down
}
func ExampleError_ErrorFull() {
func ExampleError_Full() {
err := &parsekit.Error{
Message: "it broke down",
Line: 10,
Column: 42,
}
fmt.Println(err.ErrorFull())
fmt.Println(err.Full())
// Output:
// it broke down at line 10, column 42
}

View File

@ -1,6 +1,11 @@
package parsekit
import "unicode/utf8"
import (
"fmt"
"runtime"
"strings"
"unicode/utf8"
)
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
@ -13,25 +18,50 @@ type ParseHandler func(*ParseAPI)
// ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser.
type ParseAPI struct {
state ParseHandler // the function that handles the current state
nextState ParseHandler // the function that will handle the next state
routeStack []ParseHandler // route stack, for handling nested parsing
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
items []Item // a slice of resulting Parser items (see P.Emit())
item Item // the current item as reached by Next(), retrieved by Get()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
LastMatch string // a string representation of the last matched input data
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the ParseAPI methods, to
// prevent further calls to the ParseAPI on these occasions.
//
// Basically, this guard ensures proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *ParseAPI) panicWhenStoppedOrInError() {
if !p.isStoppedOrInError() {
return
}
// No error handling, because it's private known-to-work use only.
pc, _, _, _ := runtime.Caller(1)
call := runtime.FuncForPC(pc)
pc, _, _, _ = runtime.Caller(2)
caller := runtime.FuncForPC(pc)
after := "Error()"
if p.stopped {
after = "Stop()"
}
parts := strings.Split(call.Name(), ".")
name := parts[len(parts)-1]
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s: no calls allowed after ParseAPI.%s", name, caller.Name(), after))
}
func (p *ParseAPI) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
// peek returns but does not advance the cursor to the next rune in the input.
// Returns the rune, its width in bytes and a boolean.
//

View File

@ -1,176 +0,0 @@
package parsekit
import (
"fmt"
)
// Item represents an item that can be emitted from a ParseHandler function.
type Item struct {
Type ItemType
Value string
}
// ItemType represents the type of a parser Item.
//
// When creating your own ItemType values, then make use of positive integer
// values. Negative values are possible, but they are reserved for internal
// use by parsekit.
type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
// Emit passes a Parser item to the client, including the provided string.
// Deprecated
func (p *ParseAPI) Emit(t ItemType, v string) {
p.items = append(p.items, Item{t, v})
p.buffer.reset()
}
// BufLiteral retrieves the contents of the parser's string buffer (all the
// runes that were added to it using ParseAPI.Accept()) as a literal string.
//
// Literal means that if the input had for example the subsequent runes '\' and 'n'
// in it, then the literal string would have a backslash and an 'n' it in, not a
// linefeed (ASCII char 10).
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.BufClear(), the buffer will be cleared.
func (p *ParseAPI) BufLiteral() string {
return p.buffer.asLiteralString()
}
// EmitLiteral passes a parser Item to the client, including the accumulated
// string buffer data as a literal string.
// Deprecated
func (p *ParseAPI) EmitLiteral(t ItemType) {
p.Emit(t, p.BufLiteral())
}
// BufClear clears the contents of the parser string buffer.
func (p *ParseAPI) BufClear() {
p.buffer.reset()
}
// BufInterpreted retrieves the contents of the parser's string buffer (all
// the runes that were added to it using ParseAPI.Accept()) as an
// interpreted string.
//
// Interpreted means that the contents are treated as a Go double quoted
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
// input had for example the subsequent runes '\' and 'n' in it, then the interpreted
// string would have an actual linefeed (ASCII char 10) in it.
//
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and the boolean return value will be false.
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (p *ParseAPI) BufInterpreted() (string, bool) {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.EmitError(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return "", false
}
return s, true
}
// EmitInterpreted passes a Parser item to the client, including accumulated
// string buffer data a Go double quoted interpreted string (handling escape
// codes like \n, \t, \uXXXX, etc.)
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and false will be returned.
func (p *ParseAPI) EmitInterpreted(t ItemType) bool {
if s, ok := p.BufInterpreted(); ok {
p.Emit(t, s)
return true
}
return false
}
// Error is used as the error type when parsing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
Message string
Line int
Column int
}
func (err *Error) Error() string {
if err == nil {
panic("internal parser error: Error() method called on the parser, but no error was set")
}
return err.Message
}
// ErrorFull returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) ErrorFull() string {
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
}
// EmitError sets an error message in the parser API. This error message
// will eventually be returned by the Parser.Execute() method.
func (p *ParseAPI) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.cursorLine, p.cursorColumn}
}
// Stop is used by the parser impementation to tell the API that it has
// completed the parsing process successfully.
//
// When the parser implementation returns without stopping first, the
// Parser.Execute() will assume that something went wrong and calls
// ParserAPI.UnexpectedInput() to report an error about this.
//
// The parser implementation can define what was being expected, by
// providing a description to ParseAPI.Expecting().
func (p *ParseAPI) Stop() {
p.stopped = true
}
// UnexpectedInput is used to set an error that tells the user that some
// unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil || p.stopped {
return
}
r, _, ok := p.peek(0)
switch {
case ok:
p.EmitError("unexpected character %q%s", r, fmtExpects(p))
case r == eofRune:
p.EmitError("unexpected end of file%s", fmtExpects(p))
case r == invalidRune:
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
default:
panic("parsekit bug: Unhandled output from peek()")
}
}
func fmtExpects(p *ParseAPI) string {
if p.expecting == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", p.expecting)
}

33
parsehandler_error.go Normal file
View File

@ -0,0 +1,33 @@
package parsekit
import (
"fmt"
)
// Error is used as the error type when parsing errors occur.
// The error includes some context information to allow for useful
// error messages to the user.
type Error struct {
Message string
Line int
Column int
}
func (err *Error) Error() string {
return err.Message
}
// Full returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) Full() string {
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
}
// Error sets the error message in the parser API. This error message
// will eventually be returned by the Parser.Execute() method.
func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.cursorLine, p.cursorColumn}
}

View File

@ -1,23 +0,0 @@
package parsekit
// Expects is used to let a ParseHandler function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a ParseHandler, you do not need to
// handle unexpected input yourself. When the end of the function is reached
// without setting the next state, an automatic error will be emitted.
// This error can differentiate between the following issues:
//
// 1) there is valid data on input, but it was not accepted by the function
//
// 2) there is an invalid UTF8 character on input
//
// 3) the end of the file was reached.
func (p *ParseAPI) Expects(description string) {
// TODO make this into some debugging tool?
// fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn)
if p.err != nil || p.stopped {
return
}
p.expecting = description
}

View File

@ -37,13 +37,7 @@ package parsekit
// p.Emit(SomeItemType, p.BufLiteral())
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil || p.stopped {
return &MatchAction{
p: p,
ok: false,
}
}
p.panicWhenStoppedOrInError()
// Perform the matching operation.
m := &TokenAPI{p: p}

View File

@ -1,135 +0,0 @@
package parsekit
// Handle is used to execute other ParseHandler functions from within your
// ParseHandler function.
func (p *ParseAPI) Handle(handlers ...ParseHandler) {
for _, handler := range handlers {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil || p.stopped {
break
}
handler(p)
}
}
// RouteTo tells the parser what ParseHandler function to invoke on
// the next parse cycle.
// Deprecated
func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
p.nextState = handler
return &RouteFollowupAction{p}
}
// RouteRepeat tells the parser that on the next parsing cycle, the current
// ParseHandler must be reinvoked.
// Deprecated
func (p *ParseAPI) RouteRepeat() {
p.RouteTo(p.state)
}
// RouteReturn tells the parser that on the next cycle the last ParseHandler
// that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a ParseHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
// Deprecated
func (p *ParseAPI) RouteReturn() {
p.nextState = p.popRoute()
}
// RouteFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
// Deprecated
type RouteFollowupAction struct {
p *ParseAPI
}
// ThenTo schedules a ParseHandler that must be invoked after the RouteTo
// ParseHandler has been completed.
// For example:
//
// p.RouteTo(handlerA).ThenTo(handlerB)
// Deprecated
func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
a.p.pushRoute(state)
}
// ThenReturnHere schedules the current ParseHandler to be invoked after
// the RouteTo ParseHandler has been completed.
// For example:
//
// p.RouteTo(handlerA).ThenReturnHere()
// Deprecated
func (a *RouteFollowupAction) ThenReturnHere() {
a.p.pushRoute(a.p.state)
}
// pushRoute adds the ParseHandler to the route stack.
// This is used for implementing nested parsing.
// Deprecated
func (p *ParseAPI) pushRoute(state ParseHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed ParseHandler from the route stack.
// Deprecated
func (p *ParseAPI) popRoute() ParseHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}
// ExpectEndOfFile can be used to check if the input is at end of file.
// Intended use:
//
// func yourParseHandler(p *parsekit.ParseAPI) {
// ...
// p.ExpectEndOfFile()
// }
//
// This will execute the end of file test right away. If you want to
// use the end of file check as a StateHandler instead, you can also
// make use of another form, for example:
//
// func yourParseHandler(p *parsekit.ParseAPI) {
// p.RouteTo(yourHandler).ThenTo(parsekit.ExpectEndOfFile)
// }
func (p *ParseAPI) ExpectEndOfFile() {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil || p.stopped {
return
}
if p.On(A.EndOfFile).Stay() {
p.Stop()
} else {
p.Expects("end of file")
p.UnexpectedInput()
}
}
// ExpectEndOfFile can be scheduled as a ParseHandler function.
// It makes sure that the input is at the end of file.
// Intended use:
//
// func yourParseHandler(p *parsekit.ParseAPI) {
// ...
// p.RouteTo(parsekit.ExpectEndOfFile)
// }
//
// It is not mandatory to use this ParseHandler. You can take care fo EOF
// yourself too. Simply emit an ItemEOF when the end of the input was reached
// to stop the parser loop:
//
// p.Stop()
// TODO meh, get rid of this one, once we don't use state scheduling anymore.
// Deprecated
func ExpectEndOfFile(p *ParseAPI) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay() {
p.Stop()
}
}

89
parsehandler_routing.go Normal file
View File

@ -0,0 +1,89 @@
package parsekit
import "fmt"
// Handle is used to execute other ParseHandler functions from within your
// ParseHandler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set (using ParseAPI.Error()),
// or the parser was stopped (using ParseAPI.Stop()).
func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
p.panicWhenStoppedOrInError()
parseHandler(p)
return !p.isStoppedOrInError()
}
// Expects is used to let a ParseHandler function describe what input it is
// expecting. This expectation is used in error messages to provide some
// context to them.
//
// When defining an expectation inside a ParseHandler, you do not need to
// handle unexpected input yourself. When the end of the parser is reached
// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(),
// an automatic error will be emitted using ParseAPI.UnexpectedInput().
func (p *ParseAPI) Expects(description string) {
p.panicWhenStoppedOrInError()
p.expecting = description
}
// Stop is used by the parser impementation to tell the API that it has
// completed the parsing process successfully.
//
// When the parser implementation returns without stopping first, the
// Parser.Execute() will assume that something went wrong and calls
// ParserAPI.UnexpectedInput() to report an error about this.
//
// The parser implementation can define what was being expected, by
// providing a description to ParseAPI.Expecting().
func (p *ParseAPI) Stop() {
p.stopped = true
}
// ExpectEndOfFile can be used to check if the input is at end of file.
// Intended use:
//
// When it finds that the end of the file was indeed reached, then the
// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected
// input is reported through ParseAPI.UnexpectedInput() with "end of file"
// as the expectation.
func (p *ParseAPI) ExpectEndOfFile() {
p.panicWhenStoppedOrInError()
if p.On(A.EndOfFile).Stay() {
p.Stop()
} else {
p.Expects("end of file")
p.UnexpectedInput()
}
}
// UnexpectedInput is used to set an error that tells the user that some
// unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
p.panicWhenStoppedOrInError()
r, _, ok := p.peek(0)
switch {
case ok:
p.Error("unexpected character %q%s", r, fmtExpects(p))
case r == eofRune:
p.Error("unexpected end of file%s", fmtExpects(p))
case r == invalidRune:
p.Error("invalid UTF8 character in input%s", fmtExpects(p))
}
}
func fmtExpects(p *ParseAPI) string {
if p.expecting == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", p.expecting)
}

47
parsehandler_stringbuf.go Normal file
View File

@ -0,0 +1,47 @@
package parsekit
// BufLiteral retrieves the contents of the parser's string buffer (all the
// runes that were added to it using ParseAPI.Accept()) as a literal string.
//
// Literal means that if the input had for example the subsequent runes '\' and
// 'n' in it, then the literal string would have a backslash and an 'n' it in,
// not a linefeed (ASCII char 10).
//
// Retrieving the buffer contents will not affect the buffer itself. New runes
// can still be added to it. Only when calling P.BufClear(), the buffer will be
// cleared.
func (p *ParseAPI) BufLiteral() string {
return p.buffer.asLiteralString()
}
// BufInterpreted retrieves the contents of the parser's string buffer (all the
// runes that were added to it using ParseAPI.Accept()) as an interpreted
// string.
//
// Interpreted means that the contents are treated as a Go double quoted
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
// input had for example the subsequent runes '\' and 'n' in it, then the
// interpreted string would have an actual linefeed (ASCII char 10) in it.
//
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and the boolean return value will be false.
//
// Retrieving the buffer contents will not affect the buffer itself. New runes
// can still be added to it. Only when calling P.BufClear(), the buffer will be
// cleared.
func (p *ParseAPI) BufInterpreted() (string, bool) {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.Error(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return "", false
}
return s, true
}
// BufClear clears the contents of the parser's string buffer.
func (p *ParseAPI) BufClear() {
p.buffer.reset()
}

75
parsehandler_test.go Normal file
View File

@ -0,0 +1,75 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestGivenNilTokenHandler_WhenCallingOn_ParsekitPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(nil)
})
RunPanicTest(t, PanicTest{
func() { p.Execute("") },
"internal parser error: tokenHandler argument for On() is nil"})
}
func TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Stop()
p.Handle(otherHandler)
})
RunPanicTest(t, PanicTest{
func() { p.Execute("") },
"Illegal call to ParseAPI.Handle() from git.makaay.nl/mauricem/go-parsekit_test." +
"TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics.func2: " +
"no calls allowed after ParseAPI.Stop()"})
}
func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Error("It ends here")
p.Handle(otherHandler)
})
RunPanicTest(t, PanicTest{
func() { p.Execute("") },
"Illegal call to ParseAPI.Handle() from git.makaay.nl/mauricem/go-parsekit_test." +
"TestGivenParserWithError_WhenCallingHandle_ParsekitPanics.func2: " +
"no calls allowed after ParseAPI.Error()"})
}
func TestGivenFilledStringBuffer_BufInterpreted_ReturnsInterpretedString(t *testing.T) {
var interpreted string
var literal string
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
literal = p.BufLiteral()
interpreted, _ = p.BufInterpreted()
})
p.Execute(`This\tis\ta\tcool\tstring`)
if literal != `This\tis\ta\tcool\tstring` {
t.Fatal("literal string is incorrect")
}
if interpreted != "This\tis\ta\tcool\tstring" {
t.Fatal("interpreted string is incorrect")
}
}
func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
p.BufInterpreted()
})
err := p.Execute(`This \is wrongly escaped`)
if err.Error() != `invalid string: This \is wrongly escaped (invalid syntax, forgot to escape a double quote or backslash maybe?)` {
t.Fatalf("Got unexpected error: %s", err.Error())
}
}

View File

@ -1,11 +1,5 @@
package parsekit
import (
"fmt"
"reflect"
"runtime"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
@ -24,12 +18,6 @@ func NewParser(startHandler ParseHandler) *Parser {
return &Parser{startHandler: startHandler}
}
// ParseRun represents a single parse run for a Parser.
// Deprecated
type ParseRun struct {
p *ParseAPI // holds parser state and provides an API to ParseHandler functions
}
// Execute starts the parser for the provided input.
// When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Execute(input string) *Error {
@ -38,125 +26,14 @@ func (p *Parser) Execute(input string) *Error {
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startHandler,
}
p.startHandler(api)
if !api.stopped {
api.Handle(p.startHandler)
if !api.stopped && api.err == nil {
api.UnexpectedInput()
}
return api.err
}
// Parse starts a parse run on the provided input data.
// To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method.
// Deprecated
func (p *Parser) Parse(input string) *ParseRun {
panic("Parse() is deprecated, use Execute()")
// return &ParseRun{
// p: &ParseAPI{
// input: input,
// len: len(input),
// cursorLine: 1,
// cursorColumn: 1,
// nextState: p.startHandler,
// },
// }
}
// Next retrieves the next parsed item for a parse run.
//
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, false will be returned and the error return value will
// be set (default is nil).
func (run *ParseRun) Next() (Item, *Error, bool) {
// State handling loop: we handle states, until an Item is ready to be returned.
for {
// If a state handler has emitted one or more parser Items, then the next
// available Item is returned to the caller.
if len(run.p.items) > 0 {
item, rest := run.p.items[0], run.p.items[1:]
run.p.items = rest
return run.makeReturnValues(item)
}
// Otherwise, the next state handler is looked up and invoked.
run.runNextParseHandler()
}
}
func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
run.p.item = i
return i, nil, true
}
}
// runNextParseHandler moves the parser, that is bascially a state machine,
// to its next status. It does so by invoking a function of the
// type ParseHandler. This function represents the current status and
// is responsible for moving the parser to its next status, depending
// on the parsed input data.
func (run *ParseRun) runNextParseHandler() {
if state, ok := run.getNextParseHandler(); ok {
run.invokeNextParseHandler(state)
}
}
// getNextParseHandler determines the next ParseHandler to invoke in order
// to move the parsing state machine one step further.
//
// When implementing a parser, the ParseHandler functions must provide
// a routing decision in every invocation. A routing decision is one
// of the following:
//
// * A route is specified explicitly, which means that the next ParseHandler
// function to invoke is registered during the ParseHandler function
// invocation. For example: p.RouteTo(nextStatus)
//
// * A route is specified implicitly, which means that a previous ParseHandler
// invocation has registered the followup route for the current state.
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
// In this example, the nextStatus ParseHandler will not have to specify
// a route explicitly, but otherStatus will be used implicitly after
// the nextStatus function has returned.
//
// * An expectation is registered by the ParseHandler.
// For example: p.Expects("a cool thing")
// When the ParseHandler returns without having specified a route, this
// expectation is used to generate an "unexpected input" error message.
//
// When no routing decision is provided by a ParseHandler, then this is
// considered a bug in the state handler, and the parser will panic.
func (run *ParseRun) getNextParseHandler() (ParseHandler, bool) {
switch {
case run.p.nextState != nil:
return run.p.nextState, true
case len(run.p.routeStack) > 0:
return run.p.popRoute(), true
case run.p.expecting != "":
run.p.UnexpectedInput()
return nil, false
default:
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
panic(fmt.Sprintf("internal parser error: ParseHandler %s did not provide a routing decision", name))
}
}
// invokeNextParseHandler moves the parser state to the provided state
// and invokes the ParseHandler function.
func (run *ParseRun) invokeNextParseHandler(state ParseHandler) {
run.p.state = state
run.p.nextState = nil
run.p.expecting = ""
run.p.state(run.p)
}
// Matcher is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Matcher can be instantiated using the parsekit.NewMatcher()
@ -198,12 +75,3 @@ func (m *Matcher) Execute(input string) (string, *Error) {
err := m.parser.Execute(input)
return m.match, err
}
// Parse checks for a match on the provided input data.
func (m *Matcher) Parse(input string) (string, *Error) {
item, err, ok := m.parser.Parse(input).Next()
if !ok {
return "", err
}
return item.Value, nil
}

View File

@ -9,16 +9,14 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
const TestItem parsekit.ItemType = 1
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
type TokenHandlerTest struct {
input string
tokenHandler parsekit.TokenHandler
mustMatch bool
expected string
Input string
TokenHandler parsekit.TokenHandler
MustMatch bool
Expected string
}
func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
@ -28,17 +26,40 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
}
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
output, err := parsekit.NewMatcher(test.tokenHandler, "a match").Execute(test.input)
if test.mustMatch {
output, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.input, err)
} else if output != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.input)
t.Errorf("Test %q failed: should not match, but it did", test.Input)
}
}
}
type PanicTest struct {
function func()
epxected string
}
func RunPanicTest(t *testing.T, p PanicTest) {
defer func() {
if r := recover(); r != nil {
if r != p.epxected {
t.Errorf("Function did panic, but unexpected panic message received:\nexpected: %q\nactual: %q\n", p.epxected, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.epxected)
}
}()
p.function()
}
func RunPanicTests(t *testing.T, testSet []PanicTest) {
for _, test := range testSet {
RunPanicTest(t, test)
}
}

View File

@ -155,7 +155,7 @@ func (t *TokenAPI) checkAllowedCall(name string) {
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
}
if !t.currRune.OK {
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRune() did not return OK (EOF or invalid rune)", name))
}
}

75
tokenhandler_test.go Normal file
View File

@ -0,0 +1,75 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestWithinTokenHandler_AcceptIncludesAndSkipIgnoresRuneInOutput(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
for i := 0; i < 33; i++ {
t.NextRune()
t.Accept()
t.NextRune()
t.Skip()
}
return true
}, "test")
output, _ := parser.Execute("Txhxixsx xsxhxoxuxlxdx xbxexcxoxmxex xqxuxixtxex xrxexaxdxaxbxlxex")
if output != "This should become quite readable" {
t.Fatalf("Got unexpected output from TokenHandler: %s", output)
}
}
func TestGivenNextRuneCalled_WithoutAcceptOrSkip_NextCallToNextRunePanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
t.NextRune()
t.NextRune()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("input string") },
"internal Matcher error: NextRune() was called without accepting or skipping the previously read rune"})
}
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
t.Accept()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("input string") },
"internal Matcher error: Accept() was called without a prior call to NextRune()"})
}
func TestGivenNextRuneNotCalled_CallToSkipPanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
t.Skip()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("input string") },
"internal Matcher error: Skip() was called without a prior call to NextRune()"})
}
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
t.NextRune()
t.Accept()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("\xcd") },
"internal Matcher error: Accept() was called, but prior call to NextRune() did not return OK (EOF or invalid rune)"})
}
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
RunPanicTest(t, PanicTest{
func() {
a := parsekit.TokenAPI{}
a.Merge()
},
"internal parser error: Cannot call Merge a a non-forked MatchDialog",
})
}

View File

@ -93,10 +93,10 @@ func MatchRunes(expected ...rune) TokenHandler {
//
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) TokenHandler {
if end < start {
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
return func(t *TokenAPI) bool {
if end < start {
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
input, ok := t.NextRune()
if ok && input >= start && input <= end {
t.Accept()
@ -202,14 +202,17 @@ func MatchNot(handler TokenHandler) TokenHandler {
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input.
func MatchRep(times int, handler TokenHandler) TokenHandler {
return matchMinMax(times, times, handler)
return matchMinMax(times, times, handler, "MatchRep")
}
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler TokenHandler) TokenHandler {
return matchMinMax(min, -1, handler)
if min < 0 {
panic("internal parser error: MatchMin definition error: min must be >= 0")
}
return matchMinMax(min, -1, handler, "MatchMin")
}
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
@ -217,20 +220,23 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
return matchMinMax(0, max, handler)
if max < 0 {
panic("internal parser error: MatchMax definition error: max must be >= 0")
}
return matchMinMax(0, max, handler, "MatchMax")
}
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
return matchMinMax(0, -1, handler)
return matchMinMax(0, -1, handler, "MatchZeroOfMore")
}
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(handler TokenHandler) TokenHandler {
return matchMinMax(1, -1, handler)
return matchMinMax(1, -1, handler, "MatchOneOrMore")
}
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
@ -238,20 +244,20 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler {
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
if max < 0 {
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
panic("internal parser error: MatchMinMax definition error: max must be >= 0")
}
if min < 0 {
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
panic("internal parser error: MatchMinMax definition error: min must be >= 0")
}
return matchMinMax(min, max, handler)
return matchMinMax(min, max, handler, "MatchMinMax")
}
func matchMinMax(min int, max int, handler TokenHandler) TokenHandler {
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
if max >= 0 && min > max {
panic(fmt.Sprintf("internal parser error: %s definition error: max %d must not be < min %d", name, max, min))
}
return func(t *TokenAPI) bool {
child := t.Fork()
if max >= 0 && min > max {
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
}
total := 0
// Check for the minimum required amount of matches.
for total < min {

View File

@ -78,6 +78,23 @@ func TestCombinators(t *testing.T) {
})
}
func TestCombinatorPanics(t *testing.T) {
RunPanicTests(t, []PanicTest{
{func() { parsekit.C.RuneRange('z', 'a') },
"internal parser error: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
{func() { parsekit.C.MinMax(-1, 1, parsekit.A.Space) },
"internal parser error: MatchMinMax definition error: min must be >= 0"},
{func() { parsekit.C.MinMax(1, -1, parsekit.A.Space) },
"internal parser error: MatchMinMax definition error: max must be >= 0"},
{func() { parsekit.C.MinMax(10, 5, parsekit.A.Space) },
"internal parser error: MatchMinMax definition error: max 5 must not be < min 10"},
{func() { parsekit.C.Min(-10, parsekit.A.Space) },
"internal parser error: MatchMin definition error: min must be >= 0"},
{func() { parsekit.C.Max(-42, parsekit.A.Space) },
"internal parser error: MatchMax definition error: max must be >= 0"},
})
}
func TestAtoms(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{
{"", a.EndOfFile, true, ""},