diff --git a/examples/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go index 3d4ed1b..09daabc 100644 --- a/examples/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -19,8 +19,8 @@ func Example_helloWorldUsingTokenizer() { for i, input := range []string{ "Hello, world!", "HELLO ,Johnny!", - "hello , Bob123!", - "hello Pizza!", + "hello , Bob123!", + "hello Pizza!", "Oh no!", "Hello, world", "Hello,!", @@ -35,8 +35,8 @@ func Example_helloWorldUsingTokenizer() { // Output: // [0] Input: "Hello, world!" Output: world // [1] Input: "HELLO ,Johnny!" Output: Johnny - // [2] Input: "hello , Bob123!" Output: Bob123 - // [3] Input: "hello Pizza!" Output: Pizza + // [2] Input: "hello , Bob123!" Output: Bob123 + // [3] Input: "hello Pizza!" Output: Pizza // [4] Input: "Oh no!" Error: mismatch at start of file // [5] Input: "Hello, world" Error: mismatch at start of file // [6] Input: "Hello,!" Error: mismatch at start of file @@ -54,8 +54,8 @@ func createHelloTokenizer() tokenize.Func { // that does all the work. The 'greeting' Handler matches the whole input and // drops all but the name from it. hello := a.StrNoCase("hello") - comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank)) - separator := c.Any(comma, a.Blank) + comma := c.Seq(c.Opt(a.Blanks), a.Comma, c.Opt(a.Blanks)) + separator := c.Any(comma, a.Blanks) name := c.OneOrMore(c.Not(a.Excl)) greeting := m.Drop(hello). Then(m.Drop(separator)). diff --git a/examples/example_helloSingleStateParser_test.go b/examples/example_helloSingleStateParser_test.go index 722287c..5a8b218 100644 --- a/examples/example_helloSingleStateParser_test.go +++ b/examples/example_helloSingleStateParser_test.go @@ -85,11 +85,11 @@ func (h *helloparser2) start(p *parse.API) { p.Error("the greeting is not being friendly") return } - if !p.Accept(c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))) { + if !p.Accept(c.Seq(c.Opt(a.Blanks), a.Comma, c.Opt(a.Blanks))) { p.Error("the greeting is not properly separated") return } - if p.Accept(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))) { + if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) { h.greetee = p.Result().String() if h.greetee == "" { p.Error("the name cannot be empty") diff --git a/go.sum b/go.sum index 4347755..e69de29 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +0,0 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/parse/api.go b/parse/api.go index 9af8792..ab0a3f8 100644 --- a/parse/api.go +++ b/parse/api.go @@ -63,9 +63,9 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool { func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) { p.panicWhenStoppedOrInError(name) - p.checkForLoops() + p.checkForLoops(name) if tokenHandler == nil { - callerPanic(2, "parsekit.parse.API.%s(): %s() called with nil tokenHandler argument at {caller}", name, name) + callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") } p.result = nil @@ -84,7 +84,7 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokeni // that clean routes are followed. You can consider this check a runtime // unit test. func (p *API) panicWhenStoppedOrInError(name string) { - if !p.isStoppedOrInError() { + if !p.IsStoppedOrInError() { return } @@ -92,12 +92,14 @@ func (p *API) panicWhenStoppedOrInError(name string) { if p.stopped { after = "Stop()" } - - callerPanic(2, "parsekit.parse.API.%s(): Illegal call to %s() at {caller}: "+ - "no calls allowed after API.%s", name, name, after) + callerPanic(name, "parsekit.parse.API.{name}(): Illegal call to {name}() at {caller}: "+ + "no calls allowed after API.%s", after) } -func (p *API) isStoppedOrInError() bool { +// IsStoppedOrInError checks if the parser has stopped or if an error was set. +// When true, then the parser can no longer continue. If your parser tries to +// call parse.API methods when true is returned, this will result in a panic. +func (p *API) IsStoppedOrInError() bool { return p.stopped || p.err != nil } @@ -112,10 +114,10 @@ func (p *API) initLoopCheck() { // checkForLoops checks if the line of code from which Accept() or Peek() // was called has been seen before for the current read cursor position. // If yes, then the parser is in a loop and the method will panic. -func (p *API) checkForLoops() { +func (p *API) checkForLoops(name string) { filepos := callerFilepos(3) if _, ok := p.loopCheck[filepos]; ok { - callerPanic(3, "parsekit.parse.API: Loop detected in parser at {caller}") + callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}") } p.loopCheck[filepos] = true } @@ -128,33 +130,42 @@ func (p *API) checkForLoops() { func (p *API) Result() *tokenize.Result { result := p.result if p.result == nil { - callerPanic(1, "parsekit.parse.API.Result(): Result() called "+ + callerPanic("Result", "parsekit.parse.API.{name}(): {name}() called "+ "at {caller} without calling API.Peek() or API.Accept() on beforehand") } return result } -// Handle executes another parse.Handler function from within the active +// Handle executes other parse.Handler functions from within the active // parse.Handler function. // // The boolean return value is true when the parser can still continue. // It will be false when either an error was set using Error(), or the // parser was stopped using Stop(). // +// When multiple parse.Handler functions are provided as arguments, they +// will be executed in the provided order. When one of those handlers stops +// the parser or sets an error, then the following handlers will not be called. +// // Instead of calling another handler using this method, you can also call // that other handler directly. However, it is generally advised to make use // of this method, because it performs some sanity checks and it will return // an easy to use boolean indicating whether the parser can continue or not. -func (p *API) Handle(parseHandler Handler) bool { +func (p *API) Handle(parseHandler ...Handler) bool { p.panicWhenStoppedOrInError("Handle") - p.panicWhenHandlerNil(parseHandler) - parseHandler(p) - return !p.isStoppedOrInError() + for _, handler := range parseHandler { + p.panicWhenHandlerNil("Handle", handler) + handler(p) + if p.IsStoppedOrInError() { + return false + } + } + return true } -func (p *API) panicWhenHandlerNil(parseHandler Handler) { +func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) { if parseHandler == nil { - callerPanic(2, "parsekit.parse.API.Handle(): Handle() called with nil input at {caller}") + callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil input at {caller}") } } diff --git a/parse/callerinfo.go b/parse/callerinfo.go index a9a9d0f..bf06282 100644 --- a/parse/callerinfo.go +++ b/parse/callerinfo.go @@ -6,15 +6,34 @@ import ( "strings" ) +func callerBefore(name string) string { + found := false + for i := 1; ; i++ { + pc, file, line, ok := runtime.Caller(i) + if found { + return fmt.Sprintf("%s:%d", file, line) + } + if !ok { + return "unknown caller" + } + f := runtime.FuncForPC(pc) + + if strings.HasSuffix(f.Name(), "."+name) { + found = true + } + } +} + func callerFilepos(depth int) string { // No error handling, because we call this method ourselves with safe depth values. _, file, line, _ := runtime.Caller(depth + 1) return fmt.Sprintf("%s:%d", file, line) } -func callerPanic(depth int, f string, args ...interface{}) { - filepos := callerFilepos(depth + 1) +func callerPanic(name, f string, args ...interface{}) { + filepos := callerBefore(name) m := fmt.Sprintf(f, args...) - m = strings.Replace(m, "{caller}", filepos, 1) + m = strings.Replace(m, "{caller}", filepos, -1) + m = strings.Replace(m, "{name}", name, -1) panic(m) } diff --git a/parse/parse.go b/parse/parse.go index d5d05f8..3e157e6 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -27,7 +27,7 @@ type Func func(interface{}) error // look at the documentation for parsekit.read.New(). func New(startHandler Handler) Func { if startHandler == nil { - callerPanic(1, "parsekit.parse.New(): New() called with nil input at {caller}") + callerPanic("New", "parsekit.parse.{name}(): {name}() called with nil input at {caller}") } return func(input interface{}) error { api := &API{ diff --git a/parse/parse_test.go b/parse/parse_test.go index 40b8a95..b28c23d 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -307,7 +307,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { parse.AssertPanic(t, parse.PanicT{ Function: func() { parser("Het houdt niet op, niet vanzelf") }, Regexp: true, - Expect: `parsekit\.parse\.API: Loop detected in parser at /.*/parse_test.go:\d+`}) + Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at /.*/parse_test.go:\d+`}) } // This test incorporates an actual loop bug that I dropped on myself and @@ -333,5 +333,5 @@ func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { parse.AssertPanic(t, parse.PanicT{ Function: func() { parser("This will end soon") }, Regexp: true, - Expect: `parsekit\.parse\.API: Loop detected in parser at .*/parse_test.go:\d+`}) + Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at .*/parse_test.go:\d+`}) } diff --git a/read/read_test.go b/read/read_test.go index 06468ca..de143f2 100644 --- a/read/read_test.go +++ b/read/read_test.go @@ -12,14 +12,29 @@ import ( ) func ExampleNew() { - r := read.New(strings.NewReader("Hello, world!")) - at := func(i int) rune { r, _ := r.RuneAt(i); return r } + printFirstRuneOf := func(input interface{}) { + r := read.New(input) + c, _ := r.RuneAt(0) + fmt.Printf("%q\n", c) + } - fmt.Printf("%c", at(0)) - fmt.Printf("%c", at(12)) + simpleString := "Hello, world!" + printFirstRuneOf(simpleString) + + ioReaderImplementation := strings.NewReader("Good bye, world!") + printFirstRuneOf(ioReaderImplementation) + + bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?")) + printFirstRuneOf(bufioReaderPointer) + + bufioReaderValue := *(bufio.NewReader(strings.NewReader("Where do we go, world?"))) + printFirstRuneOf(bufioReaderValue) // Output: - // H! + // 'H' + // 'G' + // 'W' + // 'W' } func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { diff --git a/tokenize/api.go b/tokenize/api.go index 8b6ce9c..3ff37a3 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -98,7 +98,7 @@ func NewAPI(input interface{}) *API { // built-in unit test, enforcing correct serialization of API method calls. func (i *API) NextRune() (rune, error) { if i.result.lastRune != nil { - callerPanic(1, "tokenize.API.NextRune(): NextRune() called at {caller} "+ + callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+ "without a prior call to Accept()") } i.detachChild() @@ -115,9 +115,9 @@ func (i *API) NextRune() (rune, error) { // returned an error. Calling Accept() in such case will result in a panic. func (i *API) Accept() { if i.result.lastRune == nil { - callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller} without first calling NextRune()") + callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()") } else if i.result.lastRune.err != nil { - callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") + callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed") } i.result.runes = append(i.result.runes, i.result.lastRune.r) i.result.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r)) @@ -168,7 +168,7 @@ func (i *API) Fork() *API { // This allows a child to feed results in chunks to its parent. func (i *API) Merge() { if i.parent == nil { - callerPanic(1, "tokenize.API.Merge(): Merge() called at {caller} on a non-forked API") + callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on a non-forked API") } i.addResultsToParent() i.syncCursorTo(i.parent) diff --git a/tokenize/callerinfo.go b/tokenize/callerinfo.go index 604dfb2..adf650e 100644 --- a/tokenize/callerinfo.go +++ b/tokenize/callerinfo.go @@ -6,15 +6,28 @@ import ( "strings" ) -func callerPanic(depth int, f string, args ...interface{}) { - filepos := callerFilepos(depth + 1) +func callerPanic(name, f string, args ...interface{}) { + filepos := callerBefore(name) m := fmt.Sprintf(f, args...) - m = strings.Replace(m, "{caller}", filepos, 1) + m = strings.Replace(m, "{caller}", filepos, -1) + m = strings.Replace(m, "{name}", name, -1) panic(m) } -func callerFilepos(depth int) string { - // No error handling, because we call this method ourselves with safe depth values. - _, file, line, _ := runtime.Caller(depth + 1) - return fmt.Sprintf("%s:%d", file, line) +func callerBefore(name string) string { + found := false + for i := 1; ; i++ { + pc, file, line, ok := runtime.Caller(i) + if found { + return fmt.Sprintf("%s:%d", file, line) + } + if !ok { + return "unknown caller" + } + f := runtime.FuncForPC(pc) + + if strings.HasSuffix(f.Name(), "."+name) { + found = true + } + } } diff --git a/tokenize/handler.go b/tokenize/handler.go index 3a12708..f3e2b77 100644 --- a/tokenize/handler.go +++ b/tokenize/handler.go @@ -45,3 +45,9 @@ func (handler Handler) SeparatedBy(separatorHandler Handler) Handler { func (handler Handler) Optional() Handler { return MatchOpt(handler) } + +// Except is syntactic sugar that allows you to write a construction like +// MatchExcept(handler) as handler.Optional(). +func (handler Handler) Except(exceptHandler Handler) Handler { + return MatchExcept(handler, exceptHandler) +} diff --git a/tokenize/handler_test.go b/tokenize/handler_test.go index ad22c3b..700ad64 100644 --- a/tokenize/handler_test.go +++ b/tokenize/handler_test.go @@ -74,7 +74,7 @@ func ExampleHandler_Optional() { spanish := c.Seq( a.Rune('¿').Optional(), - c.OneOrMore(c.Except(a.Question, a.AnyRune)), + c.OneOrMore(a.AnyRune.Except(a.Question)), a.Rune('?').Optional()) fmt.Println(spanish.Match("¿Habla español María?")) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index d01c136..cbf7ffc 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -70,6 +70,7 @@ var A = struct { EndOfFile Handler AnyRune Handler ValidRune Handler + InvalidRune Handler Space Handler Tab Handler CR Handler @@ -152,6 +153,7 @@ var A = struct { EndOfFile: MatchEndOfFile(), AnyRune: MatchAnyRune(), ValidRune: MatchValidRune(), + InvalidRune: MatchInvalidRune(), Space: MatchRune(' '), Tab: MatchRune('\t'), CR: MatchRune('\r'), @@ -332,7 +334,7 @@ func MatchRunes(expected ...rune) Handler { // creates a Handler that will match any of 'g', 'h', 'i', 'j' or 'k'. func MatchRuneRange(start rune, end rune) Handler { if end < start { - callerPanic(1, "Handler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end) + callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) } return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end }) } @@ -485,7 +487,7 @@ func MatchRep(times int, handler Handler) Handler { // When more matches are possible, these will be included in the output. func MatchMin(min int, handler Handler) Handler { if min < 0 { - callerPanic(1, "Handler: MatchMin definition error at {caller}: min must be >= 0") + callerPanic("MatchMin", "Handler: {name} definition error at {caller}: min must be >= 0") } return matchMinMax(min, -1, handler, "MatchMin") } @@ -496,7 +498,7 @@ func MatchMin(min int, handler Handler) Handler { // Zero matches are considered a successful match. func MatchMax(max int, handler Handler) Handler { if max < 0 { - callerPanic(1, "Handler: MatchMax definition error at {caller}: max must be >= 0") + callerPanic("MatchMax", "Handler: {name} definition error at {caller}: max must be >= 0") } return matchMinMax(0, max, handler, "MatchMax") } @@ -519,17 +521,17 @@ func MatchOneOrMore(handler Handler) Handler { // inclusive. All matches will be included in the output. func MatchMinMax(min int, max int, handler Handler) Handler { if max < 0 { - callerPanic(1, "Handler: MatchMinMax definition error at {caller}: max must be >= 0") + callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: max must be >= 0") } if min < 0 { - callerPanic(1, "Handler: MatchMinMax definition error at {caller}: min must be >= 0") + callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: min must be >= 0") } return matchMinMax(min, max, handler, "MatchMinMax") } func matchMinMax(min int, max int, handler Handler, name string) Handler { if max >= 0 && min > max { - callerPanic(2, "Handler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) + callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min) } return func(t *API) bool { total := 0 @@ -569,7 +571,7 @@ func MatchSeparated(separator Handler, separated Handler) Handler { // applied to the upcoming input. It also checks if the except Handler can be // applied. If the handler applies, but the except Handler as well, then the match // as a whole will be treated as a mismatch. -func MatchExcept(except Handler, handler Handler) Handler { +func MatchExcept(handler Handler, except Handler) Handler { return func(t *API) bool { if except(t.Fork()) { return false @@ -594,7 +596,7 @@ func MatchSigned(handler Handler) Handler { // ranging from -9223372036854775808 to 9223372036854775807. func MatchIntegerBetween(min int64, max int64) Handler { if max < min { - callerPanic(1, "Handler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min) + callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min) } digits := MatchSigned(MatchDigits()) return func(t *API) bool { @@ -647,6 +649,19 @@ func MatchValidRune() Handler { } } +// MatchInvalidRune creates a Handler function that checks if an invalid +// UTF8 rune can be read from the input. +func MatchInvalidRune() Handler { + return func(t *API) bool { + r, err := t.NextRune() + if err == nil && r == utf8.RuneError { + t.Accept() + return true + } + return false + } +} + // MatchDigit creates a Handler that checks if a single digit can be read // from the input. func MatchDigit() Handler { @@ -935,19 +950,25 @@ func MatchIPv6Net(normalize bool) Handler { // even though we would have dropped the output anyway. So if you would like // to drop optional blanks (spaces and tabs), then use something like: // -// M.Drop(C.Opt(A.Blank)) +// M.Drop(C.Opt(A.Blanks)) // // instead of: // -// M.Drop(A.Blank) +// M.Drop(A.Blanks) // // Since A.Blanks is defined as "1 or more spaces and/or tabs", the input // string "bork" would not match against the second form, but " bork" would. // In both cases, it would match the first form. func ModifyDrop(handler Handler) Handler { - return ModifyByCallback(handler, func(s string) string { - return "" - }) + return func(t *API) bool { + child := t.Fork() + if handler(child) { + child.Reset() + child.Merge() + return true + } + return false + } } // ModifyTrim creates a Handler that checks if the provided Handler applies. diff --git a/tokenize/result.go b/tokenize/result.go index 26f9476..a91ee0c 100644 --- a/tokenize/result.go +++ b/tokenize/result.go @@ -70,15 +70,15 @@ func (r *Result) ClearRunes() { // SetRunes replaces the Runes from the Result with the provided input. func (r *Result) SetRunes(s ...interface{}) { r.ClearRunes() - r.addRunes(s...) + r.addRunes("SetRunes", s...) } // AddRunes is used to add runes to the Result. func (r *Result) AddRunes(set ...interface{}) { - r.addRunes(set...) + r.addRunes("AddRunes", set...) } -func (r *Result) addRunes(set ...interface{}) { +func (r *Result) addRunes(name string, set ...interface{}) { for _, s := range set { switch s := s.(type) { case string: @@ -88,7 +88,7 @@ func (r *Result) addRunes(set ...interface{}) { case rune: r.runes = append(r.runes, s) default: - callerPanic(2, "tokenize.Result.AddRunes(): unsupported type '%T' used at {caller}", s) + callerPanic(name, "tokenize.Result.{name}(): unsupported type '%T' used at {caller}", s) } } } diff --git a/tokenize/result_test.go b/tokenize/result_test.go index c810d6e..77a371d 100644 --- a/tokenize/result_test.go +++ b/tokenize/result_test.go @@ -57,6 +57,6 @@ func TestSetResult_PanicsOnUnhandledInput(t *testing.T) { i.Result().SetRunes(1234567) }, Regexp: true, - Expect: `tokenize\.Result\.AddRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`, + Expect: `tokenize\.Result\.SetRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`, }) } diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go index 7641b32..50b5104 100644 --- a/tokenize/tokenize.go +++ b/tokenize/tokenize.go @@ -1,5 +1,5 @@ -// Package tokenize provides tooling to build a tokenizer in a combinator/parser-style -// that is used to feed data to the parser. +// Package tokenize provides tooling to build a tokenizer in +// parser/combinator-style, used to feed data to the parser. package tokenize import (