Simplified some internal code, which also fixes a bug with correct error reporting from within parsekit in various edge cases.

This commit is contained in:
Maurice Makaay 2019-06-17 13:59:31 +00:00
parent cdfc4ce52c
commit 99654c2f9e
16 changed files with 153 additions and 75 deletions

View File

@ -19,8 +19,8 @@ func Example_helloWorldUsingTokenizer() {
for i, input := range []string{
"Hello, world!",
"HELLO ,Johnny!",
"hello , Bob123!",
"hello Pizza!",
"hello , Bob123!",
"hello Pizza!",
"Oh no!",
"Hello, world",
"Hello,!",
@ -35,8 +35,8 @@ func Example_helloWorldUsingTokenizer() {
// Output:
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: mismatch at start of file
// [5] Input: "Hello, world" Error: mismatch at start of file
// [6] Input: "Hello,!" Error: mismatch at start of file
@ -54,8 +54,8 @@ func createHelloTokenizer() tokenize.Func {
// that does all the work. The 'greeting' Handler matches the whole input and
// drops all but the name from it.
hello := a.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
separator := c.Any(comma, a.Blank)
comma := c.Seq(c.Opt(a.Blanks), a.Comma, c.Opt(a.Blanks))
separator := c.Any(comma, a.Blanks)
name := c.OneOrMore(c.Not(a.Excl))
greeting := m.Drop(hello).
Then(m.Drop(separator)).

View File

@ -85,11 +85,11 @@ func (h *helloparser2) start(p *parse.API) {
p.Error("the greeting is not being friendly")
return
}
if !p.Accept(c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))) {
if !p.Accept(c.Seq(c.Opt(a.Blanks), a.Comma, c.Opt(a.Blanks))) {
p.Error("the greeting is not properly separated")
return
}
if p.Accept(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))) {
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
h.greetee = p.Result().String()
if h.greetee == "" {
p.Error("the name cannot be empty")

7
go.sum
View File

@ -1,7 +0,0 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

View File

@ -63,9 +63,9 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) {
p.panicWhenStoppedOrInError(name)
p.checkForLoops()
p.checkForLoops(name)
if tokenHandler == nil {
callerPanic(2, "parsekit.parse.API.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
}
p.result = nil
@ -84,7 +84,7 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokeni
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *API) panicWhenStoppedOrInError(name string) {
if !p.isStoppedOrInError() {
if !p.IsStoppedOrInError() {
return
}
@ -92,12 +92,14 @@ func (p *API) panicWhenStoppedOrInError(name string) {
if p.stopped {
after = "Stop()"
}
callerPanic(2, "parsekit.parse.API.%s(): Illegal call to %s() at {caller}: "+
"no calls allowed after API.%s", name, name, after)
callerPanic(name, "parsekit.parse.API.{name}(): Illegal call to {name}() at {caller}: "+
"no calls allowed after API.%s", after)
}
func (p *API) isStoppedOrInError() bool {
// IsStoppedOrInError checks if the parser has stopped or if an error was set.
// When true, then the parser can no longer continue. If your parser tries to
// call parse.API methods when true is returned, this will result in a panic.
func (p *API) IsStoppedOrInError() bool {
return p.stopped || p.err != nil
}
@ -112,10 +114,10 @@ func (p *API) initLoopCheck() {
// checkForLoops checks if the line of code from which Accept() or Peek()
// was called has been seen before for the current read cursor position.
// If yes, then the parser is in a loop and the method will panic.
func (p *API) checkForLoops() {
func (p *API) checkForLoops(name string) {
filepos := callerFilepos(3)
if _, ok := p.loopCheck[filepos]; ok {
callerPanic(3, "parsekit.parse.API: Loop detected in parser at {caller}")
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
}
p.loopCheck[filepos] = true
}
@ -128,33 +130,42 @@ func (p *API) checkForLoops() {
func (p *API) Result() *tokenize.Result {
result := p.result
if p.result == nil {
callerPanic(1, "parsekit.parse.API.Result(): Result() called "+
callerPanic("Result", "parsekit.parse.API.{name}(): {name}() called "+
"at {caller} without calling API.Peek() or API.Accept() on beforehand")
}
return result
}
// Handle executes another parse.Handler function from within the active
// Handle executes other parse.Handler functions from within the active
// parse.Handler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set using Error(), or the
// parser was stopped using Stop().
//
// When multiple parse.Handler functions are provided as arguments, they
// will be executed in the provided order. When one of those handlers stops
// the parser or sets an error, then the following handlers will not be called.
//
// Instead of calling another handler using this method, you can also call
// that other handler directly. However, it is generally advised to make use
// of this method, because it performs some sanity checks and it will return
// an easy to use boolean indicating whether the parser can continue or not.
func (p *API) Handle(parseHandler Handler) bool {
func (p *API) Handle(parseHandler ...Handler) bool {
p.panicWhenStoppedOrInError("Handle")
p.panicWhenHandlerNil(parseHandler)
parseHandler(p)
return !p.isStoppedOrInError()
for _, handler := range parseHandler {
p.panicWhenHandlerNil("Handle", handler)
handler(p)
if p.IsStoppedOrInError() {
return false
}
}
return true
}
func (p *API) panicWhenHandlerNil(parseHandler Handler) {
func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) {
if parseHandler == nil {
callerPanic(2, "parsekit.parse.API.Handle(): Handle() called with nil input at {caller}")
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil input at {caller}")
}
}

View File

@ -6,15 +6,34 @@ import (
"strings"
)
func callerBefore(name string) string {
found := false
for i := 1; ; i++ {
pc, file, line, ok := runtime.Caller(i)
if found {
return fmt.Sprintf("%s:%d", file, line)
}
if !ok {
return "unknown caller"
}
f := runtime.FuncForPC(pc)
if strings.HasSuffix(f.Name(), "."+name) {
found = true
}
}
}
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
}
func callerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
func callerPanic(name, f string, args ...interface{}) {
filepos := callerBefore(name)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)
m = strings.Replace(m, "{caller}", filepos, -1)
m = strings.Replace(m, "{name}", name, -1)
panic(m)
}

View File

@ -27,7 +27,7 @@ type Func func(interface{}) error
// look at the documentation for parsekit.read.New().
func New(startHandler Handler) Func {
if startHandler == nil {
callerPanic(1, "parsekit.parse.New(): New() called with nil input at {caller}")
callerPanic("New", "parsekit.parse.{name}(): {name}() called with nil input at {caller}")
}
return func(input interface{}) error {
api := &API{

View File

@ -307,7 +307,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("Het houdt niet op, niet vanzelf") },
Regexp: true,
Expect: `parsekit\.parse\.API: Loop detected in parser at /.*/parse_test.go:\d+`})
Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at /.*/parse_test.go:\d+`})
}
// This test incorporates an actual loop bug that I dropped on myself and
@ -333,5 +333,5 @@ func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("This will end soon") },
Regexp: true,
Expect: `parsekit\.parse\.API: Loop detected in parser at .*/parse_test.go:\d+`})
Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at .*/parse_test.go:\d+`})
}

View File

@ -12,14 +12,29 @@ import (
)
func ExampleNew() {
r := read.New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
printFirstRuneOf := func(input interface{}) {
r := read.New(input)
c, _ := r.RuneAt(0)
fmt.Printf("%q\n", c)
}
fmt.Printf("%c", at(0))
fmt.Printf("%c", at(12))
simpleString := "Hello, world!"
printFirstRuneOf(simpleString)
ioReaderImplementation := strings.NewReader("Good bye, world!")
printFirstRuneOf(ioReaderImplementation)
bufioReaderPointer := bufio.NewReader(strings.NewReader("Where do we go, world?"))
printFirstRuneOf(bufioReaderPointer)
bufioReaderValue := *(bufio.NewReader(strings.NewReader("Where do we go, world?")))
printFirstRuneOf(bufioReaderValue)
// Output:
// H!
// 'H'
// 'G'
// 'W'
// 'W'
}
func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {

View File

@ -98,7 +98,7 @@ func NewAPI(input interface{}) *API {
// built-in unit test, enforcing correct serialization of API method calls.
func (i *API) NextRune() (rune, error) {
if i.result.lastRune != nil {
callerPanic(1, "tokenize.API.NextRune(): NextRune() called at {caller} "+
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"without a prior call to Accept()")
}
i.detachChild()
@ -115,9 +115,9 @@ func (i *API) NextRune() (rune, error) {
// returned an error. Calling Accept() in such case will result in a panic.
func (i *API) Accept() {
if i.result.lastRune == nil {
callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller} without first calling NextRune()")
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()")
} else if i.result.lastRune.err != nil {
callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.result.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
@ -168,7 +168,7 @@ func (i *API) Fork() *API {
// This allows a child to feed results in chunks to its parent.
func (i *API) Merge() {
if i.parent == nil {
callerPanic(1, "tokenize.API.Merge(): Merge() called at {caller} on a non-forked API")
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on a non-forked API")
}
i.addResultsToParent()
i.syncCursorTo(i.parent)

View File

@ -6,15 +6,28 @@ import (
"strings"
)
func callerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
func callerPanic(name, f string, args ...interface{}) {
filepos := callerBefore(name)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)
m = strings.Replace(m, "{caller}", filepos, -1)
m = strings.Replace(m, "{name}", name, -1)
panic(m)
}
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
func callerBefore(name string) string {
found := false
for i := 1; ; i++ {
pc, file, line, ok := runtime.Caller(i)
if found {
return fmt.Sprintf("%s:%d", file, line)
}
if !ok {
return "unknown caller"
}
f := runtime.FuncForPC(pc)
if strings.HasSuffix(f.Name(), "."+name) {
found = true
}
}
}

View File

@ -45,3 +45,9 @@ func (handler Handler) SeparatedBy(separatorHandler Handler) Handler {
func (handler Handler) Optional() Handler {
return MatchOpt(handler)
}
// Except is syntactic sugar that allows you to write a construction like
// MatchExcept(handler) as handler.Optional().
func (handler Handler) Except(exceptHandler Handler) Handler {
return MatchExcept(handler, exceptHandler)
}

View File

@ -74,7 +74,7 @@ func ExampleHandler_Optional() {
spanish := c.Seq(
a.Rune('¿').Optional(),
c.OneOrMore(c.Except(a.Question, a.AnyRune)),
c.OneOrMore(a.AnyRune.Except(a.Question)),
a.Rune('?').Optional())
fmt.Println(spanish.Match("¿Habla español María?"))

View File

@ -70,6 +70,7 @@ var A = struct {
EndOfFile Handler
AnyRune Handler
ValidRune Handler
InvalidRune Handler
Space Handler
Tab Handler
CR Handler
@ -152,6 +153,7 @@ var A = struct {
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
InvalidRune: MatchInvalidRune(),
Space: MatchRune(' '),
Tab: MatchRune('\t'),
CR: MatchRune('\r'),
@ -332,7 +334,7 @@ func MatchRunes(expected ...rune) Handler {
// creates a Handler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) Handler {
if end < start {
callerPanic(1, "Handler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
}
return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end })
}
@ -485,7 +487,7 @@ func MatchRep(times int, handler Handler) Handler {
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler Handler) Handler {
if min < 0 {
callerPanic(1, "Handler: MatchMin definition error at {caller}: min must be >= 0")
callerPanic("MatchMin", "Handler: {name} definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, -1, handler, "MatchMin")
}
@ -496,7 +498,7 @@ func MatchMin(min int, handler Handler) Handler {
// Zero matches are considered a successful match.
func MatchMax(max int, handler Handler) Handler {
if max < 0 {
callerPanic(1, "Handler: MatchMax definition error at {caller}: max must be >= 0")
callerPanic("MatchMax", "Handler: {name} definition error at {caller}: max must be >= 0")
}
return matchMinMax(0, max, handler, "MatchMax")
}
@ -519,17 +521,17 @@ func MatchOneOrMore(handler Handler) Handler {
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler Handler) Handler {
if max < 0 {
callerPanic(1, "Handler: MatchMinMax definition error at {caller}: max must be >= 0")
callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: max must be >= 0")
}
if min < 0 {
callerPanic(1, "Handler: MatchMinMax definition error at {caller}: min must be >= 0")
callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, max, handler, "MatchMinMax")
}
func matchMinMax(min int, max int, handler Handler, name string) Handler {
if max >= 0 && min > max {
callerPanic(2, "Handler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
}
return func(t *API) bool {
total := 0
@ -569,7 +571,7 @@ func MatchSeparated(separator Handler, separated Handler) Handler {
// applied to the upcoming input. It also checks if the except Handler can be
// applied. If the handler applies, but the except Handler as well, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except Handler, handler Handler) Handler {
func MatchExcept(handler Handler, except Handler) Handler {
return func(t *API) bool {
if except(t.Fork()) {
return false
@ -594,7 +596,7 @@ func MatchSigned(handler Handler) Handler {
// ranging from -9223372036854775808 to 9223372036854775807.
func MatchIntegerBetween(min int64, max int64) Handler {
if max < min {
callerPanic(1, "Handler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min)
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
}
digits := MatchSigned(MatchDigits())
return func(t *API) bool {
@ -647,6 +649,19 @@ func MatchValidRune() Handler {
}
}
// MatchInvalidRune creates a Handler function that checks if an invalid
// UTF8 rune can be read from the input.
func MatchInvalidRune() Handler {
return func(t *API) bool {
r, err := t.NextRune()
if err == nil && r == utf8.RuneError {
t.Accept()
return true
}
return false
}
}
// MatchDigit creates a Handler that checks if a single digit can be read
// from the input.
func MatchDigit() Handler {
@ -935,19 +950,25 @@ func MatchIPv6Net(normalize bool) Handler {
// even though we would have dropped the output anyway. So if you would like
// to drop optional blanks (spaces and tabs), then use something like:
//
// M.Drop(C.Opt(A.Blank))
// M.Drop(C.Opt(A.Blanks))
//
// instead of:
//
// M.Drop(A.Blank)
// M.Drop(A.Blanks)
//
// Since A.Blanks is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler Handler) Handler {
return ModifyByCallback(handler, func(s string) string {
return ""
})
return func(t *API) bool {
child := t.Fork()
if handler(child) {
child.Reset()
child.Merge()
return true
}
return false
}
}
// ModifyTrim creates a Handler that checks if the provided Handler applies.

View File

@ -70,15 +70,15 @@ func (r *Result) ClearRunes() {
// SetRunes replaces the Runes from the Result with the provided input.
func (r *Result) SetRunes(s ...interface{}) {
r.ClearRunes()
r.addRunes(s...)
r.addRunes("SetRunes", s...)
}
// AddRunes is used to add runes to the Result.
func (r *Result) AddRunes(set ...interface{}) {
r.addRunes(set...)
r.addRunes("AddRunes", set...)
}
func (r *Result) addRunes(set ...interface{}) {
func (r *Result) addRunes(name string, set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
@ -88,7 +88,7 @@ func (r *Result) addRunes(set ...interface{}) {
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "tokenize.Result.AddRunes(): unsupported type '%T' used at {caller}", s)
callerPanic(name, "tokenize.Result.{name}(): unsupported type '%T' used at {caller}", s)
}
}
}

View File

@ -57,6 +57,6 @@ func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
i.Result().SetRunes(1234567)
},
Regexp: true,
Expect: `tokenize\.Result\.AddRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`,
Expect: `tokenize\.Result\.SetRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`,
})
}

View File

@ -1,5 +1,5 @@
// Package tokenize provides tooling to build a tokenizer in a combinator/parser-style
// that is used to feed data to the parser.
// Package tokenize provides tooling to build a tokenizer in
// parser/combinator-style, used to feed data to the parser.
package tokenize
import (