From 6d92e1dc6866f3b7d32f2f4874f9bf88d16139d9 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Fri, 7 Jun 2019 07:56:24 +0000 Subject: [PATCH] Merged functionality of p.Expects(string) and p.UnexpectedInput(). It is now simply p.UnexpectedInput(string). This makes the naming of unexpected input not as magical, but explicit (which is a GoodThing). With one of the earlier incarnations of parsekit it did make sense, but it went in a way in which explicit is more idiomatic for the package. --- assertions_test.go | 4 +- examples/example_basiccalculator1_test.go | 6 +-- examples/example_basiccalculator2_test.go | 5 +-- examples/example_dutchpostcode_test.go | 2 +- examples/example_helloManyStateParser_test.go | 24 ++++++----- .../example_helloParserCombinator_test.go | 2 +- parseapi.go | 40 +++++-------------- parser.go | 9 ++--- parser_test.go | 3 +- tokenapi.go | 6 +-- tokenapi_example_test.go | 14 +++---- tokenhandler_test.go | 14 +++---- tokenhandlerresult.go | 2 + tokenhandlers_builtin_test.go | 5 ++- tokenizer.go | 8 +--- tokenizer_test.go | 6 +-- 16 files changed, 65 insertions(+), 85 deletions(-) diff --git a/assertions_test.go b/assertions_test.go index d05ab0e..76a9bd1 100644 --- a/assertions_test.go +++ b/assertions_test.go @@ -75,7 +75,7 @@ func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) { } func AssertTokenHandler(t *testing.T, test TokenHandlerT) { - result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input) + result, err := NewTokenizer(test.TokenHandler).Execute(test.Input) if test.MustMatch { if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) @@ -102,7 +102,7 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { } func AssertTokenMaker(t *testing.T, test TokenMakerT) { - result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input) + result, err := NewTokenizer(test.TokenHandler).Execute(test.Input) if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) } else { diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index ab69152..a2eb35d 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -75,10 +75,11 @@ var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitesp var int64Token = parsekit.T.Int64(nil, bareInteger) func (c *simpleCalculator) number(p *parsekit.ParseAPI) { - p.Expects("integer number") if p.On(int64Token).Accept() { c.Result += c.op * p.Result().Value(0).(int64) p.Handle(c.operatorOrEndOfFile) + } else { + p.UnexpectedInput("integer number") } } @@ -92,8 +93,7 @@ func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) { c.op = -1 p.Handle(c.number) case !p.On(A.EndOfFile).Stay(): - p.Expects("operator, '+' or '-'") - p.UnexpectedInput() + p.UnexpectedInput("operator, '+' or '-'") default: p.ExpectEndOfFile() } diff --git a/examples/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go index c85f195..dfd7ff5 100644 --- a/examples/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -140,12 +140,11 @@ func (c *calculator) factor(p *parsekit.ParseAPI) { return } if !p.On(A.RightParen).Skip() { - p.Expects("')'") - p.UnexpectedInput() + p.UnexpectedInput("')'") return } default: - p.UnexpectedInput() + p.UnexpectedInput("factor or (expression)") return } p.On(A.Whitespace).Skip() diff --git a/examples/example_dutchpostcode_test.go b/examples/example_dutchpostcode_test.go index b464be3..dbd6fa4 100644 --- a/examples/example_dutchpostcode_test.go +++ b/examples/example_dutchpostcode_test.go @@ -70,5 +70,5 @@ func createPostcodeTokenizer() *parsekit.Tokenizer { // Create a Tokenizer that wraps the 'postcode' TokenHandler and allows // us to match some input against that handler. - return parsekit.NewTokenizer(postcode, "a Dutch postcode") + return parsekit.NewTokenizer(postcode) } diff --git a/examples/example_helloManyStateParser_test.go b/examples/example_helloManyStateParser_test.go index bab4982..2c0be6c 100644 --- a/examples/example_helloManyStateParser_test.go +++ b/examples/example_helloManyStateParser_test.go @@ -80,51 +80,56 @@ func (h *helloparser1) Parse(input string) (string, *parsekit.Error) { func (h *helloparser1) start(p *parsekit.ParseAPI) { a := parsekit.A - p.Expects("hello") if p.On(a.StrNoCase("hello")).Skip() { p.Handle(h.comma) + } else { + p.UnexpectedInput("hello") } } func (h *helloparser1) comma(p *parsekit.ParseAPI) { a := parsekit.A - p.Expects("comma") switch { case p.On(a.Whitespace).Skip(): p.Handle(h.comma) case p.On(a.Comma).Skip(): p.Handle(h.startName) + default: + p.UnexpectedInput("comma") } } func (h *helloparser1) startName(p *parsekit.ParseAPI) { c, a := parsekit.C, parsekit.A - p.Expects("name") switch { case p.On(a.Whitespace).Skip(): p.Handle(h.startName) case p.On(c.Not(a.Excl)).Stay(): p.Handle(h.name) + default: + p.UnexpectedInput("name") } } func (h *helloparser1) name(p *parsekit.ParseAPI) { - c, a := parsekit.C, parsekit.A - p.Expects("name") + a := parsekit.A switch { - case p.On(c.Not(a.Excl)).Accept(): + case p.On(a.Excl).Skip(): + p.Handle(h.exclamation) + case p.On(a.AnyRune).Accept(): h.greetee += p.Result().String() p.Handle(h.name) default: - p.Handle(h.exclamation) + p.UnexpectedInput("name") } } func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { a := parsekit.A - p.Expects("exclamation") if p.On(a.Excl).Accept() { p.Handle(h.end) + } else { + p.UnexpectedInput("exclamation") } } @@ -134,8 +139,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { func (h *helloparser1) end(p *parsekit.ParseAPI) { var a = parsekit.A if !p.On(a.EndOfFile).Stay() { - p.Expects("end of greeting") - p.UnexpectedInput() + p.UnexpectedInput("end of greeting") return } diff --git a/examples/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go index 5b06df3..76faa16 100644 --- a/examples/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -61,5 +61,5 @@ func createHelloTokenizer() *parsekit.Tokenizer { // Create a Tokenizer that wraps the 'greeting' TokenHandler and allows // us to match some input against that handler. - return parsekit.NewTokenizer(greeting, "a friendly greeting") + return parsekit.NewTokenizer(greeting) } diff --git a/parseapi.go b/parseapi.go index 67dd50c..ffe7169 100644 --- a/parseapi.go +++ b/parseapi.go @@ -10,7 +10,6 @@ import ( type ParseAPI struct { tokenAPI *TokenAPI // the input reader loopCheck map[string]bool // used for parser loop detection - expecting string // a description of what the current state expects to find (see Expects()) result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept() err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored @@ -145,8 +144,9 @@ func (a *ParseAPIOnAction) Accept() bool { // When no match was found, then no action is taken and false is returned. func (a *ParseAPIOnAction) Skip() bool { if a.ok { - a.tokenAPI.syncCursor(a.tokenAPI.root) + a.parseAPI.result = nil a.tokenAPI.clearResults() + a.tokenAPI.syncCursorTo(a.tokenAPI.root) a.tokenAPI.detachChilds() a.flushReader() } @@ -208,19 +208,6 @@ func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { } } -// Expects is used to let a ParseHandler function describe what input it is -// expecting. This expectation is used in error messages to provide some -// context to them. -// -// When defining an expectation inside a ParseHandler, you do not need to -// handle unexpected input yourself. When the end of the parser is reached -// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(), -// an automatic error will be emitted using ParseAPI.UnexpectedInput(). -func (p *ParseAPI) Expects(description string) { - p.panicWhenStoppedOrInError() - p.expecting = description -} - // Stop is used by the parser impementation to tell the ParseAPI that it has // completed the parsing process successfully. // @@ -259,38 +246,33 @@ func (p *ParseAPI) ExpectEndOfFile() { if p.On(A.EndOfFile).Stay() { p.Stop() } else { - p.Expects("end of file") - p.UnexpectedInput() + p.UnexpectedInput("end of file") } } // UnexpectedInput is used to set an error that tells the user that some // unexpected input was encountered. // -// It can automatically produce an error message for a couple of situations: +// It automatically produces an error message for a couple of situations: // 1) the input simply didn't match the expectation // 2) the end of the input was reached // 3) there was an error while reading the input. -// -// The parser implementation can provide some feedback for this error by -// calling ParseAPI.Expects() to set the expectation. When set, the -// expectation is included in the error message. -func (p *ParseAPI) UnexpectedInput() { +func (p *ParseAPI) UnexpectedInput(expected string) { p.panicWhenStoppedOrInError() _, err := p.tokenAPI.NextRune() switch { case err == nil: - p.Error("unexpected input%s", fmtExpects(p)) + p.Error("unexpected input%s", fmtExpects(expected)) case err == io.EOF: - p.Error("unexpected end of file%s", fmtExpects(p)) + p.Error("unexpected end of file%s", fmtExpects(expected)) default: - p.Error("unexpected error '%s'%s", err, fmtExpects(p)) + p.Error("unexpected error '%s'%s", err, fmtExpects(expected)) } } -func fmtExpects(p *ParseAPI) string { - if p.expecting == "" { +func fmtExpects(expected string) string { + if expected == "" { return "" } - return fmt.Sprintf(" (expected %s)", p.expecting) + return fmt.Sprintf(" (expected %s)", expected) } diff --git a/parser.go b/parser.go index 290cef9..e10cdf3 100644 --- a/parser.go +++ b/parser.go @@ -43,12 +43,9 @@ func (p *Parser) Execute(input string) *Error { if api.Handle(p.startHandler) { // Handle returned true, indicating that parsing could still continue. // There was no error and that the parsing has not actively been Stop()-ed. - // Let's try to make the best of it. - if api.expecting != "" { - api.UnexpectedInput() - } else { - api.ExpectEndOfFile() - } + // Let's assume that we actually reached the end of the parsing successfully + // and try to make the best of it. + api.ExpectEndOfFile() } return api.err } diff --git a/parser_test.go b/parser_test.go index dadf644..74edc89 100644 --- a/parser_test.go +++ b/parser_test.go @@ -46,8 +46,7 @@ func ExampleParser_usingTokens() { func ExampleParseAPI_UnexpectedInput() { parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Expects("a thing") - p.UnexpectedInput() + p.UnexpectedInput("a thing") }) err := parser.Execute("Whatever, this parser will never be happy...") fmt.Println(err.Full()) diff --git a/tokenapi.go b/tokenapi.go index 327a2fa..bc6b359 100644 --- a/tokenapi.go +++ b/tokenapi.go @@ -141,7 +141,7 @@ func (i *TokenAPI) Fork() *TokenAPI { parent: i, } child.result = newTokenHandlerResult() - i.syncCursor(child) + i.syncCursorTo(child) i.child = child return child } @@ -159,7 +159,7 @@ func (i *TokenAPI) Merge() { callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI") } i.addResultsToParent() - i.syncCursor(i.parent) + i.syncCursorTo(i.parent) i.clearResults() i.detachChilds() } @@ -169,7 +169,7 @@ func (i *TokenAPI) addResultsToParent() { i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...) } -func (i *TokenAPI) syncCursor(to *TokenAPI) { +func (i *TokenAPI) syncCursorTo(to *TokenAPI) { to.result.offset = i.result.offset *to.result.cursor = *i.result.cursor } diff --git a/tokenapi_example_test.go b/tokenapi_example_test.go index 35d6fbf..9a46fed 100644 --- a/tokenapi_example_test.go +++ b/tokenapi_example_test.go @@ -28,20 +28,20 @@ func ExampleTokenAPI_Fork() { // a lot simpler. The handler from above can be replaced with: simpler := parsekit.A.Str("abcd") - result, err := parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcdefgh") + result, err := parsekit.NewTokenizer(abcdSequence).Execute("abcdefgh") fmt.Println(result, err) - result, err = parsekit.NewTokenizer(simpler, "abcd").Execute("abcdefgh") + result, err = parsekit.NewTokenizer(simpler).Execute("abcdefgh") fmt.Println(result, err) - result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcx") + result, err = parsekit.NewTokenizer(abcdSequence).Execute("abcx") fmt.Println(result, err) - result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("xyz") + result, err = parsekit.NewTokenizer(abcdSequence).Execute("xyz") fmt.Println(result, err) // Output: // abcd // abcd - // unexpected input (expected abcd) - // unexpected input (expected abcd) + // unexpected input + // unexpected input } func ExampleTokenAPI_Merge() { @@ -62,7 +62,7 @@ func ExampleTokenAPI_Merge() { return true } - result, _ := parsekit.NewTokenizer(tokenHandler, "a match").Execute("Hi mister X!") + result, _ := parsekit.NewTokenizer(tokenHandler).Execute("Hi mister X!") fmt.Println(result) // Output: diff --git a/tokenhandler_test.go b/tokenhandler_test.go index 3d0af65..cd4e906 100644 --- a/tokenhandler_test.go +++ b/tokenhandler_test.go @@ -13,7 +13,7 @@ func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) { t.Accept() } return true - }, "test") + }) result, _ := parser.Execute("This is some random data to parse") if result.String() != "This is some random " { t.Fatalf("Got unexpected output from TokenHandler: %s", result.String()) @@ -33,7 +33,7 @@ func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) { Value: true, }) return true - }, "test") + }) result, _ := parser.Execute("doesn't matter") if len(result.Tokens()) != 2 { t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens())) @@ -61,7 +61,7 @@ func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) { func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) { var tok, c, a = parsekit.T, parsekit.C, parsekit.A fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII)) - parser := parsekit.NewTokenizer(fooToken, "something") + parser := parsekit.NewTokenizer(fooToken) input := "This is fine ASCII Åltho hère öt endĩt!" result, err := parser.Execute(input) @@ -83,7 +83,7 @@ func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) { )), m.Drop(c.ZeroOrMore(a.Asterisk)), ) - parser := parsekit.NewTokenizer(fooToken, "something") + parser := parsekit.NewTokenizer(fooToken) input := "*** This is fine ASCII Åltho hère öt endĩt! ***" output := "This is fine ASCIIÅltho hère öt endĩt!" result, err := parser.Execute(input) @@ -112,7 +112,7 @@ func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) { parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { t.Accept() return false - }, "test") + }) parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { parser.Execute("input string") }, Regexp: true, @@ -125,7 +125,7 @@ func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) { t.NextRune() t.NextRune() return false - }, "test") + }) parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { parser.Execute("input string") }, Regexp: true, @@ -138,7 +138,7 @@ func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) { t.NextRune() t.Accept() return false - }, "test") + }) parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { parser.Execute("") }, Regexp: true, diff --git a/tokenhandlerresult.go b/tokenhandlerresult.go index 9c6a9d4..c556b25 100644 --- a/tokenhandlerresult.go +++ b/tokenhandlerresult.go @@ -164,6 +164,8 @@ func (r *TokenHandlerResult) Value(idx int) interface{} { return r.tokens[idx].Value } +// Cursor retrieves the read cursor from the TokenHandlerResult. This is the +// first cursor position after the runes that were read by the TokenHandler. func (r *TokenHandlerResult) Cursor() *Cursor { return r.cursor } diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index 07314aa..46be6ad 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -298,7 +298,7 @@ func TestModifiers(t *testing.T) { func TestTokenMakerErrorHandling(t *testing.T) { var a, tok = parsekit.A, parsekit.T invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool() - parser := parsekit.NewTokenizer(invalid, "boolean") + parser := parsekit.NewTokenizer(invalid) parsekit.AssertPanic(t, parsekit.PanicT{ func() { parser.Execute("no") }, false, `TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + @@ -380,10 +380,11 @@ func TestSequenceOfRunes(t *testing.T) { input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" output := "" parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Expects("Sequence of runes") if p.On(sequence).Accept() { output = p.Result().String() p.Stop() + } else { + p.UnexpectedInput("sequence of runes") } }) err := parser.Execute(input) diff --git a/tokenizer.go b/tokenizer.go index 18f2da6..9225aaf 100644 --- a/tokenizer.go +++ b/tokenizer.go @@ -23,18 +23,14 @@ type TokenHandler func(t *TokenAPI) bool // This is a simple wrapper around a TokenHandler function. It can be used to // match an input string against that TokenHandler function and retrieve the // results in a straight forward way. -// -// The 'expects' parameter is used for creating an error message in case parsed -// input does not match the TokenHandler. -func NewTokenizer(tokenHandler TokenHandler, expects string) *Tokenizer { +func NewTokenizer(tokenHandler TokenHandler) *Tokenizer { tokenizer := &Tokenizer{} tokenizer.parser = NewParser(func(p *ParseAPI) { if p.On(tokenHandler).Accept() { tokenizer.result = p.Result() p.Stop() } else { - p.Expects(expects) - p.UnexpectedInput() + p.UnexpectedInput("") } }) return tokenizer diff --git a/tokenizer_test.go b/tokenizer_test.go index 7e8f6ed..10186cd 100644 --- a/tokenizer_test.go +++ b/tokenizer_test.go @@ -24,7 +24,7 @@ func ExampleTokenizer_Execute() { ip := T.Str("ip", A.IPv4) mask := T.Int8("mask", A.IPv4CIDRMask) cidr := C.Seq(ip, A.Slash, mask) - tokenizer := NewTokenizer(cidr, "cidr") + tokenizer := NewTokenizer(cidr) for _, input := range []string{ "000.000.000.000/000", @@ -46,8 +46,8 @@ func ExampleTokenizer_Execute() { // Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0) // Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24) // Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32) - // Error: unexpected input (expected cidr) - // Error: unexpected input (expected cidr) + // Error: unexpected input + // Error: unexpected input } func TestCallingNextRune_ReturnsNextRune(t *testing.T) {