From 05585db34138edd4fc3b7eb2ece96ef403609a73 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Wed, 5 Jun 2019 10:07:50 +0000 Subject: [PATCH] Normalizing error handling, to always include the caller location in errors. This makes debugging a lot easier for users of the package, because it doesn't say stuff like 'Method() was called incorrectly', but instead something like 'Method() was called incorrectlty at /path/to/file.go:1234'. --- error.go | 30 ++- examples/example_basiccalculator1_test.go | 8 +- examples/example_basiccalculator2_test.go | 2 +- examples/example_dutchpostcode_test.go | 8 +- examples/example_helloManyStateParser_test.go | 9 +- .../example_helloParserCombinator_test.go | 6 +- parseapi.go | 54 ++--- parser.go | 28 +-- parser_test.go | 26 +-- tokenapi.go | 21 +- tokenapi_example_test.go | 4 +- tokenhandlers_builtin.go | 198 +++++++++++------- tokenhandlers_builtin_test.go | 24 +-- tokenizer_test.go | 61 ++++-- tokenresult.go | 26 ++- tokenresult_test.go | 8 +- 16 files changed, 301 insertions(+), 212 deletions(-) diff --git a/error.go b/error.go index 5cc6e4b..ec0909b 100644 --- a/error.go +++ b/error.go @@ -2,6 +2,8 @@ package parsekit import ( "fmt" + "runtime" + "strings" ) // Error is used as the error type when parsing errors occur. @@ -14,8 +16,7 @@ type Error struct { func (err *Error) Error() string { if err == nil { - _, linepos := getCaller(1) - panic(fmt.Sprintf("parsekit.Error.Error(): method called with nil error at %s", linepos)) + callerPanic(1, "parsekit.Error.Error(): method called with nil error at {caller}") } return err.Message } @@ -24,8 +25,29 @@ func (err *Error) Error() string { // the position in the input where the error occurred. func (err *Error) Full() string { if err == nil { - _, linepos := getCaller(1) - panic(fmt.Sprintf("parsekit.Error.Full(): method called with nil error at %s", linepos)) + callerPanic(1, "parsekit.Error.Full(): method called with nil error at {caller}") } return fmt.Sprintf("%s at %s", err, err.Cursor) } + +func callerFunc(depth int) string { + // No error handling, because we call this method ourselves with safe depth values. + pc, _, _, _ := runtime.Caller(depth + 1) + caller := runtime.FuncForPC(pc) + parts := strings.Split(caller.Name(), ".") + funcName := parts[len(parts)-1] + return funcName +} + +func callerFilepos(depth int) string { + // No error handling, because we call this method ourselves with safe depth values. + _, file, line, _ := runtime.Caller(depth + 1) + return fmt.Sprintf("%s:%d", file, line) +} + +func callerPanic(depth int, f string, args ...interface{}) { + filepos := callerFilepos(depth + 1) + m := fmt.Sprintf(f, args...) + m = strings.Replace(m, "{caller}", filepos, 1) + panic(m) +} diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index 0686acd..ab69152 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -41,10 +41,10 @@ func Example_basicCalculator1() { // Input: "1+2+3", got outcome: 6, correct = true // Input: " 10 + \t20 - 3 + 7 -10 ", got outcome: 24, correct = true // Input: "", got error: unexpected end of file (expected integer number) - // Input: " \t ", got error: unexpected character ' ' (expected integer number) - // Input: "+", got error: unexpected character '+' (expected integer number) - // Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-') - // Input: "42+ ", got error: unexpected character ' ' (expected integer number) + // Input: " \t ", got error: unexpected input (expected integer number) + // Input: "+", got error: unexpected input (expected integer number) + // Input: "10.8 + 12", got error: unexpected input (expected operator, '+' or '-') + // Input: "42+ ", got error: unexpected input (expected integer number) } // --------------------------------------------------------------------------- diff --git a/examples/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go index fda7184..c85f195 100644 --- a/examples/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -56,7 +56,7 @@ func Example_basicCalculator2() { // Input: "", got error: unexpected end of file at start of file // Input: "(", got error: unexpected end of file at line 1, column 2 // Input: "10+20-", got error: unexpected end of file at line 1, column 7 - // Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) at line 1, column 13 + // Input: "10+20-(4*10))", got error: unexpected input (expected end of file) at line 1, column 13 // Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19 } diff --git a/examples/example_dutchpostcode_test.go b/examples/example_dutchpostcode_test.go index 73a07e4..b464be3 100644 --- a/examples/example_dutchpostcode_test.go +++ b/examples/example_dutchpostcode_test.go @@ -40,11 +40,11 @@ func Example_dutchPostcodeUsingTokenizer() { // [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB) // [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB) // [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB) - // [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file - // [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file - // [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file + // [4] Input: "1212abc" Error: unexpected input (expected a Dutch postcode) at start of file + // [5] Input: "1234" Error: unexpected input (expected a Dutch postcode) at start of file + // [6] Input: "huh" Error: unexpected input (expected a Dutch postcode) at start of file // [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at start of file - // [8] Input: "\xcd2222AB" Error: unexpected character '�' (expected a Dutch postcode) at start of file + // [8] Input: "\xcd2222AB" Error: unexpected input (expected a Dutch postcode) at start of file } // --------------------------------------------------------------------------- diff --git a/examples/example_helloManyStateParser_test.go b/examples/example_helloManyStateParser_test.go index 2308557..bab4982 100644 --- a/examples/example_helloManyStateParser_test.go +++ b/examples/example_helloManyStateParser_test.go @@ -24,6 +24,7 @@ import ( func Example_helloWorldUsingParser1() { for i, input := range []string{ + "Oh!", "Hello, world!", "HELLO ,Johnny!", "hello , Bob123!", @@ -50,17 +51,17 @@ func Example_helloWorldUsingParser1() { // [0] Input: "Hello, world!" Output: world // [1] Input: "HELLO ,Johnny!" Output: Johnny // [2] Input: "hello , Bob123!" Output: Bob123 - // [3] Input: "hello Pizza!" Error: unexpected character 'P' (expected comma) + // [3] Input: "hello Pizza!" Error: unexpected input (expected comma) // [4] Input: "" Error: unexpected end of file (expected hello) - // [5] Input: " " Error: unexpected character ' ' (expected hello) + // [5] Input: " " Error: unexpected input (expected hello) // [6] Input: "hello" Error: unexpected end of file (expected comma) // [7] Input: "hello," Error: unexpected end of file (expected name) // [8] Input: "hello , " Error: unexpected end of file (expected name) // [9] Input: "hello , Droopy" Error: unexpected end of file (expected exclamation) // [10] Input: "hello , Droopy!" Output: Droopy // [11] Input: "hello , \t \t Droopy \t !" Output: Droopy - // [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello) - // [13] Input: "hello,!" Error: unexpected character '!' (expected name) + // [12] Input: "Oh no!" Error: unexpected input (expected hello) + // [13] Input: "hello,!" Error: unexpected input (expected name) } // --------------------------------------------------------------------------- diff --git a/examples/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go index 11e714e..5b06df3 100644 --- a/examples/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -37,9 +37,9 @@ func Example_helloWorldUsingTokenizer() { // [1] Input: "HELLO ,Johnny!" Output: Johnny // [2] Input: "hello , Bob123!" Output: Bob123 // [3] Input: "hello Pizza!" Output: Pizza - // [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at start of file - // [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at start of file - // [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at start of file + // [4] Input: "Oh no!" Error: unexpected input (expected a friendly greeting) at start of file + // [5] Input: "Hello, world" Error: unexpected input (expected a friendly greeting) at start of file + // [6] Input: "Hello,!" Error: unexpected input (expected a friendly greeting) at start of file } // --------------------------------------------------------------------------- diff --git a/parseapi.go b/parseapi.go index c6f4ffe..64533a8 100644 --- a/parseapi.go +++ b/parseapi.go @@ -3,7 +3,6 @@ package parsekit import ( "fmt" "io" - "strings" ) // ParseAPI holds the internal state of a parse run and provides an API to @@ -29,20 +28,15 @@ func (p *ParseAPI) panicWhenStoppedOrInError() { return } - called, _ := getCaller(1) - parts := strings.Split(called, ".") - calledShort := parts[len(parts)-1] - _, filepos := getCaller(2) + called := callerFunc(1) after := "Error()" if p.stopped { after = "Stop()" } - panic(fmt.Sprintf( - "parsekit.ParseAPI.%s(): Illegal call to %s() at %s: "+ - "no calls allowed after ParseAPI.%s", - calledShort, calledShort, filepos, after)) + callerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+ + "no calls allowed after ParseAPI.%s", called, called, after) } func (p *ParseAPI) isStoppedOrInError() bool { @@ -54,9 +48,9 @@ func (p *ParseAPI) initLoopCheck() { } func (p *ParseAPI) checkForLoops() { - _, filepos := getCaller(2) + filepos := callerFilepos(2) if _, ok := p.loopCheck[filepos]; ok { - panic(fmt.Sprintf("parsekit.ParseAPI: Loop detected in parser at %s", filepos)) + callerPanic(2, "parsekit.ParseAPI: Loop detected in parser at {caller}") } p.loopCheck[filepos] = true } @@ -65,9 +59,9 @@ func (p *ParseAPI) checkForLoops() { // TokenHandler. On must be chained with another method that tells the parser // what action to perform when a match was found: // -// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes. +// 1) On(...).Skip() - Move read cursor forward, ignoring the match results. // -// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer. +// 2) On(...).Accept() - Move cursor, making results available through Result() // // 3) On(...).Stay() - Do nothing, the cursor stays at the same position. // @@ -93,18 +87,15 @@ func (p *ParseAPI) checkForLoops() { // p.RouteTo(stateHandlerC) // } // -// // When there's a "hi" on input, then say hello. -// if p.On(parsekit.C.Str("hi")).Accept() { -// fmt.Println("Hello!") +// // Echo back a sequence of digits on the input. +// if p.On(parsekit.A.Digits).Accept() { +// fmt.Println(p.Result().String()) // } func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction { p.panicWhenStoppedOrInError() p.checkForLoops() if tokenHandler == nil { - _, filepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.ParseAPI.On(): On() called with nil "+ - "tokenHandler argument at %s", filepos)) + callerPanic(1, "parsekit.ParseAPI.On(): On() called with nil tokenHandler argument at {caller}") } p.result = nil @@ -127,9 +118,9 @@ type ParseAPIOnAction struct { ok bool } -// Accept tells the parser to move the cursor past a match that was found, -// and to make the TokenResult from the TokenAPI available in the ParseAPI -// through the Result() method. +// Accept tells the parser to move the read cursor past a match that was +// found, and to make the TokenResult from the TokenAPI available in the +// ParseAPI through the ParseAPI.Result() method. // // Returns true in case a match was found. // When no match was found, then no action is taken and false is returned. @@ -198,10 +189,8 @@ func (a *ParseAPIOnAction) flushReader() { func (p *ParseAPI) Result() *TokenResult { result := p.result if p.result == nil { - _, filepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.ParseAPI.TokenResult(): TokenResult() called at %s without "+ - "calling ParseAPI.Accept() on beforehand", filepos)) + callerPanic(1, "parsekit.ParseAPI.TokenResult(): TokenResult() called "+ + "at {caller} without calling ParseAPI.Accept() on beforehand") } return result } @@ -221,8 +210,7 @@ func (p *ParseAPI) Handle(parseHandler ParseHandler) bool { func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { if parseHandler == nil { - _, filepos := getCaller(2) - panic(fmt.Sprintf("parsekit.ParseAPI.Handle(): Handle() called with nil input at %s", filepos)) + callerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}") } } @@ -286,19 +274,19 @@ func (p *ParseAPI) ExpectEndOfFile() { // unexpected input was encountered. // // It can automatically produce an error message for a couple of situations: -// 1) input simply didn't match the expectation +// 1) the input simply didn't match the expectation // 2) the end of the input was reached -// 3) there was an invalid UTF8 character on the input. +// 3) there was an error while reading the input. // // The parser implementation can provide some feedback for this error by // calling ParseAPI.Expects() to set the expectation. When set, the // expectation is included in the error message. func (p *ParseAPI) UnexpectedInput() { p.panicWhenStoppedOrInError() - r, err := p.tokenAPI.NextRune() + _, err := p.tokenAPI.NextRune() switch { case err == nil: - p.Error("unexpected character %q%s", r, fmtExpects(p)) + p.Error("unexpected input%s", fmtExpects(p)) case err == io.EOF: p.Error("unexpected end of file%s", fmtExpects(p)) default: diff --git a/parser.go b/parser.go index 7c4195a..290cef9 100644 --- a/parser.go +++ b/parser.go @@ -1,8 +1,6 @@ package parsekit import ( - "fmt" - "runtime" "strings" ) @@ -30,8 +28,7 @@ type ParseHandler func(*ParseAPI) // To parse input data, use the method Parser.Execute(). func NewParser(startHandler ParseHandler) *Parser { if startHandler == nil { - _, filepos := getCaller(1) - panic(fmt.Sprintf("parsekit.NewParser(): NewParser() called with nil input at %s", filepos)) + callerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}") } return &Parser{startHandler: startHandler} } @@ -44,21 +41,14 @@ func (p *Parser) Execute(input string) *Error { loopCheck: map[string]bool{}, } if api.Handle(p.startHandler) { - // Handle indicated that parsing could still continue, meaning that there - // was no error and that the parsing has not actively been Stop()-ed. - // However, at this point, the parsing really should have stopped. - // We'll see what happens when we tell the parser that EOF was expected. - // This might work if we're indeed at EOF. Otherwise, an error will be - // generated. - api.ExpectEndOfFile() + // Handle returned true, indicating that parsing could still continue. + // There was no error and that the parsing has not actively been Stop()-ed. + // Let's try to make the best of it. + if api.expecting != "" { + api.UnexpectedInput() + } else { + api.ExpectEndOfFile() + } } return api.err } - -func getCaller(depth int) (string, string) { - // No error handling, because we call this method ourselves with safe depth values. - pc, file, line, _ := runtime.Caller(depth + 1) - filepos := fmt.Sprintf("%s:%d", file, line) - caller := runtime.FuncForPC(pc) - return caller.Name(), filepos -} diff --git a/parser_test.go b/parser_test.go index 5df49f8..1069e41 100644 --- a/parser_test.go +++ b/parser_test.go @@ -30,26 +30,18 @@ func ExampleParser_usingTokens() { // Easy access to the parsekit definitions. c, a, tok := parsekit.C, parsekit.A, parsekit.T - var tokens []*parsekit.Token - var accepted string - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() { - tokens = p.Result().Tokens() - accepted = p.Result().String() + if p.On(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))).Accept() { + fmt.Printf("Runes accepted: %q\n", p.Result().String()) + fmt.Printf("Token values: %s\n", p.Result().Tokens()) } p.ExpectEndOfFile() }) - parser.Execute("¡Any will dö!") + parser.Execute("¡ök!") - fmt.Printf("Runes accepted: %q\n", accepted) - fmt.Printf("Token values: ") - for _, t := range tokens { - fmt.Printf("%c ", t.Value) - } // Output: - // Runes accepted: "¡Any will dö!" - // Token values: ¡ A n y w i l l d ö ! + // Runes accepted: "¡ök!" + // Token values: RUNE(int32:161) RUNE(int32:246) RUNE(int32:107) RUNE(int32:33) } func ExampleParseAPI_UnexpectedInput() { @@ -61,7 +53,7 @@ func ExampleParseAPI_UnexpectedInput() { fmt.Println(err.Full()) // Output: - // unexpected character 'W' (expected a thing) at start of file + // unexpected input (expected a thing) at start of file } func ExampleParseAPIOnAction_Accept() { @@ -151,7 +143,7 @@ func ExampleParseAPI_Stop_notCalledButInputPending() { // Output: // First word: Input - // Error: unexpected character ' ' (expected end of file) at line 1, column 6 + // Error: unexpected input (expected end of file) at line 1, column 6 } func ExampleParseAPIOnAction_Stay() { @@ -265,7 +257,7 @@ func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKic func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) { p := parsekit.NewParser(func(p *parsekit.ParseAPI) {}) err := p.Execute("x") - parsekit.AssertEqual(t, "unexpected character 'x' (expected end of file) at start of file", err.Full(), "err") + parsekit.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Full(), "err") } type parserWithLoop struct { diff --git a/tokenapi.go b/tokenapi.go index 1508861..a0d258e 100644 --- a/tokenapi.go +++ b/tokenapi.go @@ -88,10 +88,8 @@ func NewTokenAPI(r io.Reader) *TokenAPI { // without explicitly accepting, this method will panic. func (i *TokenAPI) NextRune() (rune, error) { if i.result.lastRune != nil { - _, linepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.TokenAPI.NextRune(): NextRune() called at %s without a "+ - "prior call to Accept()", linepos)) + callerPanic(1, "parsekit.TokenAPI.NextRune(): NextRune() called at {caller} "+ + "without a prior call to Accept()") } i.detachChilds() @@ -107,15 +105,9 @@ func (i *TokenAPI) NextRune() (rune, error) { // returned an error. Calling Accept() in such case will result in a panic. func (i *TokenAPI) Accept() { if i.result.lastRune == nil { - _, linepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.TokenAPI.Accept(): Accept() called at %s without "+ - "first calling NextRune()", linepos)) + callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()") } else if i.result.lastRune.err != nil { - _, linepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.TokenAPI.Accept(): Accept() called at %s, but the "+ - "prior call to NextRune() failed", linepos)) + callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") } i.result.runes = append(i.result.runes, i.result.lastRune.r) i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r)) @@ -167,10 +159,7 @@ func (i *TokenAPI) Fork() *TokenAPI { // This allows a child to feed results in chunks to its parent. func (i *TokenAPI) Merge() { if i.parent == nil { - _, filepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.TokenAPI.Merge(): Merge() called at %s "+ - "on a non-forked TokenAPI", filepos)) + callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI") } i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) diff --git a/tokenapi_example_test.go b/tokenapi_example_test.go index 888cfd8..35d6fbf 100644 --- a/tokenapi_example_test.go +++ b/tokenapi_example_test.go @@ -40,8 +40,8 @@ func ExampleTokenAPI_Fork() { // Output: // abcd // abcd - // unexpected character 'a' (expected abcd) - // unexpected character 'x' (expected abcd) + // unexpected input (expected abcd) + // unexpected input (expected abcd) } func ExampleTokenAPI_Merge() { diff --git a/tokenhandlers_builtin.go b/tokenhandlers_builtin.go index e348450..88aba77 100644 --- a/tokenhandlers_builtin.go +++ b/tokenhandlers_builtin.go @@ -184,9 +184,9 @@ var A = struct { Pipe: MatchRune('|'), CurlyClose: MatchRune('}'), Tilde: MatchRune('~'), - Whitespace: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))), - WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))), - EndOfLine: MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()), + Whitespace: MatchWhitespace(), + WhitespaceAndNewlines: MatchWhitespaceAndNewlines(), + EndOfLine: MatchEndOfLine(), Digit: MatchDigit(), DigitNotZero: MatchDigitNotZero(), Digits: MatchDigits(), @@ -195,15 +195,50 @@ var A = struct { IntegerBetween: MatchIntegerBetween, Float: MatchFloat(), Boolean: MatchBoolean(), - ASCII: MatchRuneRange('\x00', '\x7F'), - ASCIILower: MatchRuneRange('a', 'z'), - ASCIIUpper: MatchRuneRange('A', 'Z'), - HexDigit: MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')), + ASCII: MatchASCII(), + ASCIILower: MatchASCIILower(), + ASCIIUpper: MatchASCIIUpper(), + HexDigit: MatchHexDigit(), Octet: MatchOctet(false), IPv4: MatchIPv4(), IPv4MaskBits: MatchIntegerBetween(0, 32), } +// M provides convenient access to a range of modifiers (which in their nature are +// parser/combinators) that can be used when creating TokenHandler functions. +// +// In parsekit, a modifier is defined as a TokenHandler function that modifies the +// resulting output of another TokenHandler in some way. It does not do any matching +// against input of its own. +// +// When using M in your own parser, then it is advised to create a variable +// to reference it: +// +// var m = parsekit.M +// +// Doing so saves you a lot of typing, and it makes your code a lot cleaner. +var M = struct { + Drop func(TokenHandler) TokenHandler + Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? + TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? + TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? + TrimSpace func(handler TokenHandler) TokenHandler + ToLower func(TokenHandler) TokenHandler + ToUpper func(TokenHandler) TokenHandler + Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments? + ByCallback func(TokenHandler, func(string) string) TokenHandler +}{ + Drop: ModifyDrop, + Trim: ModifyTrim, + TrimLeft: ModifyTrimLeft, + TrimRight: ModifyTrimRight, + TrimSpace: ModifyTrimSpace, + ToLower: ModifyToLower, + ToUpper: ModifyToUpper, + Replace: ModifyReplace, + ByCallback: ModifyByCallback, +} + // T provides convenient access to a range of Token producers (which in their // nature are parser/combinators) that can be used when creating TokenHandler // functions. @@ -254,8 +289,7 @@ var T = struct { ByCallback: MakeTokenByCallback, } -// MatchRune creates a TokenHandler function that checks if the next rune from -// the input matches the provided rune. +// MatchRune creates a TokenHandler function that matches against the provided rune. func MatchRune(expected rune) TokenHandler { return func(t *TokenAPI) bool { input, err := t.NextRune() @@ -267,8 +301,8 @@ func MatchRune(expected rune) TokenHandler { } } -// MatchRunes creates a TokenHandler function that that checks if the next rune -// from the input is one of the provided runes. +// MatchRunes creates a TokenHandler function that checks if the input matches +// one of the provided runes. func MatchRunes(expected ...rune) TokenHandler { s := string(expected) return func(t *TokenAPI) bool { @@ -283,17 +317,16 @@ func MatchRunes(expected ...rune) TokenHandler { } } -// MatchRuneRange creates a TokenHandler function that that checks if the next rune -// from the input is contained by the provided rune range. -// -// The rune range is defined by a start and an end rune, inclusive, so: +// MatchRuneRange creates a TokenHandler function that checks if the input +// matches the provided rune range. The rune range is defined by a start and +// an end rune, inclusive, so: // // MatchRuneRange('g', 'k') // // creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'. func MatchRuneRange(start rune, end rune) TokenHandler { if end < start { - panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end)) + callerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end) } return func(t *TokenAPI) bool { input, err := t.NextRune() @@ -305,8 +338,28 @@ func MatchRuneRange(start rune, end rune) TokenHandler { } } -// MatchStr creates a TokenHandler that will check if the upcoming runes on the -// input match the provided string. +// MatchWhitespace creates a TokenHandler that matches the input against one +// or more whitespace characters, meansing tabs and spaces. +// +// When you need whitespace matching to also include newlines, then make use +// of MatchWhitespaceAndNewlines(). +func MatchWhitespace() TokenHandler { + return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))) +} + +// MatchWhitespaceAndNewlines creates a TokenHandler that matches the input +// against one or more whitespace and/or newline characters, meaning tabs, +// spaces and newlines ("\r\n" and "\n"). +func MatchWhitespaceAndNewlines() TokenHandler { + return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))) +} + +// MatchEndOfLine creates a TokenHandler that matches a newline ("\r\n" or "\n") or EOF. +func MatchEndOfLine() TokenHandler { + return MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()) +} + +// MatchStr creates a TokenHandler that matches the input against the provided string. // TODO make this a more efficient string-level match? func MatchStr(expected string) TokenHandler { var handlers = []TokenHandler{} @@ -316,8 +369,8 @@ func MatchStr(expected string) TokenHandler { return MatchSeq(handlers...) } -// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes -// on the input match the provided string in a case-insensitive manner. +// MatchStrNoCase creates a TokenHandler that matches the input against the +// provided string in a case-insensitive manner. // TODO make this a more efficient string-level match? func MatchStrNoCase(expected string) TokenHandler { var handlers = []TokenHandler{} @@ -331,7 +384,8 @@ func MatchStrNoCase(expected string) TokenHandler { // MatchOpt creates a TokenHandler that makes the provided TokenHandler optional. // When the provided TokenHandler applies, then its output is used, otherwise -// no output is generated but still a successful match is reported. +// no output is generated but still a successful match is reported (but the +// result will be empty). func MatchOpt(handler TokenHandler) TokenHandler { return func(t *TokenAPI) bool { child := t.Fork() @@ -410,7 +464,7 @@ func MatchRep(times int, handler TokenHandler) TokenHandler { // When more matches are possible, these will be included in the output. func MatchMin(min int, handler TokenHandler) TokenHandler { if min < 0 { - panic("TokenHandler bug: MatchMin definition error: min must be >= 0") + callerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0") } return matchMinMax(min, -1, handler, "MatchMin") } @@ -421,7 +475,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler { // Zero matches are considered a successful match. func MatchMax(max int, handler TokenHandler) TokenHandler { if max < 0 { - panic("TokenHandler bug: MatchMax definition error: max must be >= 0") + callerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0") } return matchMinMax(0, max, handler, "MatchMax") } @@ -444,17 +498,17 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler { // inclusive. All matches will be included in the output. func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler { if max < 0 { - panic("TokenHandler bug: MatchMinMax definition error: max must be >= 0") + callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0") } if min < 0 { - panic("TokenHandler bug: MatchMinMax definition error: min must be >= 0") + callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0") } return matchMinMax(min, max, handler, "MatchMinMax") } func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler { if max >= 0 && min > max { - panic(fmt.Sprintf("TokenHandler bug: %s definition error: max %d must not be < min %d", name, max, min)) + callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) } return func(t *TokenAPI) bool { child := t.Fork() @@ -592,15 +646,43 @@ func MatchFloat() TokenHandler { return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits))) } -// MatchBoolean creates a TokenHandler function that checks if a valid boolean +// MatchBoolean creates a TokenHandler function that checks if a boolean // value can be read from the input. It supports the boolean values as understood // by Go's strconv.ParseBool() function. +// +// True values: true, TRUE, True, 1, t, T +// +// False falues: false, FALSE, False, 0, f, F func MatchBoolean() TokenHandler { trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T')) falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F')) return MatchAny(trues, falses) } +// MatchASCII creates a TokenHandler function that matches against any +// ASCII value on the input. +func MatchASCII() TokenHandler { + return MatchRuneRange('\x00', '\x7F') +} + +// MatchASCIILower creates a TokenHandler function that matches against any +// lower case ASCII letter on the input (a - z). +func MatchASCIILower() TokenHandler { + return MatchRuneRange('a', 'z') +} + +// MatchASCIIUpper creates a TokenHandler function that matches against any +// upper case ASCII letter on the input (a - z). +func MatchASCIIUpper() TokenHandler { + return MatchRuneRange('A', 'Z') +} + +// MatchHexDigit creates a TokenHandler function that check if a single hexadecimal +// digit can be read from the input. +func MatchHexDigit() TokenHandler { + return MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')) +} + // MatchOctet creates a TokenHandler function that checks if a valid octet value // can be read from the input (octet = byte value representation, with a value // between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming @@ -610,25 +692,25 @@ func MatchBoolean() TokenHandler { // When the normalize parameter is set to true, then leading zeroes will be // stripped from the octet. func MatchOctet(normalize bool) TokenHandler { - digits := MatchMinMax(1, 3, MatchDigit()) + max3Digits := MatchMinMax(1, 3, MatchDigit()) return func(t *TokenAPI) bool { fork := t.Fork() - if !digits(fork) { + if !max3Digits(fork) { return false } value, _ := strconv.ParseInt(fork.Result().String(), 10, 16) - if value <= 255 { - if normalize { - runes := fork.Result().Runes() - for len(runes) > 1 && runes[0] == '0' { - runes = runes[1:] - } - fork.Result().SetRunes(runes) - } - fork.Merge() - return true + if value > 255 { + return false } - return false + if normalize { + runes := fork.Result().Runes() + for len(runes) > 1 && runes[0] == '0' { + runes = runes[1:] + } + fork.Result().SetRunes(runes) + } + fork.Merge() + return true } } @@ -642,41 +724,6 @@ func MatchIPv4() TokenHandler { return MatchSeq(octet, dot, octet, dot, octet, dot, octet) } -// M provides convenient access to a range of modifiers (which in their nature are -// parser/combinators) that can be used when creating TokenHandler functions. -// -// In parsekit, a modifier is defined as a TokenHandler function that modifies the -// resulting output of another TokenHandler in some way. It does not do any matching -// against input of its own. -// -// When using M in your own parser, then it is advised to create a variable -// to reference it: -// -// var m = parsekit.M -// -// Doing so saves you a lot of typing, and it makes your code a lot cleaner. -var M = struct { - Drop func(TokenHandler) TokenHandler - Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? - TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? - TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? - TrimSpace func(handler TokenHandler) TokenHandler - ToLower func(TokenHandler) TokenHandler - ToUpper func(TokenHandler) TokenHandler - Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments? - ByCallback func(TokenHandler, func(string) string) TokenHandler -}{ - Drop: ModifyDrop, - Trim: ModifyTrim, - TrimLeft: ModifyTrimLeft, - TrimRight: ModifyTrimRight, - TrimSpace: ModifyTrimSpace, - ToLower: ModifyToLower, - ToUpper: ModifyToUpper, - Replace: ModifyReplace, - ByCallback: ModifyByCallback, -} - // ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies. // If it does, then its output is discarded completely. // @@ -970,6 +1017,7 @@ func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { value, err := convert(t.Result().String()) if err != nil { + // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? panic(fmt.Sprintf( "TokenHandler error: %s cannot handle input %q: %s "+ "(only use a type conversion token maker, when the input has been "+ diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index 132cc3a..444e930 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -70,18 +70,18 @@ func TestCombinators(t *testing.T) { func TestCombinatorPanics(t *testing.T) { var c, a = parsekit.C, parsekit.A parsekit.AssertPanics(t, []parsekit.PanicT{ - {func() { a.RuneRange('z', 'a') }, false, - "TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"}, - {func() { c.MinMax(-1, 1, parsekit.A.Space) }, false, - "TokenHandler bug: MatchMinMax definition error: min must be >= 0"}, - {func() { c.MinMax(1, -1, parsekit.A.Space) }, false, - "TokenHandler bug: MatchMinMax definition error: max must be >= 0"}, - {func() { c.MinMax(10, 5, parsekit.A.Space) }, false, - "TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"}, - {func() { c.Min(-10, parsekit.A.Space) }, false, - "TokenHandler bug: MatchMin definition error: min must be >= 0"}, - {func() { c.Max(-42, parsekit.A.Space) }, false, - "TokenHandler bug: MatchMax definition error: max must be >= 0"}, + {func() { a.RuneRange('z', 'a') }, true, + `TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`}, + {func() { c.MinMax(-1, 1, parsekit.A.Space) }, true, + `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`}, + {func() { c.MinMax(1, -1, parsekit.A.Space) }, true, + `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`}, + {func() { c.MinMax(10, 5, parsekit.A.Space) }, true, + `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`}, + {func() { c.Min(-10, parsekit.A.Space) }, true, + `TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`}, + {func() { c.Max(-42, parsekit.A.Space) }, true, + `TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`}, }) } diff --git a/tokenizer_test.go b/tokenizer_test.go index 052891d..1b740fb 100644 --- a/tokenizer_test.go +++ b/tokenizer_test.go @@ -1,12 +1,55 @@ package parsekit import ( + "fmt" "io" "strings" "testing" "unicode/utf8" ) +// TODO For error handling, it would be really cool if for example the +// 10.0.300.1/24 case would return an actual error stating that +// 300 is not a valid octet for an IPv4 address. +// Biggest thing to take care of here, is that errors should not stop +// a Parser flow (since we might be trying to match different cases in +// sequence), but a Parser flow should optionally be able to make use +// of the actual error. +// The same goes for a Tokenizer, since those can also make use of +// optional matching using parsekit.C.Any(...) for example. If matching +// for Any(IPv4, Digits), the example case should simply end up with 10 +// after the IPv4 mismatch. +func ExampleTokenizer_Execute() { + // Build the tokenizer for ip/mask. + ip := T.Str("ip", A.IPv4) + mask := T.Int8("mask", A.IPv4MaskBits) + cidr := C.Seq(ip, A.Slash, mask) + tokenizer := NewTokenizer(cidr, "cidr") + + for _, input := range []string{ + "000.000.000.000/000", + "192.168.0.1/24", + "255.255.255.255/32", + "10.0.300.1/24", + "not an IPv4 CIDR", + } { + // Execute returns a TokenResult and an error, which is nil on success. + result, err := tokenizer.Execute(input) + + if err == nil { + fmt.Printf("Result: %s\n", result.Tokens()) + } else { + fmt.Printf("Error: %s\n", err) + } + } + // Output: + // Result: ip(string:0.0.0.0) mask(int8:0) + // Result: ip(string:192.168.0.1) mask(int8:24) + // Result: ip(string:255.255.255.255) mask(int8:32) + // Error: unexpected input (expected cidr) + // Error: unexpected input (expected cidr) +} + func TestCallingNextRune_ReturnsNextRune(t *testing.T) { r, _ := mkInput().NextRune() AssertEqual(t, 'T', r, "first rune") @@ -31,8 +74,7 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) { i.NextRune() }, Regexp: true, - Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` + - `/.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, + Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, }) } @@ -40,8 +82,7 @@ func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: mkInput().Accept, Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` + - `at /.*/assertions_test\.go:\d+ without first calling NextRune()`, + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`, }) } @@ -52,8 +93,7 @@ func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { i.Merge() }, Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + - `/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) } func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { @@ -65,8 +105,7 @@ func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { f.Merge() }, Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + - `/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) } func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { @@ -78,8 +117,7 @@ func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { f.Merge() }, Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + - `/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) } func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) { @@ -127,8 +165,7 @@ func TestForkingInput_ClearsLastRune(t *testing.T) { i.Accept() }, Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` + - `at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, }) } diff --git a/tokenresult.go b/tokenresult.go index 99f2158..a66f93b 100644 --- a/tokenresult.go +++ b/tokenresult.go @@ -2,6 +2,7 @@ package parsekit import ( "fmt" + "strings" ) // Result holds results as produced by a TokenHandler. @@ -39,11 +40,16 @@ func (r *TokenResult) ClearRunes() { // SetRunes replaces the Runes from the TokenResult with the provided input. func (r *TokenResult) SetRunes(s interface{}) { r.ClearRunes() - r.AddRunes(s) + r.addRunes(s) } // AddRunes is used to add runes to the TokenResult. func (r *TokenResult) AddRunes(set ...interface{}) { + r.addRunes(set...) +} + +// AddRunes is used to add runes to the TokenResult. +func (r *TokenResult) addRunes(set ...interface{}) { for _, s := range set { switch s := s.(type) { case string: @@ -53,7 +59,7 @@ func (r *TokenResult) AddRunes(set ...interface{}) { case rune: r.runes = append(r.runes, s) default: - panic(fmt.Sprintf("parsekit.TokenResult.SetRunes(): unsupported type '%T' used", s)) + callerPanic(2, "parsekit.TokenResult.AddRunes(): unsupported type '%T' used at {caller}", s) } } } @@ -91,8 +97,22 @@ func (r *TokenResult) AddToken(t *Token) { r.tokens = append(r.tokens, t) } +// SliceOfTokens is an alias for []*Token type. The method Tokens() returns +// this type. A String() method is defined for it, to make it easy to +// format the tokens as a string for testing / debugging purposes. +type SliceOfTokens []*Token + +func (ts SliceOfTokens) String() string { + parts := make([]string, len(ts)) + for i, t := range ts { + str := fmt.Sprintf("%v(%T:%v)", t.Type, t.Value, t.Value) + parts[i] = str + } + return strings.Join(parts, " ") +} + // Tokens retrieves the Tokens from the TokenResult. -func (r *TokenResult) Tokens() []*Token { +func (r *TokenResult) Tokens() SliceOfTokens { return r.tokens } diff --git a/tokenresult_test.go b/tokenresult_test.go index fc94cef..5f288fc 100644 --- a/tokenresult_test.go +++ b/tokenresult_test.go @@ -1,11 +1,12 @@ package parsekit import ( + "strings" "testing" ) func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { - i := mkInput() + i := NewTokenAPI(strings.NewReader("Testing")) i.Result().SetRunes("string") AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input") i.Result().SetRunes([]rune("rune slice")) @@ -17,9 +18,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { func TestSetResult_PanicsOnUnhandledInput(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := mkInput() + i := NewTokenAPI(strings.NewReader("Testing")) i.Result().SetRunes(1234567) }, - Expect: "parsekit.TokenResult.SetRunes(): unsupported type 'int' used", + Regexp: true, + Expect: `parsekit\.TokenResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`, }) }