Normalizing error handling, to always include the caller location in errors. This makes debugging a lot easier for users of the package, because it doesn't say stuff like 'Method() was called incorrectly', but instead something like 'Method() was called incorrectlty at /path/to/file.go:1234'.

This commit is contained in:
Maurice Makaay 2019-06-05 10:07:50 +00:00
parent 75373e5ed5
commit 05585db341
16 changed files with 301 additions and 212 deletions

View File

@ -2,6 +2,8 @@ package parsekit
import (
"fmt"
"runtime"
"strings"
)
// Error is used as the error type when parsing errors occur.
@ -14,8 +16,7 @@ type Error struct {
func (err *Error) Error() string {
if err == nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf("parsekit.Error.Error(): method called with nil error at %s", linepos))
callerPanic(1, "parsekit.Error.Error(): method called with nil error at {caller}")
}
return err.Message
}
@ -24,8 +25,29 @@ func (err *Error) Error() string {
// the position in the input where the error occurred.
func (err *Error) Full() string {
if err == nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf("parsekit.Error.Full(): method called with nil error at %s", linepos))
callerPanic(1, "parsekit.Error.Full(): method called with nil error at {caller}")
}
return fmt.Sprintf("%s at %s", err, err.Cursor)
}
func callerFunc(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
pc, _, _, _ := runtime.Caller(depth + 1)
caller := runtime.FuncForPC(pc)
parts := strings.Split(caller.Name(), ".")
funcName := parts[len(parts)-1]
return funcName
}
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
}
func callerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)
panic(m)
}

View File

@ -41,10 +41,10 @@ func Example_basicCalculator1() {
// Input: "1+2+3", got outcome: 6, correct = true
// Input: " 10 + \t20 - 3 + 7 -10 ", got outcome: 24, correct = true
// Input: "", got error: unexpected end of file (expected integer number)
// Input: " \t ", got error: unexpected character ' ' (expected integer number)
// Input: "+", got error: unexpected character '+' (expected integer number)
// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
// Input: " \t ", got error: unexpected input (expected integer number)
// Input: "+", got error: unexpected input (expected integer number)
// Input: "10.8 + 12", got error: unexpected input (expected operator, '+' or '-')
// Input: "42+ ", got error: unexpected input (expected integer number)
}
// ---------------------------------------------------------------------------

View File

@ -56,7 +56,7 @@ func Example_basicCalculator2() {
// Input: "", got error: unexpected end of file at start of file
// Input: "(", got error: unexpected end of file at line 1, column 2
// Input: "10+20-", got error: unexpected end of file at line 1, column 7
// Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) at line 1, column 13
// Input: "10+20-(4*10))", got error: unexpected input (expected end of file) at line 1, column 13
// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19
}

View File

@ -40,11 +40,11 @@ func Example_dutchPostcodeUsingTokenizer() {
// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
// [4] Input: "1212abc" Error: unexpected input (expected a Dutch postcode) at start of file
// [5] Input: "1234" Error: unexpected input (expected a Dutch postcode) at start of file
// [6] Input: "huh" Error: unexpected input (expected a Dutch postcode) at start of file
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at start of file
// [8] Input: "\xcd2222AB" Error: unexpected character '<27>' (expected a Dutch postcode) at start of file
// [8] Input: "\xcd2222AB" Error: unexpected input (expected a Dutch postcode) at start of file
}
// ---------------------------------------------------------------------------

View File

@ -24,6 +24,7 @@ import (
func Example_helloWorldUsingParser1() {
for i, input := range []string{
"Oh!",
"Hello, world!",
"HELLO ,Johnny!",
"hello , Bob123!",
@ -50,17 +51,17 @@ func Example_helloWorldUsingParser1() {
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Error: unexpected character 'P' (expected comma)
// [3] Input: "hello Pizza!" Error: unexpected input (expected comma)
// [4] Input: "" Error: unexpected end of file (expected hello)
// [5] Input: " " Error: unexpected character ' ' (expected hello)
// [5] Input: " " Error: unexpected input (expected hello)
// [6] Input: "hello" Error: unexpected end of file (expected comma)
// [7] Input: "hello," Error: unexpected end of file (expected name)
// [8] Input: "hello , " Error: unexpected end of file (expected name)
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected exclamation)
// [10] Input: "hello , Droopy!" Output: Droopy
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
// [13] Input: "hello,!" Error: unexpected character '!' (expected name)
// [12] Input: "Oh no!" Error: unexpected input (expected hello)
// [13] Input: "hello,!" Error: unexpected input (expected name)
}
// ---------------------------------------------------------------------------

View File

@ -37,9 +37,9 @@ func Example_helloWorldUsingTokenizer() {
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at start of file
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at start of file
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at start of file
// [4] Input: "Oh no!" Error: unexpected input (expected a friendly greeting) at start of file
// [5] Input: "Hello, world" Error: unexpected input (expected a friendly greeting) at start of file
// [6] Input: "Hello,!" Error: unexpected input (expected a friendly greeting) at start of file
}
// ---------------------------------------------------------------------------

View File

@ -3,7 +3,6 @@ package parsekit
import (
"fmt"
"io"
"strings"
)
// ParseAPI holds the internal state of a parse run and provides an API to
@ -29,20 +28,15 @@ func (p *ParseAPI) panicWhenStoppedOrInError() {
return
}
called, _ := getCaller(1)
parts := strings.Split(called, ".")
calledShort := parts[len(parts)-1]
_, filepos := getCaller(2)
called := callerFunc(1)
after := "Error()"
if p.stopped {
after = "Stop()"
}
panic(fmt.Sprintf(
"parsekit.ParseAPI.%s(): Illegal call to %s() at %s: "+
"no calls allowed after ParseAPI.%s",
calledShort, calledShort, filepos, after))
callerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+
"no calls allowed after ParseAPI.%s", called, called, after)
}
func (p *ParseAPI) isStoppedOrInError() bool {
@ -54,9 +48,9 @@ func (p *ParseAPI) initLoopCheck() {
}
func (p *ParseAPI) checkForLoops() {
_, filepos := getCaller(2)
filepos := callerFilepos(2)
if _, ok := p.loopCheck[filepos]; ok {
panic(fmt.Sprintf("parsekit.ParseAPI: Loop detected in parser at %s", filepos))
callerPanic(2, "parsekit.ParseAPI: Loop detected in parser at {caller}")
}
p.loopCheck[filepos] = true
}
@ -65,9 +59,9 @@ func (p *ParseAPI) checkForLoops() {
// TokenHandler. On must be chained with another method that tells the parser
// what action to perform when a match was found:
//
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
// 1) On(...).Skip() - Move read cursor forward, ignoring the match results.
//
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
// 2) On(...).Accept() - Move cursor, making results available through Result()
//
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
//
@ -93,18 +87,15 @@ func (p *ParseAPI) checkForLoops() {
// p.RouteTo(stateHandlerC)
// }
//
// // When there's a "hi" on input, then say hello.
// if p.On(parsekit.C.Str("hi")).Accept() {
// fmt.Println("Hello!")
// // Echo back a sequence of digits on the input.
// if p.On(parsekit.A.Digits).Accept() {
// fmt.Println(p.Result().String())
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
p.panicWhenStoppedOrInError()
p.checkForLoops()
if tokenHandler == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.ParseAPI.On(): On() called with nil "+
"tokenHandler argument at %s", filepos))
callerPanic(1, "parsekit.ParseAPI.On(): On() called with nil tokenHandler argument at {caller}")
}
p.result = nil
@ -127,9 +118,9 @@ type ParseAPIOnAction struct {
ok bool
}
// Accept tells the parser to move the cursor past a match that was found,
// and to make the TokenResult from the TokenAPI available in the ParseAPI
// through the Result() method.
// Accept tells the parser to move the read cursor past a match that was
// found, and to make the TokenResult from the TokenAPI available in the
// ParseAPI through the ParseAPI.Result() method.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
@ -198,10 +189,8 @@ func (a *ParseAPIOnAction) flushReader() {
func (p *ParseAPI) Result() *TokenResult {
result := p.result
if p.result == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.ParseAPI.TokenResult(): TokenResult() called at %s without "+
"calling ParseAPI.Accept() on beforehand", filepos))
callerPanic(1, "parsekit.ParseAPI.TokenResult(): TokenResult() called "+
"at {caller} without calling ParseAPI.Accept() on beforehand")
}
return result
}
@ -221,8 +210,7 @@ func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
if parseHandler == nil {
_, filepos := getCaller(2)
panic(fmt.Sprintf("parsekit.ParseAPI.Handle(): Handle() called with nil input at %s", filepos))
callerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}")
}
}
@ -286,19 +274,19 @@ func (p *ParseAPI) ExpectEndOfFile() {
// unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 1) the input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
// 3) there was an error while reading the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
p.panicWhenStoppedOrInError()
r, err := p.tokenAPI.NextRune()
_, err := p.tokenAPI.NextRune()
switch {
case err == nil:
p.Error("unexpected character %q%s", r, fmtExpects(p))
p.Error("unexpected input%s", fmtExpects(p))
case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(p))
default:

View File

@ -1,8 +1,6 @@
package parsekit
import (
"fmt"
"runtime"
"strings"
)
@ -30,8 +28,7 @@ type ParseHandler func(*ParseAPI)
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
if startHandler == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf("parsekit.NewParser(): NewParser() called with nil input at %s", filepos))
callerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}")
}
return &Parser{startHandler: startHandler}
}
@ -44,21 +41,14 @@ func (p *Parser) Execute(input string) *Error {
loopCheck: map[string]bool{},
}
if api.Handle(p.startHandler) {
// Handle indicated that parsing could still continue, meaning that there
// was no error and that the parsing has not actively been Stop()-ed.
// However, at this point, the parsing really should have stopped.
// We'll see what happens when we tell the parser that EOF was expected.
// This might work if we're indeed at EOF. Otherwise, an error will be
// generated.
api.ExpectEndOfFile()
// Handle returned true, indicating that parsing could still continue.
// There was no error and that the parsing has not actively been Stop()-ed.
// Let's try to make the best of it.
if api.expecting != "" {
api.UnexpectedInput()
} else {
api.ExpectEndOfFile()
}
}
return api.err
}
func getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}

View File

@ -30,26 +30,18 @@ func ExampleParser_usingTokens() {
// Easy access to the parsekit definitions.
c, a, tok := parsekit.C, parsekit.A, parsekit.T
var tokens []*parsekit.Token
var accepted string
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
tokens = p.Result().Tokens()
accepted = p.Result().String()
if p.On(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))).Accept() {
fmt.Printf("Runes accepted: %q\n", p.Result().String())
fmt.Printf("Token values: %s\n", p.Result().Tokens())
}
p.ExpectEndOfFile()
})
parser.Execute(Any will dö!")
parser.Execute(ök!")
fmt.Printf("Runes accepted: %q\n", accepted)
fmt.Printf("Token values: ")
for _, t := range tokens {
fmt.Printf("%c ", t.Value)
}
// Output:
// Runes accepted: "¡Any will dö!"
// Token values: ¡ A n y w i l l d ö !
// Runes accepted: "¡ök!"
// Token values: RUNE(int32:161) RUNE(int32:246) RUNE(int32:107) RUNE(int32:33)
}
func ExampleParseAPI_UnexpectedInput() {
@ -61,7 +53,7 @@ func ExampleParseAPI_UnexpectedInput() {
fmt.Println(err.Full())
// Output:
// unexpected character 'W' (expected a thing) at start of file
// unexpected input (expected a thing) at start of file
}
func ExampleParseAPIOnAction_Accept() {
@ -151,7 +143,7 @@ func ExampleParseAPI_Stop_notCalledButInputPending() {
// Output:
// First word: Input
// Error: unexpected character ' ' (expected end of file) at line 1, column 6
// Error: unexpected input (expected end of file) at line 1, column 6
}
func ExampleParseAPIOnAction_Stay() {
@ -265,7 +257,7 @@ func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKic
func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
err := p.Execute("x")
parsekit.AssertEqual(t, "unexpected character 'x' (expected end of file) at start of file", err.Full(), "err")
parsekit.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Full(), "err")
}
type parserWithLoop struct {

View File

@ -88,10 +88,8 @@ func NewTokenAPI(r io.Reader) *TokenAPI {
// without explicitly accepting, this method will panic.
func (i *TokenAPI) NextRune() (rune, error) {
if i.result.lastRune != nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.NextRune(): NextRune() called at %s without a "+
"prior call to Accept()", linepos))
callerPanic(1, "parsekit.TokenAPI.NextRune(): NextRune() called at {caller} "+
"without a prior call to Accept()")
}
i.detachChilds()
@ -107,15 +105,9 @@ func (i *TokenAPI) NextRune() (rune, error) {
// returned an error. Calling Accept() in such case will result in a panic.
func (i *TokenAPI) Accept() {
if i.result.lastRune == nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.Accept(): Accept() called at %s without "+
"first calling NextRune()", linepos))
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()")
} else if i.result.lastRune.err != nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.Accept(): Accept() called at %s, but the "+
"prior call to NextRune() failed", linepos))
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
@ -167,10 +159,7 @@ func (i *TokenAPI) Fork() *TokenAPI {
// This allows a child to feed results in chunks to its parent.
func (i *TokenAPI) Merge() {
if i.parent == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.Merge(): Merge() called at %s "+
"on a non-forked TokenAPI", filepos))
callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
}
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)

View File

@ -40,8 +40,8 @@ func ExampleTokenAPI_Fork() {
// Output:
// abcd <nil>
// abcd <nil>
// <nil> unexpected character 'a' (expected abcd)
// <nil> unexpected character 'x' (expected abcd)
// <nil> unexpected input (expected abcd)
// <nil> unexpected input (expected abcd)
}
func ExampleTokenAPI_Merge() {

View File

@ -184,9 +184,9 @@ var A = struct {
Pipe: MatchRune('|'),
CurlyClose: MatchRune('}'),
Tilde: MatchRune('~'),
Whitespace: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))),
WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))),
EndOfLine: MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()),
Whitespace: MatchWhitespace(),
WhitespaceAndNewlines: MatchWhitespaceAndNewlines(),
EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
@ -195,15 +195,50 @@ var A = struct {
IntegerBetween: MatchIntegerBetween,
Float: MatchFloat(),
Boolean: MatchBoolean(),
ASCII: MatchRuneRange('\x00', '\x7F'),
ASCIILower: MatchRuneRange('a', 'z'),
ASCIIUpper: MatchRuneRange('A', 'Z'),
HexDigit: MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')),
ASCII: MatchASCII(),
ASCIILower: MatchASCIILower(),
ASCIIUpper: MatchASCIIUpper(),
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
IPv4: MatchIPv4(),
IPv4MaskBits: MatchIntegerBetween(0, 32),
}
// M provides convenient access to a range of modifiers (which in their nature are
// parser/combinators) that can be used when creating TokenHandler functions.
//
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
// resulting output of another TokenHandler in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create a variable
// to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
ByCallback func(TokenHandler, func(string) string) TokenHandler
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ByCallback: ModifyByCallback,
}
// T provides convenient access to a range of Token producers (which in their
// nature are parser/combinators) that can be used when creating TokenHandler
// functions.
@ -254,8 +289,7 @@ var T = struct {
ByCallback: MakeTokenByCallback,
}
// MatchRune creates a TokenHandler function that checks if the next rune from
// the input matches the provided rune.
// MatchRune creates a TokenHandler function that matches against the provided rune.
func MatchRune(expected rune) TokenHandler {
return func(t *TokenAPI) bool {
input, err := t.NextRune()
@ -267,8 +301,8 @@ func MatchRune(expected rune) TokenHandler {
}
}
// MatchRunes creates a TokenHandler function that that checks if the next rune
// from the input is one of the provided runes.
// MatchRunes creates a TokenHandler function that checks if the input matches
// one of the provided runes.
func MatchRunes(expected ...rune) TokenHandler {
s := string(expected)
return func(t *TokenAPI) bool {
@ -283,17 +317,16 @@ func MatchRunes(expected ...rune) TokenHandler {
}
}
// MatchRuneRange creates a TokenHandler function that that checks if the next rune
// from the input is contained by the provided rune range.
//
// The rune range is defined by a start and an end rune, inclusive, so:
// MatchRuneRange creates a TokenHandler function that checks if the input
// matches the provided rune range. The rune range is defined by a start and
// an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) TokenHandler {
if end < start {
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
callerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
}
return func(t *TokenAPI) bool {
input, err := t.NextRune()
@ -305,8 +338,28 @@ func MatchRuneRange(start rune, end rune) TokenHandler {
}
}
// MatchStr creates a TokenHandler that will check if the upcoming runes on the
// input match the provided string.
// MatchWhitespace creates a TokenHandler that matches the input against one
// or more whitespace characters, meansing tabs and spaces.
//
// When you need whitespace matching to also include newlines, then make use
// of MatchWhitespaceAndNewlines().
func MatchWhitespace() TokenHandler {
return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t')))
}
// MatchWhitespaceAndNewlines creates a TokenHandler that matches the input
// against one or more whitespace and/or newline characters, meaning tabs,
// spaces and newlines ("\r\n" and "\n").
func MatchWhitespaceAndNewlines() TokenHandler {
return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n')))
}
// MatchEndOfLine creates a TokenHandler that matches a newline ("\r\n" or "\n") or EOF.
func MatchEndOfLine() TokenHandler {
return MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile())
}
// MatchStr creates a TokenHandler that matches the input against the provided string.
// TODO make this a more efficient string-level match?
func MatchStr(expected string) TokenHandler {
var handlers = []TokenHandler{}
@ -316,8 +369,8 @@ func MatchStr(expected string) TokenHandler {
return MatchSeq(handlers...)
}
// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
// on the input match the provided string in a case-insensitive manner.
// MatchStrNoCase creates a TokenHandler that matches the input against the
// provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStrNoCase(expected string) TokenHandler {
var handlers = []TokenHandler{}
@ -331,7 +384,8 @@ func MatchStrNoCase(expected string) TokenHandler {
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
// When the provided TokenHandler applies, then its output is used, otherwise
// no output is generated but still a successful match is reported.
// no output is generated but still a successful match is reported (but the
// result will be empty).
func MatchOpt(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
@ -410,7 +464,7 @@ func MatchRep(times int, handler TokenHandler) TokenHandler {
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler TokenHandler) TokenHandler {
if min < 0 {
panic("TokenHandler bug: MatchMin definition error: min must be >= 0")
callerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, -1, handler, "MatchMin")
}
@ -421,7 +475,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
if max < 0 {
panic("TokenHandler bug: MatchMax definition error: max must be >= 0")
callerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0")
}
return matchMinMax(0, max, handler, "MatchMax")
}
@ -444,17 +498,17 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler {
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
if max < 0 {
panic("TokenHandler bug: MatchMinMax definition error: max must be >= 0")
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0")
}
if min < 0 {
panic("TokenHandler bug: MatchMinMax definition error: min must be >= 0")
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, max, handler, "MatchMinMax")
}
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
if max >= 0 && min > max {
panic(fmt.Sprintf("TokenHandler bug: %s definition error: max %d must not be < min %d", name, max, min))
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
}
return func(t *TokenAPI) bool {
child := t.Fork()
@ -592,15 +646,43 @@ func MatchFloat() TokenHandler {
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
}
// MatchBoolean creates a TokenHandler function that checks if a valid boolean
// MatchBoolean creates a TokenHandler function that checks if a boolean
// value can be read from the input. It supports the boolean values as understood
// by Go's strconv.ParseBool() function.
//
// True values: true, TRUE, True, 1, t, T
//
// False falues: false, FALSE, False, 0, f, F
func MatchBoolean() TokenHandler {
trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
return MatchAny(trues, falses)
}
// MatchASCII creates a TokenHandler function that matches against any
// ASCII value on the input.
func MatchASCII() TokenHandler {
return MatchRuneRange('\x00', '\x7F')
}
// MatchASCIILower creates a TokenHandler function that matches against any
// lower case ASCII letter on the input (a - z).
func MatchASCIILower() TokenHandler {
return MatchRuneRange('a', 'z')
}
// MatchASCIIUpper creates a TokenHandler function that matches against any
// upper case ASCII letter on the input (a - z).
func MatchASCIIUpper() TokenHandler {
return MatchRuneRange('A', 'Z')
}
// MatchHexDigit creates a TokenHandler function that check if a single hexadecimal
// digit can be read from the input.
func MatchHexDigit() TokenHandler {
return MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F'))
}
// MatchOctet creates a TokenHandler function that checks if a valid octet value
// can be read from the input (octet = byte value representation, with a value
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
@ -610,25 +692,25 @@ func MatchBoolean() TokenHandler {
// When the normalize parameter is set to true, then leading zeroes will be
// stripped from the octet.
func MatchOctet(normalize bool) TokenHandler {
digits := MatchMinMax(1, 3, MatchDigit())
max3Digits := MatchMinMax(1, 3, MatchDigit())
return func(t *TokenAPI) bool {
fork := t.Fork()
if !digits(fork) {
if !max3Digits(fork) {
return false
}
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
if value <= 255 {
if normalize {
runes := fork.Result().Runes()
for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:]
}
fork.Result().SetRunes(runes)
}
fork.Merge()
return true
if value > 255 {
return false
}
return false
if normalize {
runes := fork.Result().Runes()
for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:]
}
fork.Result().SetRunes(runes)
}
fork.Merge()
return true
}
}
@ -642,41 +724,6 @@ func MatchIPv4() TokenHandler {
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
}
// M provides convenient access to a range of modifiers (which in their nature are
// parser/combinators) that can be used when creating TokenHandler functions.
//
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
// resulting output of another TokenHandler in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create a variable
// to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
ByCallback func(TokenHandler, func(string) string) TokenHandler
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ByCallback: ModifyByCallback,
}
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is discarded completely.
//
@ -970,6 +1017,7 @@ func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
value, err := convert(t.Result().String())
if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
panic(fmt.Sprintf(
"TokenHandler error: %s cannot handle input %q: %s "+
"(only use a type conversion token maker, when the input has been "+

View File

@ -70,18 +70,18 @@ func TestCombinators(t *testing.T) {
func TestCombinatorPanics(t *testing.T) {
var c, a = parsekit.C, parsekit.A
parsekit.AssertPanics(t, []parsekit.PanicT{
{func() { a.RuneRange('z', 'a') }, false,
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
{func() { c.MinMax(1, -1, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
{func() { c.MinMax(10, 5, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
{func() { c.Min(-10, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
{func() { c.Max(-42, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
{func() { a.RuneRange('z', 'a') }, true,
`TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.MinMax(1, -1, parsekit.A.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { c.MinMax(10, 5, parsekit.A.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
{func() { c.Min(-10, parsekit.A.Space) }, true,
`TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.Max(-42, parsekit.A.Space) }, true,
`TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
})
}

View File

@ -1,12 +1,55 @@
package parsekit
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
)
// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using parsekit.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleTokenizer_Execute() {
// Build the tokenizer for ip/mask.
ip := T.Str("ip", A.IPv4)
mask := T.Int8("mask", A.IPv4MaskBits)
cidr := C.Seq(ip, A.Slash, mask)
tokenizer := NewTokenizer(cidr, "cidr")
for _, input := range []string{
"000.000.000.000/000",
"192.168.0.1/24",
"255.255.255.255/32",
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a TokenResult and an error, which is nil on success.
result, err := tokenizer.Execute(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens())
} else {
fmt.Printf("Error: %s\n", err)
}
}
// Output:
// Result: ip(string:0.0.0.0) mask(int8:0)
// Result: ip(string:192.168.0.1) mask(int8:24)
// Result: ip(string:255.255.255.255) mask(int8:32)
// Error: unexpected input (expected cidr)
// Error: unexpected input (expected cidr)
}
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
AssertEqual(t, 'T', r, "first rune")
@ -31,8 +74,7 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
i.NextRune()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
})
}
@ -40,8 +82,7 @@ func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` +
`at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
})
}
@ -52,8 +93,7 @@ func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
i.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
@ -65,8 +105,7 @@ func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
f.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
@ -78,8 +117,7 @@ func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
f.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
@ -127,8 +165,7 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
i.Accept()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` +
`at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
})
}

View File

@ -2,6 +2,7 @@ package parsekit
import (
"fmt"
"strings"
)
// Result holds results as produced by a TokenHandler.
@ -39,11 +40,16 @@ func (r *TokenResult) ClearRunes() {
// SetRunes replaces the Runes from the TokenResult with the provided input.
func (r *TokenResult) SetRunes(s interface{}) {
r.ClearRunes()
r.AddRunes(s)
r.addRunes(s)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
@ -53,7 +59,7 @@ func (r *TokenResult) AddRunes(set ...interface{}) {
case rune:
r.runes = append(r.runes, s)
default:
panic(fmt.Sprintf("parsekit.TokenResult.SetRunes(): unsupported type '%T' used", s))
callerPanic(2, "parsekit.TokenResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
@ -91,8 +97,22 @@ func (r *TokenResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// SliceOfTokens is an alias for []*Token type. The method Tokens() returns
// this type. A String() method is defined for it, to make it easy to
// format the tokens as a string for testing / debugging purposes.
type SliceOfTokens []*Token
func (ts SliceOfTokens) String() string {
parts := make([]string, len(ts))
for i, t := range ts {
str := fmt.Sprintf("%v(%T:%v)", t.Type, t.Value, t.Value)
parts[i] = str
}
return strings.Join(parts, " ")
}
// Tokens retrieves the Tokens from the TokenResult.
func (r *TokenResult) Tokens() []*Token {
func (r *TokenResult) Tokens() SliceOfTokens {
return r.tokens
}

View File

@ -1,11 +1,12 @@
package parsekit
import (
"strings"
"testing"
)
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := mkInput()
i := NewTokenAPI(strings.NewReader("Testing"))
i.Result().SetRunes("string")
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
@ -17,9 +18,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i := NewTokenAPI(strings.NewReader("Testing"))
i.Result().SetRunes(1234567)
},
Expect: "parsekit.TokenResult.SetRunes(): unsupported type 'int' used",
Regexp: true,
Expect: `parsekit\.TokenResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`,
})
}