diff --git a/examples_test.go b/examples_test.go new file mode 100644 index 0000000..1e13399 --- /dev/null +++ b/examples_test.go @@ -0,0 +1,91 @@ +package parsekit_test + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func ExampleItemType() { + // Make use of positive values. Ideally, define your ItemTypes using + // iota for easy automatic value management like this: + const ( + ItemWord parsekit.ItemType = iota + ItemNumber + ItemBlob + // ... + ) +} + +func ExampleError() { + error := parsekit.Error{ + Message: "it broke down", + Line: 10, + Column: 42} + + fmt.Println(error.Error()) + fmt.Println(error.ErrorFull()) + // Output: + // it broke down + // it broke down after line 10, column 42 +} + +func ExampleMatchAnyRune() { + // Easy access to the parsekit definitions. + var a = parsekit.A + + handler := func(p *parsekit.P) { + p.Expects("Any valid rune") + if p.On(a.AnyRune).Accept().End() { + p.EmitLiteral(TestItem) + p.RouteRepeat() + } + } + parser := parsekit.NewParser(handler) + run := parser.Parse("¡Any / valid / character will dö!") + + for i := 0; i < 5; i++ { + match, _, _ := run.Next() + fmt.Printf("Match = %q\n", match.Value) + } + // Output: + // Match = "¡" + // Match = "A" + // Match = "n" + // Match = "y" + // Match = " " +} + +func ExampleModifyToUpper() { + // Easy access to the parsekit definitions. + var c, a, m = parsekit.C, parsekit.A, parsekit.M + + // A Dutch poscode consists of 4 numbers and 2 letters (1234XX). + // The numbers never start with a zero. + digitNotZero := c.Except(c.Rune('0'), a.Digit) + numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit)) + + // It is good form to write the letters in upper case. + letter := c.Any(a.ASCIILower, a.ASCIIUpper) + letters := m.ToUpper(c.Seq(letter, letter)) + + // It is good form to use a single space between letters and numbers, + // but it is not mandatory. + space := m.Replace(c.Opt(a.Whitespace), " ") + + // With all the building blocks, we can now build the postcode parser. + postcode := c.Seq(numbers, space, letters) + + // Create a parser and let is parse some postcode inputs. + // This will print "1234 AB" for every input, because of the built-in normalization. + p := parsekit.NewMatcherWrapper(postcode) + for _, input := range []string{"1234 AB", "1234Ab", "1234 ab", "1234ab"} { + output, _, _ := p.Match("1234 AB") + fmt.Printf("Input: %q, output: %q\n", input, output) + } + // Output: + // Input: "1234 AB", output: "1234 AB" + // Input: "1234Ab", output: "1234 AB" + // Input: "1234 ab", output: "1234 AB" + // Input: "1234ab", output: "1234 AB" +} diff --git a/matcher_builtin.go b/matcher_builtin.go index da22837..107a755 100644 --- a/matcher_builtin.go +++ b/matcher_builtin.go @@ -31,7 +31,8 @@ var C = struct { ZeroOrMore func(Matcher) Matcher OneOrMore func(Matcher) Matcher MinMax func(int, int, Matcher) Matcher - Separated func(Matcher, Matcher) Matcher + Separated func(separated Matcher, separator Matcher) Matcher + Except func(except Matcher, matcher Matcher) Matcher }{ Rune: MatchRune, Runes: MatchRunes, @@ -49,6 +50,7 @@ var C = struct { OneOrMore: MatchOneOrMore, MinMax: MatchMinMax, Separated: MatchSeparated, + Except: MatchExcept, } // MatchRune creates a Matcher function that checks if the next rune from @@ -274,10 +276,23 @@ func matchMinMax(min int, max int, matcher Matcher) Matcher { // Matchers of one type (the separated), separated by Matches of another type // (the separator). All matches (separated + separator) are included in the // output. -func MatchSeparated(separated Matcher, separator Matcher) Matcher { +func MatchSeparated(separator Matcher, separated Matcher) Matcher { return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated))) } +// MatchExcept creates a Matcher that checks if the provided matcher can be +// applied to the upcoming input. It also checks if the except Matcher can be +// applied. If the matcher applies, but the except Matcher too, then the match +// as a whole will be treated as a mismatch. +func MatchExcept(except Matcher, matcher Matcher) Matcher { + return func(m *MatchDialog) bool { + if except(m.Fork()) { + return false + } + return matcher(m) + } +} + // A provides convenient access to a range of atoms that can be used to // build combinators or parsing rules. // diff --git a/matcher_builtin_test.go b/matcher_builtin_test.go index b12c123..59878f0 100644 --- a/matcher_builtin_test.go +++ b/matcher_builtin_test.go @@ -70,7 +70,7 @@ func TestCombinators(t *testing.T) { {"ghijkl", c.Opt(c.Rune('h')), true, ""}, {"ghijkl", c.Opt(c.Rune('g')), true, "g"}, {"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"}, - {"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"}, + {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"}, {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, {" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""}, {" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""}, @@ -183,7 +183,7 @@ func TestCombination(t *testing.T) { m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string { return fmt.Sprintf("%d", len(s)) }), - m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "), + m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "), m.ToUpper(c.Min(1, a.ASCIILower)), m.Drop(a.Excl), c.Rep(3, a.AngleOpen), @@ -211,7 +211,7 @@ func TestSequenceOfRunes(t *testing.T) { a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, ) input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - parser := parsekit.New(func(p *parsekit.P) { + parser := parsekit.NewParser(func(p *parsekit.P) { p.Expects("Sequence of runes") if p.On(sequence).Accept().End() { p.EmitLiteral(TestItem) @@ -225,46 +225,3 @@ func TestSequenceOfRunes(t *testing.T) { t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value) } } - -func ExampleMatchAnyRune() { - handler := func(p *parsekit.P) { - p.Expects("Any valid rune") - if p.On(a.AnyRune).Accept().End() { - p.EmitLiteral(TestItem) - } - } - parser := parsekit.New(handler) - run := parser.Parse("¡Any / valid / character will dö!") - match, _, ok := run.Next() - - // This will output '¡', since a.AnyRune matches exactly 1 rune. - if ok { - fmt.Printf("Match = %q\n", match) - } -} - -func ExampleModifyToUpper() { - // A Dutch poscode consists of 4 numbers and 2 letters (1234XX). - // The numbers never start with a zero. - digitNotZero := c.RuneRange('1', '9') - numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit)) - - // It is good form to write the letters in upper case. - letter := c.Any(a.ASCIILower, a.ASCIIUpper) - letters := m.ToUpper(c.Seq(letter, letter)) - - // It is good form to use a single space between letters and numbers, - // but it is not mandatory. - space := m.Replace(c.Opt(a.Whitespace), " ") - - // With all the building blocks, we can now build the postcode parser. - postcode := c.Seq(numbers, space, letters) - - // Create a parser and let is parse some postcode inputs. - // This will print "1234 AB" for every input, because of the built-in normalization. - p := parsekit.New(postcode) - for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} { - r, _, _ := p.Parse("1234 AB").Next() - fmt.Printf("Input: %q, output: %q", input, r.Value) - } -} diff --git a/parsekit.go b/parsekit.go index 8990542..0883e23 100644 --- a/parsekit.go +++ b/parsekit.go @@ -7,63 +7,25 @@ import ( ) // Parser is the top-level struct that holds the configuration for a parser. -// The Parser can be instantiated using the parsekit.New() method. -// -// To start parsing input data, use the method Parser.Parse(). +// The Parser can be instantiated using the parsekit.NewParser() method. type Parser struct { startState StateHandler // the function that handles the very first state } -// New instantiates a new Parser. -// The logic parameter provides the parsing logic to apply. This can be: +// NewParser instantiates a new Parser. // -// 1) A StateHandler function: in this case, a state machine-style -// recursive descent parser is created, in which StateHandler functions -// are used to move the state machine forward during parsing. -// This type of parser offers a lot of flexibility and it is possible to -// emit multiple items from the parse flow. -// -// This style of parser is typically used for parsing languages and +// The Parser is a state machine-style recursive descent parser, in which +// StateHandler functions are used to move the state machine forward during +// parsing. This style of parser is typically used for parsing languages and // structured data formats (like json, toml, etc.) // -// 2) A Matcher function: in this case, a parser/combinator-style parser -// is created, which can be used to match against the provided logic. -// The parser can only check input against the Matcher function, and -// reports back a successful match or a failure. -// -// This style of parser can typically be used for validation and normalization -// of input data. However, when you are about to use parsekit for that -// task, consider using regular expressions instead. They might serve -// you better. -func New(logic interface{}) *Parser { - switch logic := logic.(type) { - case func(*P): - return makeParserForStateHandler(logic) - case StateHandler: - return makeParserForStateHandler(logic) - case func(m *MatchDialog) bool: - return makeParserForMatcher(logic) - case Matcher: - return makeParserForMatcher(logic) - default: - panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic)) - } -} - -func makeParserForStateHandler(handler StateHandler) *Parser { - return &Parser{startState: handler} -} - -func makeParserForMatcher(matcher Matcher) *Parser { - return New(StateHandler(func(p *P) { - p.Expects("match") - if p.On(matcher).Accept().RouteRepeat().End() { - p.EmitLiteral(MatchedItem) - } - })) +// To start parsing input data, use the method Parser.Parse(). +func NewParser(startState StateHandler) *Parser { + return &Parser{startState: startState} } // Run represents a single parse run for a Parser. +// TODO rename to ParseRun type Run struct { p *P // a struct holding the internal state of a parse run } @@ -106,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) { func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) { switch { - case i.Type == ItemEOF: + case i.Type == itemEOF: return i, nil, false - case i.Type == ItemError: + case i.Type == itemError: run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} return i, run.p.err, false default: @@ -176,3 +138,38 @@ func (run *Run) invokeNextStateHandler(state StateHandler) { run.p.expecting = "" run.p.state(run.p) } + +// MatcherWrapper is the top-level struct that holds the configuration for +// a parser that is based solely on a Wrapper function. +// The MatcherWrapper can be instantiated using the parsekit.NewMatcher() +// method. +// +// To match input data against the wrapped Matcher function, use the method +// MatcherWrapper.Match(). +type MatcherWrapper struct { + parser *Parser +} + +// NewMatcherWrapper instantiates a new MatcherWrapper. +// +// This is a simple wrapper around a Matcher function. It can be used to +// match an input string against that Matcher function and retrieve the +// results in a straight forward way. +func NewMatcherWrapper(matcher Matcher) *MatcherWrapper { + handler := func(p *P) { + p.Expects("match") + if p.On(matcher).Accept().End() { + p.EmitLiteral(0) // ItemType is irrelevant + } + } + return &MatcherWrapper{parser: NewParser(handler)} +} + +// Match runs the wrapped Matcher function against the provided input data. +func (w *MatcherWrapper) Match(input string) (string, *Error, bool) { + item, err, ok := w.parser.Parse(input).Next() + if !ok { + return "", err, false + } + return item.Value, nil, true +} diff --git a/parsekit_test.go b/parsekit_test.go index 19887e2..669efeb 100644 --- a/parsekit_test.go +++ b/parsekit_test.go @@ -27,16 +27,13 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) { } func RunMatcherTest(t *testing.T, test MatcherTest) { - parser := parsekit.New(test.matcher).Parse(test.input) - item, err, ok := parser.Next() + output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input) if test.mustMatch { if !ok { t.Errorf("Test %q failed with error: %s", test.input, err) - } else if item.Type != parsekit.MatchedItem { - t.Errorf("Test %q failed: should match, but it didn't", test.input) - } else if item.Value != test.expected { - t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value) + } else if output != test.expected { + t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output) } } else { if ok { diff --git a/statehandler.go b/statehandler.go index d8b91bf..7b84fe8 100644 --- a/statehandler.go +++ b/statehandler.go @@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() { p.RouteTo(func(p *P) { p.Expects("end of file") if p.On(A.EndOfFile).Stay().End() { - p.Emit(ItemEOF, "EOF") + p.Emit(itemEOF, "EOF") } }) } diff --git a/statehandler_emit.go b/statehandler_emit.go index 8d5ff28..e598bec 100644 --- a/statehandler_emit.go +++ b/statehandler_emit.go @@ -5,19 +5,19 @@ import ( ) // ItemType represents the type of a parser Item. +// +// When creating your own ItemType values, then make use of positive integer +// values. Negative values are possible, but they are reserved for internal +// use by parsekit. type ItemType int -// ItemEOF is a built-in parser item type that is used for flagging that the +// itemEOF is a built-in parser item type that is used for flagging that the // end of the input was reached. -const ItemEOF ItemType = -1 +const itemEOF ItemType = -1 -// ItemError is a built-in parser item type that is used for flagging that +// itemError is a built-in parser item type that is used for flagging that // an error has occurred during parsing. -const ItemError ItemType = -2 - -// MatchedItem is a built-in parser item type that is used for indicating a -// successful match when using a parser that is based on a Matcher. -const MatchedItem ItemType = -3 +const itemError ItemType = -2 // Item represents an item that can be emitted from the parser. type Item struct { @@ -81,7 +81,7 @@ func (err *Error) ErrorFull() string { // EmitError emits a Parser error item to the client. func (p *P) EmitError(format string, args ...interface{}) { message := fmt.Sprintf(format, args...) - p.Emit(ItemError, message) + p.Emit(itemError, message) } // UnexpectedInput is used by a StateHandler function to emit an error item