diff --git a/example_dutchpostcode_test.go b/example_dutchpostcode_test.go new file mode 100644 index 0000000..afe9348 --- /dev/null +++ b/example_dutchpostcode_test.go @@ -0,0 +1,55 @@ +// In this example, a parser is created which can parse and normalize Dutch postcodes. +package parsekit_test + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func createPostcodeMatcher() *parsekit.MatcherWrapper { + // Easy access to the parsekit definitions. + var c, a, m = parsekit.C, parsekit.A, parsekit.M + + // Matcher functions are created and combined to satisfy these rules: + // - A Dutch postcode consists of 4 digits and 2 letters (1234XX). + // - The first digit is never a zero. + // - A space between letters and digits is optional. + // - It is good form to write the letters in upper case. + // - It is good form to use a single space between digits and letters. + var digitNotZero = c.Except(c.Rune('0'), a.Digit) + var pcDigits = c.Seq(digitNotZero, c.Rep(3, a.Digit)) + var pcLetter = c.Any(a.ASCIILower, a.ASCIIUpper) + var pcLetters = m.ToUpper(c.Seq(pcLetter, pcLetter)) + var space = m.Replace(c.Opt(a.Whitespace), " ") + var postcode = c.Seq(pcDigits, space, pcLetters) + + return parsekit.NewMatcher(postcode, "a Dutch postcode") +} + +func Example_dutchPostcodeUsingMatcher() { + pcParser := createPostcodeMatcher() + + for i, input := range []string{ + "1234 AB", + "2233Ab", + "1001\t\tab", + "1818ab", + "1234", + "huh", + } { + output, err, ok := pcParser.Parse(input) + if !ok { + fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) + } else { + fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) + } + } + // Output: + // [0] Input: "1234 AB" Output: 1234 AB + // [1] Input: "2233Ab" Output: 2233 AB + // [2] Input: "1001\t\tab" Output: 1001 AB + // [3] Input: "1818ab" Output: 1818 AB + // [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) + // [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) +} diff --git a/examples_test.go b/examples_test.go index b2140bc..d579661 100644 --- a/examples_test.go +++ b/examples_test.go @@ -7,32 +7,84 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -func Example_minimalAnnotated() { +func Example_helloWorldUsingParser() { +} + +func Example_helloWorldUsingMatcher() { + // In this example, a parser is created that is able to parse input that looks + // like "Hello, !", and that extracts the name from it. + // The implementation uses only a Matcher function and does not implement a + // full-fledged state-based Parser for it. + + // Easy access to parsekit parser/combinators, atoms and modifiers. + var c, a, m = parsekit.C, parsekit.A, parsekit.M + + // Using the parser/combinator support of parsekit, we create a Matcher function + // that does all the work. The 'greeting' Matcher matches the whole input and + // drops all but the name from it. + var hello = c.StrNoCase("hello") + var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace)) + var separator = c.Any(comma, a.Whitespace) + var name = c.OneOrMore(c.Not(a.Excl)) + var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl)) + + // Now we can already do some parsing, by using a Matcher. + var parser = parsekit.NewMatcher(greeting, "a friendly greeting") + for i, input := range []string{ + "Hello, world!", + "HELLO ,Johnny!", + "hello , Bob123!", + "hello Pizza!", + "Oh no!", + "Hello, world", + } { + output, err, ok := parser.Parse(input) + if !ok { + fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) + } else { + fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) + } + } + // Output: + // [0] Input: "Hello, world!" Output: world + // [1] Input: "HELLO ,Johnny!" Output: Johnny + // [2] Input: "hello , Bob123!" Output: Bob123 + // [3] Input: "hello Pizza!" Output: Pizza + // [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) + // [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) +} + +func Example_basicCalculator() { // Let's write a small example for parsing a really basic calculator. // The calculator understands input that looks like: // // 10 + 20 - 8+4 // - // So positive numbers that can be either added or substracted, with - // optional whitespace around the operators. + // So positive numbers that can be either added or substracted, and whitespace + // is ignored. - // Easy access to parser/combinators, atoms and modifiers. + // Easy access to parsekit parser/combinators, atoms and modifiers. var c, a, m = parsekit.C, parsekit.A, parsekit.M // When writing a parser, it's a good start to use the parser/combinator // functionality of parsekit to create some Matcher functions. These // functions can later be used in the parser state machine to find the // matching tokens on the input data. - var number = c.OneOrMore(a.Digit) + // + // In our case, we only need a definition of "number, surrounded by + // optional whitespace". Skipping whitespace could be a part of the + // StateHandler functions below too, but including it in a Matcher makes + // things really practical here. var whitespace = m.Drop(c.Opt(a.Whitespace)) - var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace) + var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace) // We also must define the types of items that the parser will emit. - // We only need two: for numbers and for operators. + // We only need three of them here, for numbers, plus and minus. // The recommended way to define these, is using 'iota' for auto numbering. const ( numberType parsekit.ItemType = iota - operatorType + addType + subtractType ) // Now it is time to define the state machine for parsing the input. @@ -41,12 +93,18 @@ func Example_minimalAnnotated() { // The P struct holds the internal state for the parser and it provides // some methods that form the API for your StateHandler implementation. // - // Note that normally you'd write normal functions and not anonymous + // (note that normally you'd write normal functions and not anonymous // functions like I did here. I had to use these to be able to write the - // example code. + // example code) var operatorHandler parsekit.StateHandler + // In this state, we expect a number. When a number is found on the input, + // it is accepted in the output buffer, after which the output buffer is + // emitted as a numberType item. Then we tell the state machine to continue + // with the operatorHandler state. + // When no number is found, the parser will emit an error, explaining that + // "a number" was expected. numberHandler := func(p *parsekit.P) { p.Expects("a number") if p.On(number).Accept().End() { @@ -55,11 +113,21 @@ func Example_minimalAnnotated() { } } + // In this state, we expect a plus or minus operator. When one of those + // is found, the appropriate Item is emitted and the parser is sent back + // to the numberHandler to find the next number on the input. + // When no operator is found, then the parser is told to expect the end of + // the input. When more input data is available (which is obviously wrong + // data since it does not match our syntax), the parser will emit an error. operatorHandler = func(p *parsekit.P) { - if p.On(operator).Accept().End() { - p.EmitLiteral(operatorType) + switch { + case p.On(a.Plus).Accept().End(): + p.EmitLiteral(addType) p.RouteTo(numberHandler) - } else { + case p.On(a.Minus).Accept().End(): + p.EmitLiteral(subtractType) + p.RouteTo(numberHandler) + default: p.ExpectEndOfFile() } } @@ -70,7 +138,7 @@ func Example_minimalAnnotated() { parser := parsekit.NewParser(numberHandler) // Let's feed the parser some input to work with. - run := parser.Parse("153+ 22+31 - 4- 6+42") + run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ") // We can now step through the results of the parsing process by repeated // calls to run.Next(). Next() returns either the next parse item, a parse @@ -91,12 +159,16 @@ func Example_minimalAnnotated() { default: fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value) switch { - case item.Type == operatorType && item.Value == "+": + case item.Type == addType: op = +1 - case item.Type == operatorType && item.Value == "-": + case item.Type == subtractType: op = -1 - default: - nr, _ := strconv.Atoi(item.Value) + case item.Type == numberType: + nr, err := strconv.Atoi(item.Value) + if err != nil { + fmt.Printf("Error: invalid number %s: %s\n", item.Value, err) + return + } sum += op * nr } } @@ -108,9 +180,9 @@ func Example_minimalAnnotated() { // Type: 0, Value: "22" // Type: 1, Value: "+" // Type: 0, Value: "31" - // Type: 1, Value: "-" + // Type: 2, Value: "-" // Type: 0, Value: "4" - // Type: 1, Value: "-" + // Type: 2, Value: "-" // Type: 0, Value: "6" // Type: 1, Value: "+" // Type: 0, Value: "42" @@ -118,76 +190,6 @@ func Example_minimalAnnotated() { // Outcome of computation: 238 } -func Example_minimal() { - // Let's write a small example for parsing a really basic calculator. - // The calculator understands input that looks like: - // - // 10 + 20 - 8+4 - // - // So positive numbers that can be either added or substracted, with - // optional whitespace around the operators. - - var c, a, m = parsekit.C, parsekit.A, parsekit.M - - var number = c.OneOrMore(a.Digit) - var whitespace = m.Drop(c.Opt(a.Whitespace)) - var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace) - - const ( - numberType parsekit.ItemType = iota - operatorType - ) - - var operatorHandler parsekit.StateHandler - - numberHandler := func(p *parsekit.P) { - p.Expects("a number") - if p.On(number).Accept().End() { - p.EmitLiteral(numberType) - p.RouteTo(operatorHandler) - } - } - - operatorHandler = func(p *parsekit.P) { - if p.On(operator).Accept().End() { - p.EmitLiteral(operatorType) - p.RouteTo(numberHandler) - } else { - p.ExpectEndOfFile() - } - } - - parser := parsekit.NewParser(numberHandler) - run := parser.Parse("153+ 22+31 - 4- 6+42") - - sum := 0 - op := +1 - for { - item, err, ok := run.Next() - switch { - case !ok && err == nil: - fmt.Println("Outcome of computation:", sum) - return - case !ok: - fmt.Printf("Error: %s\n", err) - return - default: - switch { - case item.Type == operatorType && item.Value == "+": - op = +1 - case item.Type == operatorType && item.Value == "-": - op = -1 - default: - nr, _ := strconv.Atoi(item.Value) - sum += op * nr - } - } - } - - // Output: - // Outcome of computation: 238 -} - func ExampleItemType() { // Make use of positive values. Ideally, define your ItemTypes using // iota for easy automatic value management like this: @@ -301,37 +303,3 @@ func ExampleMatchAnyRune() { // Match = "y" // Match = " " } - -func ExampleModifyToUpper() { - // Easy access to the parsekit definitions. - var c, a, m = parsekit.C, parsekit.A, parsekit.M - - // A Dutch postcode consists of 4 digits and 2 letters (1234XX). - // The first digit is never a zero. - digitNotZero := c.Except(c.Rune('0'), a.Digit) - pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit)) - - // It is good form to write the letters in upper case. - pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) - pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) - - // It is good form to use a single space between letters and numbers, - // but it is not mandatory. - space := m.Replace(c.Opt(a.Whitespace), " ") - - // With all the building blocks, we can now build the postcode parser. - postcode := c.Seq(pcDigits, space, pcLetters) - - // Create a parser and let is parse some postcode inputs. - // This will print "1234 AB" for every input, because of the built-in normalization. - p := parsekit.NewMatcherWrapper(postcode) - for _, input := range []string{"1234 AB", "1234Ab", "1234\t\tab", "1234ab"} { - output, _, _ := p.Match(input) - fmt.Printf("Input: %q, output: %q\n", input, output) - } - // Output: - // Input: "1234 AB", output: "1234 AB" - // Input: "1234Ab", output: "1234 AB" - // Input: "1234\t\tab", output: "1234 AB" - // Input: "1234ab", output: "1234 AB" -} diff --git a/matcher_builtin.go b/matcher_builtin.go index 107a755..fe96dcd 100644 --- a/matcher_builtin.go +++ b/matcher_builtin.go @@ -486,6 +486,7 @@ func ModifyDrop(matcher Matcher) Matcher { // ModifyTrim creates a Matcher that checks if the provided Matcher applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from both the left and the right of the output. +// TODO move cutset to the left arg func ModifyTrim(matcher Matcher, cutset string) Matcher { return modifyTrim(matcher, cutset, true, true) } diff --git a/parsekit.go b/parsekit.go index 569ebc4..ab3f050 100644 --- a/parsekit.go +++ b/parsekit.go @@ -145,19 +145,19 @@ func (run *Run) invokeNextStateHandler(state StateHandler) { // method. // // To match input data against the wrapped Matcher function, use the method -// MatcherWrapper.Match(). +// MatcherWrapper.Parse(). type MatcherWrapper struct { parser *Parser } -// NewMatcherWrapper instantiates a new MatcherWrapper. +// NewMatcher instantiates a new MatcherWrapper. // // This is a simple wrapper around a Matcher function. It can be used to // match an input string against that Matcher function and retrieve the // results in a straight forward way. -func NewMatcherWrapper(matcher Matcher) *MatcherWrapper { +func NewMatcher(matcher Matcher, expects string) *MatcherWrapper { handler := func(p *P) { - p.Expects("match") + p.Expects(expects) if p.On(matcher).Accept().End() { p.EmitLiteral(0) // ItemType is irrelevant } @@ -165,8 +165,8 @@ func NewMatcherWrapper(matcher Matcher) *MatcherWrapper { return &MatcherWrapper{parser: NewParser(handler)} } -// Match runs the wrapped Matcher function against the provided input data. -func (w *MatcherWrapper) Match(input string) (string, *Error, bool) { +// Parse runs the wrapped Matcher function against the provided input data. +func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) { item, err, ok := w.parser.Parse(input).Next() if !ok { return "", err, false diff --git a/parsekit_test.go b/parsekit_test.go index 669efeb..5e43288 100644 --- a/parsekit_test.go +++ b/parsekit_test.go @@ -11,6 +11,7 @@ import ( const TestItem parsekit.ItemType = 1 +// Easy access to the parsekit definitions. var c, a, m = parsekit.C, parsekit.A, parsekit.M type MatcherTest struct { @@ -27,7 +28,7 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) { } func RunMatcherTest(t *testing.T, test MatcherTest) { - output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input) + output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input) if test.mustMatch { if !ok {