package parsekit_test import ( "fmt" "strconv" "git.makaay.nl/mauricem/go-parsekit" ) func Example_minimal() { // Let's write a small example for parsing a really basic calculator. // The calculator understands input that looks like: // // 10 + 20 - 8+4 // // So positive numbers that can be either added or substracted, with // optional whitespace around the operators. // Easy access to parser/combinators, atoms and modifiers. var c, a, m = parsekit.C, parsekit.A, parsekit.M // When writing a parser, it's a good start to use the parser/combinator // functionality of parsekit to create some Matcher functions. These // functions can later be used in the parser state machine to find the // matching tokens on the input data. var number = c.OneOrMore(a.Digit) var whitespace = m.Drop(c.Opt(a.Whitespace)) var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace) // We also must define the types of items that the parser will emit. // We only need two: for numbers and for operators. // The recommended way to define these, is using 'iota' for auto numbering. const ( numberType parsekit.ItemType = iota operatorType ) // Now it is time to define the state machine for parsing the input. // The state machine is built up from functions that match the StateHandler // signature: func(*parsekit.P) // The P struct holds the internal state for the parser and it provides // some methods that form the API for your StateHandler implementation. // // Note that normally you'd write normal functions and not anonymous // functions like I did here. I had to use these to be able to write the // example code. var operatorHandler parsekit.StateHandler numberHandler := func(p *parsekit.P) { p.Expects("a number") if p.On(number).Accept().End() { p.EmitLiteral(numberType) p.RouteTo(operatorHandler) } } operatorHandler = func(p *parsekit.P) { if p.On(operator).Accept().End() { p.EmitLiteral(operatorType) p.RouteTo(numberHandler) } else { p.ExpectEndOfFile() } } // All is ready for our parser. We now can create a new Parser struct. // We need to tell it what the start state is. In our case, it's the // of course the number state. parser := parsekit.NewParser(numberHandler) // Let's fee the parser some input to work with. run := parser.Parse("153+ 22+31 - 4- 6+42") // We can step through the results of the parsing process by repeated // calls to run.Next(). Next() returns the next parse item, a parse // error or an end of file. Let's dump the parse results and handle the // computation while we're at it. sum := 0 op := +1 for { item, err, ok := run.Next() switch { case !ok && err == nil: fmt.Println("End of file reached") fmt.Println("Outcome of computation:", sum) return case !ok: fmt.Printf("Error: %s\n", err) return default: fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value) switch { case item.Type == operatorType && item.Value == "+": op = +1 case item.Type == operatorType && item.Value == "-": op = -1 default: nr, _ := strconv.Atoi(item.Value) sum += op * nr } } } // Output: // Type: 0, Value: "153" // Type: 1, Value: "+" // Type: 0, Value: "22" // Type: 1, Value: "+" // Type: 0, Value: "31" // Type: 1, Value: "-" // Type: 0, Value: "4" // Type: 1, Value: "-" // Type: 0, Value: "6" // Type: 1, Value: "+" // Type: 0, Value: "42" // End of file reached // Outcome of computation: 238 } func ExampleItemType() { // Make use of positive values. Ideally, define your ItemTypes using // iota for easy automatic value management like this: const ( ItemWord parsekit.ItemType = iota ItemNumber ItemBlob // ... ) } func ExampleItem() { var c = parsekit.C // You define your own item types for your specific parser. var QuestionItem parsekit.ItemType = 42 // A StateHandler function can use the defined item type by means of // the p.Emit* methods on parsekit.P. // When errors occur, or the end of the file is reached, then the built-in // types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit. stateHandler := func(p *parsekit.P) { if p.On(c.Str("question")).Accept().End() { p.EmitLiteral(QuestionItem) } p.ExpectEndOfFile() } // Successful match item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next() fmt.Println(ok, item.Type == QuestionItem, item.Value) // End of file reached item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next() fmt.Println(ok, item.Type == parsekit.ItemEOF) // An error occurred item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next() fmt.Println(ok, item.Type == parsekit.ItemError, err) // Output: // true true question // false true // false true unexpected character 'a' (expected end of file) } func ExampleError() { err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } fmt.Println(err.Error()) fmt.Printf("%s\n", err) fmt.Println(err.ErrorFull()) // Output: // it broke down // it broke down // it broke down after line 10, column 42 } func ExampleError_Error() { err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } fmt.Println(err.Error()) fmt.Printf("%s\n", err) // Output: // it broke down // it broke down } func ExampleError_ErrorFull() { err := &parsekit.Error{ Message: "it broke down", Line: 10, Column: 42, } fmt.Println(err.ErrorFull()) // Output: // it broke down after line 10, column 42 } func ExampleMatchAnyRune() { // Easy access to the parsekit definitions. var a = parsekit.A handler := func(p *parsekit.P) { p.Expects("Any valid rune") if p.On(a.AnyRune).Accept().End() { p.EmitLiteral(TestItem) p.RouteRepeat() } } parser := parsekit.NewParser(handler) run := parser.Parse("¡Any / valid / character will dö!") for i := 0; i < 5; i++ { match, _, _ := run.Next() fmt.Printf("Match = %q\n", match.Value) } // Output: // Match = "¡" // Match = "A" // Match = "n" // Match = "y" // Match = " " } func ExampleModifyToUpper() { // Easy access to the parsekit definitions. var c, a, m = parsekit.C, parsekit.A, parsekit.M // A Dutch postcode consists of 4 digits and 2 letters (1234XX). // The first digit is never a zero. digitNotZero := c.Except(c.Rune('0'), a.Digit) pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit)) // It is good form to write the letters in upper case. pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) // It is good form to use a single space between letters and numbers, // but it is not mandatory. space := m.Replace(c.Opt(a.Whitespace), " ") // With all the building blocks, we can now build the postcode parser. postcode := c.Seq(pcDigits, space, pcLetters) // Create a parser and let is parse some postcode inputs. // This will print "1234 AB" for every input, because of the built-in normalization. p := parsekit.NewMatcherWrapper(postcode) for _, input := range []string{"1234 AB", "1234Ab", "1234\t\tab", "1234ab"} { output, _, _ := p.Match(input) fmt.Printf("Input: %q, output: %q\n", input, output) } // Output: // Input: "1234 AB", output: "1234 AB" // Input: "1234Ab", output: "1234 AB" // Input: "1234\t\tab", output: "1234 AB" // Input: "1234ab", output: "1234 AB" }