diff --git a/examples_test.go b/examples_test.go index ca64a1f..6cdadb2 100644 --- a/examples_test.go +++ b/examples_test.go @@ -2,10 +2,122 @@ package parsekit_test import ( "fmt" + "strconv" "git.makaay.nl/mauricem/go-parsekit" ) +func Example_minimal() { + // Let's write a small example for parsing a really basic calculator. + // The calculator understands input that looks like: + // + // 10 + 20 - 8+4 + // + // So positive numbers that can be either added or substracted, with + // optional whitespace around the operators. + + // Easy access to parser/combinators, atoms and modifiers. + var c, a, m = parsekit.C, parsekit.A, parsekit.M + + // When writing a parser, it's a good start to use the parser/combinator + // functionality of parsekit to create some Matcher functions. These + // functions can later be used in the parser state machine to find the + // matching tokens on the input data. + var number = c.OneOrMore(a.Digit) + var whitespace = m.Drop(c.Opt(a.Whitespace)) + var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace) + + // We also must define the types of items that the parser will emit. + // We only need two: for numbers and for operators. + // The recommended way to define these, is using 'iota' for auto numbering. + const ( + numberType parsekit.ItemType = iota + operatorType + ) + + // Now it is time to define the state machine for parsing the input. + // The state machine is built up from functions that match the StateHandler + // signature: func(*parsekit.P) + // The P struct holds the internal state for the parser and it provides + // some methods that form the API for your StateHandler implementation. + // + // Note that normally you'd write normal functions and not anonymous + // functions like I did here. I had to use these to be able to write the + // example code. + + var operatorHandler parsekit.StateHandler + + numberHandler := func(p *parsekit.P) { + p.Expects("a number") + if p.On(number).Accept().End() { + p.EmitLiteral(numberType) + p.RouteTo(operatorHandler) + } + } + + operatorHandler = func(p *parsekit.P) { + if p.On(operator).Accept().End() { + p.EmitLiteral(operatorType) + p.RouteTo(numberHandler) + } else { + p.ExpectEndOfFile() + } + } + + // All is ready for our parser. We now can create a new Parser struct. + // We need to tell it what the start state is. In our case, it's the + // of course the number state. + parser := parsekit.NewParser(numberHandler) + + // Let's fee the parser some input to work with. + run := parser.Parse("153+ 22+31 - 4- 6+42") + + // We can step through the results of the parsing process by repeated + // calls to run.Next(). Next() returns the next parse item, a parse + // error or an end of file. Let's dump the parse results and handle the + // computation while we're at it. + sum := 0 + op := +1 + for { + item, err, ok := run.Next() + switch { + case !ok && err == nil: + fmt.Println("End of file reached") + fmt.Println("Outcome of computation:", sum) + return + case !ok: + fmt.Printf("Error: %s\n", err) + return + default: + fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value) + switch { + case item.Type == operatorType && item.Value == "+": + op = +1 + case item.Type == operatorType && item.Value == "-": + op = -1 + default: + nr, _ := strconv.Atoi(item.Value) + sum += op * nr + } + } + } + + // Output: + // Type: 0, Value: "153" + // Type: 1, Value: "+" + // Type: 0, Value: "22" + // Type: 1, Value: "+" + // Type: 0, Value: "31" + // Type: 1, Value: "-" + // Type: 0, Value: "4" + // Type: 1, Value: "-" + // Type: 0, Value: "6" + // Type: 1, Value: "+" + // Type: 0, Value: "42" + // End of file reached + // Outcome of computation: 238 +} + func ExampleItemType() { // Make use of positive values. Ideally, define your ItemTypes using // iota for easy automatic value management like this: @@ -17,6 +129,41 @@ func ExampleItemType() { ) } +func ExampleItem() { + var c = parsekit.C + + // You define your own item types for your specific parser. + var QuestionItem parsekit.ItemType = 42 + + // A StateHandler function can use the defined item type by means of + // the p.Emit* methods on parsekit.P. + // When errors occur, or the end of the file is reached, then the built-in + // types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit. + stateHandler := func(p *parsekit.P) { + if p.On(c.Str("question")).Accept().End() { + p.EmitLiteral(QuestionItem) + } + p.ExpectEndOfFile() + } + + // Successful match + item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next() + fmt.Println(ok, item.Type == QuestionItem, item.Value) + + // End of file reached + item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next() + fmt.Println(ok, item.Type == parsekit.ItemEOF) + + // An error occurred + item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next() + fmt.Println(ok, item.Type == parsekit.ItemError, err) + + // Output: + // true true question + // false true + // false true unexpected character 'a' (expected end of file) +} + func ExampleError() { err := &parsekit.Error{ Message: "it broke down", @@ -89,32 +236,32 @@ func ExampleModifyToUpper() { // Easy access to the parsekit definitions. var c, a, m = parsekit.C, parsekit.A, parsekit.M - // A Dutch poscode consists of 4 numbers and 2 letters (1234XX). - // The numbers never start with a zero. + // A Dutch postcode consists of 4 digits and 2 letters (1234XX). + // The first digit is never a zero. digitNotZero := c.Except(c.Rune('0'), a.Digit) - numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit)) + pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit)) // It is good form to write the letters in upper case. - letter := c.Any(a.ASCIILower, a.ASCIIUpper) - letters := m.ToUpper(c.Seq(letter, letter)) + pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) + pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) // It is good form to use a single space between letters and numbers, // but it is not mandatory. space := m.Replace(c.Opt(a.Whitespace), " ") // With all the building blocks, we can now build the postcode parser. - postcode := c.Seq(numbers, space, letters) + postcode := c.Seq(pcDigits, space, pcLetters) // Create a parser and let is parse some postcode inputs. // This will print "1234 AB" for every input, because of the built-in normalization. p := parsekit.NewMatcherWrapper(postcode) - for _, input := range []string{"1234 AB", "1234Ab", "1234 ab", "1234ab"} { - output, _, _ := p.Match("1234 AB") + for _, input := range []string{"1234 AB", "1234Ab", "1234\t\tab", "1234ab"} { + output, _, _ := p.Match(input) fmt.Printf("Input: %q, output: %q\n", input, output) } // Output: // Input: "1234 AB", output: "1234 AB" // Input: "1234Ab", output: "1234 AB" - // Input: "1234 ab", output: "1234 AB" + // Input: "1234\t\tab", output: "1234 AB" // Input: "1234ab", output: "1234 AB" } diff --git a/parsekit.go b/parsekit.go index 0883e23..569ebc4 100644 --- a/parsekit.go +++ b/parsekit.go @@ -68,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) { func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) { switch { - case i.Type == itemEOF: + case i.Type == ItemEOF: return i, nil, false - case i.Type == itemError: + case i.Type == ItemError: run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} return i, run.p.err, false default: diff --git a/statehandler.go b/statehandler.go index 7b84fe8..d8b91bf 100644 --- a/statehandler.go +++ b/statehandler.go @@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() { p.RouteTo(func(p *P) { p.Expects("end of file") if p.On(A.EndOfFile).Stay().End() { - p.Emit(itemEOF, "EOF") + p.Emit(ItemEOF, "EOF") } }) } diff --git a/statehandler_emit.go b/statehandler_emit.go index d34e0d5..ce6778c 100644 --- a/statehandler_emit.go +++ b/statehandler_emit.go @@ -11,13 +11,13 @@ import ( // use by parsekit. type ItemType int -// itemEOF is a built-in parser item type that is used for flagging that the +// ItemEOF is a built-in parser item type that is used for flagging that the // end of the input was reached. -const itemEOF ItemType = -1 +const ItemEOF ItemType = -1 -// itemError is a built-in parser item type that is used for flagging that +// ItemError is a built-in parser item type that is used for flagging that // an error has occurred during parsing. -const itemError ItemType = -2 +const ItemError ItemType = -2 // Item represents an item that can be emitted from the parser. type Item struct { @@ -80,7 +80,7 @@ func (err *Error) ErrorFull() string { // EmitError emits a Parser error item to the client. func (p *P) EmitError(format string, args ...interface{}) { message := fmt.Sprintf(format, args...) - p.Emit(itemError, message) + p.Emit(ItemError, message) } // UnexpectedInput is used by a StateHandler function to emit an error item