Added complete example for a parser.

2019-05-24 23:41:47 +00:00 · 2019-05-24 23:41:47 +00:00 · 723e2a0c38
parent 070a215ac3
commit 723e2a0c38
4 changed files with 164 additions and 17 deletions
--- a/examples_test.go
+++ b/examples_test.go
@ -2,10 +2,122 @@ package parsekit_test

 import (
 	"fmt"
+	"strconv"

 	"git.makaay.nl/mauricem/go-parsekit"
 )

+func Example_minimal() {
+	// Let's write a small example for parsing a really basic calculator.
+	// The calculator understands input that looks like:
+	//
+	//     10 + 20 - 8+4
+	//
+	// So positive numbers that can be either added or substracted, with
+	// optional whitespace around the operators.
+
+	// Easy access to parser/combinators, atoms and modifiers.
+	var c, a, m = parsekit.C, parsekit.A, parsekit.M
+
+	// When writing a parser, it's a good start to use the parser/combinator
+	// functionality of parsekit to create some Matcher functions. These
+	// functions can later be used in the parser state machine to find the
+	// matching tokens on the input data.
+	var number = c.OneOrMore(a.Digit)
+	var whitespace = m.Drop(c.Opt(a.Whitespace))
+	var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
+
+	// We also must define the types of items that the parser will emit.
+	// We only need two: for numbers and for operators.
+	// The recommended way to define these, is using 'iota' for auto numbering.
+	const (
+		numberType parsekit.ItemType = iota
+		operatorType
+	)
+
+	// Now it is time to define the state machine for parsing the input.
+	// The state machine is built up from functions that match the StateHandler
+	// signature: func(*parsekit.P)
+	// The P struct holds the internal state for the parser and it provides
+	// some methods that form the API for your StateHandler implementation.
+	//
+	// Note that normally you'd write normal functions and not anonymous
+	// functions like I did here. I had to use these to be able to write the
+	// example code.
+
+	var operatorHandler parsekit.StateHandler
+
+	numberHandler := func(p *parsekit.P) {
+		p.Expects("a number")
+		if p.On(number).Accept().End() {
+			p.EmitLiteral(numberType)
+			p.RouteTo(operatorHandler)
+		}
+	}
+
+	operatorHandler = func(p *parsekit.P) {
+		if p.On(operator).Accept().End() {
+			p.EmitLiteral(operatorType)
+			p.RouteTo(numberHandler)
+		} else {
+			p.ExpectEndOfFile()
+		}
+	}
+
+	// All is ready for our parser. We now can create a new Parser struct.
+	// We need to tell it what the start state is. In our case, it's the
+	// of course the number state.
+	parser := parsekit.NewParser(numberHandler)
+
+	// Let's fee the parser some input to work with.
+	run := parser.Parse("153+ 22+31 - 4- 6+42")
+
+	// We can step through the results of the parsing process by repeated
+	// calls to run.Next(). Next() returns the next parse item, a parse
+	// error or an end of file. Let's dump the parse results and handle the
+	// computation while we're at it.
+	sum := 0
+	op := +1
+	for {
+		item, err, ok := run.Next()
+		switch {
+		case !ok && err == nil:
+			fmt.Println("End of file reached")
+			fmt.Println("Outcome of computation:", sum)
+			return
+		case !ok:
+			fmt.Printf("Error: %s\n", err)
+			return
+		default:
+			fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
+			switch {
+			case item.Type == operatorType && item.Value == "+":
+				op = +1
+			case item.Type == operatorType && item.Value == "-":
+				op = -1
+			default:
+				nr, _ := strconv.Atoi(item.Value)
+				sum += op * nr
+			}
+		}
+	}
+
+	// Output:
+	// Type: 0, Value: "153"
+	// Type: 1, Value: "+"
+	// Type: 0, Value: "22"
+	// Type: 1, Value: "+"
+	// Type: 0, Value: "31"
+	// Type: 1, Value: "-"
+	// Type: 0, Value: "4"
+	// Type: 1, Value: "-"
+	// Type: 0, Value: "6"
+	// Type: 1, Value: "+"
+	// Type: 0, Value: "42"
+	// End of file reached
+	// Outcome of computation: 238
+}
+
 func ExampleItemType() {
 	// Make use of positive values. Ideally, define your ItemTypes using
 	// iota for easy automatic value management like this:
@ -17,6 +129,41 @@ func ExampleItemType() {
 	)
 }

+func ExampleItem() {
+	var c = parsekit.C
+
+	// You define your own item types for your specific parser.
+	var QuestionItem parsekit.ItemType = 42
+
+	// A StateHandler function can use the defined item type by means of
+	// the p.Emit* methods on parsekit.P.
+	// When errors occur, or the end of the file is reached, then the built-in
+	// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
+	stateHandler := func(p *parsekit.P) {
+		if p.On(c.Str("question")).Accept().End() {
+			p.EmitLiteral(QuestionItem)
+		}
+		p.ExpectEndOfFile()
+	}
+
+	// Successful match
+	item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next()
+	fmt.Println(ok, item.Type == QuestionItem, item.Value)
+
+	// End of file reached
+	item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next()
+	fmt.Println(ok, item.Type == parsekit.ItemEOF)
+
+	// An error occurred
+	item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next()
+	fmt.Println(ok, item.Type == parsekit.ItemError, err)
+
+	// Output:
+	// true true question
+	// false true
+	// false true unexpected character 'a' (expected end of file)
+}
+
 func ExampleError() {
 	err := &parsekit.Error{
 		Message: "it broke down",
@ -89,32 +236,32 @@ func ExampleModifyToUpper() {
 	// Easy access to the parsekit definitions.
 	var c, a, m = parsekit.C, parsekit.A, parsekit.M

-	// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
-	// The numbers never start with a zero.
+	// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
+	// The first digit is never a zero.
 	digitNotZero := c.Except(c.Rune('0'), a.Digit)
-	numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
+	pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))

 	// It is good form to write the letters in upper case.
-	letter := c.Any(a.ASCIILower, a.ASCIIUpper)
-	letters := m.ToUpper(c.Seq(letter, letter))
+	pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
+	pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))

 	// It is good form to use a single space between letters and numbers,
 	// but it is not mandatory.
 	space := m.Replace(c.Opt(a.Whitespace), " ")

 	// With all the building blocks, we can now build the postcode parser.
-	postcode := c.Seq(numbers, space, letters)
+	postcode := c.Seq(pcDigits, space, pcLetters)

 	// Create a parser and let is parse some postcode inputs.
 	// This will print "1234 AB" for every input, because of the built-in normalization.
 	p := parsekit.NewMatcherWrapper(postcode)
-	for _, input := range []string{"1234  AB", "1234Ab", "1234 ab", "1234ab"} {
-		output, _, _ := p.Match("1234 AB")
+	for _, input := range []string{"1234  AB", "1234Ab", "1234\t\tab", "1234ab"} {
+		output, _, _ := p.Match(input)
 		fmt.Printf("Input: %q, output: %q\n", input, output)
 	}
 	// Output:
 	// Input: "1234  AB", output: "1234 AB"
 	// Input: "1234Ab", output: "1234 AB"
-	// Input: "1234 ab", output: "1234 AB"
+	// Input: "1234\t\tab", output: "1234 AB"
 	// Input: "1234ab", output: "1234 AB"
 }
--- a/parsekit.go
+++ b/parsekit.go
@ -68,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {

 func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
 	switch {
-	case i.Type == itemEOF:
+	case i.Type == ItemEOF:
 		return i, nil, false
-	case i.Type == itemError:
+	case i.Type == ItemError:
 		run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
 		return i, run.p.err, false
 	default:
--- a/statehandler.go
+++ b/statehandler.go
@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
 	p.RouteTo(func(p *P) {
 		p.Expects("end of file")
 		if p.On(A.EndOfFile).Stay().End() {
-			p.Emit(itemEOF, "EOF")
+			p.Emit(ItemEOF, "EOF")
 		}
 	})
 }
--- a/statehandler_emit.go
+++ b/statehandler_emit.go
@ -11,13 +11,13 @@ import (
 // use by parsekit.
 type ItemType int

-// itemEOF is a built-in parser item type that is used for flagging that the
+// ItemEOF is a built-in parser item type that is used for flagging that the
 // end of the input was reached.
-const itemEOF ItemType = -1
+const ItemEOF ItemType = -1

-// itemError is a built-in parser item type that is used for flagging that
+// ItemError is a built-in parser item type that is used for flagging that
 // an error has occurred during parsing.
-const itemError ItemType = -2
+const ItemError ItemType = -2

 // Item represents an item that can be emitted from the parser.
 type Item struct {
@ -80,7 +80,7 @@ func (err *Error) ErrorFull() string {
 // EmitError emits a Parser error item to the client.
 func (p *P) EmitError(format string, args ...interface{}) {
 	message := fmt.Sprintf(format, args...)
-	p.Emit(itemError, message)
+	p.Emit(ItemError, message)
 }

 // UnexpectedInput is used by a StateHandler function to emit an error item