go-parsekit/examples_test.go

package parsekit_test

import (
	"fmt"
	"strconv"

	"git.makaay.nl/mauricem/go-parsekit"
)

func Example_minimal() {
	// Let's write a small example for parsing a really basic calculator.
	// The calculator understands input that looks like:
	//
	//     10 + 20 - 8+4
	//
	// So positive numbers that can be either added or substracted, with
	// optional whitespace around the operators.

	// Easy access to parser/combinators, atoms and modifiers.
	var c, a, m = parsekit.C, parsekit.A, parsekit.M

	// When writing a parser, it's a good start to use the parser/combinator
	// functionality of parsekit to create some Matcher functions. These
	// functions can later be used in the parser state machine to find the
	// matching tokens on the input data.
	var number = c.OneOrMore(a.Digit)
	var whitespace = m.Drop(c.Opt(a.Whitespace))
	var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)

	// We also must define the types of items that the parser will emit.
	// We only need two: for numbers and for operators.
	// The recommended way to define these, is using 'iota' for auto numbering.
	const (
		numberType parsekit.ItemType = iota
		operatorType
	)

	// Now it is time to define the state machine for parsing the input.
	// The state machine is built up from functions that match the StateHandler
	// signature: func(*parsekit.P)
	// The P struct holds the internal state for the parser and it provides
	// some methods that form the API for your StateHandler implementation.
	//
	// Note that normally you'd write normal functions and not anonymous
	// functions like I did here. I had to use these to be able to write the
	// example code.

	var operatorHandler parsekit.StateHandler

	numberHandler := func(p *parsekit.P) {
		p.Expects("a number")
		if p.On(number).Accept().End() {
			p.EmitLiteral(numberType)
			p.RouteTo(operatorHandler)
		}
	}

	operatorHandler = func(p *parsekit.P) {
		if p.On(operator).Accept().End() {
			p.EmitLiteral(operatorType)
			p.RouteTo(numberHandler)
		} else {
			p.ExpectEndOfFile()
		}
	}

	// All is ready for our parser. We now can create a new Parser struct.
	// We need to tell it what the start state is. In our case, it's the
	// of course the number state.
	parser := parsekit.NewParser(numberHandler)

	// Let's fee the parser some input to work with.
	run := parser.Parse("153+ 22+31 - 4- 6+42")

	// We can step through the results of the parsing process by repeated
	// calls to run.Next(). Next() returns the next parse item, a parse
	// error or an end of file. Let's dump the parse results and handle the
	// computation while we're at it.
	sum := 0
	op := +1
	for {
		item, err, ok := run.Next()
		switch {
		case !ok && err == nil:
			fmt.Println("End of file reached")
			fmt.Println("Outcome of computation:", sum)
			return
		case !ok:
			fmt.Printf("Error: %s\n", err)
			return
		default:
			fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
			switch {
			case item.Type == operatorType && item.Value == "+":
				op = +1
			case item.Type == operatorType && item.Value == "-":
				op = -1
			default:
				nr, _ := strconv.Atoi(item.Value)
				sum += op * nr
			}
		}
	}

	// Output:
	// Type: 0, Value: "153"
	// Type: 1, Value: "+"
	// Type: 0, Value: "22"
	// Type: 1, Value: "+"
	// Type: 0, Value: "31"
	// Type: 1, Value: "-"
	// Type: 0, Value: "4"
	// Type: 1, Value: "-"
	// Type: 0, Value: "6"
	// Type: 1, Value: "+"
	// Type: 0, Value: "42"
	// End of file reached
	// Outcome of computation: 238
}

func ExampleItemType() {
	// Make use of positive values. Ideally, define your ItemTypes using
	// iota for easy automatic value management like this:
	const (
		ItemWord parsekit.ItemType = iota
		ItemNumber
		ItemBlob
		// ...
	)
}

func ExampleItem() {
	var c = parsekit.C

	// You define your own item types for your specific parser.
	var QuestionItem parsekit.ItemType = 42

	// A StateHandler function can use the defined item type by means of
	// the p.Emit* methods on parsekit.P.
	// When errors occur, or the end of the file is reached, then the built-in
	// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
	stateHandler := func(p *parsekit.P) {
		if p.On(c.Str("question")).Accept().End() {
			p.EmitLiteral(QuestionItem)
		}
		p.ExpectEndOfFile()
	}

	// Successful match
	item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next()
	fmt.Println(ok, item.Type == QuestionItem, item.Value)

	// End of file reached
	item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next()
	fmt.Println(ok, item.Type == parsekit.ItemEOF)

	// An error occurred
	item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next()
	fmt.Println(ok, item.Type == parsekit.ItemError, err)

	// Output:
	// true true question
	// false true
	// false true unexpected character 'a' (expected end of file)
}

func ExampleError() {
	err := &parsekit.Error{
		Message: "it broke down",
		Line:    10,
		Column:  42,
	}

	fmt.Println(err.Error())
	fmt.Printf("%s\n", err)
	fmt.Println(err.ErrorFull())
	// Output:
	// it broke down
	// it broke down
	// it broke down after line 10, column 42
}

func ExampleError_Error() {
	err := &parsekit.Error{
		Message: "it broke down",
		Line:    10,
		Column:  42,
	}

	fmt.Println(err.Error())
	fmt.Printf("%s\n", err)
	// Output:
	// it broke down
	// it broke down
}

func ExampleError_ErrorFull() {
	err := &parsekit.Error{
		Message: "it broke down",
		Line:    10,
		Column:  42,
	}

	fmt.Println(err.ErrorFull())
	// Output:
	// it broke down after line 10, column 42
}

func ExampleMatchAnyRune() {
	// Easy access to the parsekit definitions.
	var a = parsekit.A

	handler := func(p *parsekit.P) {
		p.Expects("Any valid rune")
		if p.On(a.AnyRune).Accept().End() {
			p.EmitLiteral(TestItem)
			p.RouteRepeat()
		}
	}
	parser := parsekit.NewParser(handler)
	run := parser.Parse("¡Any / valid / character will dö!")

	for i := 0; i < 5; i++ {
		match, _, _ := run.Next()
		fmt.Printf("Match = %q\n", match.Value)
	}
	// Output:
	// Match = "¡"
	// Match = "A"
	// Match = "n"
	// Match = "y"
	// Match = " "
}

func ExampleModifyToUpper() {
	// Easy access to the parsekit definitions.
	var c, a, m = parsekit.C, parsekit.A, parsekit.M

	// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
	// The first digit is never a zero.
	digitNotZero := c.Except(c.Rune('0'), a.Digit)
	pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))

	// It is good form to write the letters in upper case.
	pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
	pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))

	// It is good form to use a single space between letters and numbers,
	// but it is not mandatory.
	space := m.Replace(c.Opt(a.Whitespace), " ")

	// With all the building blocks, we can now build the postcode parser.
	postcode := c.Seq(pcDigits, space, pcLetters)

	// Create a parser and let is parse some postcode inputs.
	// This will print "1234 AB" for every input, because of the built-in normalization.
	p := parsekit.NewMatcherWrapper(postcode)
	for _, input := range []string{"1234  AB", "1234Ab", "1234\t\tab", "1234ab"} {
		output, _, _ := p.Match(input)
		fmt.Printf("Input: %q, output: %q\n", input, output)
	}
	// Output:
	// Input: "1234  AB", output: "1234 AB"
	// Input: "1234Ab", output: "1234 AB"
	// Input: "1234\t\tab", output: "1234 AB"
	// Input: "1234ab", output: "1234 AB"
}