go-parsekit/example_dutchpostcode_test.go

// In this example, a Parser is created that can parse and normalize Dutch postcodes
// The implementation uses only TokenHandler functions and does not implement a
// full-fledged state-based Parser for it.
package parsekit_test

import (
	"fmt"

	"git.makaay.nl/mauricem/go-parsekit"
)

func Example_dutchPostcodeUsingMatcher() {
	parser := createPostcodeMatcher()

	for i, input := range []string{
		"1234  AB",
		"2233Ab",
		"1001\t\tab",
		"1818ab",
		"1212abc",
		"1234",
		"huh",
		"",
		"\xcd2222AB",
	} {
		output, err := parser.Execute(input)
		if err != nil {
			fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
		} else {
			fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
		}
	}
	// Output:
	// [0] Input: "1234  AB" Output: 1234 AB
	// [1] Input: "2233Ab" Output: 2233 AB
	// [2] Input: "1001\t\tab" Output: 1001 AB
	// [3] Input: "1818ab" Output: 1818 AB
	// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
	// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
	// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1
	// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1
	// [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1
}

// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------

func createPostcodeMatcher() *parsekit.Matcher {
	// Easy access to the parsekit definitions.
	c, a, m := parsekit.C, parsekit.A, parsekit.M

	// TokenHandler functions are created and combined to satisfy these rules:
	// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
	// - The first digit is never a zero.
	// - A space between letters and digits is optional.
	// - It is good form to write the letters in upper case.
	// - It is good form to use a single space between digits and letters.
	digitNotZero := c.Except(c.Rune('0'), a.Digit)
	pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
	pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
	pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
	space := m.Replace(c.Opt(a.Whitespace), " ")
	postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile)

	// Create a Matcher that wraps the 'postcode' TokenHandler and allows
	// us to match some input against that handler.
	return parsekit.NewMatcher(postcode, "a Dutch postcode")
}