go-parsekit/examples/example_dutchpostcode_test.go

// In this example, a Parser is created that can parse and normalize Dutch postcodes
// The implementation uses only Handler functions and does not implement a
// full-fledged state-based Parser for it.

package examples

import (
	"fmt"

	"git.makaay.nl/mauricem/go-parsekit/tokenize"
)

func Example_dutchPostcodeUsingTokenizer() {
	tokenizer := createPostcodeTokenizer()

	for i, input := range []string{
		"1234  AB",
		"2233Ab",
		"1001\t\tab",
		"1818ab",
		"1212abc",
		"1234",
		"huh",
		"",
		"\xcd2222AB",
	} {
		result, err := tokenizer(input)
		if err != nil {
			fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
		} else {
			fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result)
			for _, t := range result.Tokens() {
				fmt.Printf(" %s(%s)", t.Type, t.Value)
			}
			fmt.Printf("\n")
		}
	}
	// Output:
	// [0] Input: "1234  AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB)
	// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
	// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
	// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
	// [4] Input: "1212abc" Error: mismatch at start of file
	// [5] Input: "1234" Error: mismatch at start of file
	// [6] Input: "huh" Error: mismatch at start of file
	// [7] Input: "" Error: mismatch at start of file
	// [8] Input: "\xcd2222AB" Error: mismatch at start of file
}

// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――

func createPostcodeTokenizer() tokenize.Func {
	// Easy access to the tokenize definitions.
	a, m, t := tokenize.A, tokenize.M, tokenize.T

	// Handler functions are created and combined to satisfy these rules:
	// • A Dutch postcode consists of 4 digits and 2 letters (1234XX).
	// • The first digit is never a zero.
	// • A space between letters and digits is optional.
	// • It is good form to write the letters in upper case.
	// • It is good form to use a single space between digits and letters.
	pcDigits := a.DigitNotZero.Then(a.Digit.Times(3))
	tokDigits := t.Str("PCD", pcDigits)
	pcLetter := a.ASCIILower.Or(a.ASCIIUpper)
	pcLetters := m.ToUpper(pcLetter.Times(2))
	tokLetters := t.Str("PCL", pcLetters)
	space := m.Replace(a.Blanks.Optional(), " ")
	postcode := tokDigits.Then(space).Then(tokLetters).Then(a.EndOfFile)

	// Create a Tokenizer that wraps the 'postcode' Handler and allows
	// us to match some input against that handler.
	return tokenize.New(postcode)
}