go-parsekit/examples/example_dutchpostcode_test.go

75 lines
2.8 KiB
Go
Raw Blame History

// In this example, a Parser is created that can parse and normalize Dutch postcodes
// The implementation uses only TokenHandler functions and does not implement a
// full-fledged state-based Parser for it.
package examples
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func Example_dutchPostcodeUsingTokenizer() {
parser := createPostcodeTokenizer()
for i, input := range []string{
"1234 AB",
"2233Ab",
"1001\t\tab",
"1818ab",
"1212abc",
"1234",
"huh",
"",
"\xcd2222AB",
} {
result, err := parser.Execute(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full())
} else {
fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result)
for _, t := range result.Tokens() {
fmt.Printf(" %s(%s)", t.Type, t.Value)
}
fmt.Printf("\n")
}
}
// Output:
// [0] Input: "1234 AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB)
// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at start of file
// [8] Input: "\xcd2222AB" Error: unexpected character '<27>' (expected a Dutch postcode) at start of file
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createPostcodeTokenizer() *parsekit.Tokenizer {
// Easy access to the parsekit definitions.
C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T
// TokenHandler functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
// - The first digit is never a zero.
// - A space between letters and digits is optional.
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
digitNotZero := C.Except(A.Rune('0'), A.Digit)
pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
space := M.Replace(C.Opt(A.Whitespace), " ")
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewTokenizer(postcode, "a Dutch postcode")
}