74 lines
2.8 KiB
Go
74 lines
2.8 KiB
Go
// In this example, a Parser is created that can parse and normalize Dutch postcodes
|
|
// The implementation uses only Handler functions and does not implement a
|
|
// full-fledged state-based Parser for it.
|
|
|
|
package examples
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
|
)
|
|
|
|
func Example_dutchPostcodeUsingTokenizer() {
|
|
tokenizer := createPostcodeTokenizer()
|
|
|
|
for i, input := range []string{
|
|
"1234 AB",
|
|
"2233Ab",
|
|
"1001\t\tab",
|
|
"1818ab",
|
|
"1212abc",
|
|
"1234",
|
|
"huh",
|
|
"",
|
|
"\xcd2222AB",
|
|
} {
|
|
result, err := tokenizer(input)
|
|
if err != nil {
|
|
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
|
} else {
|
|
fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result)
|
|
for _, t := range result.Tokens() {
|
|
fmt.Printf(" %s(%s)", t.Type, t.Value)
|
|
}
|
|
fmt.Printf("\n")
|
|
}
|
|
}
|
|
// Output:
|
|
// [0] Input: "1234 AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB)
|
|
// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
|
|
// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
|
|
// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
|
|
// [4] Input: "1212abc" Error: mismatch at start of file
|
|
// [5] Input: "1234" Error: mismatch at start of file
|
|
// [6] Input: "huh" Error: mismatch at start of file
|
|
// [7] Input: "" Error: mismatch at start of file
|
|
// [8] Input: "\xcd2222AB" Error: mismatch at start of file
|
|
}
|
|
|
|
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
|
|
// Implementation of the parser
|
|
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
|
|
|
|
func createPostcodeTokenizer() tokenize.Func {
|
|
// Easy access to the tokenize definitions.
|
|
C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T
|
|
|
|
// Handler functions are created and combined to satisfy these rules:
|
|
// • A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
|
// • The first digit is never a zero.
|
|
// • A space between letters and digits is optional.
|
|
// • It is good form to write the letters in upper case.
|
|
// • It is good form to use a single space between digits and letters.
|
|
pcDigits := A.DigitNotZero.Then(A.Digit.Times(3))
|
|
pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
|
|
pcLetters := M.ToUpper(pcLetter.Times(2))
|
|
space := M.Replace(A.Blanks.Optional(), " ")
|
|
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
|
|
|
|
// Create a Tokenizer that wraps the 'postcode' Handler and allows
|
|
// us to match some input against that handler.
|
|
return tokenize.New(postcode)
|
|
}
|