// In this example, a Parser is created that can parse and normalize Dutch postcodes // The implementation uses only TokenHandler functions and does not implement a // full-fledged state-based Parser for it. package examples import ( "fmt" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_dutchPostcodeUsingTokenizer() { parser := createPostcodeTokenizer() for i, input := range []string{ "1234 AB", "2233Ab", "1001\t\tab", "1818ab", "1212abc", "1234", "huh", "", "\xcd2222AB", } { result, err := parser.Execute(input) if err != nil { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result) for _, t := range result.Tokens() { fmt.Printf(" %s(%s)", t.Type, t.Value) } fmt.Printf("\n") } } // Output: // [0] Input: "1234 AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB) // [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB) // [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB) // [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB) // [4] Input: "1212abc" Error: mismatch at start of file // [5] Input: "1234" Error: mismatch at start of file // [6] Input: "huh" Error: mismatch at start of file // [7] Input: "" Error: mismatch at start of file // [8] Input: "\xcd2222AB" Error: mismatch at start of file } // --------------------------------------------------------------------------- // Implementation of the parser // --------------------------------------------------------------------------- func createPostcodeTokenizer() *tokenize.Tokenizer { // Easy access to the parsekit definitions. C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T // TokenHandler functions are created and combined to satisfy these rules: // - A Dutch postcode consists of 4 digits and 2 letters (1234XX). // - The first digit is never a zero. // - A space between letters and digits is optional. // - It is good form to write the letters in upper case. // - It is good form to use a single space between digits and letters. pcDigits := A.DigitNotZero.Then(A.Digit.Times(3)) pcLetter := A.ASCIILower.Or(A.ASCIIUpper) pcLetters := M.ToUpper(pcLetter.Times(2)) space := M.Replace(A.Blanks.Optional(), " ") postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile) // Create a Tokenizer that wraps the 'postcode' TokenHandler and allows // us to match some input against that handler. return tokenize.NewTokenizer(postcode) }