diff --git a/parse/parse_test.go b/parse/parse_test.go index b28c23d..6d3cca6 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -46,10 +46,10 @@ func ExampleNew_usingTokens() { // Output: // Runes accepted: "¡ök!" // Tokens: - // [0] RUNE("¡", value = (int32)161) - // [1] RUNE("ö", value = (int32)246) - // [2] RUNE("k", value = (int32)107) - // [3] RUNE("!", value = (int32)33) + // [0] RUNE(161) + // [1] RUNE(246) + // [2] RUNE(107) + // [3] RUNE(33) } func ExampleAPI_Expected() { diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 0ce2389..9083ca8 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -15,7 +15,6 @@ func ExampleNewAPI() { func ExampleAPI_NextRune() { api := tokenize.NewAPI("The input that the API will handle") r, err := api.NextRune() - fmt.Printf("Rune read from input; %c\n", r) fmt.Printf("The error: %v\n", err) fmt.Printf("API results: %q\n", api.Result().String()) @@ -73,8 +72,8 @@ func ExampleAPI_Result() { // API result runes as string: "new set of runes" // API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's'] // API third rune: 'w' - // API result tokens: [42((string)towel) 73((string)Zaphod)] - // API second result token: 73((string)Zaphod) + // API result tokens: [42("towel") 73("Zaphod")] + // API second result token: 73("Zaphod") } func ExampleAPI_Reset() { diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 0674aa1..40481e9 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -4,7 +4,6 @@ import ( "fmt" "io" "net" - "runtime" "strconv" "strings" "unicode" @@ -124,6 +123,7 @@ var A = struct { Blank Handler Blanks Handler Whitespace Handler + UnicodeSpace Handler EndOfLine Handler Digit Handler DigitNotZero Handler @@ -208,6 +208,7 @@ var A = struct { Blank: MatchBlank(), Blanks: MatchBlanks(), Whitespace: MatchWhitespace(), + UnicodeSpace: MatchUnicodeSpace(), EndOfLine: MatchEndOfLine(), Digit: MatchDigit(), DigitNotZero: MatchDigitNotZero(), @@ -373,14 +374,23 @@ func MatchBlank() Handler { // or more blank characters, meaning tabs and spaces. // // When you need whitespace matching, which also includes characters like -// newlines, then make use of MatchSpace(). +// newlines, then make use of MatchWhitespace(). +// When you need unicode whitespace matching, which also includes characters +// like a vertical tab, then make use of MatchUnicodeSpace(). func MatchBlanks() Handler { return MatchOneOrMore(MatchBlank()) } // MatchWhitespace creates a Handler that matches the input against one or more -// whitespace characters, as defined by unicode. +// whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and +// carriage return '\r' followed by a newline '\n' (CRLF). func MatchWhitespace() Handler { + return MatchOneOrMore(MatchBlank().Or(MatchNewline())) +} + +// MatchUnicodeSpace creates a Handler that matches the input against one or more +// whitespace characters, as defined by unicode. +func MatchUnicodeSpace() Handler { return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace)) } @@ -601,7 +611,10 @@ func MatchExcept(handler Handler, except Handler) Handler { } } -// TODO keep this? +// MatchFollowedBy creates a Handler that checks if the provided handler matches +// and if the provided lookAhead handler matches after the handler. +// When both handlers match, the match for the handler is accepted and the match +// for the lookAhead handler is ignored. func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { return func(t *API) bool { child := t.Fork() @@ -613,7 +626,10 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { } } -// TODO keep this? +// MatchNotFollowedBy creates a Handler that checks if the provided handler matches +// and if the provided lookAhead handler does not match after the handler. +// If the handler matches and the lookAhead handler doesn't, then the match for +// the handler is accepted. func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { return func(t *API) bool { child := t.Fork() @@ -1158,7 +1174,7 @@ func MakeByteToken(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an int-representation // of the read Rune. func MakeIntToken(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { + return makeStrconvToken("int", toktype, handler, func(s string) (interface{}, error) { return strconv.Atoi(s) }) } @@ -1168,7 +1184,7 @@ func MakeIntToken(toktype interface{}, handler Handler) Handler { // of the read Rune. // TODO allow other Go types for oct and hex too. func MakeInt8Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("int8", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 8) if err == nil { @@ -1182,7 +1198,7 @@ func MakeInt8Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an int16-representation // of the read Rune. func MakeInt16Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("int16", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 16) if err == nil { @@ -1196,7 +1212,7 @@ func MakeInt16Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an int32-representation // of the read Rune. func MakeInt32Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("int32", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 32) if err == nil { @@ -1211,7 +1227,11 @@ func MakeInt32Token(toktype interface{}, handler Handler) Handler { // of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, // 10 = decimal, 16 = hexadecimal). func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeInt64BaseToken(toktype, base, handler) +} + +func makeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler { + return makeStrconvToken("int64", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, base, 64) if err == nil { @@ -1232,7 +1252,7 @@ func MakeInt64Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an uint-representation // of the read Rune. func MakeUintToken(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("uint", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 0) if err == nil { @@ -1247,7 +1267,7 @@ func MakeUintToken(toktype interface{}, handler Handler) Handler { // of the read Rune. // TODO allow other Go types for oct and hex too. func MakeUint8Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("uint8", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 8) if err == nil { @@ -1261,7 +1281,7 @@ func MakeUint8Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an uint16-representation // of the read Rune. func MakeUint16Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("uint16", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 16) if err == nil { @@ -1275,7 +1295,7 @@ func MakeUint16Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an uint32-representation // of the read Rune. func MakeUint32Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("unit32", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 32) if err == nil { @@ -1290,7 +1310,7 @@ func MakeUint32Token(toktype interface{}, handler Handler) Handler { // of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, // 10 = decimal, 16 = hexadecimal). func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("uint64", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, base, 64) if err == nil { @@ -1311,7 +1331,7 @@ func MakeUint64Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an float32-representation // of the read Rune. func MakeFloat32Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("float32", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseFloat(s, 32) if err == nil { @@ -1325,7 +1345,7 @@ func MakeFloat32Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an float64-representation // of the read Rune. func MakeFloat64Token(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("float64", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseFloat(s, 64) if err == nil { @@ -1339,7 +1359,7 @@ func MakeFloat64Token(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to an bool-representation // of the read Rune. func MakeBooleanToken(toktype interface{}, handler Handler) Handler { - return makeStrconvToken(toktype, handler, + return makeStrconvToken("boolean", toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseBool(s) if err == nil { @@ -1349,19 +1369,12 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler { }) } -func makeStrconvToken(toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler { - pc, _, _, _ := runtime.Caller(1) - fullName := runtime.FuncForPC(pc).Name() - parts := strings.Split(fullName, ".") - name := parts[len(parts)-1] +func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler { return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { value, err := convert(t.Result().String()) if err != nil { // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? - panic(fmt.Sprintf( - "Handler error: %s cannot handle input %q: %s "+ - "(only use a type conversion token maker, when the input has been "+ - "validated on beforehand)", name, t.Result().String(), err)) + panic(fmt.Sprintf("%s token invalid (%s)", name, err)) } return value }) @@ -1398,6 +1411,8 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t } } +// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will +// take the tokens as produced by the handler and group them together in a single token. func MakeTokenGroup(toktype interface{}, handler Handler) Handler { return func(t *API) bool { child := t.Fork() diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index 525d44e..5d8a867 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -162,7 +162,11 @@ func TestAtoms(t *testing.T) { {"xxx", a.Whitespace, false, ""}, {" ", a.Whitespace, true, " "}, {"\t", a.Whitespace, true, "\t"}, - {" \t\r\n \r\v\f ", a.Whitespace, true, " \t\r\n \r\v\f "}, + {"\n", a.Whitespace, true, "\n"}, + {"\r\n", a.Whitespace, true, "\r\n"}, + {" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "}, + {"xxx", a.UnicodeSpace, false, ""}, + {" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "}, {"", a.EndOfLine, true, ""}, {"\r\n", a.EndOfLine, true, "\r\n"}, {"\n", a.EndOfLine, true, "\n"}, @@ -311,8 +315,7 @@ func TestTokenMakerErrorHandling(t *testing.T) { tokenizer := tokenize.New(invalid) AssertPanic(t, PanicT{ func() { tokenizer("no") }, false, - `Handler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + - `invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`, + `boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`, }) } diff --git a/tokenize/result_test.go b/tokenize/result_test.go index f188717..6966da2 100644 --- a/tokenize/result_test.go +++ b/tokenize/result_test.go @@ -29,11 +29,11 @@ func ExampleToken() { fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3) - // Output: - // () - // Number((int)224) - // 1((string)John) - // ((int)42) + // Result: [ip("0.0.0.0") mask((int8)0)] + // Result: [ip("192.168.0.1") mask((int8)24)] + // Result: [ip("255.255.255.255") mask((int8)32)] + // Error: mismatch at start of file + // Error: mismatch at start of file } func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index 9361798..97b7d6d 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -46,9 +46,9 @@ func ExampleNew() { } } // Output: - // Result: [ip((string)0.0.0.0) mask((int8)0)] - // Result: [ip((string)192.168.0.1) mask((int8)24)] - // Result: [ip((string)255.255.255.255) mask((int8)32)] + // Result: [ip("0.0.0.0") mask((int8)0)] + // Result: [ip("192.168.0.1") mask((int8)24)] + // Result: [ip("255.255.255.255") mask((int8)32)] // Error: mismatch at start of file // Error: mismatch at start of file }