Made a distinction between MatchWhitespace() and MatchUnicodeSpace().

This commit is contained in:
Maurice Makaay 2019-07-04 11:32:07 +00:00
parent d96511ce0a
commit 583197c37a
6 changed files with 62 additions and 45 deletions

View File

@ -46,10 +46,10 @@ func ExampleNew_usingTokens() {
// Output: // Output:
// Runes accepted: "¡ök!" // Runes accepted: "¡ök!"
// Tokens: // Tokens:
// [0] RUNE("¡", value = (int32)161) // [0] RUNE(161)
// [1] RUNE("ö", value = (int32)246) // [1] RUNE(246)
// [2] RUNE("k", value = (int32)107) // [2] RUNE(107)
// [3] RUNE("!", value = (int32)33) // [3] RUNE(33)
} }
func ExampleAPI_Expected() { func ExampleAPI_Expected() {

View File

@ -15,7 +15,6 @@ func ExampleNewAPI() {
func ExampleAPI_NextRune() { func ExampleAPI_NextRune() {
api := tokenize.NewAPI("The input that the API will handle") api := tokenize.NewAPI("The input that the API will handle")
r, err := api.NextRune() r, err := api.NextRune()
fmt.Printf("Rune read from input; %c\n", r) fmt.Printf("Rune read from input; %c\n", r)
fmt.Printf("The error: %v\n", err) fmt.Printf("The error: %v\n", err)
fmt.Printf("API results: %q\n", api.Result().String()) fmt.Printf("API results: %q\n", api.Result().String())
@ -73,8 +72,8 @@ func ExampleAPI_Result() {
// API result runes as string: "new set of runes" // API result runes as string: "new set of runes"
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's'] // API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
// API third rune: 'w' // API third rune: 'w'
// API result tokens: [42((string)towel) 73((string)Zaphod)] // API result tokens: [42("towel") 73("Zaphod")]
// API second result token: 73((string)Zaphod) // API second result token: 73("Zaphod")
} }
func ExampleAPI_Reset() { func ExampleAPI_Reset() {

View File

@ -4,7 +4,6 @@ import (
"fmt" "fmt"
"io" "io"
"net" "net"
"runtime"
"strconv" "strconv"
"strings" "strings"
"unicode" "unicode"
@ -124,6 +123,7 @@ var A = struct {
Blank Handler Blank Handler
Blanks Handler Blanks Handler
Whitespace Handler Whitespace Handler
UnicodeSpace Handler
EndOfLine Handler EndOfLine Handler
Digit Handler Digit Handler
DigitNotZero Handler DigitNotZero Handler
@ -208,6 +208,7 @@ var A = struct {
Blank: MatchBlank(), Blank: MatchBlank(),
Blanks: MatchBlanks(), Blanks: MatchBlanks(),
Whitespace: MatchWhitespace(), Whitespace: MatchWhitespace(),
UnicodeSpace: MatchUnicodeSpace(),
EndOfLine: MatchEndOfLine(), EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(), Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(), DigitNotZero: MatchDigitNotZero(),
@ -373,14 +374,23 @@ func MatchBlank() Handler {
// or more blank characters, meaning tabs and spaces. // or more blank characters, meaning tabs and spaces.
// //
// When you need whitespace matching, which also includes characters like // When you need whitespace matching, which also includes characters like
// newlines, then make use of MatchSpace(). // newlines, then make use of MatchWhitespace().
// When you need unicode whitespace matching, which also includes characters
// like a vertical tab, then make use of MatchUnicodeSpace().
func MatchBlanks() Handler { func MatchBlanks() Handler {
return MatchOneOrMore(MatchBlank()) return MatchOneOrMore(MatchBlank())
} }
// MatchWhitespace creates a Handler that matches the input against one or more // MatchWhitespace creates a Handler that matches the input against one or more
// whitespace characters, as defined by unicode. // whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and
// carriage return '\r' followed by a newline '\n' (CRLF).
func MatchWhitespace() Handler { func MatchWhitespace() Handler {
return MatchOneOrMore(MatchBlank().Or(MatchNewline()))
}
// MatchUnicodeSpace creates a Handler that matches the input against one or more
// whitespace characters, as defined by unicode.
func MatchUnicodeSpace() Handler {
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace)) return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
} }
@ -601,7 +611,10 @@ func MatchExcept(handler Handler, except Handler) Handler {
} }
} }
// TODO keep this? // MatchFollowedBy creates a Handler that checks if the provided handler matches
// and if the provided lookAhead handler matches after the handler.
// When both handlers match, the match for the handler is accepted and the match
// for the lookAhead handler is ignored.
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t *API) bool { return func(t *API) bool {
child := t.Fork() child := t.Fork()
@ -613,7 +626,10 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
} }
} }
// TODO keep this? // MatchNotFollowedBy creates a Handler that checks if the provided handler matches
// and if the provided lookAhead handler does not match after the handler.
// If the handler matches and the lookAhead handler doesn't, then the match for
// the handler is accepted.
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t *API) bool { return func(t *API) bool {
child := t.Fork() child := t.Fork()
@ -1158,7 +1174,7 @@ func MakeByteToken(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an int-representation // Result, for which the Token.Value is set to an int-representation
// of the read Rune. // of the read Rune.
func MakeIntToken(toktype interface{}, handler Handler) Handler { func MakeIntToken(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { return makeStrconvToken("int", toktype, handler, func(s string) (interface{}, error) {
return strconv.Atoi(s) return strconv.Atoi(s)
}) })
} }
@ -1168,7 +1184,7 @@ func MakeIntToken(toktype interface{}, handler Handler) Handler {
// of the read Rune. // of the read Rune.
// TODO allow other Go types for oct and hex too. // TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler Handler) Handler { func MakeInt8Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("int8", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 8) value, err := strconv.ParseInt(s, 10, 8)
if err == nil { if err == nil {
@ -1182,7 +1198,7 @@ func MakeInt8Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an int16-representation // Result, for which the Token.Value is set to an int16-representation
// of the read Rune. // of the read Rune.
func MakeInt16Token(toktype interface{}, handler Handler) Handler { func MakeInt16Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("int16", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 16) value, err := strconv.ParseInt(s, 10, 16)
if err == nil { if err == nil {
@ -1196,7 +1212,7 @@ func MakeInt16Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an int32-representation // Result, for which the Token.Value is set to an int32-representation
// of the read Rune. // of the read Rune.
func MakeInt32Token(toktype interface{}, handler Handler) Handler { func MakeInt32Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("int32", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 32) value, err := strconv.ParseInt(s, 10, 32)
if err == nil { if err == nil {
@ -1211,7 +1227,11 @@ func MakeInt32Token(toktype interface{}, handler Handler) Handler {
// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, // of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal,
// 10 = decimal, 16 = hexadecimal). // 10 = decimal, 16 = hexadecimal).
func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler { func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeInt64BaseToken(toktype, base, handler)
}
func makeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler {
return makeStrconvToken("int64", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, base, 64) value, err := strconv.ParseInt(s, base, 64)
if err == nil { if err == nil {
@ -1232,7 +1252,7 @@ func MakeInt64Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an uint-representation // Result, for which the Token.Value is set to an uint-representation
// of the read Rune. // of the read Rune.
func MakeUintToken(toktype interface{}, handler Handler) Handler { func MakeUintToken(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("uint", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 0) value, err := strconv.ParseUint(s, 10, 0)
if err == nil { if err == nil {
@ -1247,7 +1267,7 @@ func MakeUintToken(toktype interface{}, handler Handler) Handler {
// of the read Rune. // of the read Rune.
// TODO allow other Go types for oct and hex too. // TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler Handler) Handler { func MakeUint8Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("uint8", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 8) value, err := strconv.ParseUint(s, 10, 8)
if err == nil { if err == nil {
@ -1261,7 +1281,7 @@ func MakeUint8Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an uint16-representation // Result, for which the Token.Value is set to an uint16-representation
// of the read Rune. // of the read Rune.
func MakeUint16Token(toktype interface{}, handler Handler) Handler { func MakeUint16Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("uint16", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 16) value, err := strconv.ParseUint(s, 10, 16)
if err == nil { if err == nil {
@ -1275,7 +1295,7 @@ func MakeUint16Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an uint32-representation // Result, for which the Token.Value is set to an uint32-representation
// of the read Rune. // of the read Rune.
func MakeUint32Token(toktype interface{}, handler Handler) Handler { func MakeUint32Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("unit32", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 32) value, err := strconv.ParseUint(s, 10, 32)
if err == nil { if err == nil {
@ -1290,7 +1310,7 @@ func MakeUint32Token(toktype interface{}, handler Handler) Handler {
// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal, // of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal,
// 10 = decimal, 16 = hexadecimal). // 10 = decimal, 16 = hexadecimal).
func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler { func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("uint64", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, base, 64) value, err := strconv.ParseUint(s, base, 64)
if err == nil { if err == nil {
@ -1311,7 +1331,7 @@ func MakeUint64Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an float32-representation // Result, for which the Token.Value is set to an float32-representation
// of the read Rune. // of the read Rune.
func MakeFloat32Token(toktype interface{}, handler Handler) Handler { func MakeFloat32Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("float32", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 32) value, err := strconv.ParseFloat(s, 32)
if err == nil { if err == nil {
@ -1325,7 +1345,7 @@ func MakeFloat32Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an float64-representation // Result, for which the Token.Value is set to an float64-representation
// of the read Rune. // of the read Rune.
func MakeFloat64Token(toktype interface{}, handler Handler) Handler { func MakeFloat64Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("float64", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 64) value, err := strconv.ParseFloat(s, 64)
if err == nil { if err == nil {
@ -1339,7 +1359,7 @@ func MakeFloat64Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an bool-representation // Result, for which the Token.Value is set to an bool-representation
// of the read Rune. // of the read Rune.
func MakeBooleanToken(toktype interface{}, handler Handler) Handler { func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, return makeStrconvToken("boolean", toktype, handler,
func(s string) (interface{}, error) { func(s string) (interface{}, error) {
value, err := strconv.ParseBool(s) value, err := strconv.ParseBool(s)
if err == nil { if err == nil {
@ -1349,19 +1369,12 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
}) })
} }
func makeStrconvToken(toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler { func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
pc, _, _, _ := runtime.Caller(1)
fullName := runtime.FuncForPC(pc).Name()
parts := strings.Split(fullName, ".")
name := parts[len(parts)-1]
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
value, err := convert(t.Result().String()) value, err := convert(t.Result().String())
if err != nil { if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
panic(fmt.Sprintf( panic(fmt.Sprintf("%s token invalid (%s)", name, err))
"Handler error: %s cannot handle input %q: %s "+
"(only use a type conversion token maker, when the input has been "+
"validated on beforehand)", name, t.Result().String(), err))
} }
return value return value
}) })
@ -1398,6 +1411,8 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
} }
} }
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
// take the tokens as produced by the handler and group them together in a single token.
func MakeTokenGroup(toktype interface{}, handler Handler) Handler { func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
return func(t *API) bool { return func(t *API) bool {
child := t.Fork() child := t.Fork()

View File

@ -162,7 +162,11 @@ func TestAtoms(t *testing.T) {
{"xxx", a.Whitespace, false, ""}, {"xxx", a.Whitespace, false, ""},
{" ", a.Whitespace, true, " "}, {" ", a.Whitespace, true, " "},
{"\t", a.Whitespace, true, "\t"}, {"\t", a.Whitespace, true, "\t"},
{" \t\r\n \r\v\f ", a.Whitespace, true, " \t\r\n \r\v\f "}, {"\n", a.Whitespace, true, "\n"},
{"\r\n", a.Whitespace, true, "\r\n"},
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
{"xxx", a.UnicodeSpace, false, ""},
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
{"", a.EndOfLine, true, ""}, {"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"}, {"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"}, {"\n", a.EndOfLine, true, "\n"},
@ -311,8 +315,7 @@ func TestTokenMakerErrorHandling(t *testing.T) {
tokenizer := tokenize.New(invalid) tokenizer := tokenize.New(invalid)
AssertPanic(t, PanicT{ AssertPanic(t, PanicT{
func() { tokenizer("no") }, false, func() { tokenizer("no") }, false,
`Handler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + `boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
}) })
} }

View File

@ -29,11 +29,11 @@ func ExampleToken() {
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3) fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
// Output: // Result: [ip("0.0.0.0") mask((int8)0)]
// () // Result: [ip("192.168.0.1") mask((int8)24)]
// Number((int)224) // Result: [ip("255.255.255.255") mask((int8)32)]
// 1((string)John) // Error: mismatch at start of file
// ((int)42) // Error: mismatch at start of file
} }
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {

View File

@ -46,9 +46,9 @@ func ExampleNew() {
} }
} }
// Output: // Output:
// Result: [ip((string)0.0.0.0) mask((int8)0)] // Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip((string)192.168.0.1) mask((int8)24)] // Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip((string)255.255.255.255) mask((int8)32)] // Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file // Error: mismatch at start of file
// Error: mismatch at start of file // Error: mismatch at start of file
} }