Made a distinction between MatchWhitespace() and MatchUnicodeSpace().

This commit is contained in:
Maurice Makaay 2019-07-04 11:32:07 +00:00
parent d96511ce0a
commit 583197c37a
6 changed files with 62 additions and 45 deletions

View File

@ -46,10 +46,10 @@ func ExampleNew_usingTokens() {
// Output:
// Runes accepted: "¡ök!"
// Tokens:
// [0] RUNE("¡", value = (int32)161)
// [1] RUNE("ö", value = (int32)246)
// [2] RUNE("k", value = (int32)107)
// [3] RUNE("!", value = (int32)33)
// [0] RUNE(161)
// [1] RUNE(246)
// [2] RUNE(107)
// [3] RUNE(33)
}
func ExampleAPI_Expected() {

View File

@ -15,7 +15,6 @@ func ExampleNewAPI() {
func ExampleAPI_NextRune() {
api := tokenize.NewAPI("The input that the API will handle")
r, err := api.NextRune()
fmt.Printf("Rune read from input; %c\n", r)
fmt.Printf("The error: %v\n", err)
fmt.Printf("API results: %q\n", api.Result().String())
@ -73,8 +72,8 @@ func ExampleAPI_Result() {
// API result runes as string: "new set of runes"
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
// API third rune: 'w'
// API result tokens: [42((string)towel) 73((string)Zaphod)]
// API second result token: 73((string)Zaphod)
// API result tokens: [42("towel") 73("Zaphod")]
// API second result token: 73("Zaphod")
}
func ExampleAPI_Reset() {

View File

@ -4,7 +4,6 @@ import (
"fmt"
"io"
"net"
"runtime"
"strconv"
"strings"
"unicode"
@ -124,6 +123,7 @@ var A = struct {
Blank Handler
Blanks Handler
Whitespace Handler
UnicodeSpace Handler
EndOfLine Handler
Digit Handler
DigitNotZero Handler
@ -208,6 +208,7 @@ var A = struct {
Blank: MatchBlank(),
Blanks: MatchBlanks(),
Whitespace: MatchWhitespace(),
UnicodeSpace: MatchUnicodeSpace(),
EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
@ -373,14 +374,23 @@ func MatchBlank() Handler {
// or more blank characters, meaning tabs and spaces.
//
// When you need whitespace matching, which also includes characters like
// newlines, then make use of MatchSpace().
// newlines, then make use of MatchWhitespace().
// When you need unicode whitespace matching, which also includes characters
// like a vertical tab, then make use of MatchUnicodeSpace().
func MatchBlanks() Handler {
return MatchOneOrMore(MatchBlank())
}
// MatchWhitespace creates a Handler that matches the input against one or more
// whitespace characters, as defined by unicode.
// whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and
// carriage return '\r' followed by a newline '\n' (CRLF).
func MatchWhitespace() Handler {
return MatchOneOrMore(MatchBlank().Or(MatchNewline()))
}
// MatchUnicodeSpace creates a Handler that matches the input against one or more
// whitespace characters, as defined by unicode.
func MatchUnicodeSpace() Handler {
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
}
@ -601,7 +611,10 @@ func MatchExcept(handler Handler, except Handler) Handler {
}
}
// TODO keep this?
// MatchFollowedBy creates a Handler that checks if the provided handler matches
// and if the provided lookAhead handler matches after the handler.
// When both handlers match, the match for the handler is accepted and the match
// for the lookAhead handler is ignored.
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t *API) bool {
child := t.Fork()
@ -613,7 +626,10 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
}
}
// TODO keep this?
// MatchNotFollowedBy creates a Handler that checks if the provided handler matches
// and if the provided lookAhead handler does not match after the handler.
// If the handler matches and the lookAhead handler doesn't, then the match for
// the handler is accepted.
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t *API) bool {
child := t.Fork()
@ -1158,7 +1174,7 @@ func MakeByteToken(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an int-representation
// of the read Rune.
func MakeIntToken(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
return makeStrconvToken("int", toktype, handler, func(s string) (interface{}, error) {
return strconv.Atoi(s)
})
}
@ -1168,7 +1184,7 @@ func MakeIntToken(toktype interface{}, handler Handler) Handler {
// of the read Rune.
// TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("int8", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 8)
if err == nil {
@ -1182,7 +1198,7 @@ func MakeInt8Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an int16-representation
// of the read Rune.
func MakeInt16Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("int16", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 16)
if err == nil {
@ -1196,7 +1212,7 @@ func MakeInt16Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an int32-representation
// of the read Rune.
func MakeInt32Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("int32", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 32)
if err == nil {
@ -1211,7 +1227,11 @@ func MakeInt32Token(toktype interface{}, handler Handler) Handler {
// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal,
// 10 = decimal, 16 = hexadecimal).
func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeInt64BaseToken(toktype, base, handler)
}
func makeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler {
return makeStrconvToken("int64", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, base, 64)
if err == nil {
@ -1232,7 +1252,7 @@ func MakeInt64Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an uint-representation
// of the read Rune.
func MakeUintToken(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("uint", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 0)
if err == nil {
@ -1247,7 +1267,7 @@ func MakeUintToken(toktype interface{}, handler Handler) Handler {
// of the read Rune.
// TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("uint8", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 8)
if err == nil {
@ -1261,7 +1281,7 @@ func MakeUint8Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an uint16-representation
// of the read Rune.
func MakeUint16Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("uint16", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 16)
if err == nil {
@ -1275,7 +1295,7 @@ func MakeUint16Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an uint32-representation
// of the read Rune.
func MakeUint32Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("unit32", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 32)
if err == nil {
@ -1290,7 +1310,7 @@ func MakeUint32Token(toktype interface{}, handler Handler) Handler {
// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal,
// 10 = decimal, 16 = hexadecimal).
func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("uint64", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, base, 64)
if err == nil {
@ -1311,7 +1331,7 @@ func MakeUint64Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an float32-representation
// of the read Rune.
func MakeFloat32Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("float32", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 32)
if err == nil {
@ -1325,7 +1345,7 @@ func MakeFloat32Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an float64-representation
// of the read Rune.
func MakeFloat64Token(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("float64", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 64)
if err == nil {
@ -1339,7 +1359,7 @@ func MakeFloat64Token(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to an bool-representation
// of the read Rune.
func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
return makeStrconvToken(toktype, handler,
return makeStrconvToken("boolean", toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseBool(s)
if err == nil {
@ -1349,19 +1369,12 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
})
}
func makeStrconvToken(toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
pc, _, _, _ := runtime.Caller(1)
fullName := runtime.FuncForPC(pc).Name()
parts := strings.Split(fullName, ".")
name := parts[len(parts)-1]
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
value, err := convert(t.Result().String())
if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
panic(fmt.Sprintf(
"Handler error: %s cannot handle input %q: %s "+
"(only use a type conversion token maker, when the input has been "+
"validated on beforehand)", name, t.Result().String(), err))
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
}
return value
})
@ -1398,6 +1411,8 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
}
}
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
// take the tokens as produced by the handler and group them together in a single token.
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
return func(t *API) bool {
child := t.Fork()

View File

@ -162,7 +162,11 @@ func TestAtoms(t *testing.T) {
{"xxx", a.Whitespace, false, ""},
{" ", a.Whitespace, true, " "},
{"\t", a.Whitespace, true, "\t"},
{" \t\r\n \r\v\f ", a.Whitespace, true, " \t\r\n \r\v\f "},
{"\n", a.Whitespace, true, "\n"},
{"\r\n", a.Whitespace, true, "\r\n"},
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
{"xxx", a.UnicodeSpace, false, ""},
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
@ -311,8 +315,7 @@ func TestTokenMakerErrorHandling(t *testing.T) {
tokenizer := tokenize.New(invalid)
AssertPanic(t, PanicT{
func() { tokenizer("no") }, false,
`Handler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
})
}

View File

@ -29,11 +29,11 @@ func ExampleToken() {
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
// Output:
// ()
// Number((int)224)
// 1((string)John)
// ((int)42)
// Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {

View File

@ -46,9 +46,9 @@ func ExampleNew() {
}
}
// Output:
// Result: [ip((string)0.0.0.0) mask((int8)0)]
// Result: [ip((string)192.168.0.1) mask((int8)24)]
// Result: [ip((string)255.255.255.255) mask((int8)32)]
// Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}