package tokenize_test import ( "fmt" "testing" tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func TestCombinators(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M AssertHandlers(t, []HandlerT{ {"", c.Not(a.Char('b')), false, ""}, {"abc not", c.Not(a.Char('b')), true, "a"}, {"bcd not", c.Not(a.Char('b')), false, ""}, {"aaaxxxb", c.OneOrMore(c.Not(a.Char('b'))), true, "aaaxxx"}, {"1010 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), true, "1"}, {"2020 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), false, ""}, {"abc any", c.Any(a.Char('a'), a.Char('b')), true, "a"}, {"bcd any", c.Any(a.Char('a'), a.Char('b')), true, "b"}, {"cde any", c.Any(a.Char('a'), a.Char('b')), false, ""}, {"ababc repeated", c.Repeated(4, a.Char('a', 'b')), true, "abab"}, {"ababc repeated", c.Repeated(5, a.Char('a', 'b')), false, ""}, {"", c.Min(0, a.Char('a')), true, ""}, {"a", c.Min(0, a.Char('a')), true, "a"}, {"aaaaa", c.Min(4, a.Char('a')), true, "aaaaa"}, {"aaaaa", c.Min(5, a.Char('a')), true, "aaaaa"}, {"aaaaa", c.Min(6, a.Char('a')), false, ""}, {"", c.Max(4, a.Char('b')), true, ""}, {"X", c.Max(4, a.Char('b')), true, ""}, {"bbbbbX", c.Max(4, a.Char('b')), true, "bbbb"}, {"bbbbbX", c.Max(5, a.Char('b')), true, "bbbbb"}, {"bbbbbX", c.Max(6, a.Char('b')), true, "bbbbb"}, {"", c.MinMax(0, 0, a.Char('c')), true, ""}, {"X", c.MinMax(0, 0, a.Char('c')), true, ""}, {"cccc", c.MinMax(0, 5, a.Char('c')), true, "cccc"}, {"ccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"}, {"cccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"}, {"cccccX", c.MinMax(0, 0, a.Char('c')), true, ""}, {"cccccX", c.MinMax(0, 1, a.Char('c')), true, "c"}, {"cccccX", c.MinMax(0, 5, a.Char('c')), true, "ccccc"}, {"cccccX", c.MinMax(0, 6, a.Char('c')), true, "ccccc"}, {"cccccX", c.MinMax(1, 1, a.Char('c')), true, "c"}, {"", c.MinMax(1, 1, a.Char('c')), false, ""}, {"X", c.MinMax(1, 1, a.Char('c')), false, ""}, {"cccccX", c.MinMax(1, 3, a.Char('c')), true, "ccc"}, {"cccccX", c.MinMax(1, 6, a.Char('c')), true, "ccccc"}, {"cccccX", c.MinMax(3, 4, a.Char('c')), true, "cccc"}, {"", c.OneOrMore(a.Char('d')), false, ""}, {"X", c.OneOrMore(a.Char('d')), false, ""}, {"dX", c.OneOrMore(a.Char('d')), true, "d"}, {"dddddX", c.OneOrMore(a.Char('d')), true, "ddddd"}, {"", c.ZeroOrMore(a.Char('e')), true, ""}, {"X", c.ZeroOrMore(a.Char('e')), true, ""}, {"eX", c.ZeroOrMore(a.Char('e')), true, "e"}, {"eeeeeX", c.ZeroOrMore(a.Char('e')), true, "eeeee"}, {"HI!", c.Seq(a.Char('H'), a.Char('I'), a.Char('!')), true, "HI!"}, {"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"}, {"101010123", c.OneOrMore(c.Seq(a.Char('1'), a.Char('0'))), true, "101010"}, {"", c.Optional(c.OneOrMore(a.Char('f'))), true, ""}, {"ghijkl", c.Optional(a.Char('h')), true, ""}, {"ghijkl", c.Optional(a.Char('g')), true, "g"}, {"fffffX", c.Optional(c.OneOrMore(a.Char('f'))), true, "fffff"}, {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"}, {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Char('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, {" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""}, {" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"}, {"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"}, {" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"}, {"ab", c.FollowedBy(a.Char('b'), a.Char('a')), true, "a"}, {"ba", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""}, {"aa", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""}, {"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), true, "aaabbbccc"}, {"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), false, ""}, {"xy", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"}, {"yx", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""}, {"xx", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"}, {"xa", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""}, {"xxxyyyzzzaaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), false, ""}, {"xxxyyyzzzbaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), true, "xxxyyyzzz"}, }) } func TestCombinatorPanics(t *testing.T) { var c, a = tokenize.C, tokenize.A AssertPanics(t, []PanicT{ {func() { a.CharRange('z', 'a') }, true, `Handler: MatchCharRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must be <= end 'a'`}, {func() { c.MinMax(-1, 1, a.Space) }, true, `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`}, {func() { c.MinMax(1, -1, a.Space) }, true, `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`}, {func() { c.MinMax(10, 5, a.Space) }, true, `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`}, {func() { c.Min(-10, a.Space) }, true, `Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`}, {func() { c.Max(-42, a.Space) }, true, `Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`}, {func() { a.IntegerBetween(10, -10) }, true, `Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`}, }) } func TestAtoms(t *testing.T) { var a, c = tokenize.A, tokenize.C AssertHandlers(t, []HandlerT{ {"dd", a.CharRange('b', 'e'), true, "d"}, {"ee", a.CharRange('b', 'e'), true, "e"}, {"ff", a.CharRange('b', 'e'), false, ""}, {"ff", a.CharRange('b', 'c', 'f', 'g'), true, "f"}, {"abc123_-,other", c.OneOrMore(a.CharRange('a', 'z', '0', '9', '_', '_', '-', '-')), true, "abc123_-"}, {"Hello, world 1!", a.Str("Hello"), true, "Hello"}, {"Hello, world 2!", a.StrNoCase("hElLo"), true, "Hello"}, {"H♥llÖ, wÖrld 3!", a.Str("H♥llÖ"), true, "H♥llÖ"}, {"H♥llÖ, world 4!", a.StrNoCase("h♥llö"), true, "H♥llÖ"}, {"+X", a.Char('+', '-', '*', '/'), true, "+"}, {"-X", a.Char('+', '-', '*', '/'), true, "-"}, {"*X", a.Char('+', '-', '*', '/'), true, "*"}, {"/X", a.Char('+', '-', '*', '/'), true, "/"}, {"!X", a.Char('+', '-', '*', '/'), false, ""}, {"xxx", a.Char('x'), true, "x"}, {"x ", a.Char(' '), false, ""}, {"aa", a.CharRange('b', 'e'), false, ""}, {"bb", a.CharRange('b', 'e'), true, "b"}, {"cc", a.CharRange('b', 'e'), true, "c"}, {"", a.EndOfFile, true, ""}, {"😂", a.AnyRune, true, "😂"}, {"\xbc with AnyRune", a.AnyRune, true, "�"}, {"", a.AnyRune, false, ""}, {"⌘", a.ValidRune, true, "⌘"}, {"\xbc with ValidRune", a.ValidRune, false, ""}, {"", a.ValidRune, false, ""}, {"", a.ValidRunes, false, ""}, {"v", a.ValidRunes, true, "v"}, {"v😂līd Rün€s\xbcstop here", a.ValidRunes, true, "v😂līd Rün€s"}, {"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", a.ValidRunes, true, "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567"}, {"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678", a.ValidRunes, true, "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"}, {"\xbc with InvalidRune", a.InvalidRune, true, "�"}, {"ok with InvalidRune", a.InvalidRune, false, ""}, {" ", a.Space, true, " "}, {"X", a.Space, false, ""}, {"\t", a.Tab, true, "\t"}, {"\r", a.CR, true, "\r"}, {"\n", a.LF, true, "\n"}, {"!", a.Excl, true, "!"}, {"\"", a.DoubleQuote, true, "\""}, {"#", a.Hash, true, "#"}, {"$", a.Dollar, true, "$"}, {"%", a.Percent, true, "%"}, {"&", a.Amp, true, "&"}, {"'", a.SingleQuote, true, "'"}, {"(", a.LeftParen, true, "("}, {"(", a.RoundOpen, true, "("}, {")", a.RightParen, true, ")"}, {")", a.RoundClose, true, ")"}, {"*", a.Asterisk, true, "*"}, {"*", a.Multiply, true, "*"}, {"+", a.Plus, true, "+"}, {"+", a.Add, true, "+"}, {",", a.Comma, true, ","}, {"-", a.Minus, true, "-"}, {"-", a.Subtract, true, "-"}, {".", a.Dot, true, "."}, {"/", a.Slash, true, "/"}, {"/", a.Divide, true, "/"}, {":", a.Colon, true, ":"}, {";", a.Semicolon, true, ";"}, {"<", a.AngleOpen, true, "<"}, {"<", a.LessThan, true, "<"}, {"=", a.Equal, true, "="}, {">", a.AngleClose, true, ">"}, {">", a.GreaterThan, true, ">"}, {"?", a.Question, true, "?"}, {"@", a.At, true, "@"}, {"[", a.SquareOpen, true, "["}, {"\\", a.Backslash, true, "\\"}, {"]", a.SquareClose, true, "]"}, {"^", a.Caret, true, "^"}, {"_", a.Underscore, true, "_"}, {"`", a.Backquote, true, "`"}, {"{", a.CurlyOpen, true, "{"}, {"|", a.Pipe, true, "|"}, {"}", a.CurlyClose, true, "}"}, {"~", a.Tilde, true, "~"}, {"\t \t \r\n", a.Blank, true, "\t"}, {" \t \t \r\n", a.Blanks, true, " \t \t "}, // {"xxx", a.Whitespace, false, ""}, // {" ", a.Whitespace, true, " "}, // {"\t", a.Whitespace, true, "\t"}, // {"\n", a.Whitespace, true, "\n"}, // {"\r\n", a.Whitespace, true, "\r\n"}, // {" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "}, // {"xxx", a.UnicodeSpace, false, ""}, // {" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "}, // {"", a.EndOfLine, true, ""}, // {"\r\n", a.EndOfLine, true, "\r\n"}, // {"\n", a.EndOfLine, true, "\n"}, // {"0", a.Digit, true, "0"}, // {"1", a.Digit, true, "1"}, // {"2", a.Digit, true, "2"}, // {"3", a.Digit, true, "3"}, // {"4", a.Digit, true, "4"}, // {"5", a.Digit, true, "5"}, // {"6", a.Digit, true, "6"}, // {"7", a.Digit, true, "7"}, // {"8", a.Digit, true, "8"}, // {"9", a.Digit, true, "9"}, // {"X", a.Digit, false, ""}, // {"a", a.ASCIILower, true, "a"}, // {"z", a.ASCIILower, true, "z"}, // {"A", a.ASCIILower, false, ""}, // {"Z", a.ASCIILower, false, ""}, // {"A", a.ASCIIUpper, true, "A"}, // {"Z", a.ASCIIUpper, true, "Z"}, // {"a", a.ASCIIUpper, false, ""}, // {"z", a.ASCIIUpper, false, ""}, // {"1", a.Letter, false, ""}, // {"a", a.Letter, true, "a"}, // {"Ø", a.Letter, true, "Ø"}, // {"Ë", a.Lower, false, ""}, // {"ë", a.Lower, true, "ë"}, // {"ä", a.Upper, false, "ä"}, // {"Ä", a.Upper, true, "Ä"}, // {"0", a.HexDigit, true, "0"}, // {"9", a.HexDigit, true, "9"}, // {"a", a.HexDigit, true, "a"}, // {"f", a.HexDigit, true, "f"}, // {"A", a.HexDigit, true, "A"}, // {"F", a.HexDigit, true, "F"}, // {"g", a.HexDigit, false, "g"}, // {"G", a.HexDigit, false, "G"}, // {"09", a.Integer, true, "9"}, // {"0000129", a.Integer, true, "129"}, // {"0", a.Integer, true, "0"}, // {"00000", a.Integer, true, "0"}, // {"1", a.Integer, true, "1"}, // {"-10X", a.Integer, false, ""}, // {"+10X", a.Integer, false, ""}, // {"-10X", a.Signed(a.Integer), true, "-10"}, // {"+10X", a.Signed(a.Integer), true, "+10"}, // {"+10.1X", a.Signed(a.Integer), true, "+10"}, // {"0X", a.Decimal, true, "0"}, // {"0000X", a.Decimal, true, "0"}, // {"1X", a.Decimal, true, "1"}, // {"01X", a.Decimal, true, "1"}, // {"000001X", a.Decimal, true, "1"}, // {"1.", a.Decimal, true, "1"}, // incomplete float, so only the 1 is picked up // {"123.321X", a.Decimal, true, "123.321"}, // {"0.6X", a.Decimal, true, "0.6"}, // {"-3.14X", a.Decimal, false, ""}, // {"-3.14X", a.Signed(a.Decimal), true, "-3.14"}, // {"-003.0014X", a.Signed(a.Decimal), true, "-3.0014"}, // {"-11", a.IntegerBetween(-10, 10), false, "0"}, // {"-10", a.IntegerBetween(-10, 10), true, "-10"}, // {"0", a.IntegerBetween(-10, 10), true, "0"}, // {"10", a.IntegerBetween(-10, 10), true, "10"}, // {"11", a.IntegerBetween(0, 10), false, ""}, // {"fifteen", a.IntegerBetween(0, 10), false, ""}, }) } func TestIPv4Atoms(t *testing.T) { var a = tokenize.A AssertHandlers(t, []HandlerT{ // Not normalized octet. {"0X", tokenize.MatchOctet(false), true, "0"}, {"00X", tokenize.MatchOctet(false), true, "00"}, {"000X", tokenize.MatchOctet(false), true, "000"}, {"10X", tokenize.MatchOctet(false), true, "10"}, {"010X", tokenize.MatchOctet(false), true, "010"}, {"255123", tokenize.MatchOctet(false), true, "255"}, {"256123", tokenize.MatchOctet(false), false, ""}, {"300", tokenize.MatchOctet(false), false, ""}, // // Octet. {"0", tokenize.MatchOctet(false), true, "0"}, {"02", tokenize.MatchOctet(false), true, "02"}, {"003", tokenize.MatchOctet(false), true, "003"}, {"256", tokenize.MatchOctet(false), false, ""}, {"0X", a.Octet, true, "0"}, {"00X", a.Octet, true, "0"}, {"000X", a.Octet, true, "0"}, {"10X", a.Octet, true, "10"}, {"010X", a.Octet, true, "10"}, {"255123", a.Octet, true, "255"}, {"256123", a.Octet, false, ""}, {"300", a.Octet, false, ""}, // IPv4 address. {"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"}, {"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"}, {"0.0.0.0", a.IPv4, true, "0.0.0.0"}, {"10.20.30.40", a.IPv4, true, "10.20.30.40"}, {"010.020.003.004", a.IPv4, true, "10.20.3.4"}, {"255.255.255.255", a.IPv4, true, "255.255.255.255"}, {"256.255.255.255", a.IPv4, false, ""}, // IPv4 CIDR netmask. {"0", tokenize.MatchIPv4CIDRMask(false), true, "0"}, {"000", tokenize.MatchIPv4CIDRMask(false), true, "000"}, {"0", a.IPv4CIDRMask, true, "0"}, {"00", a.IPv4CIDRMask, true, "0"}, {"000", a.IPv4CIDRMask, true, "0"}, {"32", a.IPv4CIDRMask, true, "32"}, {"032", a.IPv4CIDRMask, true, "32"}, {"33", a.IPv4CIDRMask, false, ""}, // IPv4 netmask in dotted quad format. {"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"}, {"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"}, {"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"}, {"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"}, {"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"}, {"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits) // IPv4 address + CIDR or dotted quad netmask. {"192.168.6.123", a.IPv4Net, false, ""}, {"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"}, {"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"}, {"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"}, {"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"}, {"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"}, {"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr {"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0" }) } func TestIPv6Atoms(t *testing.T) { var a = tokenize.A AssertHandlers(t, []HandlerT{ {"", a.IPv6, false, ""}, {"::", a.IPv6, true, "::"}, {"1::", a.IPv6, true, "1::"}, {"1::1", a.IPv6, true, "1::1"}, {"::1", a.IPv6, true, "::1"}, {"1:2:3:4:5:6:7::", a.IPv6, false, ""}, {"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"}, {"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"}, {"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"}, {"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"}, {"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"}, {"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"}, {"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"}, {"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""}, {"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"}, {"0", a.IPv6CIDRMask, true, "0"}, {"128", a.IPv6CIDRMask, true, "128"}, {"129", a.IPv6CIDRMask, false, ""}, {"::1/128", a.IPv6Net, true, "::1/128"}, {"::1/129", a.IPv6Net, false, ""}, {"1.1.1.1/24", a.IPv6Net, false, ""}, {"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"}, {"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"}, }) } func TestModifiers(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M AssertHandlers(t, []HandlerT{ {"missed me!", m.Drop(a.Char('w')), false, ""}, {"where are you?", m.Drop(a.Char('w')), true, ""}, {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {"cool", a.Str("cool"), true, "cool"}, {"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, {" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "}, {" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"}, {" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"}, {"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"}, {"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"}, {"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""}, {"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"}, {"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"}, }) } // When a TokenMaker encounters an error, this is considered a programmer error. // A TokenMaker should not be called, unless the input is already validated to // follow the correct pattern. Therefore, tokenmakers will panic when the // input cannot be processed successfully. func TestTokenMakerErrorHandling(t *testing.T) { var a, tok = tokenize.A, tokenize.T invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool() tokenizer := tokenize.New(invalid) AssertPanic(t, PanicT{ func() { tokenizer("no") }, false, `boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`, }) } func TestTokenMakers(t *testing.T) { var c, a, tok = tokenize.C, tokenize.A, tokenize.T AssertTokenMakers(t, []TokenMakerT{ {`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)), []tokenize.Token{{Type: "A", Value: ""}}}, {`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}}, {`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}}, {`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "� invalid rune"}}}, // I don't check the returned error here, but it's good enough to see that the parsing // stopped after the illegal \g escape sequence. {`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}}, {"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}}, {"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{ {Type: "bar", Value: byte('R')}, {Type: "bar", Value: byte('O')}, {Type: "bar", Value: byte('C')}, {Type: "bar", Value: byte('K')}, {Type: "bar", Value: byte('S')}, }}, {"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}}, {`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}}, {`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}}, {`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}}, {`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}}, {`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}}, {`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}}, {`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}}, {`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}}, {`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}}, {`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}}, {`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}}, {`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}}, {`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}}, {`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}}, {`3.1415=PI`, tok.Float32("N", a.Decimal), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}}, {`24.19287=PI`, tok.Float64("O", a.Decimal), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}}, {`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{ {Type: "P", Value: true}, {Type: "P", Value: true}, {Type: "P", Value: true}, {Type: "P", Value: true}, {Type: "P", Value: true}, {Type: "P", Value: true}, }}, {`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{ {Type: "P", Value: false}, {Type: "P", Value: false}, {Type: "P", Value: false}, {Type: "P", Value: false}, {Type: "P", Value: false}, {Type: "P", Value: false}, }}, {`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}}, }) } func TestTokenGroup_Match(t *testing.T) { var c, a, tok = tokenize.C, tokenize.A, tokenize.T tokenizer := tokenize.New(tok.Group("Group", c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter)))) api, err := tokenizer("xxxxx") AssertTrue(t, err == nil, "Tokenizer result") tokens := api.Tokens() AssertEqual(t, 1, len(tokens), "Length of tokens slice") contained := tokens[0].Value.([]tokenize.Token) AssertEqual(t, 3, len(contained), "Length of contained tokens") AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1") AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2") AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3") } func TestTokenGroup_Mismatch(t *testing.T) { var c, a, tok = tokenize.C, tokenize.A, tokenize.T tokenizer := tokenize.New(tok.Group("Group", c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional()) api, err := tokenizer("12345") AssertTrue(t, err == nil, "Tokenizer result") tokens := api.Tokens() AssertEqual(t, 0, len(tokens), "Length of tokens slice") } // I know, this is hell, but that's the whole point for this test :-> func TestCombination(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M demonic := c.Seq( c.Optional(a.SquareOpen), m.Trim( c.Seq( c.Optional(a.Blanks), c.Repeated(3, a.AngleClose), m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string { return fmt.Sprintf("%d", len(s)) }), m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "), m.ToUpper(c.Min(1, a.ASCIILower)), m.Drop(a.Excl), c.Repeated(3, a.AngleOpen), c.Optional(a.Blanks), ), " \t", ), c.Optional(a.SquareClose), ) AssertHandlers(t, []HandlerT{ {"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"}, {"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"}, {">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"}, {"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"}, }) } // 46709 ns/op func BenchmarkBoolean(b *testing.B) { tokenizer := tokenize.New(tokenize.A.Boolean) for i := 0; i < b.N; i++ { tokenizer("0") tokenizer("1") tokenizer("t") tokenizer("f") tokenizer("T") tokenizer("F") tokenizer("0XX") tokenizer("1XX") tokenizer("tXX") tokenizer("fXX") tokenizer("TXX") tokenizer("FXX") tokenizer("true") tokenizer("TRUE") tokenizer("True") tokenizer("false") tokenizer("FALSE") tokenizer("False") } }