go-parsekit/tokenize2/handlers_builtin_test.go

513 lines
22 KiB
Go
Raw Blame History

package tokenize2_test
import (
"fmt"
"testing"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
)
func TestCombinatorsTempDebug(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
// {"024", a.IPv4CIDRMask, true, "24"},
// {"024", a.Octet, true, "24"},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
})
}
func TestCombinators(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{
{"", c.Not(a.Rune('b')), false, ""},
{"abc not", c.Not(a.Rune('b')), true, "a"},
{"bcd not", c.Not(a.Rune('b')), false, ""},
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
{"", c.Min(0, a.Rune('a')), true, ""},
{"a", c.Min(0, a.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
{"", c.Max(4, a.Rune('b')), true, ""},
{"X", c.Max(4, a.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
{"", c.OneOrMore(a.Rune('d')), false, ""},
{"X", c.OneOrMore(a.Rune('d')), false, ""},
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
})
}
func TestCombinatorPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A
AssertPanics(t, []PanicT{
{func() { a.RuneRange('z', 'a') }, true,
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
{func() { c.MinMax(-1, 1, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.MinMax(1, -1, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { c.MinMax(10, 5, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
{func() { c.Min(-10, a.Space) }, true,
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.Max(-42, a.Space) }, true,
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { a.IntegerBetween(10, -10) }, true,
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
})
}
func TestAtoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""},
{"Hello, world!", a.Str("Hello"), true, "Hello"},
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
{"xxx", a.Rune('x'), true, "x"},
{"x ", a.Rune(' '), false, ""},
{"aa", a.RuneRange('b', 'e'), false, ""},
{"bb", a.RuneRange('b', 'e'), true, "b"},
{"cc", a.RuneRange('b', 'e'), true, "c"},
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
{"", a.AnyRune, false, ""},
{"⌘", a.ValidRune, true, "⌘"},
{"\xbc with ValidRune", a.ValidRune, false, ""},
{"", a.ValidRune, false, ""},
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
{"ok with InvalidRune", a.InvalidRune, false, ""},
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
{"\r", a.CR, true, "\r"},
{"\n", a.LF, true, "\n"},
{"!", a.Excl, true, "!"},
{"\"", a.DoubleQuote, true, "\""},
{"#", a.Hash, true, "#"},
{"$", a.Dollar, true, "$"},
{"%", a.Percent, true, "%"},
{"&", a.Amp, true, "&"},
{"'", a.SingleQuote, true, "'"},
{"(", a.LeftParen, true, "("},
{"(", a.RoundOpen, true, "("},
{")", a.RightParen, true, ")"},
{")", a.RoundClose, true, ")"},
{"*", a.Asterisk, true, "*"},
{"*", a.Multiply, true, "*"},
{"+", a.Plus, true, "+"},
{"+", a.Add, true, "+"},
{",", a.Comma, true, ","},
{"-", a.Minus, true, "-"},
{"-", a.Subtract, true, "-"},
{".", a.Dot, true, "."},
{"/", a.Slash, true, "/"},
{"/", a.Divide, true, "/"},
{":", a.Colon, true, ":"},
{";", a.Semicolon, true, ";"},
{"<", a.AngleOpen, true, "<"},
{"<", a.LessThan, true, "<"},
{"=", a.Equal, true, "="},
{">", a.AngleClose, true, ">"},
{">", a.GreaterThan, true, ">"},
{"?", a.Question, true, "?"},
{"@", a.At, true, "@"},
{"[", a.SquareOpen, true, "["},
{"\\", a.Backslash, true, "\\"},
{"]", a.SquareClose, true, "]"},
{"^", a.Caret, true, "^"},
{"_", a.Underscore, true, "_"},
{"`", a.Backquote, true, "`"},
{"{", a.CurlyOpen, true, "{"},
{"|", a.Pipe, true, "|"},
{"}", a.CurlyClose, true, "}"},
{"~", a.Tilde, true, "~"},
{"\t \t \r\n", a.Blank, true, "\t"},
{" \t \t \r\n", a.Blanks, true, " \t \t "},
{"xxx", a.Whitespace, false, ""},
{" ", a.Whitespace, true, " "},
{"\t", a.Whitespace, true, "\t"},
{"\n", a.Whitespace, true, "\n"},
{"\r\n", a.Whitespace, true, "\r\n"},
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
{"xxx", a.UnicodeSpace, false, ""},
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
{"0", a.Digit, true, "0"},
{"1", a.Digit, true, "1"},
{"2", a.Digit, true, "2"},
{"3", a.Digit, true, "3"},
{"4", a.Digit, true, "4"},
{"5", a.Digit, true, "5"},
{"6", a.Digit, true, "6"},
{"7", a.Digit, true, "7"},
{"8", a.Digit, true, "8"},
{"9", a.Digit, true, "9"},
{"X", a.Digit, false, ""},
{"a", a.ASCIILower, true, "a"},
{"z", a.ASCIILower, true, "z"},
{"A", a.ASCIILower, false, ""},
{"Z", a.ASCIILower, false, ""},
{"A", a.ASCIIUpper, true, "A"},
{"Z", a.ASCIIUpper, true, "Z"},
{"a", a.ASCIIUpper, false, ""},
{"z", a.ASCIIUpper, false, ""},
{"1", a.Letter, false, ""},
{"a", a.Letter, true, "a"},
{"Ø", a.Letter, true, "Ø"},
{"Ë", a.Lower, false, ""},
{"ë", a.Lower, true, "ë"},
{"ä", a.Upper, false, "ä"},
{"Ä", a.Upper, true, "Ä"},
{"0", a.HexDigit, true, "0"},
{"9", a.HexDigit, true, "9"},
{"a", a.HexDigit, true, "a"},
{"f", a.HexDigit, true, "f"},
{"A", a.HexDigit, true, "A"},
{"F", a.HexDigit, true, "F"},
{"g", a.HexDigit, false, "g"},
{"G", a.HexDigit, false, "G"},
{"0", a.Integer, true, "0"},
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
{"1", a.Integer, true, "1"},
{"-10X", a.Integer, false, ""},
{"+10X", a.Integer, false, ""},
{"-10X", a.Signed(a.Integer), true, "-10"},
{"+10X", a.Signed(a.Integer), true, "+10"},
{"+10.1X", a.Signed(a.Integer), true, "+10"},
{"0X", a.Float, true, "0"},
{"0X", a.Float, true, "0"},
{"1X", a.Float, true, "1"},
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
{"123.321X", a.Float, true, "123.321"},
{"-3.14X", a.Float, false, ""},
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
{"fifteen", a.IntegerBetween(0, 10), false, ""},
})
}
func TestIPv4Atoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
// Not normalized octet.
{"0X", tokenize.MatchOctet(false), true, "0"},
{"00X", tokenize.MatchOctet(false), true, "00"},
{"000X", tokenize.MatchOctet(false), true, "000"},
{"10X", tokenize.MatchOctet(false), true, "10"},
{"010X", tokenize.MatchOctet(false), true, "010"},
{"255123", tokenize.MatchOctet(false), true, "255"},
{"256123", tokenize.MatchOctet(false), false, ""},
{"300", tokenize.MatchOctet(false), false, ""},
// Octet.
{"0", tokenize.MatchOctet(false), true, "0"},
{"02", tokenize.MatchOctet(false), true, "02"},
{"003", tokenize.MatchOctet(false), true, "003"},
{"256", tokenize.MatchOctet(false), false, ""},
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "0"},
{"000X", a.Octet, true, "0"},
{"10X", a.Octet, true, "10"},
{"010X", a.Octet, true, "10"},
{"255123", a.Octet, true, "255"},
{"256123", a.Octet, false, ""},
{"300", a.Octet, false, ""},
// IPv4 address.
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
{"256.255.255.255", a.IPv4, false, ""},
// IPv4 CIDR netmask.
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
{"0", a.IPv4CIDRMask, true, "0"},
{"00", a.IPv4CIDRMask, true, "0"},
{"000", a.IPv4CIDRMask, true, "0"},
{"32", a.IPv4CIDRMask, true, "32"},
{"032", a.IPv4CIDRMask, true, "32"},
{"33", a.IPv4CIDRMask, false, ""},
// IPv4 netmask in dotted quad format.
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
// IPv4 address + CIDR or dotted quad netmask.
{"192.168.6.123", a.IPv4Net, false, ""},
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
})
}
func TestIPv6Atoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
{"", a.IPv6, false, ""},
{"::", a.IPv6, true, "::"},
{"1::", a.IPv6, true, "1::"},
{"1::1", a.IPv6, true, "1::1"},
{"::1", a.IPv6, true, "::1"},
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
{"0", a.IPv6CIDRMask, true, "0"},
{"128", a.IPv6CIDRMask, true, "128"},
{"129", a.IPv6CIDRMask, false, ""},
{"::1/128", a.IPv6Net, true, "::1/128"},
{"::1/129", a.IPv6Net, false, ""},
{"1.1.1.1/24", a.IPv6Net, false, ""},
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
})
}
func TestModifiers(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{
{"missed me!", m.Drop(a.Rune('w')), false, ""},
{"where are you?", m.Drop(a.Rune('w')), true, ""},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
})
}
// When a TokenMaker encounters an error, this is considered a programmer error.
// A TokenMaker should not be called, unless the input is already validated to
// follow the correct pattern. Therefore, tokenmakers will panic when the
// input cannot be processed successfully.
func TestTokenMakerErrorHandling(t *testing.T) {
var a, tok = tokenize.A, tokenize.T
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
tokenizer := tokenize.New(invalid)
AssertPanic(t, PanicT{
func() { tokenizer("no") }, false,
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
})
}
func TestTokenMakers(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
AssertTokenMakers(t, []TokenMakerT{
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
[]tokenize.Token{{Type: "A", Value: ""}}},
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
// I don't check the returned error here, but it's good enough to see that the parsing
// stopped after the illegal \g escape sequence.
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
{Type: "bar", Value: byte('R')},
{Type: "bar", Value: byte('O')},
{Type: "bar", Value: byte('C')},
{Type: "bar", Value: byte('K')},
{Type: "bar", Value: byte('S')},
}},
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
}},
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
}},
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
})
}
func TestTokenGroup_Match(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
tokenizer := tokenize.New(tok.Group("Group",
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
api, err := tokenizer("xxxxx")
AssertTrue(t, err == nil, "Tokenizer result")
tokens := api.Tokens()
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
contained := tokens[0].Value.([]tokenize.Token)
AssertEqual(t, 3, len(contained), "Length of contained tokens")
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
}
func TestTokenGroup_Mismatch(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
tokenizer := tokenize.New(tok.Group("Group",
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
api, err := tokenizer("12345")
AssertTrue(t, err == nil, "Tokenizer result")
tokens := api.Tokens()
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
demonic := c.Seq(
c.Optional(a.SquareOpen),
m.Trim(
c.Seq(
c.Optional(a.Blanks),
c.Repeated(3, a.AngleClose),
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl),
c.Repeated(3, a.AngleOpen),
c.Optional(a.Blanks),
),
" \t",
),
c.Optional(a.SquareClose),
)
AssertHandlers(t, []HandlerT{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
})
}