diff --git a/tokenhandlers_builtin.go b/tokenhandlers_builtin.go index 88aba77..f2525d1 100644 --- a/tokenhandlers_builtin.go +++ b/tokenhandlers_builtin.go @@ -3,10 +3,12 @@ package parsekit import ( "fmt" "io" + "net" "runtime" "strconv" "strings" "unicode" + "unicode/utf8" ) // C provides convenient access to a range of parser/combinators that can be @@ -67,6 +69,7 @@ var A = struct { StrNoCase func(string) TokenHandler EndOfFile TokenHandler AnyRune TokenHandler + ValidRune TokenHandler Space TokenHandler Tab TokenHandler CR TokenHandler @@ -130,7 +133,12 @@ var A = struct { HexDigit TokenHandler Octet TokenHandler IPv4 TokenHandler - IPv4MaskBits TokenHandler + IPv4CIDRMask TokenHandler + IPv4Netmask TokenHandler + IPv4Net TokenHandler + IPv6 TokenHandler + IPv6CIDRMask TokenHandler + IPv6Net TokenHandler }{ Rune: MatchRune, Runes: MatchRunes, @@ -139,6 +147,7 @@ var A = struct { StrNoCase: MatchStrNoCase, EndOfFile: MatchEndOfFile(), AnyRune: MatchAnyRune(), + ValidRune: MatchValidRune(), Space: MatchRune(' '), Tab: MatchRune('\t'), CR: MatchRune('\r'), @@ -200,8 +209,13 @@ var A = struct { ASCIIUpper: MatchASCIIUpper(), HexDigit: MatchHexDigit(), Octet: MatchOctet(false), - IPv4: MatchIPv4(), - IPv4MaskBits: MatchIntegerBetween(0, 32), + IPv4: MatchIPv4(true), + IPv4CIDRMask: MatchIPv4CIDRMask(), + IPv4Netmask: MatchIPv4Netmask(), + IPv4Net: MatchIPv4Net(true), + IPv6: MatchIPv6(true), + IPv6CIDRMask: MatchIPv6CIDRMask(), + IPv6Net: MatchIPv6Net(true), } // M provides convenient access to a range of modifiers (which in their nature are @@ -596,9 +610,9 @@ func MatchEndOfFile() TokenHandler { } } -// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be -// read from the input. It reports back a successful match if the end of the -// input has not yet been reached and the upcoming input is a valid UTF8 rune. +// MatchAnyRune creates a TokenHandler function that checks if a rune can be +// read from the input. Invalid runes on the input are replaced with the UTF8 +// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as �. func MatchAnyRune() TokenHandler { return func(t *TokenAPI) bool { _, err := t.NextRune() @@ -610,6 +624,19 @@ func MatchAnyRune() TokenHandler { } } +// MatchValidRune creates a TokenHandler function that checks if a valid +// UTF8 rune can be read from the input. +func MatchValidRune() TokenHandler { + return func(t *TokenAPI) bool { + r, err := t.NextRune() + if err == nil && r != utf8.RuneError { + t.Accept() + return true + } + return false + } +} + // MatchDigit creates a TokenHandler that checks if a single digit can be read // from the input. func MatchDigit() TokenHandler { @@ -716,14 +743,148 @@ func MatchOctet(normalize bool) TokenHandler { // MatchIPv4 creates a TokenHandler function that checks if a valid IPv4 // IP address value can be read from the input. -// It will normalize IP-addresses that look like "192.168.001.012" to -// "192.168.1.12". -func MatchIPv4() TokenHandler { - octet := MatchOctet(true) +// +// When the normalize parameter is true, IP-addresses that look like +// "192.168.001.012" will be normalize to "192.168.1.12". +func MatchIPv4(normalize bool) TokenHandler { + octet := MatchOctet(normalize) dot := MatchRune('.') return MatchSeq(octet, dot, octet, dot, octet, dot, octet) } +// MatchIPv4CIDRMask creates a TokenHandler function that checks if a +// valid IPv4 CIDR mask (0 - 32) value can be read from the input. +func MatchIPv4CIDRMask() TokenHandler { + return MatchIntegerBetween(0, 32) +} + +// MatchIPv4Netmask creates a TokenHandler function that checks if a valid +// IPv4 netmask can be read from input (e.g. 255.255.255.0). +// Only a netmask in canonical form are accepted (meaning that in binary form +// it start with zero or more 1-bits, followed by only 0-bits up to the +// 32 bit length). +// +// Netmasks that look like "255.255.192.000" will be normalized to "255.255.192.0". +func MatchIPv4Netmask() TokenHandler { + octet := MakeUint8Token(nil, MatchOctet(true)) + dot := MatchRune('.') + netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) + + return func(t *TokenAPI) bool { + if !netmask(t) { + return false + } + + // Check if the mask is provided in canonical form (ones followed by zeroes). + r := t.Result() + mask := net.IPv4Mask(r.Value(0).(byte), r.Value(1).(byte), r.Value(2).(byte), r.Value(3).(byte)) + ones, bits := mask.Size() + if ones == 0 && bits == 0 { + return false + } + + r.ClearTokens() + return true + } +} + +// MatchIPv4Net creates a TokenHandler function that checks the input for an +// IPv4 + mask input. Both / (e.g. 192.168.0.1/24) and / +// (e.g. 172.16.10.254/255.255.192.0) are acceptable. +// +// When the normalize parameter is true, then the IP address and the mask are +// normalized. The mask will be normalized to cidr, so the above example would +// be normalized to 172.16.10.254/18. +func MatchIPv4Net(normalize bool) TokenHandler { + ip := MakeStrLiteralToken("ip", MatchIPv4(normalize)) + slash := MatchRune('/') + mask := MatchAny( + MakeStrLiteralToken("mask", MatchIPv4Netmask()), + MakeUint8Token("cidr", MatchIPv4CIDRMask())) + ipnet := MatchSeq(ip, slash, mask) + + return func(t *TokenAPI) bool { + if !ipnet(t) { + return false + } + + if !normalize { + return true + } + + r := t.Result() + maskToken := r.Token(1) + if maskToken.Type == "cidr" { + r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), r.Value(1).(uint8))) + } else { + o := strings.Split(r.Value(1).(string), ".") + b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) } + mask := net.IPv4Mask(b(0), b(1), b(2), b(3)) + bits, _ := mask.Size() + r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), bits)) + } + + r.ClearTokens() + return true + } +} + +// MatchIPv6 creates a TokenHandler function that checks if an IPv6 address +// can be read from the input. +func MatchIPv6(normalize bool) TokenHandler { + hextet := MatchMinMax(1, 4, MatchHexDigit()) + colon := MatchRune(':') + empty := MatchSeq(colon, colon) + + return func(t *TokenAPI) bool { + nrOfHextets := 0 + fork := t.Fork() + for nrOfHextets < 8 { + if hextet(fork) { + nrOfHextets++ + } else if empty(fork) { + nrOfHextets += 2 + } else if !colon(fork) { + break + } + } + // No hextets or too many hextets (e.g. 1:1:1:1:1:1:1:: <-- since :: is 2 or more hextets). + if nrOfHextets == 0 || nrOfHextets > 8 { + return false + } + + // Invalid IPv6, when net.ParseIP() cannot handle it. + parsed := net.ParseIP(fork.Result().String()) + if parsed == nil { + return false + } + + if normalize { + fork.Result().SetRunes(parsed.String()) + } + fork.Merge() + return true + } +} + +// MatchIPv6CIDRMask creates a TokenHandler function that checks if a +// valid IPv6 CIDR mask (0 - 128) value can be read from the input. +func MatchIPv6CIDRMask() TokenHandler { + return MatchIntegerBetween(0, 128) +} + +// MatchIPv6Net creates a TokenHandler function that checks the input for an +// IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64. +// +// When the normalize parameter is true, then the IP address and the mask are +// normalized. +func MatchIPv6Net(normalize bool) TokenHandler { + ip := MatchIPv6(normalize) + slash := MatchRune('/') + mask := MatchIPv6CIDRMask() + return MatchSeq(ip, slash, mask) +} + // ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies. // If it does, then its output is discarded completely. // diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index 444e930..4b06b01 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -105,8 +105,11 @@ func TestAtoms(t *testing.T) { {"cc", a.RuneRange('b', 'e'), true, "c"}, {"", a.EndOfFile, true, ""}, {"⌘", a.AnyRune, true, "⌘"}, - {"\xbc", a.AnyRune, true, "�"}, // invalid UTF8 rune - {"", a.AnyRune, false, ""}, // false is for end of file + {"\xbc with AnyRune", a.AnyRune, true, "�"}, + {"", a.AnyRune, false, ""}, + {"⌘", a.ValidRune, true, "⌘"}, + {"\xbc with ValidRune", a.ValidRune, false, "�"}, + {"", a.ValidRune, false, ""}, {" ", a.Space, true, " "}, {"X", a.Space, false, ""}, {"\t", a.Tab, true, "\t"}, @@ -201,6 +204,17 @@ func TestAtoms(t *testing.T) { {"-3.14X", a.Float, false, ""}, {"-3.14X", a.Signed(a.Float), true, "-3.14"}, {"-003.0014X", a.Signed(a.Float), true, "-003.0014"}, + {"-11", a.IntegerBetween(-10, 10), false, "0"}, + {"-10", a.IntegerBetween(-10, 10), true, "-10"}, + {"0", a.IntegerBetween(-10, 10), true, "0"}, + {"10", a.IntegerBetween(-10, 10), true, "10"}, + {"11", a.IntegerBetween(0, 10), false, ""}, + }) +} + +func TestIPv4Atoms(t *testing.T) { + var a = parsekit.A + parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ {"0X", a.Octet, true, "0"}, {"00X", a.Octet, true, "00"}, {"000X", a.Octet, true, "000"}, @@ -214,14 +228,44 @@ func TestAtoms(t *testing.T) { {"010.020.003.004", a.IPv4, true, "10.20.3.4"}, {"255.255.255.255", a.IPv4, true, "255.255.255.255"}, {"256.255.255.255", a.IPv4, false, ""}, - {"0", a.IPv4MaskBits, true, "0"}, - {"32", a.IPv4MaskBits, true, "32"}, - {"33", a.IPv4MaskBits, false, "0"}, - {"-11", a.IntegerBetween(-10, 10), false, "0"}, - {"-10", a.IntegerBetween(-10, 10), true, "-10"}, - {"0", a.IntegerBetween(-10, 10), true, "0"}, - {"10", a.IntegerBetween(-10, 10), true, "10"}, - {"11", a.IntegerBetween(0, 10), false, ""}, + {"0", a.IPv4CIDRMask, true, "0"}, + {"32", a.IPv4CIDRMask, true, "32"}, + {"33", a.IPv4CIDRMask, false, ""}, + {"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"}, + {"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"}, + {"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"}, + {"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits) + {"192.168.6.123", a.IPv4Net, false, ""}, + {"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"}, + {"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"}, + {"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"}, + {"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr + {"10.0.0.10/16.0.0.0", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0" + }) +} + +func TestIPv6Atoms(t *testing.T) { + var a = parsekit.A + parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + {"", a.IPv6, false, ""}, + {"::", a.IPv6, true, "::"}, + {"1::", a.IPv6, true, "1::"}, + {"1::1", a.IPv6, true, "1::1"}, + {"::1", a.IPv6, true, "::1"}, + {"1:2:3:4:5:6:7::", a.IPv6, false, ""}, + {"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"}, + {"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"}, + {"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"}, + {"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"}, + {"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"}, + {"0", a.IPv6CIDRMask, true, "0"}, + {"128", a.IPv6CIDRMask, true, "128"}, + {"129", a.IPv6CIDRMask, false, ""}, + {"::1/128", a.IPv6Net, true, "::1/128"}, + {"::1/129", a.IPv6Net, false, ""}, + {"1.1.1.1/24", a.IPv6Net, false, ""}, + {"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"}, + {"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"}, }) } diff --git a/tokenizer_test.go b/tokenizer_test.go index 1b740fb..9b4b126 100644 --- a/tokenizer_test.go +++ b/tokenizer_test.go @@ -22,7 +22,7 @@ import ( func ExampleTokenizer_Execute() { // Build the tokenizer for ip/mask. ip := T.Str("ip", A.IPv4) - mask := T.Int8("mask", A.IPv4MaskBits) + mask := T.Int8("mask", A.IPv4CIDRMask) cidr := C.Seq(ip, A.Slash, mask) tokenizer := NewTokenizer(cidr, "cidr")