Added a lot of IP-address-related TokenHandlers, so we can now process IPv4 addresses, IPv6 addresses, CIDR netmasks, IPv4 dotted quad netmasks, IPv4Net (ipv4 + mask) and IPv6Mask (ipv6 + mask).

This commit is contained in:
Maurice Makaay 2019-06-05 22:16:09 +00:00
parent 05585db341
commit 3d791233e0
3 changed files with 226 additions and 21 deletions

View File

@ -3,10 +3,12 @@ package parsekit
import (
"fmt"
"io"
"net"
"runtime"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// C provides convenient access to a range of parser/combinators that can be
@ -67,6 +69,7 @@ var A = struct {
StrNoCase func(string) TokenHandler
EndOfFile TokenHandler
AnyRune TokenHandler
ValidRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
@ -130,7 +133,12 @@ var A = struct {
HexDigit TokenHandler
Octet TokenHandler
IPv4 TokenHandler
IPv4MaskBits TokenHandler
IPv4CIDRMask TokenHandler
IPv4Netmask TokenHandler
IPv4Net TokenHandler
IPv6 TokenHandler
IPv6CIDRMask TokenHandler
IPv6Net TokenHandler
}{
Rune: MatchRune,
Runes: MatchRunes,
@ -139,6 +147,7 @@ var A = struct {
StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
Space: MatchRune(' '),
Tab: MatchRune('\t'),
CR: MatchRune('\r'),
@ -200,8 +209,13 @@ var A = struct {
ASCIIUpper: MatchASCIIUpper(),
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
IPv4: MatchIPv4(),
IPv4MaskBits: MatchIntegerBetween(0, 32),
IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(),
IPv4Netmask: MatchIPv4Netmask(),
IPv4Net: MatchIPv4Net(true),
IPv6: MatchIPv6(true),
IPv6CIDRMask: MatchIPv6CIDRMask(),
IPv6Net: MatchIPv6Net(true),
}
// M provides convenient access to a range of modifiers (which in their nature are
@ -596,9 +610,9 @@ func MatchEndOfFile() TokenHandler {
}
}
// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
// read from the input. It reports back a successful match if the end of the
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
// MatchAnyRune creates a TokenHandler function that checks if a rune can be
// read from the input. Invalid runes on the input are replaced with the UTF8
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() TokenHandler {
return func(t *TokenAPI) bool {
_, err := t.NextRune()
@ -610,6 +624,19 @@ func MatchAnyRune() TokenHandler {
}
}
// MatchValidRune creates a TokenHandler function that checks if a valid
// UTF8 rune can be read from the input.
func MatchValidRune() TokenHandler {
return func(t *TokenAPI) bool {
r, err := t.NextRune()
if err == nil && r != utf8.RuneError {
t.Accept()
return true
}
return false
}
}
// MatchDigit creates a TokenHandler that checks if a single digit can be read
// from the input.
func MatchDigit() TokenHandler {
@ -716,14 +743,148 @@ func MatchOctet(normalize bool) TokenHandler {
// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
// IP address value can be read from the input.
// It will normalize IP-addresses that look like "192.168.001.012" to
// "192.168.1.12".
func MatchIPv4() TokenHandler {
octet := MatchOctet(true)
//
// When the normalize parameter is true, IP-addresses that look like
// "192.168.001.012" will be normalize to "192.168.1.12".
func MatchIPv4(normalize bool) TokenHandler {
octet := MatchOctet(normalize)
dot := MatchRune('.')
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
}
// MatchIPv4CIDRMask creates a TokenHandler function that checks if a
// valid IPv4 CIDR mask (0 - 32) value can be read from the input.
func MatchIPv4CIDRMask() TokenHandler {
return MatchIntegerBetween(0, 32)
}
// MatchIPv4Netmask creates a TokenHandler function that checks if a valid
// IPv4 netmask can be read from input (e.g. 255.255.255.0).
// Only a netmask in canonical form are accepted (meaning that in binary form
// it start with zero or more 1-bits, followed by only 0-bits up to the
// 32 bit length).
//
// Netmasks that look like "255.255.192.000" will be normalized to "255.255.192.0".
func MatchIPv4Netmask() TokenHandler {
octet := MakeUint8Token(nil, MatchOctet(true))
dot := MatchRune('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
return func(t *TokenAPI) bool {
if !netmask(t) {
return false
}
// Check if the mask is provided in canonical form (ones followed by zeroes).
r := t.Result()
mask := net.IPv4Mask(r.Value(0).(byte), r.Value(1).(byte), r.Value(2).(byte), r.Value(3).(byte))
ones, bits := mask.Size()
if ones == 0 && bits == 0 {
return false
}
r.ClearTokens()
return true
}
}
// MatchIPv4Net creates a TokenHandler function that checks the input for an
// IPv4 + mask input. Both <ip>/<cidr> (e.g. 192.168.0.1/24) and <ip>/<netmask>
// (e.g. 172.16.10.254/255.255.192.0) are acceptable.
//
// When the normalize parameter is true, then the IP address and the mask are
// normalized. The mask will be normalized to cidr, so the above example would
// be normalized to 172.16.10.254/18.
func MatchIPv4Net(normalize bool) TokenHandler {
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
slash := MatchRune('/')
mask := MatchAny(
MakeStrLiteralToken("mask", MatchIPv4Netmask()),
MakeUint8Token("cidr", MatchIPv4CIDRMask()))
ipnet := MatchSeq(ip, slash, mask)
return func(t *TokenAPI) bool {
if !ipnet(t) {
return false
}
if !normalize {
return true
}
r := t.Result()
maskToken := r.Token(1)
if maskToken.Type == "cidr" {
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), r.Value(1).(uint8)))
} else {
o := strings.Split(r.Value(1).(string), ".")
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
bits, _ := mask.Size()
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), bits))
}
r.ClearTokens()
return true
}
}
// MatchIPv6 creates a TokenHandler function that checks if an IPv6 address
// can be read from the input.
func MatchIPv6(normalize bool) TokenHandler {
hextet := MatchMinMax(1, 4, MatchHexDigit())
colon := MatchRune(':')
empty := MatchSeq(colon, colon)
return func(t *TokenAPI) bool {
nrOfHextets := 0
fork := t.Fork()
for nrOfHextets < 8 {
if hextet(fork) {
nrOfHextets++
} else if empty(fork) {
nrOfHextets += 2
} else if !colon(fork) {
break
}
}
// No hextets or too many hextets (e.g. 1:1:1:1:1:1:1:: <-- since :: is 2 or more hextets).
if nrOfHextets == 0 || nrOfHextets > 8 {
return false
}
// Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(fork.Result().String())
if parsed == nil {
return false
}
if normalize {
fork.Result().SetRunes(parsed.String())
}
fork.Merge()
return true
}
}
// MatchIPv6CIDRMask creates a TokenHandler function that checks if a
// valid IPv6 CIDR mask (0 - 128) value can be read from the input.
func MatchIPv6CIDRMask() TokenHandler {
return MatchIntegerBetween(0, 128)
}
// MatchIPv6Net creates a TokenHandler function that checks the input for an
// IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64.
//
// When the normalize parameter is true, then the IP address and the mask are
// normalized.
func MatchIPv6Net(normalize bool) TokenHandler {
ip := MatchIPv6(normalize)
slash := MatchRune('/')
mask := MatchIPv6CIDRMask()
return MatchSeq(ip, slash, mask)
}
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is discarded completely.
//

View File

@ -105,8 +105,11 @@ func TestAtoms(t *testing.T) {
{"cc", a.RuneRange('b', 'e'), true, "c"},
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc", a.AnyRune, true, "<22>"}, // invalid UTF8 rune
{"", a.AnyRune, false, ""}, // false is for end of file
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
{"", a.AnyRune, false, ""},
{"⌘", a.ValidRune, true, "⌘"},
{"\xbc with ValidRune", a.ValidRune, false, "<22>"},
{"", a.ValidRune, false, ""},
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
@ -201,6 +204,17 @@ func TestAtoms(t *testing.T) {
{"-3.14X", a.Float, false, ""},
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
})
}
func TestIPv4Atoms(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "00"},
{"000X", a.Octet, true, "000"},
@ -214,14 +228,44 @@ func TestAtoms(t *testing.T) {
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
{"256.255.255.255", a.IPv4, false, ""},
{"0", a.IPv4MaskBits, true, "0"},
{"32", a.IPv4MaskBits, true, "32"},
{"33", a.IPv4MaskBits, false, "0"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
{"0", a.IPv4CIDRMask, true, "0"},
{"32", a.IPv4CIDRMask, true, "32"},
{"33", a.IPv4CIDRMask, false, ""},
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
{"192.168.6.123", a.IPv4Net, false, ""},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
{"10.0.0.10/16.0.0.0", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
})
}
func TestIPv6Atoms(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"", a.IPv6, false, ""},
{"::", a.IPv6, true, "::"},
{"1::", a.IPv6, true, "1::"},
{"1::1", a.IPv6, true, "1::1"},
{"::1", a.IPv6, true, "::1"},
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
{"0", a.IPv6CIDRMask, true, "0"},
{"128", a.IPv6CIDRMask, true, "128"},
{"129", a.IPv6CIDRMask, false, ""},
{"::1/128", a.IPv6Net, true, "::1/128"},
{"::1/129", a.IPv6Net, false, ""},
{"1.1.1.1/24", a.IPv6Net, false, ""},
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
})
}

View File

@ -22,7 +22,7 @@ import (
func ExampleTokenizer_Execute() {
// Build the tokenizer for ip/mask.
ip := T.Str("ip", A.IPv4)
mask := T.Int8("mask", A.IPv4MaskBits)
mask := T.Int8("mask", A.IPv4CIDRMask)
cidr := C.Seq(ip, A.Slash, mask)
tokenizer := NewTokenizer(cidr, "cidr")