go-parsekit/tokenhandlers_builtin_test.go

378 lines
17 KiB
Go
Raw Blame History

package parsekit_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestCombinators(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"abc", c.Not(a.Rune('b')), true, "a"},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
{"ababc", c.Rep(4, a.Runes('a', 'b')), true, "abab"},
{"ababc", c.Rep(5, a.Runes('a', 'b')), false, ""},
{"", c.Min(0, a.Rune('a')), true, ""},
{"a", c.Min(0, a.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
{"", c.Max(4, a.Rune('b')), true, ""},
{"X", c.Max(4, a.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
{"", c.OneOrMore(a.Rune('d')), false, ""},
{"X", c.OneOrMore(a.Rune('d')), false, ""},
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
{"", c.Opt(c.OneOrMore(a.Rune('f'))), true, ""},
{"ghijkl", c.Opt(a.Rune('h')), true, ""},
{"ghijkl", c.Opt(a.Rune('g')), true, "g"},
{"fffffX", c.Opt(c.OneOrMore(a.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
})
}
func TestCombinatorPanics(t *testing.T) {
var c, a = parsekit.C, parsekit.A
parsekit.AssertPanics(t, []parsekit.PanicT{
{func() { a.RuneRange('z', 'a') }, false,
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
{func() { c.MinMax(1, -1, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
{func() { c.MinMax(10, 5, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
{func() { c.Min(-10, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
{func() { c.Max(-42, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
})
}
func TestAtoms(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""},
{"Hello, world!", a.Str("Hello"), true, "Hello"},
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
{"xxx", a.Rune('x'), true, "x"},
{"x ", a.Rune(' '), false, ""},
{"aa", a.RuneRange('b', 'e'), false, ""},
{"bb", a.RuneRange('b', 'e'), true, "b"},
{"cc", a.RuneRange('b', 'e'), true, "c"},
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc", a.AnyRune, true, "<22>"}, // invalid UTF8 rune
{"", a.AnyRune, false, ""}, // false is for end of file
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
{"\r", a.CR, true, "\r"},
{"\n", a.LF, true, "\n"},
{"!", a.Excl, true, "!"},
{"\"", a.DoubleQuote, true, "\""},
{"#", a.Hash, true, "#"},
{"$", a.Dollar, true, "$"},
{"%", a.Percent, true, "%"},
{"&", a.Amp, true, "&"},
{"'", a.SingleQuote, true, "'"},
{"(", a.LeftParen, true, "("},
{"(", a.RoundOpen, true, "("},
{")", a.RightParen, true, ")"},
{")", a.RoundClose, true, ")"},
{"*", a.Asterisk, true, "*"},
{"*", a.Multiply, true, "*"},
{"+", a.Plus, true, "+"},
{"+", a.Add, true, "+"},
{",", a.Comma, true, ","},
{"-", a.Minus, true, "-"},
{"-", a.Subtract, true, "-"},
{".", a.Dot, true, "."},
{"/", a.Slash, true, "/"},
{"/", a.Divide, true, "/"},
{":", a.Colon, true, ":"},
{";", a.Semicolon, true, ";"},
{"<", a.AngleOpen, true, "<"},
{"<", a.LessThan, true, "<"},
{"=", a.Equal, true, "="},
{">", a.AngleClose, true, ">"},
{">", a.GreaterThan, true, ">"},
{"?", a.Question, true, "?"},
{"@", a.At, true, "@"},
{"[", a.SquareOpen, true, "["},
{"\\", a.Backslash, true, "\\"},
{"]", a.SquareClose, true, "]"},
{"^", a.Caret, true, "^"},
{"_", a.Underscore, true, "_"},
{"`", a.Backquote, true, "`"},
{"{", a.CurlyOpen, true, "{"},
{"|", a.Pipe, true, "|"},
{"}", a.CurlyClose, true, "}"},
{"~", a.Tilde, true, "~"},
{" \t \t \r\n", a.Whitespace, true, " \t \t "},
{"\r", a.WhitespaceAndNewlines, false, ""},
{" \t\r\n \r", a.WhitespaceAndNewlines, true, " \t\r\n "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
{"0", a.Digit, true, "0"},
{"1", a.Digit, true, "1"},
{"2", a.Digit, true, "2"},
{"3", a.Digit, true, "3"},
{"4", a.Digit, true, "4"},
{"5", a.Digit, true, "5"},
{"6", a.Digit, true, "6"},
{"7", a.Digit, true, "7"},
{"8", a.Digit, true, "8"},
{"9", a.Digit, true, "9"},
{"X", a.Digit, false, ""},
{"a", a.ASCIILower, true, "a"},
{"z", a.ASCIILower, true, "z"},
{"A", a.ASCIILower, false, ""},
{"Z", a.ASCIILower, false, ""},
{"A", a.ASCIIUpper, true, "A"},
{"Z", a.ASCIIUpper, true, "Z"},
{"a", a.ASCIIUpper, false, ""},
{"z", a.ASCIIUpper, false, ""},
{"0", a.HexDigit, true, "0"},
{"9", a.HexDigit, true, "9"},
{"a", a.HexDigit, true, "a"},
{"f", a.HexDigit, true, "f"},
{"A", a.HexDigit, true, "A"},
{"F", a.HexDigit, true, "F"},
{"g", a.HexDigit, false, "g"},
{"G", a.HexDigit, false, "G"},
{"0", a.Integer, true, "0"},
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
{"1", a.Integer, true, "1"},
{"-10X", a.Integer, false, ""},
{"+10X", a.Integer, false, ""},
{"-10X", a.Signed(a.Integer), true, "-10"},
{"+10X", a.Signed(a.Integer), true, "+10"},
{"+10.1X", a.Signed(a.Integer), true, "+10"},
{"0X", a.Float, true, "0"},
{"0X", a.Float, true, "0"},
{"1X", a.Float, true, "1"},
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
{"123.321X", a.Float, true, "123.321"},
{"-3.14X", a.Float, false, ""},
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "00"},
{"000X", a.Octet, true, "000"},
{"10X", a.Octet, true, "10"},
{"010X", a.Octet, true, "010"},
{"255123", a.Octet, true, "255"},
{"256123", a.Octet, false, ""},
{"300", a.Octet, false, ""},
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
{"256.255.255.255", a.IPv4, false, ""},
{"0", a.IPv4MaskBits, true, "0"},
{"32", a.IPv4MaskBits, true, "32"},
{"33", a.IPv4MaskBits, false, "0"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
})
}
func TestModifiers(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
})
}
// When a TokenMaker encounters an error, this is considered a programmer error.
// A TokenMaker should not be called, unless the input is already validated to
// follow the correct pattern. Therefore, tokenmakers will panic when the
// input cannot be processed successfully.
func TestTokenMakerErrorHandling(t *testing.T) {
var a, tok = parsekit.A, parsekit.T
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
parser := parsekit.NewTokenizer(invalid, "boolean")
parsekit.AssertPanic(t, parsekit.PanicT{
func() { parser.Execute("no") }, false,
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
})
}
func TestTokenMakers(t *testing.T) {
var c, a, tok = parsekit.C, parsekit.A, parsekit.T
parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
}},
{"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
{`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
{Type: "P", Runes: []rune("1"), Value: true},
{Type: "P", Runes: []rune("t"), Value: true},
{Type: "P", Runes: []rune("T"), Value: true},
{Type: "P", Runes: []rune("true"), Value: true},
{Type: "P", Runes: []rune("TRUE"), Value: true},
{Type: "P", Runes: []rune("True"), Value: true},
}},
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
{Type: "P", Runes: []rune("0"), Value: false},
{Type: "P", Runes: []rune("f"), Value: false},
{Type: "P", Runes: []rune("F"), Value: false},
{Type: "P", Runes: []rune("false"), Value: false},
{Type: "P", Runes: []rune("FALSE"), Value: false},
{Type: "P", Runes: []rune("False"), Value: false},
}},
})
}
func TestSequenceOfRunes(t *testing.T) {
var c, a = parsekit.C, parsekit.A
sequence := c.Seq(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
output := ""
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept() {
output = p.Result().String()
p.Stop()
}
})
err := parser.Execute(input)
if err != nil {
t.Fatalf("Parsing failed: %s", err)
}
if output != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output)
}
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
demonic := c.Seq(
c.Opt(a.SquareOpen),
m.Trim(
c.Seq(
c.Opt(a.Whitespace),
c.Rep(3, a.AngleClose),
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl),
c.Rep(3, a.AngleOpen),
c.Opt(a.Whitespace),
),
" \t",
),
c.Opt(a.SquareClose),
)
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
})
}