Cleanup of test code structure. Added support for creating parsers for either StateHandler or Matcher functions.
This commit is contained in:
parent
8a09b7ca49
commit
d9ab7298e7
|
@ -86,22 +86,19 @@ func TestAtoms(t *testing.T) {
|
|||
{"g", a.HexDigit, false},
|
||||
{"G", a.HexDigit, false},
|
||||
} {
|
||||
parser := parsekit.New(c.input, func(p *parsekit.P) {
|
||||
if p.On(c.matcher).Accept().End() {
|
||||
p.EmitLiteral(SuccessItem)
|
||||
} else {
|
||||
p.EmitLiteral(FailItem)
|
||||
}
|
||||
})
|
||||
parser := parsekit.New(c.matcher).Parse(c.input)
|
||||
item, err, ok := parser.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err)
|
||||
}
|
||||
if c.mustMatch && item.Type != SuccessItem {
|
||||
t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
|
||||
}
|
||||
if !c.mustMatch && item.Type != FailItem {
|
||||
t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
|
||||
if c.mustMatch {
|
||||
if !ok {
|
||||
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
|
||||
}
|
||||
if item.Type != parsekit.MatchedItem {
|
||||
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
|
||||
}
|
||||
} else {
|
||||
if ok {
|
||||
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -115,13 +112,13 @@ func TestSequenceOfRunes(t *testing.T) {
|
|||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||
)
|
||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
parser := parsekit.New(input, func(p *parsekit.P) {
|
||||
parser := parsekit.New(func(p *parsekit.P) {
|
||||
p.Expects("Sequence of runes")
|
||||
if p.On(sequence).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
}
|
||||
})
|
||||
item, err, ok := parser.Next()
|
||||
item, err, ok := parser.Parse(input).Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ type MatchDialog struct {
|
|||
// case an invalid UTF8 rune of the end of the file was encountered.
|
||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||
if m.curRune == utf8.RuneError {
|
||||
panic("Matcher must not call NextRune() after it returned false")
|
||||
panic("internal parser error: Matcher must not call NextRune() after it returned false")
|
||||
}
|
||||
r, w, ok := m.p.peek(m.offset)
|
||||
m.offset += w
|
||||
|
@ -73,7 +73,7 @@ func (m *MatchDialog) Fork() *MatchDialog {
|
|||
// reused for performing another match.
|
||||
func (m *MatchDialog) Merge() bool {
|
||||
if m.parent == nil {
|
||||
panic("Cannot call Merge a a non-forked MatchDialog")
|
||||
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
||||
}
|
||||
m.parent.runes = append(m.parent.runes, m.runes...)
|
||||
m.parent.widths = append(m.parent.widths, m.widths...)
|
||||
|
@ -110,7 +110,7 @@ var C = struct {
|
|||
Max func(int, Matcher) Matcher
|
||||
ZeroOrMore func(Matcher) Matcher
|
||||
OneOrMore func(Matcher) Matcher
|
||||
Bounded func(int, int, Matcher) Matcher
|
||||
MinMax func(int, int, Matcher) Matcher
|
||||
Separated func(Matcher, Matcher) Matcher
|
||||
Drop func(Matcher) Matcher
|
||||
}{
|
||||
|
@ -128,7 +128,7 @@ var C = struct {
|
|||
Max: MatchMax,
|
||||
ZeroOrMore: MatchZeroOrMore,
|
||||
OneOrMore: MatchOneOrMore,
|
||||
Bounded: MatchBounded,
|
||||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Drop: MatchDrop,
|
||||
}
|
||||
|
@ -225,30 +225,30 @@ func MatchNot(matcher Matcher) Matcher {
|
|||
}
|
||||
|
||||
func MatchRepeat(count int, matcher Matcher) Matcher {
|
||||
return MatchBounded(count, count, matcher)
|
||||
return MatchMinMax(count, count, matcher)
|
||||
}
|
||||
|
||||
func MatchMin(min int, matcher Matcher) Matcher {
|
||||
return MatchBounded(min, -1, matcher)
|
||||
return MatchMinMax(min, -1, matcher)
|
||||
}
|
||||
|
||||
func MatchMax(max int, matcher Matcher) Matcher {
|
||||
return MatchBounded(-1, max, matcher)
|
||||
return MatchMinMax(-1, max, matcher)
|
||||
}
|
||||
|
||||
func MatchZeroOrMore(matcher Matcher) Matcher {
|
||||
return MatchBounded(0, -1, matcher)
|
||||
return MatchMinMax(0, -1, matcher)
|
||||
}
|
||||
|
||||
func MatchOneOrMore(matcher Matcher) Matcher {
|
||||
return MatchBounded(1, -1, matcher)
|
||||
return MatchMinMax(1, -1, matcher)
|
||||
}
|
||||
|
||||
func MatchBounded(min int, max int, matcher Matcher) Matcher {
|
||||
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
child := m.Fork()
|
||||
if min >= 0 && max >= 0 && min > max {
|
||||
panic("MatchRepeat definition error: max must not be < min")
|
||||
panic("internal parser error: MatchRepeat definition error: max must not be < min")
|
||||
}
|
||||
total := 0
|
||||
// Specified min: check for the minimum required amount of matches.
|
||||
|
|
|
@ -7,365 +7,106 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func newParser(input string, Matcher parsekit.Matcher) *parsekit.P {
|
||||
stateFn := func(p *parsekit.P) {
|
||||
p.Expects("MATCH")
|
||||
if p.On(Matcher).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.RouteRepeat()
|
||||
}
|
||||
}
|
||||
return parsekit.New(input, stateFn)
|
||||
}
|
||||
|
||||
func ExampleTestMatchAny(t *testing.T) {
|
||||
func ExampleMatchAnyRune(t *testing.T) {
|
||||
parser := parsekit.New(
|
||||
"¡Any / valid / character will dö!",
|
||||
func(p *parsekit.P) {
|
||||
p.On(a.AnyRune).Accept()
|
||||
p.EmitLiteral(TestItem)
|
||||
p.Expects("Any valid rune")
|
||||
if p.On(a.AnyRune).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
}
|
||||
})
|
||||
match, _, ok := parser.Next()
|
||||
run := parser.Parse("¡Any / valid / character will dö!")
|
||||
match, _, ok := run.Next()
|
||||
if ok {
|
||||
fmt.Printf("Match = %q\n", match)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchAnyRune(t *testing.T) {
|
||||
p := newParser("o", a.AnyRune)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "o" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
|
||||
}
|
||||
}
|
||||
func TestCombinators(t *testing.T) {
|
||||
for i, c := range []struct {
|
||||
input string
|
||||
matcher parsekit.Matcher
|
||||
mustMatch bool
|
||||
expected string
|
||||
}{
|
||||
{"xxx", c.Rune('x'), true, "x"},
|
||||
{"x ", c.Rune(' '), false, ""},
|
||||
{"aa", c.RuneRange('b', 'e'), false, ""},
|
||||
{"bb", c.RuneRange('b', 'e'), true, "b"},
|
||||
{"cc", c.RuneRange('b', 'e'), true, "c"},
|
||||
{"dd", c.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", c.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", c.RuneRange('b', 'e'), false, ""},
|
||||
{"Hello, world!", c.String("Hello"), true, "Hello"},
|
||||
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
|
||||
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
|
||||
{"abc", c.Not(c.Rune('b')), true, "a"},
|
||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
||||
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
|
||||
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
|
||||
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
|
||||
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, c.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, c.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
|
||||
{"", c.Max(4, c.Rune('b')), true, ""},
|
||||
{"X", c.Max(4, c.Rune('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(c.Rune('d')), false, ""},
|
||||
{"X", c.OneOrMore(c.Rune('d')), false, ""},
|
||||
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
|
||||
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
|
||||
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
|
||||
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
|
||||
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
|
||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
} {
|
||||
parser := parsekit.New(c.matcher).Parse(c.input)
|
||||
item, err, ok := parser.Next()
|
||||
|
||||
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
|
||||
p := newParser("", a.AnyRune)
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
}
|
||||
expected := "unexpected end of file (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
|
||||
p := newParser("\xcd", a.AnyRune)
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
}
|
||||
expected := "invalid UTF8 character in input (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRune(t *testing.T) {
|
||||
p := newParser("xxx", c.Rune('x'))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "x" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRune_OnMismatch(t *testing.T) {
|
||||
p := newParser("x ", c.Rune(' '))
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing did not fail unexpectedly")
|
||||
}
|
||||
expected := "unexpected character 'x' (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRuneRange(t *testing.T) {
|
||||
m := c.RuneRange('b', 'y')
|
||||
s := "mnopqrstuvwxybcdefghijkl"
|
||||
p := newParser(s, m)
|
||||
for i := 0; i < len(s); i++ {
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if s[i] != r.Value[0] {
|
||||
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
|
||||
if c.mustMatch {
|
||||
if !ok {
|
||||
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
|
||||
} else if item.Type != parsekit.MatchedItem {
|
||||
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
|
||||
} else if item.Value != c.expected {
|
||||
t.Errorf("Test [%d] %q failed: not expected output:\nexpected: %s\nactual: %s\n", i, c.input, c.expected, item.Value)
|
||||
}
|
||||
} else {
|
||||
if ok {
|
||||
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, _, ok := newParser("a", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input 'a'")
|
||||
}
|
||||
if _, _, ok := newParser("z", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input 'z'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchString(t *testing.T) {
|
||||
p := newParser("Hello, world!", c.String("Hello"))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "Hello" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchStringNoCase(t *testing.T) {
|
||||
p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "HellÖ" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRunes(t *testing.T) {
|
||||
m := c.Runes('+', '-', '*', '/')
|
||||
s := "-+/*+++"
|
||||
p := newParser(s, m)
|
||||
for i := 0; i < len(s); i++ {
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if s[i] != r.Value[0] {
|
||||
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
|
||||
}
|
||||
}
|
||||
if _, _, ok := newParser("^", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input '^'")
|
||||
}
|
||||
if _, _, ok := newParser("x", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input 'x'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchNot(t *testing.T) {
|
||||
p := newParser("aabc", c.Not(c.Rune('b')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Value != "a" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchNot_Mismatch(t *testing.T) {
|
||||
p := newParser("aabc", c.Not(c.Rune('a')))
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
}
|
||||
expected := "unexpected character 'a' (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchAnyOf(t *testing.T) {
|
||||
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "a" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||
}
|
||||
|
||||
r, err, ok = p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "b" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat(t *testing.T) {
|
||||
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "xxxx" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat_Min(t *testing.T) {
|
||||
p := newParser("1111112345", c.Min(4, c.Rune('1')))
|
||||
r, _, _ := p.Next()
|
||||
if r.Value != "111111" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat_Max(t *testing.T) {
|
||||
p := newParser("1111112345", c.Max(4, c.Rune('1')))
|
||||
r, _, _ := p.Next()
|
||||
if r.Value != "1111" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat_Bounded(t *testing.T) {
|
||||
p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1')))
|
||||
r, _, _ := p.Next()
|
||||
if r.Value != "11111" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat_Mismatch(t *testing.T) {
|
||||
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing did not fail unexpectedly")
|
||||
}
|
||||
expected := "unexpected character 'x' (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchOneOrMore(t *testing.T) {
|
||||
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "xxxxxxxx" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSequence(t *testing.T) {
|
||||
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "10" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
|
||||
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "101010" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSequence_WithRepeatedRunes(t *testing.T) {
|
||||
whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
|
||||
equal := c.Rune('=')
|
||||
ding := c.Optional(c.OneOrMore(c.Rune('x')))
|
||||
assignment := c.Sequence(whitespace, equal, whitespace, ding, whitespace)
|
||||
p := newParser(" = xxxx 16", assignment)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != " = xxxx " {
|
||||
t.Errorf("Parser item value is %q instead of expected \" = xxxx \"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchOptional(t *testing.T) {
|
||||
p := newParser("xyz", c.Optional(c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "x" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
}
|
||||
|
||||
p = newParser("xyz", c.Optional(c.Rune('y')))
|
||||
r, err, ok = p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchDrop(t *testing.T) {
|
||||
dashes := c.OneOrMore(c.Rune('-'))
|
||||
p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes)))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "X" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSeparated(t *testing.T) {
|
||||
number := c.Bounded(1, 3, c.RuneRange('0', '9'))
|
||||
separators := c.Runes('|', ';', ',')
|
||||
separated_numbers := c.Separated(separators, number)
|
||||
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "1,2;3|44,55|66;777" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMixAndMatch(t *testing.T) {
|
||||
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||
backslash := c.Rune('\\')
|
||||
x := c.Rune('x')
|
||||
hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
|
||||
|
||||
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != `\x9a\x01\xF0\xfC` {
|
||||
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
|
||||
}
|
||||
}
|
||||
|
|
154
parsekit.go
154
parsekit.go
|
@ -6,7 +6,80 @@ import (
|
|||
"runtime"
|
||||
)
|
||||
|
||||
// P holds the internal state of the parser.
|
||||
// New instantiates a new Parser.
|
||||
// The logic parameter provides the parsing logic to apply. This can be:
|
||||
//
|
||||
// 1) A StateHandler function: in this case, a state machine-style
|
||||
// recursive descent parser is created, in which StateHandler functions
|
||||
// are used to move the state machine forward during parsing.
|
||||
// This type of parser offers a lot of flexibility and it is possible to
|
||||
// emit multiple items from the parse flow.
|
||||
//
|
||||
// This style of parser is typically used for parsing languages and
|
||||
// structured data formats (like json, toml, etc.)
|
||||
//
|
||||
// 2) A Matcher function: in this case, a parser/combinator-style parser
|
||||
// is created, which can be used to match against the provided logic.
|
||||
// The parser can only check input against the Matcher function, and
|
||||
// reports back a successful match or a failure.
|
||||
//
|
||||
// This style of parser can typically be used for validation and normalization
|
||||
// of input data. However, when you are about to use parsekit for that
|
||||
// task, consider using regular expressions instead. They might serve
|
||||
// you better.
|
||||
func New(logic interface{}) *Parser {
|
||||
switch logic := logic.(type) {
|
||||
case func(*P):
|
||||
return makeParserForStateHandler(logic)
|
||||
case StateHandler:
|
||||
return makeParserForStateHandler(logic)
|
||||
case func(m *MatchDialog) bool:
|
||||
return makeParserForMatcher(logic)
|
||||
case Matcher:
|
||||
return makeParserForMatcher(logic)
|
||||
default:
|
||||
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
|
||||
}
|
||||
}
|
||||
|
||||
func makeParserForStateHandler(handler StateHandler) *Parser {
|
||||
return &Parser{startState: handler}
|
||||
}
|
||||
|
||||
func makeParserForMatcher(matcher Matcher) *Parser {
|
||||
return New(StateHandler(func(p *P) {
|
||||
p.Expects("match")
|
||||
if p.On(matcher).Accept().RouteRepeat().End() {
|
||||
p.EmitLiteral(MatchedItem)
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
// Parser is the top-level parser.
|
||||
type Parser struct {
|
||||
startState StateHandler // the function that handles the very first state
|
||||
}
|
||||
|
||||
// Parse starts a parse run on the provided input data.
|
||||
func (p *Parser) Parse(input string) *Run {
|
||||
return &Run{
|
||||
p: &P{
|
||||
input: input,
|
||||
len: len(input),
|
||||
cursorLine: 1,
|
||||
cursorColumn: 1,
|
||||
nextState: p.startState,
|
||||
items: make(chan Item, 2),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Run represents a single parse run for a Parser.
|
||||
type Run struct {
|
||||
p *P // a struct holding the internal state of a parse run
|
||||
}
|
||||
|
||||
// P holds the internal state of a parse run.
|
||||
type P struct {
|
||||
state StateHandler // the function that handles the current state
|
||||
nextState StateHandler // the function that will handle the next state
|
||||
|
@ -26,48 +99,37 @@ type P struct {
|
|||
LastMatch string // a string representation of the last matched input data
|
||||
}
|
||||
|
||||
// StateHandler defines the type of function that can be used to
|
||||
// handle a parser state.
|
||||
type StateHandler func(*P)
|
||||
|
||||
// New takes an input string and a start state,
|
||||
// and initializes the parser for it.
|
||||
func New(input string, start StateHandler) *P {
|
||||
return &P{
|
||||
input: input,
|
||||
len: len(input),
|
||||
cursorLine: 1,
|
||||
cursorColumn: 1,
|
||||
nextState: start,
|
||||
items: make(chan Item, 2),
|
||||
}
|
||||
}
|
||||
|
||||
// Next retrieves the next parsed item.
|
||||
// Next retrieves the next parsed item for a parse run.
|
||||
// When a valid item was found, then the boolean return parameter will be true.
|
||||
// On error or when successfully reaching the end of the input, false is returned.
|
||||
// When an error occurred, it will be set in the error return value, nil otherwise.
|
||||
func (p *P) Next() (Item, *Error, bool) {
|
||||
func (run *Run) Next() (Item, *Error, bool) {
|
||||
for {
|
||||
select {
|
||||
case i := <-p.items:
|
||||
return p.makeReturnValues(i)
|
||||
case i := <-run.p.items:
|
||||
return run.makeReturnValues(i)
|
||||
default:
|
||||
p.runStatusHandler()
|
||||
run.runStatusHandler()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// StateHandler defines the type of function that can be used to
|
||||
// handle a parser state.
|
||||
type StateHandler func(*P)
|
||||
|
||||
// runStatusHandler moves the parser, which is bascially a state machine,
|
||||
// to its next status. It does so by invoking a function of the
|
||||
// type StateHandler. This function represents the current status.
|
||||
func (p *P) runStatusHandler() {
|
||||
if state, ok := p.getNextStateHandler(); ok {
|
||||
p.invokeNextStatusHandler(state)
|
||||
// type StateHandler. This function represents the current status and
|
||||
// is responsible for moving the parser to its next status, depending
|
||||
// on the parsed input data.
|
||||
func (run *Run) runStatusHandler() {
|
||||
if state, ok := run.getNextStateHandler(); ok {
|
||||
run.invokeNextStatusHandler(state)
|
||||
}
|
||||
}
|
||||
|
||||
// getNextStateHandler determintes the next StatusHandler to invoke in order
|
||||
// getNextStateHandler determines the next StatusHandler to invoke in order
|
||||
// to move the parsing state machine one step further.
|
||||
//
|
||||
// When implementing a parser, the StateHandler functions must provide
|
||||
|
@ -92,39 +154,39 @@ func (p *P) runStatusHandler() {
|
|||
//
|
||||
// When no routing decision is provided by a StateHandler, then this is
|
||||
// considered a bug in the state handler, and the parser will panic.
|
||||
func (p *P) getNextStateHandler() (StateHandler, bool) {
|
||||
func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
||||
switch {
|
||||
case p.nextState != nil:
|
||||
return p.nextState, true
|
||||
case len(p.routeStack) > 0:
|
||||
return p.popRoute(), true
|
||||
case p.expecting != "":
|
||||
p.UnexpectedInput()
|
||||
case run.p.nextState != nil:
|
||||
return run.p.nextState, true
|
||||
case len(run.p.routeStack) > 0:
|
||||
return run.p.popRoute(), true
|
||||
case run.p.expecting != "":
|
||||
run.p.UnexpectedInput()
|
||||
return nil, false
|
||||
default:
|
||||
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
|
||||
panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name))
|
||||
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
|
||||
panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
|
||||
}
|
||||
}
|
||||
|
||||
// invokeNextStatusHandler moves the parser state to the provided state
|
||||
// and invokes the StatusHandler function.
|
||||
func (p *P) invokeNextStatusHandler(state StateHandler) {
|
||||
p.state = state
|
||||
p.nextState = nil
|
||||
p.expecting = ""
|
||||
p.state(p)
|
||||
func (run *Run) invokeNextStatusHandler(state StateHandler) {
|
||||
run.p.state = state
|
||||
run.p.nextState = nil
|
||||
run.p.expecting = ""
|
||||
run.p.state(run.p)
|
||||
}
|
||||
|
||||
func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||
switch {
|
||||
case i.Type == ItemEOF:
|
||||
return i, nil, false
|
||||
case i.Type == ItemError:
|
||||
p.err = &Error{i.Value, p.cursorLine, p.cursorColumn}
|
||||
return i, p.err, false
|
||||
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
||||
return i, run.p.err, false
|
||||
default:
|
||||
p.item = i
|
||||
run.p.item = i
|
||||
return i, nil, true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,10 +2,6 @@ package parsekit_test
|
|||
|
||||
import "git.makaay.nl/mauricem/go-parsekit"
|
||||
|
||||
const (
|
||||
TestItem parsekit.ItemType = 1
|
||||
SuccessItem parsekit.ItemType = 2
|
||||
FailItem parsekit.ItemType = 3
|
||||
)
|
||||
const TestItem parsekit.ItemType = 1
|
||||
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
|
|
|
@ -16,6 +16,10 @@ const ItemEOF ItemType = -1
|
|||
// an error has occurred during parsing.
|
||||
const ItemError ItemType = -2
|
||||
|
||||
// Item is a built-in parser item type that is used for indicating a
|
||||
// successful match when using a parser that is based on a Matcher.
|
||||
const MatchedItem ItemType = -3
|
||||
|
||||
// Item represents an item that can be emitted from the parser.
|
||||
type Item struct {
|
||||
Type ItemType
|
||||
|
@ -69,7 +73,7 @@ type Error struct {
|
|||
|
||||
func (err *Error) Error() string {
|
||||
if err == nil {
|
||||
panic("Error method called on the parser, but no error was set")
|
||||
panic("internal parser error: Error() method called on the parser, but no error was set")
|
||||
}
|
||||
return err.Message
|
||||
}
|
||||
|
@ -98,7 +102,7 @@ func (p *P) UnexpectedInput() {
|
|||
case r == INVALID:
|
||||
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
|
||||
default:
|
||||
panic("Unhandled output from peek()")
|
||||
panic("parsekit bug: Unhandled output from peek()")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,8 +35,7 @@ package parsekit
|
|||
func (p *P) On(matcher Matcher) *matchAction {
|
||||
m := &MatchDialog{p: p}
|
||||
if matcher == nil {
|
||||
p.EmitError("internal parser error: matcher argument for On() is nil")
|
||||
return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}}
|
||||
panic("internal parser error: matcher argument for On() is nil")
|
||||
}
|
||||
ok := matcher(m)
|
||||
|
||||
|
|
Loading…
Reference in New Issue