Cleanup of test code structure. Added support for creating parsers for either StateHandler or Matcher functions.

This commit is contained in:
Maurice Makaay 2019-05-23 00:04:10 +00:00
parent 8a09b7ca49
commit d9ab7298e7
7 changed files with 232 additions and 433 deletions

View File

@ -86,22 +86,19 @@ func TestAtoms(t *testing.T) {
{"g", a.HexDigit, false}, {"g", a.HexDigit, false},
{"G", a.HexDigit, false}, {"G", a.HexDigit, false},
} { } {
parser := parsekit.New(c.input, func(p *parsekit.P) { parser := parsekit.New(c.matcher).Parse(c.input)
if p.On(c.matcher).Accept().End() {
p.EmitLiteral(SuccessItem)
} else {
p.EmitLiteral(FailItem)
}
})
item, err, ok := parser.Next() item, err, ok := parser.Next()
if !ok { if c.mustMatch {
t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err) if !ok {
} t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
if c.mustMatch && item.Type != SuccessItem { }
t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input) if item.Type != parsekit.MatchedItem {
} t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
if !c.mustMatch && item.Type != FailItem { }
t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input) } else {
if ok {
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
} }
} }
} }
@ -115,13 +112,13 @@ func TestSequenceOfRunes(t *testing.T) {
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
) )
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(input, func(p *parsekit.P) { parser := parsekit.New(func(p *parsekit.P) {
p.Expects("Sequence of runes") p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() { if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem) p.EmitLiteral(TestItem)
} }
}) })
item, err, ok := parser.Next() item, err, ok := parser.Parse(input).Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s", err) t.Fatalf("Parsing failed: %s", err)
} }

View File

@ -31,7 +31,7 @@ type MatchDialog struct {
// case an invalid UTF8 rune of the end of the file was encountered. // case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) { func (m *MatchDialog) NextRune() (rune, bool) {
if m.curRune == utf8.RuneError { if m.curRune == utf8.RuneError {
panic("Matcher must not call NextRune() after it returned false") panic("internal parser error: Matcher must not call NextRune() after it returned false")
} }
r, w, ok := m.p.peek(m.offset) r, w, ok := m.p.peek(m.offset)
m.offset += w m.offset += w
@ -73,7 +73,7 @@ func (m *MatchDialog) Fork() *MatchDialog {
// reused for performing another match. // reused for performing another match.
func (m *MatchDialog) Merge() bool { func (m *MatchDialog) Merge() bool {
if m.parent == nil { if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog") panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
} }
m.parent.runes = append(m.parent.runes, m.runes...) m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...) m.parent.widths = append(m.parent.widths, m.widths...)
@ -110,7 +110,7 @@ var C = struct {
Max func(int, Matcher) Matcher Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher OneOrMore func(Matcher) Matcher
Bounded func(int, int, Matcher) Matcher MinMax func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher Drop func(Matcher) Matcher
}{ }{
@ -128,7 +128,7 @@ var C = struct {
Max: MatchMax, Max: MatchMax,
ZeroOrMore: MatchZeroOrMore, ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore, OneOrMore: MatchOneOrMore,
Bounded: MatchBounded, MinMax: MatchMinMax,
Separated: MatchSeparated, Separated: MatchSeparated,
Drop: MatchDrop, Drop: MatchDrop,
} }
@ -225,30 +225,30 @@ func MatchNot(matcher Matcher) Matcher {
} }
func MatchRepeat(count int, matcher Matcher) Matcher { func MatchRepeat(count int, matcher Matcher) Matcher {
return MatchBounded(count, count, matcher) return MatchMinMax(count, count, matcher)
} }
func MatchMin(min int, matcher Matcher) Matcher { func MatchMin(min int, matcher Matcher) Matcher {
return MatchBounded(min, -1, matcher) return MatchMinMax(min, -1, matcher)
} }
func MatchMax(max int, matcher Matcher) Matcher { func MatchMax(max int, matcher Matcher) Matcher {
return MatchBounded(-1, max, matcher) return MatchMinMax(-1, max, matcher)
} }
func MatchZeroOrMore(matcher Matcher) Matcher { func MatchZeroOrMore(matcher Matcher) Matcher {
return MatchBounded(0, -1, matcher) return MatchMinMax(0, -1, matcher)
} }
func MatchOneOrMore(matcher Matcher) Matcher { func MatchOneOrMore(matcher Matcher) Matcher {
return MatchBounded(1, -1, matcher) return MatchMinMax(1, -1, matcher)
} }
func MatchBounded(min int, max int, matcher Matcher) Matcher { func MatchMinMax(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool { return func(m *MatchDialog) bool {
child := m.Fork() child := m.Fork()
if min >= 0 && max >= 0 && min > max { if min >= 0 && max >= 0 && min > max {
panic("MatchRepeat definition error: max must not be < min") panic("internal parser error: MatchRepeat definition error: max must not be < min")
} }
total := 0 total := 0
// Specified min: check for the minimum required amount of matches. // Specified min: check for the minimum required amount of matches.

View File

@ -7,365 +7,106 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
func newParser(input string, Matcher parsekit.Matcher) *parsekit.P { func ExampleMatchAnyRune(t *testing.T) {
stateFn := func(p *parsekit.P) {
p.Expects("MATCH")
if p.On(Matcher).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
return parsekit.New(input, stateFn)
}
func ExampleTestMatchAny(t *testing.T) {
parser := parsekit.New( parser := parsekit.New(
"¡Any / valid / character will dö!",
func(p *parsekit.P) { func(p *parsekit.P) {
p.On(a.AnyRune).Accept() p.Expects("Any valid rune")
p.EmitLiteral(TestItem) if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
}) })
match, _, ok := parser.Next() run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
if ok { if ok {
fmt.Printf("Match = %q\n", match) fmt.Printf("Match = %q\n", match)
} }
} }
func TestMatchAnyRune(t *testing.T) { func TestCombinators(t *testing.T) {
p := newParser("o", a.AnyRune) for i, c := range []struct {
r, err, ok := p.Next() input string
if !ok { matcher parsekit.Matcher
t.Fatalf("Parsing failed: %s", err) mustMatch bool
} expected string
if r.Type != TestItem { }{
t.Error("Parser item type not expected TestTitem") {"xxx", c.Rune('x'), true, "x"},
} {"x ", c.Rune(' '), false, ""},
if r.Value != "o" { {"aa", c.RuneRange('b', 'e'), false, ""},
t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value) {"bb", c.RuneRange('b', 'e'), true, "b"},
} {"cc", c.RuneRange('b', 'e'), true, "c"},
} {"dd", c.RuneRange('b', 'e'), true, "d"},
{"ee", c.RuneRange('b', 'e'), true, "e"},
{"ff", c.RuneRange('b', 'e'), false, ""},
{"Hello, world!", c.String("Hello"), true, "Hello"},
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
{"abc", c.Not(c.Rune('b')), true, "a"},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
{"", c.Min(0, c.Rune('a')), true, ""},
{"a", c.Min(0, c.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
{"", c.Max(4, c.Rune('b')), true, ""},
{"X", c.Max(4, c.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
{"", c.OneOrMore(c.Rune('d')), false, ""},
{"X", c.OneOrMore(c.Rune('d')), false, ""},
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
} {
parser := parsekit.New(c.matcher).Parse(c.input)
item, err, ok := parser.Next()
func TestMatchAnyRune_AtEndOfFile(t *testing.T) { if c.mustMatch {
p := newParser("", a.AnyRune) if !ok {
_, err, ok := p.Next() t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
if ok { } else if item.Type != parsekit.MatchedItem {
t.Fatalf("Parsing unexpectedly succeeded") t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
} } else if item.Value != c.expected {
expected := "unexpected end of file (expected MATCH)" t.Errorf("Test [%d] %q failed: not expected output:\nexpected: %s\nactual: %s\n", i, c.input, c.expected, item.Value)
if err.Error() != expected { }
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error()) } else {
} if ok {
} t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", a.AnyRune)
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "invalid UTF8 character in input (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchRune(t *testing.T) {
p := newParser("xxx", c.Rune('x'))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMatchRune_OnMismatch(t *testing.T) {
p := newParser("x ", c.Rune(' '))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing did not fail unexpectedly")
}
expected := "unexpected character 'x' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchRuneRange(t *testing.T) {
m := c.RuneRange('b', 'y')
s := "mnopqrstuvwxybcdefghijkl"
p := newParser(s, m)
for i := 0; i < len(s); i++ {
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if s[i] != r.Value[0] {
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
} }
} }
if _, _, ok := newParser("a", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'a'")
}
if _, _, ok := newParser("z", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'z'")
}
}
func TestMatchString(t *testing.T) {
p := newParser("Hello, world!", c.String("Hello"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "Hello" {
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
}
}
func TestMatchStringNoCase(t *testing.T) {
p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "HellÖ" {
t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value)
}
}
func TestMatchRunes(t *testing.T) {
m := c.Runes('+', '-', '*', '/')
s := "-+/*+++"
p := newParser(s, m)
for i := 0; i < len(s); i++ {
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if s[i] != r.Value[0] {
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
}
}
if _, _, ok := newParser("^", m).Next(); ok {
t.Fatalf("Unexpected parse success for input '^'")
}
if _, _, ok := newParser("x", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'x'")
}
}
func TestMatchNot(t *testing.T) {
p := newParser("aabc", c.Not(c.Rune('b')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Value != "a" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
}
func TestMatchNot_Mismatch(t *testing.T) {
p := newParser("aabc", c.Not(c.Rune('a')))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "unexpected character 'a' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchAnyOf(t *testing.T) {
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "a" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "b" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
}
func TestMatchRepeat(t *testing.T) {
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
}
}
func TestMatchRepeat_Min(t *testing.T) {
p := newParser("1111112345", c.Min(4, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "111111" {
t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value)
}
}
func TestMatchRepeat_Max(t *testing.T) {
p := newParser("1111112345", c.Max(4, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "1111" {
t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value)
}
}
func TestMatchRepeat_Bounded(t *testing.T) {
p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "11111" {
t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value)
}
}
func TestMatchRepeat_Mismatch(t *testing.T) {
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing did not fail unexpectedly")
}
expected := "unexpected character 'x' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchOneOrMore(t *testing.T) {
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxxxxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
}
}
func TestMatchSequence(t *testing.T) {
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "10" {
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
}
}
func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "101010" {
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
}
}
func TestSequence_WithRepeatedRunes(t *testing.T) {
whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
equal := c.Rune('=')
ding := c.Optional(c.OneOrMore(c.Rune('x')))
assignment := c.Sequence(whitespace, equal, whitespace, ding, whitespace)
p := newParser(" = xxxx 16", assignment)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != " = xxxx " {
t.Errorf("Parser item value is %q instead of expected \" = xxxx \"", r.Value)
}
}
func TestMatchOptional(t *testing.T) {
p := newParser("xyz", c.Optional(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
p = newParser("xyz", c.Optional(c.Rune('y')))
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "" {
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
}
}
func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes)))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "X" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMatchSeparated(t *testing.T) {
number := c.Bounded(1, 3, c.RuneRange('0', '9'))
separators := c.Runes('|', ';', ',')
separated_numbers := c.Separated(separators, number)
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "1,2;3|44,55|66;777" {
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
}
}
func TestMixAndMatch(t *testing.T) {
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
backslash := c.Rune('\\')
x := c.Rune('x')
hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != `\x9a\x01\xF0\xfC` {
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
}
} }

View File

@ -6,7 +6,80 @@ import (
"runtime" "runtime"
) )
// P holds the internal state of the parser. // New instantiates a new Parser.
// The logic parameter provides the parsing logic to apply. This can be:
//
// 1) A StateHandler function: in this case, a state machine-style
// recursive descent parser is created, in which StateHandler functions
// are used to move the state machine forward during parsing.
// This type of parser offers a lot of flexibility and it is possible to
// emit multiple items from the parse flow.
//
// This style of parser is typically used for parsing languages and
// structured data formats (like json, toml, etc.)
//
// 2) A Matcher function: in this case, a parser/combinator-style parser
// is created, which can be used to match against the provided logic.
// The parser can only check input against the Matcher function, and
// reports back a successful match or a failure.
//
// This style of parser can typically be used for validation and normalization
// of input data. However, when you are about to use parsekit for that
// task, consider using regular expressions instead. They might serve
// you better.
func New(logic interface{}) *Parser {
switch logic := logic.(type) {
case func(*P):
return makeParserForStateHandler(logic)
case StateHandler:
return makeParserForStateHandler(logic)
case func(m *MatchDialog) bool:
return makeParserForMatcher(logic)
case Matcher:
return makeParserForMatcher(logic)
default:
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
}
}
func makeParserForStateHandler(handler StateHandler) *Parser {
return &Parser{startState: handler}
}
func makeParserForMatcher(matcher Matcher) *Parser {
return New(StateHandler(func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().RouteRepeat().End() {
p.EmitLiteral(MatchedItem)
}
}))
}
// Parser is the top-level parser.
type Parser struct {
startState StateHandler // the function that handles the very first state
}
// Parse starts a parse run on the provided input data.
func (p *Parser) Parse(input string) *Run {
return &Run{
p: &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startState,
items: make(chan Item, 2),
},
}
}
// Run represents a single parse run for a Parser.
type Run struct {
p *P // a struct holding the internal state of a parse run
}
// P holds the internal state of a parse run.
type P struct { type P struct {
state StateHandler // the function that handles the current state state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state nextState StateHandler // the function that will handle the next state
@ -26,48 +99,37 @@ type P struct {
LastMatch string // a string representation of the last matched input data LastMatch string // a string representation of the last matched input data
} }
// StateHandler defines the type of function that can be used to // Next retrieves the next parsed item for a parse run.
// handle a parser state.
type StateHandler func(*P)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, start StateHandler) *P {
return &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: start,
items: make(chan Item, 2),
}
}
// Next retrieves the next parsed item.
// When a valid item was found, then the boolean return parameter will be true. // When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned. // On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise. // When an error occurred, it will be set in the error return value, nil otherwise.
func (p *P) Next() (Item, *Error, bool) { func (run *Run) Next() (Item, *Error, bool) {
for { for {
select { select {
case i := <-p.items: case i := <-run.p.items:
return p.makeReturnValues(i) return run.makeReturnValues(i)
default: default:
p.runStatusHandler() run.runStatusHandler()
} }
} }
} }
// StateHandler defines the type of function that can be used to
// handle a parser state.
type StateHandler func(*P)
// runStatusHandler moves the parser, which is bascially a state machine, // runStatusHandler moves the parser, which is bascially a state machine,
// to its next status. It does so by invoking a function of the // to its next status. It does so by invoking a function of the
// type StateHandler. This function represents the current status. // type StateHandler. This function represents the current status and
func (p *P) runStatusHandler() { // is responsible for moving the parser to its next status, depending
if state, ok := p.getNextStateHandler(); ok { // on the parsed input data.
p.invokeNextStatusHandler(state) func (run *Run) runStatusHandler() {
if state, ok := run.getNextStateHandler(); ok {
run.invokeNextStatusHandler(state)
} }
} }
// getNextStateHandler determintes the next StatusHandler to invoke in order // getNextStateHandler determines the next StatusHandler to invoke in order
// to move the parsing state machine one step further. // to move the parsing state machine one step further.
// //
// When implementing a parser, the StateHandler functions must provide // When implementing a parser, the StateHandler functions must provide
@ -92,39 +154,39 @@ func (p *P) runStatusHandler() {
// //
// When no routing decision is provided by a StateHandler, then this is // When no routing decision is provided by a StateHandler, then this is
// considered a bug in the state handler, and the parser will panic. // considered a bug in the state handler, and the parser will panic.
func (p *P) getNextStateHandler() (StateHandler, bool) { func (run *Run) getNextStateHandler() (StateHandler, bool) {
switch { switch {
case p.nextState != nil: case run.p.nextState != nil:
return p.nextState, true return run.p.nextState, true
case len(p.routeStack) > 0: case len(run.p.routeStack) > 0:
return p.popRoute(), true return run.p.popRoute(), true
case p.expecting != "": case run.p.expecting != "":
p.UnexpectedInput() run.p.UnexpectedInput()
return nil, false return nil, false
default: default:
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name() name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name)) panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
} }
} }
// invokeNextStatusHandler moves the parser state to the provided state // invokeNextStatusHandler moves the parser state to the provided state
// and invokes the StatusHandler function. // and invokes the StatusHandler function.
func (p *P) invokeNextStatusHandler(state StateHandler) { func (run *Run) invokeNextStatusHandler(state StateHandler) {
p.state = state run.p.state = state
p.nextState = nil run.p.nextState = nil
p.expecting = "" run.p.expecting = ""
p.state(p) run.p.state(run.p)
} }
func (p *P) makeReturnValues(i Item) (Item, *Error, bool) { func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch { switch {
case i.Type == ItemEOF: case i.Type == ItemEOF:
return i, nil, false return i, nil, false
case i.Type == ItemError: case i.Type == ItemError:
p.err = &Error{i.Value, p.cursorLine, p.cursorColumn} run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, p.err, false return i, run.p.err, false
default: default:
p.item = i run.p.item = i
return i, nil, true return i, nil, true
} }
} }

View File

@ -2,10 +2,6 @@ package parsekit_test
import "git.makaay.nl/mauricem/go-parsekit" import "git.makaay.nl/mauricem/go-parsekit"
const ( const TestItem parsekit.ItemType = 1
TestItem parsekit.ItemType = 1
SuccessItem parsekit.ItemType = 2
FailItem parsekit.ItemType = 3
)
var c, a = parsekit.C, parsekit.A var c, a = parsekit.C, parsekit.A

View File

@ -16,6 +16,10 @@ const ItemEOF ItemType = -1
// an error has occurred during parsing. // an error has occurred during parsing.
const ItemError ItemType = -2 const ItemError ItemType = -2
// Item is a built-in parser item type that is used for indicating a
// successful match when using a parser that is based on a Matcher.
const MatchedItem ItemType = -3
// Item represents an item that can be emitted from the parser. // Item represents an item that can be emitted from the parser.
type Item struct { type Item struct {
Type ItemType Type ItemType
@ -69,7 +73,7 @@ type Error struct {
func (err *Error) Error() string { func (err *Error) Error() string {
if err == nil { if err == nil {
panic("Error method called on the parser, but no error was set") panic("internal parser error: Error() method called on the parser, but no error was set")
} }
return err.Message return err.Message
} }
@ -98,7 +102,7 @@ func (p *P) UnexpectedInput() {
case r == INVALID: case r == INVALID:
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p)) p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
default: default:
panic("Unhandled output from peek()") panic("parsekit bug: Unhandled output from peek()")
} }
} }

View File

@ -35,8 +35,7 @@ package parsekit
func (p *P) On(matcher Matcher) *matchAction { func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p} m := &MatchDialog{p: p}
if matcher == nil { if matcher == nil {
p.EmitError("internal parser error: matcher argument for On() is nil") panic("internal parser error: matcher argument for On() is nil")
return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}}
} }
ok := matcher(m) ok := matcher(m)