From d9ab7298e7b6fba9da253fdddd6c67079c0034ec Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Thu, 23 May 2019 00:04:10 +0000 Subject: [PATCH] Cleanup of test code structure. Added support for creating parsers for either StateHandler or Matcher functions. --- atoms_test.go | 31 ++- combinators.go | 22 +-- combinators_test.go | 441 +++++++++---------------------------------- parsekit.go | 154 ++++++++++----- parsekit_test.go | 6 +- statehandler_emit.go | 8 +- statehandler_on.go | 3 +- 7 files changed, 232 insertions(+), 433 deletions(-) diff --git a/atoms_test.go b/atoms_test.go index dd63535..2a20c06 100644 --- a/atoms_test.go +++ b/atoms_test.go @@ -86,22 +86,19 @@ func TestAtoms(t *testing.T) { {"g", a.HexDigit, false}, {"G", a.HexDigit, false}, } { - parser := parsekit.New(c.input, func(p *parsekit.P) { - if p.On(c.matcher).Accept().End() { - p.EmitLiteral(SuccessItem) - } else { - p.EmitLiteral(FailItem) - } - }) + parser := parsekit.New(c.matcher).Parse(c.input) item, err, ok := parser.Next() - if !ok { - t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err) - } - if c.mustMatch && item.Type != SuccessItem { - t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input) - } - if !c.mustMatch && item.Type != FailItem { - t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input) + if c.mustMatch { + if !ok { + t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err) + } + if item.Type != parsekit.MatchedItem { + t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input) + } + } else { + if ok { + t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input) + } } } } @@ -115,13 +112,13 @@ func TestSequenceOfRunes(t *testing.T) { a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, ) input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - parser := parsekit.New(input, func(p *parsekit.P) { + parser := parsekit.New(func(p *parsekit.P) { p.Expects("Sequence of runes") if p.On(sequence).Accept().End() { p.EmitLiteral(TestItem) } }) - item, err, ok := parser.Next() + item, err, ok := parser.Parse(input).Next() if !ok { t.Fatalf("Parsing failed: %s", err) } diff --git a/combinators.go b/combinators.go index 25b3851..287d74c 100644 --- a/combinators.go +++ b/combinators.go @@ -31,7 +31,7 @@ type MatchDialog struct { // case an invalid UTF8 rune of the end of the file was encountered. func (m *MatchDialog) NextRune() (rune, bool) { if m.curRune == utf8.RuneError { - panic("Matcher must not call NextRune() after it returned false") + panic("internal parser error: Matcher must not call NextRune() after it returned false") } r, w, ok := m.p.peek(m.offset) m.offset += w @@ -73,7 +73,7 @@ func (m *MatchDialog) Fork() *MatchDialog { // reused for performing another match. func (m *MatchDialog) Merge() bool { if m.parent == nil { - panic("Cannot call Merge a a non-forked MatchDialog") + panic("internal parser error: Cannot call Merge a a non-forked MatchDialog") } m.parent.runes = append(m.parent.runes, m.runes...) m.parent.widths = append(m.parent.widths, m.widths...) @@ -110,7 +110,7 @@ var C = struct { Max func(int, Matcher) Matcher ZeroOrMore func(Matcher) Matcher OneOrMore func(Matcher) Matcher - Bounded func(int, int, Matcher) Matcher + MinMax func(int, int, Matcher) Matcher Separated func(Matcher, Matcher) Matcher Drop func(Matcher) Matcher }{ @@ -128,7 +128,7 @@ var C = struct { Max: MatchMax, ZeroOrMore: MatchZeroOrMore, OneOrMore: MatchOneOrMore, - Bounded: MatchBounded, + MinMax: MatchMinMax, Separated: MatchSeparated, Drop: MatchDrop, } @@ -225,30 +225,30 @@ func MatchNot(matcher Matcher) Matcher { } func MatchRepeat(count int, matcher Matcher) Matcher { - return MatchBounded(count, count, matcher) + return MatchMinMax(count, count, matcher) } func MatchMin(min int, matcher Matcher) Matcher { - return MatchBounded(min, -1, matcher) + return MatchMinMax(min, -1, matcher) } func MatchMax(max int, matcher Matcher) Matcher { - return MatchBounded(-1, max, matcher) + return MatchMinMax(-1, max, matcher) } func MatchZeroOrMore(matcher Matcher) Matcher { - return MatchBounded(0, -1, matcher) + return MatchMinMax(0, -1, matcher) } func MatchOneOrMore(matcher Matcher) Matcher { - return MatchBounded(1, -1, matcher) + return MatchMinMax(1, -1, matcher) } -func MatchBounded(min int, max int, matcher Matcher) Matcher { +func MatchMinMax(min int, max int, matcher Matcher) Matcher { return func(m *MatchDialog) bool { child := m.Fork() if min >= 0 && max >= 0 && min > max { - panic("MatchRepeat definition error: max must not be < min") + panic("internal parser error: MatchRepeat definition error: max must not be < min") } total := 0 // Specified min: check for the minimum required amount of matches. diff --git a/combinators_test.go b/combinators_test.go index 0c5e798..3b3aa16 100644 --- a/combinators_test.go +++ b/combinators_test.go @@ -7,365 +7,106 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -func newParser(input string, Matcher parsekit.Matcher) *parsekit.P { - stateFn := func(p *parsekit.P) { - p.Expects("MATCH") - if p.On(Matcher).Accept().End() { - p.EmitLiteral(TestItem) - p.RouteRepeat() - } - } - return parsekit.New(input, stateFn) -} - -func ExampleTestMatchAny(t *testing.T) { +func ExampleMatchAnyRune(t *testing.T) { parser := parsekit.New( - "¡Any / valid / character will dö!", func(p *parsekit.P) { - p.On(a.AnyRune).Accept() - p.EmitLiteral(TestItem) + p.Expects("Any valid rune") + if p.On(a.AnyRune).Accept().End() { + p.EmitLiteral(TestItem) + } }) - match, _, ok := parser.Next() + run := parser.Parse("¡Any / valid / character will dö!") + match, _, ok := run.Next() if ok { fmt.Printf("Match = %q\n", match) } } -func TestMatchAnyRune(t *testing.T) { - p := newParser("o", a.AnyRune) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Type != TestItem { - t.Error("Parser item type not expected TestTitem") - } - if r.Value != "o" { - t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value) - } -} +func TestCombinators(t *testing.T) { + for i, c := range []struct { + input string + matcher parsekit.Matcher + mustMatch bool + expected string + }{ + {"xxx", c.Rune('x'), true, "x"}, + {"x ", c.Rune(' '), false, ""}, + {"aa", c.RuneRange('b', 'e'), false, ""}, + {"bb", c.RuneRange('b', 'e'), true, "b"}, + {"cc", c.RuneRange('b', 'e'), true, "c"}, + {"dd", c.RuneRange('b', 'e'), true, "d"}, + {"ee", c.RuneRange('b', 'e'), true, "e"}, + {"ff", c.RuneRange('b', 'e'), false, ""}, + {"Hello, world!", c.String("Hello"), true, "Hello"}, + {"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"}, + {"+X", c.Runes('+', '-', '*', '/'), true, "+"}, + {"-X", c.Runes('+', '-', '*', '/'), true, "-"}, + {"*X", c.Runes('+', '-', '*', '/'), true, "*"}, + {"/X", c.Runes('+', '-', '*', '/'), true, "/"}, + {"!X", c.Runes('+', '-', '*', '/'), false, ""}, + {"abc", c.Not(c.Rune('b')), true, "a"}, + {"bcd", c.Not(c.Rune('b')), false, ""}, + {"bcd", c.Not(c.Rune('b')), false, ""}, + {"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"}, + {"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"}, + {"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""}, + {"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"}, + {"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""}, + {"", c.Min(0, c.Rune('a')), true, ""}, + {"a", c.Min(0, c.Rune('a')), true, "a"}, + {"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"}, + {"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"}, + {"aaaaa", c.Min(6, c.Rune('a')), false, ""}, + {"", c.Max(4, c.Rune('b')), true, ""}, + {"X", c.Max(4, c.Rune('b')), true, ""}, + {"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"}, + {"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"}, + {"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"}, + {"", c.MinMax(0, 0, c.Rune('c')), true, ""}, + {"X", c.MinMax(0, 0, c.Rune('c')), true, ""}, + {"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""}, + {"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"}, + {"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"}, + {"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"}, + {"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"}, + {"", c.MinMax(1, 1, c.Rune('c')), false, ""}, + {"X", c.MinMax(1, 1, c.Rune('c')), false, ""}, + {"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"}, + {"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"}, + {"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"}, + {"", c.OneOrMore(c.Rune('d')), false, ""}, + {"X", c.OneOrMore(c.Rune('d')), false, ""}, + {"dX", c.OneOrMore(c.Rune('d')), true, "d"}, + {"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"}, + {"", c.ZeroOrMore(c.Rune('e')), true, ""}, + {"X", c.ZeroOrMore(c.Rune('e')), true, ""}, + {"eX", c.ZeroOrMore(c.Rune('e')), true, "e"}, + {"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"}, + {"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"}, + {"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"}, + {"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""}, + {"ghijkl", c.Optional(c.Rune('h')), true, ""}, + {"ghijkl", c.Optional(c.Rune('g')), true, "g"}, + {"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"}, + {"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"}, + {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"}, + {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, + } { + parser := parsekit.New(c.matcher).Parse(c.input) + item, err, ok := parser.Next() -func TestMatchAnyRune_AtEndOfFile(t *testing.T) { - p := newParser("", a.AnyRune) - _, err, ok := p.Next() - if ok { - t.Fatalf("Parsing unexpectedly succeeded") - } - expected := "unexpected end of file (expected MATCH)" - if err.Error() != expected { - t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error()) - } -} - -func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) { - p := newParser("\xcd", a.AnyRune) - _, err, ok := p.Next() - if ok { - t.Fatalf("Parsing unexpectedly succeeded") - } - expected := "invalid UTF8 character in input (expected MATCH)" - if err.Error() != expected { - t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error()) - } -} - -func TestMatchRune(t *testing.T) { - p := newParser("xxx", c.Rune('x')) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Type != TestItem { - t.Error("Parser item type not expected TestTitem") - } - if r.Value != "x" { - t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) - } -} - -func TestMatchRune_OnMismatch(t *testing.T) { - p := newParser("x ", c.Rune(' ')) - _, err, ok := p.Next() - if ok { - t.Fatalf("Parsing did not fail unexpectedly") - } - expected := "unexpected character 'x' (expected MATCH)" - if err.Error() != expected { - t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error()) - } -} - -func TestMatchRuneRange(t *testing.T) { - m := c.RuneRange('b', 'y') - s := "mnopqrstuvwxybcdefghijkl" - p := newParser(s, m) - for i := 0; i < len(s); i++ { - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if s[i] != r.Value[0] { - t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0]) + if c.mustMatch { + if !ok { + t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err) + } else if item.Type != parsekit.MatchedItem { + t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input) + } else if item.Value != c.expected { + t.Errorf("Test [%d] %q failed: not expected output:\nexpected: %s\nactual: %s\n", i, c.input, c.expected, item.Value) + } + } else { + if ok { + t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input) + } } } - if _, _, ok := newParser("a", m).Next(); ok { - t.Fatalf("Unexpected parse success for input 'a'") - } - if _, _, ok := newParser("z", m).Next(); ok { - t.Fatalf("Unexpected parse success for input 'z'") - } -} - -func TestMatchString(t *testing.T) { - p := newParser("Hello, world!", c.String("Hello")) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Type != TestItem { - t.Error("Parser item type not expected TestTitem") - } - if r.Value != "Hello" { - t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value) - } -} - -func TestMatchStringNoCase(t *testing.T) { - p := newParser("HellÖ, world!", c.StringNoCase("hellö")) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Type != TestItem { - t.Error("Parser item type not expected TestTitem") - } - if r.Value != "HellÖ" { - t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value) - } -} - -func TestMatchRunes(t *testing.T) { - m := c.Runes('+', '-', '*', '/') - s := "-+/*+++" - p := newParser(s, m) - for i := 0; i < len(s); i++ { - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if s[i] != r.Value[0] { - t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0]) - } - } - if _, _, ok := newParser("^", m).Next(); ok { - t.Fatalf("Unexpected parse success for input '^'") - } - if _, _, ok := newParser("x", m).Next(); ok { - t.Fatalf("Unexpected parse success for input 'x'") - } -} - -func TestMatchNot(t *testing.T) { - p := newParser("aabc", c.Not(c.Rune('b'))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Value != "a" { - t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value) - } -} - -func TestMatchNot_Mismatch(t *testing.T) { - p := newParser("aabc", c.Not(c.Rune('a'))) - _, err, ok := p.Next() - if ok { - t.Fatalf("Parsing unexpectedly succeeded") - } - expected := "unexpected character 'a' (expected MATCH)" - if err.Error() != expected { - t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error()) - } -} - -func TestMatchAnyOf(t *testing.T) { - p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b'))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Type != TestItem { - t.Error("Parser item type not expected TestTitem") - } - if r.Value != "a" { - t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value) - } - - r, err, ok = p.Next() - if !ok { - t.Fatalf("Parsing failed: %s", err) - } - if r.Type != TestItem { - t.Error("Parser item type not expected TestTitem") - } - if r.Value != "b" { - t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value) - } -} - -func TestMatchRepeat(t *testing.T) { - p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x'))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "xxxx" { - t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value) - } -} - -func TestMatchRepeat_Min(t *testing.T) { - p := newParser("1111112345", c.Min(4, c.Rune('1'))) - r, _, _ := p.Next() - if r.Value != "111111" { - t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value) - } -} - -func TestMatchRepeat_Max(t *testing.T) { - p := newParser("1111112345", c.Max(4, c.Rune('1'))) - r, _, _ := p.Next() - if r.Value != "1111" { - t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value) - } -} - -func TestMatchRepeat_Bounded(t *testing.T) { - p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1'))) - r, _, _ := p.Next() - if r.Value != "11111" { - t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value) - } -} - -func TestMatchRepeat_Mismatch(t *testing.T) { - p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x'))) - _, err, ok := p.Next() - if ok { - t.Fatalf("Parsing did not fail unexpectedly") - } - expected := "unexpected character 'x' (expected MATCH)" - if err.Error() != expected { - t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error()) - } -} - -func TestMatchOneOrMore(t *testing.T) { - p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x'))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "xxxxxxxx" { - t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value) - } -} - -func TestMatchSequence(t *testing.T) { - p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0'))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "10" { - t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value) - } -} - -func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) { - p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0')))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "101010" { - t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value) - } -} - -func TestSequence_WithRepeatedRunes(t *testing.T) { - whitespace := c.Optional(c.OneOrMore(c.Rune(' '))) - equal := c.Rune('=') - ding := c.Optional(c.OneOrMore(c.Rune('x'))) - assignment := c.Sequence(whitespace, equal, whitespace, ding, whitespace) - p := newParser(" = xxxx 16", assignment) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != " = xxxx " { - t.Errorf("Parser item value is %q instead of expected \" = xxxx \"", r.Value) - } -} - -func TestMatchOptional(t *testing.T) { - p := newParser("xyz", c.Optional(c.Rune('x'))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "x" { - t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) - } - - p = newParser("xyz", c.Optional(c.Rune('y'))) - r, err, ok = p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "" { - t.Errorf("Parser item value is %q instead of expected \"\"", r.Value) - } -} - -func TestMatchDrop(t *testing.T) { - dashes := c.OneOrMore(c.Rune('-')) - p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes))) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "X" { - t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) - } -} - -func TestMatchSeparated(t *testing.T) { - number := c.Bounded(1, 3, c.RuneRange('0', '9')) - separators := c.Runes('|', ';', ',') - separated_numbers := c.Separated(separators, number) - p := newParser("1,2;3|44,55|66;777,abc", separated_numbers) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != "1,2;3|44,55|66;777" { - t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value) - } -} - -func TestMixAndMatch(t *testing.T) { - hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F')) - backslash := c.Rune('\\') - x := c.Rune('x') - hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex)) - - p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte)) - r, err, ok := p.Next() - if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) - } - if r.Value != `\x9a\x01\xF0\xfC` { - t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`) - } } diff --git a/parsekit.go b/parsekit.go index e633550..9f18f11 100644 --- a/parsekit.go +++ b/parsekit.go @@ -6,7 +6,80 @@ import ( "runtime" ) -// P holds the internal state of the parser. +// New instantiates a new Parser. +// The logic parameter provides the parsing logic to apply. This can be: +// +// 1) A StateHandler function: in this case, a state machine-style +// recursive descent parser is created, in which StateHandler functions +// are used to move the state machine forward during parsing. +// This type of parser offers a lot of flexibility and it is possible to +// emit multiple items from the parse flow. +// +// This style of parser is typically used for parsing languages and +// structured data formats (like json, toml, etc.) +// +// 2) A Matcher function: in this case, a parser/combinator-style parser +// is created, which can be used to match against the provided logic. +// The parser can only check input against the Matcher function, and +// reports back a successful match or a failure. +// +// This style of parser can typically be used for validation and normalization +// of input data. However, when you are about to use parsekit for that +// task, consider using regular expressions instead. They might serve +// you better. +func New(logic interface{}) *Parser { + switch logic := logic.(type) { + case func(*P): + return makeParserForStateHandler(logic) + case StateHandler: + return makeParserForStateHandler(logic) + case func(m *MatchDialog) bool: + return makeParserForMatcher(logic) + case Matcher: + return makeParserForMatcher(logic) + default: + panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic)) + } +} + +func makeParserForStateHandler(handler StateHandler) *Parser { + return &Parser{startState: handler} +} + +func makeParserForMatcher(matcher Matcher) *Parser { + return New(StateHandler(func(p *P) { + p.Expects("match") + if p.On(matcher).Accept().RouteRepeat().End() { + p.EmitLiteral(MatchedItem) + } + })) +} + +// Parser is the top-level parser. +type Parser struct { + startState StateHandler // the function that handles the very first state +} + +// Parse starts a parse run on the provided input data. +func (p *Parser) Parse(input string) *Run { + return &Run{ + p: &P{ + input: input, + len: len(input), + cursorLine: 1, + cursorColumn: 1, + nextState: p.startState, + items: make(chan Item, 2), + }, + } +} + +// Run represents a single parse run for a Parser. +type Run struct { + p *P // a struct holding the internal state of a parse run +} + +// P holds the internal state of a parse run. type P struct { state StateHandler // the function that handles the current state nextState StateHandler // the function that will handle the next state @@ -26,48 +99,37 @@ type P struct { LastMatch string // a string representation of the last matched input data } -// StateHandler defines the type of function that can be used to -// handle a parser state. -type StateHandler func(*P) - -// New takes an input string and a start state, -// and initializes the parser for it. -func New(input string, start StateHandler) *P { - return &P{ - input: input, - len: len(input), - cursorLine: 1, - cursorColumn: 1, - nextState: start, - items: make(chan Item, 2), - } -} - -// Next retrieves the next parsed item. +// Next retrieves the next parsed item for a parse run. // When a valid item was found, then the boolean return parameter will be true. // On error or when successfully reaching the end of the input, false is returned. // When an error occurred, it will be set in the error return value, nil otherwise. -func (p *P) Next() (Item, *Error, bool) { +func (run *Run) Next() (Item, *Error, bool) { for { select { - case i := <-p.items: - return p.makeReturnValues(i) + case i := <-run.p.items: + return run.makeReturnValues(i) default: - p.runStatusHandler() + run.runStatusHandler() } } } +// StateHandler defines the type of function that can be used to +// handle a parser state. +type StateHandler func(*P) + // runStatusHandler moves the parser, which is bascially a state machine, // to its next status. It does so by invoking a function of the -// type StateHandler. This function represents the current status. -func (p *P) runStatusHandler() { - if state, ok := p.getNextStateHandler(); ok { - p.invokeNextStatusHandler(state) +// type StateHandler. This function represents the current status and +// is responsible for moving the parser to its next status, depending +// on the parsed input data. +func (run *Run) runStatusHandler() { + if state, ok := run.getNextStateHandler(); ok { + run.invokeNextStatusHandler(state) } } -// getNextStateHandler determintes the next StatusHandler to invoke in order +// getNextStateHandler determines the next StatusHandler to invoke in order // to move the parsing state machine one step further. // // When implementing a parser, the StateHandler functions must provide @@ -92,39 +154,39 @@ func (p *P) runStatusHandler() { // // When no routing decision is provided by a StateHandler, then this is // considered a bug in the state handler, and the parser will panic. -func (p *P) getNextStateHandler() (StateHandler, bool) { +func (run *Run) getNextStateHandler() (StateHandler, bool) { switch { - case p.nextState != nil: - return p.nextState, true - case len(p.routeStack) > 0: - return p.popRoute(), true - case p.expecting != "": - p.UnexpectedInput() + case run.p.nextState != nil: + return run.p.nextState, true + case len(run.p.routeStack) > 0: + return run.p.popRoute(), true + case run.p.expecting != "": + run.p.UnexpectedInput() return nil, false default: - name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name() - panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name)) + name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name() + panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name)) } } // invokeNextStatusHandler moves the parser state to the provided state // and invokes the StatusHandler function. -func (p *P) invokeNextStatusHandler(state StateHandler) { - p.state = state - p.nextState = nil - p.expecting = "" - p.state(p) +func (run *Run) invokeNextStatusHandler(state StateHandler) { + run.p.state = state + run.p.nextState = nil + run.p.expecting = "" + run.p.state(run.p) } -func (p *P) makeReturnValues(i Item) (Item, *Error, bool) { +func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) { switch { case i.Type == ItemEOF: return i, nil, false case i.Type == ItemError: - p.err = &Error{i.Value, p.cursorLine, p.cursorColumn} - return i, p.err, false + run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} + return i, run.p.err, false default: - p.item = i + run.p.item = i return i, nil, true } } diff --git a/parsekit_test.go b/parsekit_test.go index 058b6b5..2720f28 100644 --- a/parsekit_test.go +++ b/parsekit_test.go @@ -2,10 +2,6 @@ package parsekit_test import "git.makaay.nl/mauricem/go-parsekit" -const ( - TestItem parsekit.ItemType = 1 - SuccessItem parsekit.ItemType = 2 - FailItem parsekit.ItemType = 3 -) +const TestItem parsekit.ItemType = 1 var c, a = parsekit.C, parsekit.A diff --git a/statehandler_emit.go b/statehandler_emit.go index 2e20966..220964a 100644 --- a/statehandler_emit.go +++ b/statehandler_emit.go @@ -16,6 +16,10 @@ const ItemEOF ItemType = -1 // an error has occurred during parsing. const ItemError ItemType = -2 +// Item is a built-in parser item type that is used for indicating a +// successful match when using a parser that is based on a Matcher. +const MatchedItem ItemType = -3 + // Item represents an item that can be emitted from the parser. type Item struct { Type ItemType @@ -69,7 +73,7 @@ type Error struct { func (err *Error) Error() string { if err == nil { - panic("Error method called on the parser, but no error was set") + panic("internal parser error: Error() method called on the parser, but no error was set") } return err.Message } @@ -98,7 +102,7 @@ func (p *P) UnexpectedInput() { case r == INVALID: p.EmitError("invalid UTF8 character in input%s", fmtExpects(p)) default: - panic("Unhandled output from peek()") + panic("parsekit bug: Unhandled output from peek()") } } diff --git a/statehandler_on.go b/statehandler_on.go index 93691f9..3ac0c0c 100644 --- a/statehandler_on.go +++ b/statehandler_on.go @@ -35,8 +35,7 @@ package parsekit func (p *P) On(matcher Matcher) *matchAction { m := &MatchDialog{p: p} if matcher == nil { - p.EmitError("internal parser error: matcher argument for On() is nil") - return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}} + panic("internal parser error: matcher argument for On() is nil") } ok := matcher(m)