Cleanup of test code structure. Added support for creating parsers for either StateHandler or Matcher functions.

This commit is contained in:
Maurice Makaay 2019-05-23 00:04:10 +00:00
parent 8a09b7ca49
commit d9ab7298e7
7 changed files with 232 additions and 433 deletions

View File

@ -86,22 +86,19 @@ func TestAtoms(t *testing.T) {
{"g", a.HexDigit, false},
{"G", a.HexDigit, false},
} {
parser := parsekit.New(c.input, func(p *parsekit.P) {
if p.On(c.matcher).Accept().End() {
p.EmitLiteral(SuccessItem)
} else {
p.EmitLiteral(FailItem)
}
})
parser := parsekit.New(c.matcher).Parse(c.input)
item, err, ok := parser.Next()
if !ok {
t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err)
}
if c.mustMatch && item.Type != SuccessItem {
t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
}
if !c.mustMatch && item.Type != FailItem {
t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
if c.mustMatch {
if !ok {
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
}
if item.Type != parsekit.MatchedItem {
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
}
} else {
if ok {
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
}
}
}
@ -115,13 +112,13 @@ func TestSequenceOfRunes(t *testing.T) {
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(input, func(p *parsekit.P) {
parser := parsekit.New(func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Next()
item, err, ok := parser.Parse(input).Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}

View File

@ -31,7 +31,7 @@ type MatchDialog struct {
// case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.curRune == utf8.RuneError {
panic("Matcher must not call NextRune() after it returned false")
panic("internal parser error: Matcher must not call NextRune() after it returned false")
}
r, w, ok := m.p.peek(m.offset)
m.offset += w
@ -73,7 +73,7 @@ func (m *MatchDialog) Fork() *MatchDialog {
// reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog")
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
@ -110,7 +110,7 @@ var C = struct {
Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
Bounded func(int, int, Matcher) Matcher
MinMax func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher
}{
@ -128,7 +128,7 @@ var C = struct {
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
Bounded: MatchBounded,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Drop: MatchDrop,
}
@ -225,30 +225,30 @@ func MatchNot(matcher Matcher) Matcher {
}
func MatchRepeat(count int, matcher Matcher) Matcher {
return MatchBounded(count, count, matcher)
return MatchMinMax(count, count, matcher)
}
func MatchMin(min int, matcher Matcher) Matcher {
return MatchBounded(min, -1, matcher)
return MatchMinMax(min, -1, matcher)
}
func MatchMax(max int, matcher Matcher) Matcher {
return MatchBounded(-1, max, matcher)
return MatchMinMax(-1, max, matcher)
}
func MatchZeroOrMore(matcher Matcher) Matcher {
return MatchBounded(0, -1, matcher)
return MatchMinMax(0, -1, matcher)
}
func MatchOneOrMore(matcher Matcher) Matcher {
return MatchBounded(1, -1, matcher)
return MatchMinMax(1, -1, matcher)
}
func MatchBounded(min int, max int, matcher Matcher) Matcher {
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if min >= 0 && max >= 0 && min > max {
panic("MatchRepeat definition error: max must not be < min")
panic("internal parser error: MatchRepeat definition error: max must not be < min")
}
total := 0
// Specified min: check for the minimum required amount of matches.

View File

@ -7,365 +7,106 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func newParser(input string, Matcher parsekit.Matcher) *parsekit.P {
stateFn := func(p *parsekit.P) {
p.Expects("MATCH")
if p.On(Matcher).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
return parsekit.New(input, stateFn)
}
func ExampleTestMatchAny(t *testing.T) {
func ExampleMatchAnyRune(t *testing.T) {
parser := parsekit.New(
"¡Any / valid / character will dö!",
func(p *parsekit.P) {
p.On(a.AnyRune).Accept()
p.EmitLiteral(TestItem)
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
})
match, _, ok := parser.Next()
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func TestMatchAnyRune(t *testing.T) {
p := newParser("o", a.AnyRune)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "o" {
t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
}
}
func TestCombinators(t *testing.T) {
for i, c := range []struct {
input string
matcher parsekit.Matcher
mustMatch bool
expected string
}{
{"xxx", c.Rune('x'), true, "x"},
{"x ", c.Rune(' '), false, ""},
{"aa", c.RuneRange('b', 'e'), false, ""},
{"bb", c.RuneRange('b', 'e'), true, "b"},
{"cc", c.RuneRange('b', 'e'), true, "c"},
{"dd", c.RuneRange('b', 'e'), true, "d"},
{"ee", c.RuneRange('b', 'e'), true, "e"},
{"ff", c.RuneRange('b', 'e'), false, ""},
{"Hello, world!", c.String("Hello"), true, "Hello"},
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
{"abc", c.Not(c.Rune('b')), true, "a"},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
{"", c.Min(0, c.Rune('a')), true, ""},
{"a", c.Min(0, c.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
{"", c.Max(4, c.Rune('b')), true, ""},
{"X", c.Max(4, c.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
{"", c.OneOrMore(c.Rune('d')), false, ""},
{"X", c.OneOrMore(c.Rune('d')), false, ""},
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
} {
parser := parsekit.New(c.matcher).Parse(c.input)
item, err, ok := parser.Next()
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
p := newParser("", a.AnyRune)
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "unexpected end of file (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", a.AnyRune)
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "invalid UTF8 character in input (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchRune(t *testing.T) {
p := newParser("xxx", c.Rune('x'))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMatchRune_OnMismatch(t *testing.T) {
p := newParser("x ", c.Rune(' '))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing did not fail unexpectedly")
}
expected := "unexpected character 'x' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchRuneRange(t *testing.T) {
m := c.RuneRange('b', 'y')
s := "mnopqrstuvwxybcdefghijkl"
p := newParser(s, m)
for i := 0; i < len(s); i++ {
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if s[i] != r.Value[0] {
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
if c.mustMatch {
if !ok {
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
} else if item.Type != parsekit.MatchedItem {
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
} else if item.Value != c.expected {
t.Errorf("Test [%d] %q failed: not expected output:\nexpected: %s\nactual: %s\n", i, c.input, c.expected, item.Value)
}
} else {
if ok {
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
}
}
if _, _, ok := newParser("a", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'a'")
}
if _, _, ok := newParser("z", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'z'")
}
}
func TestMatchString(t *testing.T) {
p := newParser("Hello, world!", c.String("Hello"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "Hello" {
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
}
}
func TestMatchStringNoCase(t *testing.T) {
p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "HellÖ" {
t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value)
}
}
func TestMatchRunes(t *testing.T) {
m := c.Runes('+', '-', '*', '/')
s := "-+/*+++"
p := newParser(s, m)
for i := 0; i < len(s); i++ {
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if s[i] != r.Value[0] {
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
}
}
if _, _, ok := newParser("^", m).Next(); ok {
t.Fatalf("Unexpected parse success for input '^'")
}
if _, _, ok := newParser("x", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'x'")
}
}
func TestMatchNot(t *testing.T) {
p := newParser("aabc", c.Not(c.Rune('b')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Value != "a" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
}
func TestMatchNot_Mismatch(t *testing.T) {
p := newParser("aabc", c.Not(c.Rune('a')))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "unexpected character 'a' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchAnyOf(t *testing.T) {
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "a" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "b" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
}
func TestMatchRepeat(t *testing.T) {
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
}
}
func TestMatchRepeat_Min(t *testing.T) {
p := newParser("1111112345", c.Min(4, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "111111" {
t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value)
}
}
func TestMatchRepeat_Max(t *testing.T) {
p := newParser("1111112345", c.Max(4, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "1111" {
t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value)
}
}
func TestMatchRepeat_Bounded(t *testing.T) {
p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "11111" {
t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value)
}
}
func TestMatchRepeat_Mismatch(t *testing.T) {
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing did not fail unexpectedly")
}
expected := "unexpected character 'x' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchOneOrMore(t *testing.T) {
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxxxxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
}
}
func TestMatchSequence(t *testing.T) {
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "10" {
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
}
}
func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "101010" {
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
}
}
func TestSequence_WithRepeatedRunes(t *testing.T) {
whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
equal := c.Rune('=')
ding := c.Optional(c.OneOrMore(c.Rune('x')))
assignment := c.Sequence(whitespace, equal, whitespace, ding, whitespace)
p := newParser(" = xxxx 16", assignment)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != " = xxxx " {
t.Errorf("Parser item value is %q instead of expected \" = xxxx \"", r.Value)
}
}
func TestMatchOptional(t *testing.T) {
p := newParser("xyz", c.Optional(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
p = newParser("xyz", c.Optional(c.Rune('y')))
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "" {
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
}
}
func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes)))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "X" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMatchSeparated(t *testing.T) {
number := c.Bounded(1, 3, c.RuneRange('0', '9'))
separators := c.Runes('|', ';', ',')
separated_numbers := c.Separated(separators, number)
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "1,2;3|44,55|66;777" {
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
}
}
func TestMixAndMatch(t *testing.T) {
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
backslash := c.Rune('\\')
x := c.Rune('x')
hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != `\x9a\x01\xF0\xfC` {
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
}
}

View File

@ -6,7 +6,80 @@ import (
"runtime"
)
// P holds the internal state of the parser.
// New instantiates a new Parser.
// The logic parameter provides the parsing logic to apply. This can be:
//
// 1) A StateHandler function: in this case, a state machine-style
// recursive descent parser is created, in which StateHandler functions
// are used to move the state machine forward during parsing.
// This type of parser offers a lot of flexibility and it is possible to
// emit multiple items from the parse flow.
//
// This style of parser is typically used for parsing languages and
// structured data formats (like json, toml, etc.)
//
// 2) A Matcher function: in this case, a parser/combinator-style parser
// is created, which can be used to match against the provided logic.
// The parser can only check input against the Matcher function, and
// reports back a successful match or a failure.
//
// This style of parser can typically be used for validation and normalization
// of input data. However, when you are about to use parsekit for that
// task, consider using regular expressions instead. They might serve
// you better.
func New(logic interface{}) *Parser {
switch logic := logic.(type) {
case func(*P):
return makeParserForStateHandler(logic)
case StateHandler:
return makeParserForStateHandler(logic)
case func(m *MatchDialog) bool:
return makeParserForMatcher(logic)
case Matcher:
return makeParserForMatcher(logic)
default:
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
}
}
func makeParserForStateHandler(handler StateHandler) *Parser {
return &Parser{startState: handler}
}
func makeParserForMatcher(matcher Matcher) *Parser {
return New(StateHandler(func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().RouteRepeat().End() {
p.EmitLiteral(MatchedItem)
}
}))
}
// Parser is the top-level parser.
type Parser struct {
startState StateHandler // the function that handles the very first state
}
// Parse starts a parse run on the provided input data.
func (p *Parser) Parse(input string) *Run {
return &Run{
p: &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startState,
items: make(chan Item, 2),
},
}
}
// Run represents a single parse run for a Parser.
type Run struct {
p *P // a struct holding the internal state of a parse run
}
// P holds the internal state of a parse run.
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
@ -26,48 +99,37 @@ type P struct {
LastMatch string // a string representation of the last matched input data
}
// StateHandler defines the type of function that can be used to
// handle a parser state.
type StateHandler func(*P)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, start StateHandler) *P {
return &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: start,
items: make(chan Item, 2),
}
}
// Next retrieves the next parsed item.
// Next retrieves the next parsed item for a parse run.
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise.
func (p *P) Next() (Item, *Error, bool) {
func (run *Run) Next() (Item, *Error, bool) {
for {
select {
case i := <-p.items:
return p.makeReturnValues(i)
case i := <-run.p.items:
return run.makeReturnValues(i)
default:
p.runStatusHandler()
run.runStatusHandler()
}
}
}
// StateHandler defines the type of function that can be used to
// handle a parser state.
type StateHandler func(*P)
// runStatusHandler moves the parser, which is bascially a state machine,
// to its next status. It does so by invoking a function of the
// type StateHandler. This function represents the current status.
func (p *P) runStatusHandler() {
if state, ok := p.getNextStateHandler(); ok {
p.invokeNextStatusHandler(state)
// type StateHandler. This function represents the current status and
// is responsible for moving the parser to its next status, depending
// on the parsed input data.
func (run *Run) runStatusHandler() {
if state, ok := run.getNextStateHandler(); ok {
run.invokeNextStatusHandler(state)
}
}
// getNextStateHandler determintes the next StatusHandler to invoke in order
// getNextStateHandler determines the next StatusHandler to invoke in order
// to move the parsing state machine one step further.
//
// When implementing a parser, the StateHandler functions must provide
@ -92,39 +154,39 @@ func (p *P) runStatusHandler() {
//
// When no routing decision is provided by a StateHandler, then this is
// considered a bug in the state handler, and the parser will panic.
func (p *P) getNextStateHandler() (StateHandler, bool) {
func (run *Run) getNextStateHandler() (StateHandler, bool) {
switch {
case p.nextState != nil:
return p.nextState, true
case len(p.routeStack) > 0:
return p.popRoute(), true
case p.expecting != "":
p.UnexpectedInput()
case run.p.nextState != nil:
return run.p.nextState, true
case len(run.p.routeStack) > 0:
return run.p.popRoute(), true
case run.p.expecting != "":
run.p.UnexpectedInput()
return nil, false
default:
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name))
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
}
}
// invokeNextStatusHandler moves the parser state to the provided state
// and invokes the StatusHandler function.
func (p *P) invokeNextStatusHandler(state StateHandler) {
p.state = state
p.nextState = nil
p.expecting = ""
p.state(p)
func (run *Run) invokeNextStatusHandler(state StateHandler) {
run.p.state = state
run.p.nextState = nil
run.p.expecting = ""
run.p.state(run.p)
}
func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
p.err = &Error{i.Value, p.cursorLine, p.cursorColumn}
return i, p.err, false
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
p.item = i
run.p.item = i
return i, nil, true
}
}

View File

@ -2,10 +2,6 @@ package parsekit_test
import "git.makaay.nl/mauricem/go-parsekit"
const (
TestItem parsekit.ItemType = 1
SuccessItem parsekit.ItemType = 2
FailItem parsekit.ItemType = 3
)
const TestItem parsekit.ItemType = 1
var c, a = parsekit.C, parsekit.A

View File

@ -16,6 +16,10 @@ const ItemEOF ItemType = -1
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item is a built-in parser item type that is used for indicating a
// successful match when using a parser that is based on a Matcher.
const MatchedItem ItemType = -3
// Item represents an item that can be emitted from the parser.
type Item struct {
Type ItemType
@ -69,7 +73,7 @@ type Error struct {
func (err *Error) Error() string {
if err == nil {
panic("Error method called on the parser, but no error was set")
panic("internal parser error: Error() method called on the parser, but no error was set")
}
return err.Message
}
@ -98,7 +102,7 @@ func (p *P) UnexpectedInput() {
case r == INVALID:
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
default:
panic("Unhandled output from peek()")
panic("parsekit bug: Unhandled output from peek()")
}
}

View File

@ -35,8 +35,7 @@ package parsekit
func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p}
if matcher == nil {
p.EmitError("internal parser error: matcher argument for On() is nil")
return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}}
panic("internal parser error: matcher argument for On() is nil")
}
ok := matcher(m)