From 355f995388a16af66cdaeba76aa541ae99940e38 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Tue, 21 May 2019 23:24:01 +0000 Subject: [PATCH] Switched to using a function signature for all matchers, instead of an interface. There's no need for all those structs with methods. Also some little things to see if that improves documentation. --- parsekit.go | 3 +- parser_combinators.go | 512 ++++++++++++++++++------------------- parser_combinators_test.go | 26 +- statehandler_on.go | 4 +- statehandler_on_match.go | 4 - 5 files changed, 267 insertions(+), 282 deletions(-) diff --git a/parsekit.go b/parsekit.go index b266cf3..e633550 100644 --- a/parsekit.go +++ b/parsekit.go @@ -19,10 +19,11 @@ type P struct { cursorColumn int // current column position in the input expecting string // a description of what the current state expects to find buffer stringBuffer // an efficient buffer, used to build string values - LastMatch string // a string representation of the last matched input data items chan Item // channel of resulting Parser items item Item // the current item as reached by Next() and retrieved by Get() err *Error // an error when lexing failed, retrieved by Error() + + LastMatch string // a string representation of the last matched input data } // StateHandler defines the type of function that can be used to diff --git a/parser_combinators.go b/parser_combinators.go index a4e02b3..004376e 100644 --- a/parser_combinators.go +++ b/parser_combinators.go @@ -8,9 +8,10 @@ import ( // Not in need of it myself, but nice to have I guess: // - LookAhead -// MatchDialog is used by Matcher implementations as a means -// to retrieve data to match against and to report back -// successful matches. +type Matcher func(m *MatchDialog) bool + +// MatchDialog is used by Matcher functions to retrieve data from the parser +// input to match against and to report back successful matches. type MatchDialog struct { p *P runes []rune @@ -21,44 +22,9 @@ type MatchDialog struct { parent *MatchDialog } -// Fork splits off a child MatchDialog, containing the same -// offset as the parent MatchDialog, but with all other data -// in a new state. -// By forking, a Matcher implementation can freely work with -// a MatchDialog, without affecting the parent MatchDialog. -// When the Matcher decides that a match was found, it can -// use the Merge() method on the child to merge the child's -// matching data into the parent MatchDialog. -func (m *MatchDialog) Fork() *MatchDialog { - child := &MatchDialog{ - p: m.p, - offset: m.offset, - parent: m, - } - return child -} - -// Merge merges the data for a a forked child MatchDialog back -// into its parent: -// * the runes that are accumulated in the child are added -// to the parent's runes -// * the parent's offset is set to the child's offset -// After a Merge, the child MatchDialog is reset so it can -// immediately be reused for performing another match. -func (m *MatchDialog) Merge() bool { - if m.parent == nil { - panic("Cannot call Merge a a non-forked MatchDialog") - } - m.parent.runes = append(m.parent.runes, m.runes...) - m.parent.widths = append(m.parent.widths, m.widths...) - m.parent.offset = m.offset - m.Clear() - return true -} - // NextRune can be called by a Matcher on a MatchDialog in order // to receive the next rune from the input. -// The rune is automatically added to the MatchDialog's runes. +// The rune is automatically added to the MatchDialog's slice of runes. // Returns the rune and a boolean. The boolean will be false in // case an invalid UTF8 rune of the end of the file was encountered. func (m *MatchDialog) NextRune() (rune, bool) { @@ -74,267 +40,273 @@ func (m *MatchDialog) NextRune() (rune, bool) { return r, ok } -// Clear empties out the accumulated runes that are stored -// in the MatchDialog. The offset is kept as-is. +// Fork splits off a child MatchDialog, containing the same offset as the +// parent MatchDialog, but with all other data in a new state. +// +// By forking, a Matcher implementation can freely work with a MatchDialog, +// without affecting the parent MatchDialog. This is for example useful when +// the Matcher is checking for a sequence of runes. When there are first +// 3 runes returned from NextRune() which match the expectations, then the +// slice of runes inside the MatchDialog will contain these 3 runes. +// When after this the 4th rune turns out to be a mismatch, the forked +// MatchDialog can simply be disarded, and the state in the parent will be +// kept as-is. +// +// When a forked MatchDialog is in use, and the Matcher decides that a +// successul match was found, then the Merge() method can be called in +// order to transport the collected runes to the parent MatchDialog. +func (m *MatchDialog) Fork() *MatchDialog { + child := &MatchDialog{ + p: m.p, + offset: m.offset, + parent: m, + } + return child +} + +// Merge merges the data from a forked child MatchDialog back into its parent: +// * the runes that are accumulated in the child are added to the parent runes +// * the parent's offset is set to the child's offset +// After a Merge, the child MatchDialog is reset so it can immediately be +// reused for performing another match. +func (m *MatchDialog) Merge() bool { + if m.parent == nil { + panic("Cannot call Merge a a non-forked MatchDialog") + } + m.parent.runes = append(m.parent.runes, m.runes...) + m.parent.widths = append(m.parent.widths, m.widths...) + m.parent.offset = m.offset + m.Clear() + return true +} + +// Clear empties out the accumulated runes that are stored in the MatchDialog. +// The offset is kept as-is. func (m *MatchDialog) Clear() { m.runes = []rune{} m.widths = []int{} } -// Matcher is the interface that must be implemented to provide -// a matching stategy for the match() function. -// A MatchDialog is provided as input. This implements a -// specific set of methods that a Matcher needs to retrieve data -// from the parser and to report back results. -type Matcher interface { - Match(*MatchDialog) bool -} - -type MatcherConstructors struct { - EndOfFile func() MatchEndOfFile - AnyRune func() MatchAny - Rune func(rune) MatchRune - RuneRange func(rune, rune) MatchRuneRange - Runes func(...rune) MatchAnyOf - String func(string) MatchSequence - StringNoCase func(string) MatchSequence - AnyOf func(...Matcher) MatchAnyOf - Not func(Matcher) MatchNot - Optional func(Matcher) MatchOptional - Sequence func(...Matcher) MatchSequence - Repeat func(int, Matcher) MatchRepeat - Min func(int, Matcher) MatchRepeat - Max func(int, Matcher) MatchRepeat - Bounded func(int, int, Matcher) MatchRepeat - ZeroOrMore func(Matcher) MatchRepeat - OneOrMore func(Matcher) MatchRepeat - Separated func(Matcher, Matcher) MatchSeparated - Drop func(Matcher) MatchDrop -} - -// C provides access to a wide range of parser/combinator +// C provides convenient access to a wide range of parser/combinator // constructors that can be used to build matching expressions. +// // When using C in your own parser, then it is advised to create -// an alias in your own package for easy reference: -// var c = parsekit.C -var C = MatcherConstructors{ - EndOfFile: func() MatchEndOfFile { - return MatchEndOfFile{} - }, - AnyRune: func() MatchAny { - return MatchAny{} - }, - Rune: func(rune rune) MatchRune { - return MatchRune(rune) - }, - RuneRange: func(start rune, end rune) MatchRuneRange { - return MatchRuneRange{start, end} - }, - Runes: func(runes ...rune) MatchAnyOf { - m := make([]Matcher, len(runes)) - for i, r := range runes { - m[i] = MatchRune(r) - } - return MatchAnyOf{m} - }, - String: func(s string) MatchSequence { - var m = []Matcher{} - for _, r := range s { - m = append(m, MatchRune(r)) - } - return MatchSequence{m} - }, - StringNoCase: func(s string) MatchSequence { - var m = []Matcher{} - for _, r := range s { - u := MatchRune(unicode.ToUpper(r)) - l := MatchRune(unicode.ToLower(r)) - m = append(m, MatchAnyOf{[]Matcher{u, l}}) - } - return MatchSequence{m} - }, - Optional: func(Matcher Matcher) MatchOptional { - return MatchOptional{Matcher} - }, - Not: func(Matcher Matcher) MatchNot { - return MatchNot{Matcher} - }, - AnyOf: func(Matchers ...Matcher) MatchAnyOf { - return MatchAnyOf{Matchers} - }, - Sequence: func(Matchers ...Matcher) MatchSequence { - return MatchSequence{Matchers} - }, - Repeat: func(count int, Matcher Matcher) MatchRepeat { - return MatchRepeat{count, count, Matcher} - }, - Min: func(min int, Matcher Matcher) MatchRepeat { - return MatchRepeat{min, -1, Matcher} - }, - Max: func(max int, Matcher Matcher) MatchRepeat { - return MatchRepeat{-1, max, Matcher} - }, - Bounded: func(min int, max int, Matcher Matcher) MatchRepeat { - return MatchRepeat{min, max, Matcher} - }, - OneOrMore: func(Matcher Matcher) MatchRepeat { - return MatchRepeat{1, -1, Matcher} - }, - ZeroOrMore: func(Matcher Matcher) MatchRepeat { - return MatchRepeat{0, -1, Matcher} - }, - Separated: func(separator Matcher, Matcher Matcher) MatchSeparated { - return MatchSeparated{separator, Matcher} - }, - Drop: func(Matcher Matcher) MatchDrop { - return MatchDrop{Matcher} - }, +// a variable in your own package to reference it (var c = parsekit.C). +// This saves a lot of typing, and it makes your code a lot cleaner. +var C = struct { + EndOfFile func() Matcher + AnyRune func() Matcher + Rune func(rune) Matcher + Runes func(...rune) Matcher + RuneRange func(rune, rune) Matcher + String func(string) Matcher + StringNoCase func(string) Matcher + AnyOf func(...Matcher) Matcher + Not func(Matcher) Matcher + Optional func(Matcher) Matcher + Sequence func(...Matcher) Matcher + Repeat func(int, Matcher) Matcher + Min func(int, Matcher) Matcher + Max func(int, Matcher) Matcher + ZeroOrMore func(Matcher) Matcher + OneOrMore func(Matcher) Matcher + Bounded func(int, int, Matcher) Matcher + Separated func(Matcher, Matcher) Matcher + Drop func(Matcher) Matcher +}{ + EndOfFile: MatchEndOfFile, + AnyRune: MatchAnyRune, + Rune: MatchRune, + Runes: MatchRunes, + RuneRange: MatchRuneRange, + String: MatchString, + StringNoCase: MatchStringNoCase, + Optional: MatchOptional, + AnyOf: MatchAnyOf, + Not: MatchNot, + Sequence: MatchSequence, + Repeat: MatchRepeat, + Min: MatchMin, + Max: MatchMax, + ZeroOrMore: MatchZeroOrMore, + OneOrMore: MatchOneOrMore, + Bounded: MatchBounded, + Separated: MatchSeparated, + Drop: MatchDrop, } -type MatchEndOfFile struct{} - -func (c MatchEndOfFile) Match(m *MatchDialog) bool { - r, ok := m.NextRune() - return !ok && r == EOF -} - -type MatchAny struct{} - -func (c MatchAny) Match(m *MatchDialog) bool { - _, ok := m.NextRune() - return ok -} - -type MatchNot struct { - Matcher Matcher -} - -func (c MatchNot) Match(m *MatchDialog) bool { - child := m.Fork() - if !c.Matcher.Match(child) { - child.Merge() - return true +func MatchEndOfFile() Matcher { + return func(m *MatchDialog) bool { + input, ok := m.NextRune() + return !ok && input == EOF } - return false } -type MatchOptional struct { - Matcher Matcher -} - -func (c MatchOptional) Match(m *MatchDialog) bool { - child := m.Fork() - if c.Matcher.Match(child) { - child.Merge() +func MatchAnyRune() Matcher { + return func(m *MatchDialog) bool { + _, ok := m.NextRune() + return ok } - return true } -type MatchRune rune - -func (c MatchRune) Match(m *MatchDialog) bool { - r, ok := m.NextRune() - return ok && r == rune(c) +func MatchRune(r rune) Matcher { + return func(m *MatchDialog) bool { + input, ok := m.NextRune() + return ok && input == r + } } -type MatchRuneRange struct { - start rune - end rune +func MatchRunes(runes ...rune) Matcher { + return func(m *MatchDialog) bool { + input, ok := m.NextRune() + if ok { + for _, r := range runes { + if input == r { + return true + } + } + } + return false + } } -func (c MatchRuneRange) Match(m *MatchDialog) bool { - r, ok := m.NextRune() - return ok && r >= c.start && r <= c.end +func MatchRuneRange(start rune, end rune) Matcher { + return func(m *MatchDialog) bool { + input, ok := m.NextRune() + return ok && input >= start && input <= end + } } -type MatchAnyOf struct { - Matcher []Matcher +func MatchString(s string) Matcher { + var matchers = []Matcher{} + for _, r := range s { + matchers = append(matchers, MatchRune(r)) + } + return MatchSequence(matchers...) } -func (c MatchAnyOf) Match(m *MatchDialog) bool { - for _, Matcher := range c.Matcher { +func MatchStringNoCase(s string) Matcher { + var matchers = []Matcher{} + for _, r := range s { + u := unicode.ToUpper(r) + l := unicode.ToLower(r) + matchers = append(matchers, MatchRunes(u, l)) + } + return MatchSequence(matchers...) +} + +func MatchOptional(matcher Matcher) Matcher { + return func(m *MatchDialog) bool { child := m.Fork() - if Matcher.Match(child) { - return child.Merge() - } - } - return false -} - -type MatchRepeat struct { - min int - max int - Matcher Matcher -} - -func (c MatchRepeat) Match(m *MatchDialog) bool { - child := m.Fork() - if c.min >= 0 && c.max >= 0 && c.min > c.max { - panic("MatchRepeat definition error: max must not be < min") - } - total := 0 - // Specified min: check for the minimal required amount of matches. - for total < c.min { - total++ - if !c.Matcher.Match(child) { - return false - } - } - // No specified max: include the rest of the available matches. - if c.max < 0 { - child.Merge() - for c.Matcher.Match(child) { + if matcher(child) { child.Merge() } return true } - // Specified max: include the rest of the availble matches, up to the max. - child.Merge() - for total < c.max { - total++ - if !c.Matcher.Match(child) { - break +} + +func MatchSequence(matchers ...Matcher) Matcher { + return func(m *MatchDialog) bool { + child := m.Fork() + for _, matcher := range matchers { + if !matcher(child) { + return false + } } child.Merge() - } - return true -} - -type MatchSequence struct { - Matchers []Matcher -} - -func (c MatchSequence) Match(m *MatchDialog) bool { - child := m.Fork() - for _, Matcher := range c.Matchers { - if !Matcher.Match(child) { - return false - } - } - child.Merge() - return true -} - -type MatchSeparated struct { - separator Matcher - Matcher Matcher -} - -func (c MatchSeparated) Match(m *MatchDialog) bool { - seq := C.Sequence(c.Matcher, C.ZeroOrMore(C.Sequence(c.separator, c.Matcher))) - return seq.Match(m) -} - -type MatchDrop struct { - Matcher Matcher -} - -func (c MatchDrop) Match(m *MatchDialog) bool { - child := m.Fork() - if c.Matcher.Match(child) { - child.Clear() - child.Merge() return true } - return false +} + +func MatchAnyOf(matchers ...Matcher) Matcher { + return func(m *MatchDialog) bool { + for _, matcher := range matchers { + child := m.Fork() + if matcher(child) { + return child.Merge() + } + } + return false + } +} + +func MatchNot(matcher Matcher) Matcher { + return func(m *MatchDialog) bool { + child := m.Fork() + if !matcher(child) { + return child.Merge() + } + return false + } +} + +func MatchRepeat(count int, matcher Matcher) Matcher { + return MatchBounded(count, count, matcher) +} + +func MatchMin(min int, matcher Matcher) Matcher { + return MatchBounded(min, -1, matcher) +} + +func MatchMax(max int, matcher Matcher) Matcher { + return MatchBounded(-1, max, matcher) +} + +func MatchZeroOrMore(matcher Matcher) Matcher { + return MatchBounded(0, -1, matcher) +} + +func MatchOneOrMore(matcher Matcher) Matcher { + return MatchBounded(1, -1, matcher) +} + +func MatchBounded(min int, max int, matcher Matcher) Matcher { + return func(m *MatchDialog) bool { + child := m.Fork() + if min >= 0 && max >= 0 && min > max { + panic("MatchRepeat definition error: max must not be < min") + } + total := 0 + // Specified min: check for the minimum required amount of matches. + for min > 0 && total < min { + total++ + if !matcher(child) { + return false + } + } + // No specified max: include the rest of the available matches. + if max < 0 { + child.Merge() + for matcher(child) { + child.Merge() + } + return true + } + // Specified max: include the rest of the availble matches, up to the max. + child.Merge() + for total < max { + total++ + if !matcher(child) { + break + } + child.Merge() + } + return true + } +} + +func MatchSeparated(separator Matcher, separated Matcher) Matcher { + return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated))) +} + +func MatchDrop(matcher Matcher) Matcher { + return func(m *MatchDialog) bool { + child := m.Fork() + if matcher(child) { + child.Clear() + child.Merge() + return true + } + return false + } } diff --git a/parser_combinators_test.go b/parser_combinators_test.go index b26f6b9..449f0b1 100644 --- a/parser_combinators_test.go +++ b/parser_combinators_test.go @@ -1,9 +1,11 @@ package parsekit_test import ( + "fmt" "testing" - p "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit" + p "git.makaay.nl/mauricem/go-parsekit" ) var c = p.C @@ -21,6 +23,19 @@ func newParser(input string, Matcher p.Matcher) *p.P { return p.New(input, stateFn) } +func ExampleTestMatchAny(t *testing.T) { + parser := parsekit.New( + "¡Any / valid / character will dö!", + func(p *parsekit.P) { + p.On(parsekit.MatchAnyRune()).Accept() + p.EmitLiteral(TestItem) + }) + match, _, ok := parser.Next() + if ok { + fmt.Printf("Match = %q\n", match) + } +} + func TestMatchAnyRune(t *testing.T) { p := newParser("o", c.AnyRune()) r, err, ok := p.Next() @@ -286,14 +301,15 @@ func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) { func TestSequence_WithRepeatedRunes(t *testing.T) { whitespace := c.Optional(c.OneOrMore(c.Rune(' '))) equal := c.Rune('=') - assignment := c.Sequence(whitespace, equal, whitespace) - p := newParser(" == 10", assignment) + ding := c.Optional(c.OneOrMore(c.Rune('x'))) + assignment := c.Sequence(whitespace, equal, whitespace, ding, whitespace) + p := newParser(" = xxxx 16", assignment) r, err, ok := p.Next() if !ok { t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } - if r.Value != " =" { - t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value) + if r.Value != " = xxxx " { + t.Errorf("Parser item value is %q instead of expected \" = xxxx \"", r.Value) } } diff --git a/statehandler_on.go b/statehandler_on.go index bbc7a49..bd737da 100644 --- a/statehandler_on.go +++ b/statehandler_on.go @@ -27,9 +27,9 @@ package parsekit // // Here's a complete example chain: // p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End() -func (p *P) On(Matcher Matcher) *matchAction { +func (p *P) On(matcher Matcher) *matchAction { m := &MatchDialog{p: p} - ok := Matcher.Match(m) + ok := matcher(m) // Keep track of the last match, to allow parser implementations // to access it in an easy way. Typical use would be something like: diff --git a/statehandler_on_match.go b/statehandler_on_match.go index 874e661..8beb6cb 100644 --- a/statehandler_on_match.go +++ b/statehandler_on_match.go @@ -33,10 +33,6 @@ func (a *matchAction) Accept() *routeAction { func (a *matchAction) Skip() *routeAction { if a.ok { for i, r := range a.runes { - type C struct { - Rune MatchRune - } - a.p.advanceCursor(r, a.widths[i]) } }