Switched to using a function signature for all matchers, instead of an interface. There's no need for all those structs with methods. Also some little things to see if that improves documentation.

This commit is contained in:
Maurice Makaay 2019-05-21 23:24:01 +00:00
parent 7474565179
commit 355f995388
5 changed files with 267 additions and 282 deletions

View File

@ -19,10 +19,11 @@ type P struct {
cursorColumn int // current column position in the input cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values buffer stringBuffer // an efficient buffer, used to build string values
LastMatch string // a string representation of the last matched input data
items chan Item // channel of resulting Parser items items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get() item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error() err *Error // an error when lexing failed, retrieved by Error()
LastMatch string // a string representation of the last matched input data
} }
// StateHandler defines the type of function that can be used to // StateHandler defines the type of function that can be used to

View File

@ -8,9 +8,10 @@ import (
// Not in need of it myself, but nice to have I guess: // Not in need of it myself, but nice to have I guess:
// - LookAhead // - LookAhead
// MatchDialog is used by Matcher implementations as a means type Matcher func(m *MatchDialog) bool
// to retrieve data to match against and to report back
// successful matches. // MatchDialog is used by Matcher functions to retrieve data from the parser
// input to match against and to report back successful matches.
type MatchDialog struct { type MatchDialog struct {
p *P p *P
runes []rune runes []rune
@ -21,44 +22,9 @@ type MatchDialog struct {
parent *MatchDialog parent *MatchDialog
} }
// Fork splits off a child MatchDialog, containing the same
// offset as the parent MatchDialog, but with all other data
// in a new state.
// By forking, a Matcher implementation can freely work with
// a MatchDialog, without affecting the parent MatchDialog.
// When the Matcher decides that a match was found, it can
// use the Merge() method on the child to merge the child's
// matching data into the parent MatchDialog.
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
offset: m.offset,
parent: m,
}
return child
}
// Merge merges the data for a a forked child MatchDialog back
// into its parent:
// * the runes that are accumulated in the child are added
// to the parent's runes
// * the parent's offset is set to the child's offset
// After a Merge, the child MatchDialog is reset so it can
// immediately be reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog")
}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
m.parent.offset = m.offset
m.Clear()
return true
}
// NextRune can be called by a Matcher on a MatchDialog in order // NextRune can be called by a Matcher on a MatchDialog in order
// to receive the next rune from the input. // to receive the next rune from the input.
// The rune is automatically added to the MatchDialog's runes. // The rune is automatically added to the MatchDialog's slice of runes.
// Returns the rune and a boolean. The boolean will be false in // Returns the rune and a boolean. The boolean will be false in
// case an invalid UTF8 rune of the end of the file was encountered. // case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) { func (m *MatchDialog) NextRune() (rune, bool) {
@ -74,267 +40,273 @@ func (m *MatchDialog) NextRune() (rune, bool) {
return r, ok return r, ok
} }
// Clear empties out the accumulated runes that are stored // Fork splits off a child MatchDialog, containing the same offset as the
// in the MatchDialog. The offset is kept as-is. // parent MatchDialog, but with all other data in a new state.
//
// By forking, a Matcher implementation can freely work with a MatchDialog,
// without affecting the parent MatchDialog. This is for example useful when
// the Matcher is checking for a sequence of runes. When there are first
// 3 runes returned from NextRune() which match the expectations, then the
// slice of runes inside the MatchDialog will contain these 3 runes.
// When after this the 4th rune turns out to be a mismatch, the forked
// MatchDialog can simply be disarded, and the state in the parent will be
// kept as-is.
//
// When a forked MatchDialog is in use, and the Matcher decides that a
// successul match was found, then the Merge() method can be called in
// order to transport the collected runes to the parent MatchDialog.
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
offset: m.offset,
parent: m,
}
return child
}
// Merge merges the data from a forked child MatchDialog back into its parent:
// * the runes that are accumulated in the child are added to the parent runes
// * the parent's offset is set to the child's offset
// After a Merge, the child MatchDialog is reset so it can immediately be
// reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog")
}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
m.parent.offset = m.offset
m.Clear()
return true
}
// Clear empties out the accumulated runes that are stored in the MatchDialog.
// The offset is kept as-is.
func (m *MatchDialog) Clear() { func (m *MatchDialog) Clear() {
m.runes = []rune{} m.runes = []rune{}
m.widths = []int{} m.widths = []int{}
} }
// Matcher is the interface that must be implemented to provide // C provides convenient access to a wide range of parser/combinator
// a matching stategy for the match() function.
// A MatchDialog is provided as input. This implements a
// specific set of methods that a Matcher needs to retrieve data
// from the parser and to report back results.
type Matcher interface {
Match(*MatchDialog) bool
}
type MatcherConstructors struct {
EndOfFile func() MatchEndOfFile
AnyRune func() MatchAny
Rune func(rune) MatchRune
RuneRange func(rune, rune) MatchRuneRange
Runes func(...rune) MatchAnyOf
String func(string) MatchSequence
StringNoCase func(string) MatchSequence
AnyOf func(...Matcher) MatchAnyOf
Not func(Matcher) MatchNot
Optional func(Matcher) MatchOptional
Sequence func(...Matcher) MatchSequence
Repeat func(int, Matcher) MatchRepeat
Min func(int, Matcher) MatchRepeat
Max func(int, Matcher) MatchRepeat
Bounded func(int, int, Matcher) MatchRepeat
ZeroOrMore func(Matcher) MatchRepeat
OneOrMore func(Matcher) MatchRepeat
Separated func(Matcher, Matcher) MatchSeparated
Drop func(Matcher) MatchDrop
}
// C provides access to a wide range of parser/combinator
// constructors that can be used to build matching expressions. // constructors that can be used to build matching expressions.
//
// When using C in your own parser, then it is advised to create // When using C in your own parser, then it is advised to create
// an alias in your own package for easy reference: // a variable in your own package to reference it (var c = parsekit.C).
// var c = parsekit.C // This saves a lot of typing, and it makes your code a lot cleaner.
var C = MatcherConstructors{ var C = struct {
EndOfFile: func() MatchEndOfFile { EndOfFile func() Matcher
return MatchEndOfFile{} AnyRune func() Matcher
}, Rune func(rune) Matcher
AnyRune: func() MatchAny { Runes func(...rune) Matcher
return MatchAny{} RuneRange func(rune, rune) Matcher
}, String func(string) Matcher
Rune: func(rune rune) MatchRune { StringNoCase func(string) Matcher
return MatchRune(rune) AnyOf func(...Matcher) Matcher
}, Not func(Matcher) Matcher
RuneRange: func(start rune, end rune) MatchRuneRange { Optional func(Matcher) Matcher
return MatchRuneRange{start, end} Sequence func(...Matcher) Matcher
}, Repeat func(int, Matcher) Matcher
Runes: func(runes ...rune) MatchAnyOf { Min func(int, Matcher) Matcher
m := make([]Matcher, len(runes)) Max func(int, Matcher) Matcher
for i, r := range runes { ZeroOrMore func(Matcher) Matcher
m[i] = MatchRune(r) OneOrMore func(Matcher) Matcher
} Bounded func(int, int, Matcher) Matcher
return MatchAnyOf{m} Separated func(Matcher, Matcher) Matcher
}, Drop func(Matcher) Matcher
String: func(s string) MatchSequence { }{
var m = []Matcher{} EndOfFile: MatchEndOfFile,
for _, r := range s { AnyRune: MatchAnyRune,
m = append(m, MatchRune(r)) Rune: MatchRune,
} Runes: MatchRunes,
return MatchSequence{m} RuneRange: MatchRuneRange,
}, String: MatchString,
StringNoCase: func(s string) MatchSequence { StringNoCase: MatchStringNoCase,
var m = []Matcher{} Optional: MatchOptional,
for _, r := range s { AnyOf: MatchAnyOf,
u := MatchRune(unicode.ToUpper(r)) Not: MatchNot,
l := MatchRune(unicode.ToLower(r)) Sequence: MatchSequence,
m = append(m, MatchAnyOf{[]Matcher{u, l}}) Repeat: MatchRepeat,
} Min: MatchMin,
return MatchSequence{m} Max: MatchMax,
}, ZeroOrMore: MatchZeroOrMore,
Optional: func(Matcher Matcher) MatchOptional { OneOrMore: MatchOneOrMore,
return MatchOptional{Matcher} Bounded: MatchBounded,
}, Separated: MatchSeparated,
Not: func(Matcher Matcher) MatchNot { Drop: MatchDrop,
return MatchNot{Matcher}
},
AnyOf: func(Matchers ...Matcher) MatchAnyOf {
return MatchAnyOf{Matchers}
},
Sequence: func(Matchers ...Matcher) MatchSequence {
return MatchSequence{Matchers}
},
Repeat: func(count int, Matcher Matcher) MatchRepeat {
return MatchRepeat{count, count, Matcher}
},
Min: func(min int, Matcher Matcher) MatchRepeat {
return MatchRepeat{min, -1, Matcher}
},
Max: func(max int, Matcher Matcher) MatchRepeat {
return MatchRepeat{-1, max, Matcher}
},
Bounded: func(min int, max int, Matcher Matcher) MatchRepeat {
return MatchRepeat{min, max, Matcher}
},
OneOrMore: func(Matcher Matcher) MatchRepeat {
return MatchRepeat{1, -1, Matcher}
},
ZeroOrMore: func(Matcher Matcher) MatchRepeat {
return MatchRepeat{0, -1, Matcher}
},
Separated: func(separator Matcher, Matcher Matcher) MatchSeparated {
return MatchSeparated{separator, Matcher}
},
Drop: func(Matcher Matcher) MatchDrop {
return MatchDrop{Matcher}
},
} }
type MatchEndOfFile struct{} func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
func (c MatchEndOfFile) Match(m *MatchDialog) bool { input, ok := m.NextRune()
r, ok := m.NextRune() return !ok && input == EOF
return !ok && r == EOF
}
type MatchAny struct{}
func (c MatchAny) Match(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
type MatchNot struct {
Matcher Matcher
}
func (c MatchNot) Match(m *MatchDialog) bool {
child := m.Fork()
if !c.Matcher.Match(child) {
child.Merge()
return true
} }
return false
} }
type MatchOptional struct { func MatchAnyRune() Matcher {
Matcher Matcher return func(m *MatchDialog) bool {
} _, ok := m.NextRune()
return ok
func (c MatchOptional) Match(m *MatchDialog) bool {
child := m.Fork()
if c.Matcher.Match(child) {
child.Merge()
} }
return true
} }
type MatchRune rune func MatchRune(r rune) Matcher {
return func(m *MatchDialog) bool {
func (c MatchRune) Match(m *MatchDialog) bool { input, ok := m.NextRune()
r, ok := m.NextRune() return ok && input == r
return ok && r == rune(c) }
} }
type MatchRuneRange struct { func MatchRunes(runes ...rune) Matcher {
start rune return func(m *MatchDialog) bool {
end rune input, ok := m.NextRune()
if ok {
for _, r := range runes {
if input == r {
return true
}
}
}
return false
}
} }
func (c MatchRuneRange) Match(m *MatchDialog) bool { func MatchRuneRange(start rune, end rune) Matcher {
r, ok := m.NextRune() return func(m *MatchDialog) bool {
return ok && r >= c.start && r <= c.end input, ok := m.NextRune()
return ok && input >= start && input <= end
}
} }
type MatchAnyOf struct { func MatchString(s string) Matcher {
Matcher []Matcher var matchers = []Matcher{}
for _, r := range s {
matchers = append(matchers, MatchRune(r))
}
return MatchSequence(matchers...)
} }
func (c MatchAnyOf) Match(m *MatchDialog) bool { func MatchStringNoCase(s string) Matcher {
for _, Matcher := range c.Matcher { var matchers = []Matcher{}
for _, r := range s {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
matchers = append(matchers, MatchRunes(u, l))
}
return MatchSequence(matchers...)
}
func MatchOptional(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork() child := m.Fork()
if Matcher.Match(child) { if matcher(child) {
return child.Merge()
}
}
return false
}
type MatchRepeat struct {
min int
max int
Matcher Matcher
}
func (c MatchRepeat) Match(m *MatchDialog) bool {
child := m.Fork()
if c.min >= 0 && c.max >= 0 && c.min > c.max {
panic("MatchRepeat definition error: max must not be < min")
}
total := 0
// Specified min: check for the minimal required amount of matches.
for total < c.min {
total++
if !c.Matcher.Match(child) {
return false
}
}
// No specified max: include the rest of the available matches.
if c.max < 0 {
child.Merge()
for c.Matcher.Match(child) {
child.Merge() child.Merge()
} }
return true return true
} }
// Specified max: include the rest of the availble matches, up to the max. }
child.Merge()
for total < c.max { func MatchSequence(matchers ...Matcher) Matcher {
total++ return func(m *MatchDialog) bool {
if !c.Matcher.Match(child) { child := m.Fork()
break for _, matcher := range matchers {
if !matcher(child) {
return false
}
} }
child.Merge() child.Merge()
}
return true
}
type MatchSequence struct {
Matchers []Matcher
}
func (c MatchSequence) Match(m *MatchDialog) bool {
child := m.Fork()
for _, Matcher := range c.Matchers {
if !Matcher.Match(child) {
return false
}
}
child.Merge()
return true
}
type MatchSeparated struct {
separator Matcher
Matcher Matcher
}
func (c MatchSeparated) Match(m *MatchDialog) bool {
seq := C.Sequence(c.Matcher, C.ZeroOrMore(C.Sequence(c.separator, c.Matcher)))
return seq.Match(m)
}
type MatchDrop struct {
Matcher Matcher
}
func (c MatchDrop) Match(m *MatchDialog) bool {
child := m.Fork()
if c.Matcher.Match(child) {
child.Clear()
child.Merge()
return true return true
} }
return false }
func MatchAnyOf(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
for _, matcher := range matchers {
child := m.Fork()
if matcher(child) {
return child.Merge()
}
}
return false
}
}
func MatchNot(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if !matcher(child) {
return child.Merge()
}
return false
}
}
func MatchRepeat(count int, matcher Matcher) Matcher {
return MatchBounded(count, count, matcher)
}
func MatchMin(min int, matcher Matcher) Matcher {
return MatchBounded(min, -1, matcher)
}
func MatchMax(max int, matcher Matcher) Matcher {
return MatchBounded(-1, max, matcher)
}
func MatchZeroOrMore(matcher Matcher) Matcher {
return MatchBounded(0, -1, matcher)
}
func MatchOneOrMore(matcher Matcher) Matcher {
return MatchBounded(1, -1, matcher)
}
func MatchBounded(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if min >= 0 && max >= 0 && min > max {
panic("MatchRepeat definition error: max must not be < min")
}
total := 0
// Specified min: check for the minimum required amount of matches.
for min > 0 && total < min {
total++
if !matcher(child) {
return false
}
}
// No specified max: include the rest of the available matches.
if max < 0 {
child.Merge()
for matcher(child) {
child.Merge()
}
return true
}
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for total < max {
total++
if !matcher(child) {
break
}
child.Merge()
}
return true
}
}
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
}
func MatchDrop(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Clear()
child.Merge()
return true
}
return false
}
} }

View File

@ -1,9 +1,11 @@
package parsekit_test package parsekit_test
import ( import (
"fmt"
"testing" "testing"
p "git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
p "git.makaay.nl/mauricem/go-parsekit"
) )
var c = p.C var c = p.C
@ -21,6 +23,19 @@ func newParser(input string, Matcher p.Matcher) *p.P {
return p.New(input, stateFn) return p.New(input, stateFn)
} }
func ExampleTestMatchAny(t *testing.T) {
parser := parsekit.New(
"¡Any / valid / character will dö!",
func(p *parsekit.P) {
p.On(parsekit.MatchAnyRune()).Accept()
p.EmitLiteral(TestItem)
})
match, _, ok := parser.Next()
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func TestMatchAnyRune(t *testing.T) { func TestMatchAnyRune(t *testing.T) {
p := newParser("o", c.AnyRune()) p := newParser("o", c.AnyRune())
r, err, ok := p.Next() r, err, ok := p.Next()
@ -286,14 +301,15 @@ func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
func TestSequence_WithRepeatedRunes(t *testing.T) { func TestSequence_WithRepeatedRunes(t *testing.T) {
whitespace := c.Optional(c.OneOrMore(c.Rune(' '))) whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
equal := c.Rune('=') equal := c.Rune('=')
assignment := c.Sequence(whitespace, equal, whitespace) ding := c.Optional(c.OneOrMore(c.Rune('x')))
p := newParser(" == 10", assignment) assignment := c.Sequence(whitespace, equal, whitespace, ding, whitespace)
p := newParser(" = xxxx 16", assignment)
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != " =" { if r.Value != " = xxxx " {
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value) t.Errorf("Parser item value is %q instead of expected \" = xxxx \"", r.Value)
} }
} }

View File

@ -27,9 +27,9 @@ package parsekit
// //
// Here's a complete example chain: // Here's a complete example chain:
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End() // p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(Matcher Matcher) *matchAction { func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p} m := &MatchDialog{p: p}
ok := Matcher.Match(m) ok := matcher(m)
// Keep track of the last match, to allow parser implementations // Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like: // to access it in an easy way. Typical use would be something like:

View File

@ -33,10 +33,6 @@ func (a *matchAction) Accept() *routeAction {
func (a *matchAction) Skip() *routeAction { func (a *matchAction) Skip() *routeAction {
if a.ok { if a.ok {
for i, r := range a.runes { for i, r := range a.runes {
type C struct {
Rune MatchRune
}
a.p.advanceCursor(r, a.widths[i]) a.p.advanceCursor(r, a.widths[i])
} }
} }