343 lines
8.2 KiB
Go
343 lines
8.2 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// Not in need of it myself, but nice to have I guess:
|
|
// - LookAhead
|
|
|
|
// MatchDialog is used by Matcher implementations as a means
|
|
// to retrieve data to match against and to report back
|
|
// successful matches.
|
|
type MatchDialog struct {
|
|
p *P
|
|
runes []rune
|
|
widths []int
|
|
offset int
|
|
curRune rune
|
|
curWidth int
|
|
parent *MatchDialog
|
|
}
|
|
|
|
// Fork splits off a child MatchDialog, containing the same
|
|
// offset as the parent MatchDialog, but with all other data
|
|
// in a new state.
|
|
// By forking, a Matcher implementation can freely work with
|
|
// a MatchDialog, without affecting the parent MatchDialog.
|
|
// When the Matcher decides that a match was found, it can
|
|
// use the Merge() method on the child to merge the child's
|
|
// matching data into the parent MatchDialog.
|
|
func (m *MatchDialog) Fork() *MatchDialog {
|
|
child := &MatchDialog{
|
|
p: m.p,
|
|
offset: m.offset,
|
|
parent: m,
|
|
}
|
|
return child
|
|
}
|
|
|
|
// Merge merges the data for a a forked child MatchDialog back
|
|
// into its parent:
|
|
// * the runes that are accumulated in the child are added
|
|
// to the parent's runes
|
|
// * the parent's offset is set to the child's offset
|
|
// After a Merge, the child MatchDialog is reset so it can
|
|
// immediately be reused for performing another match.
|
|
func (m *MatchDialog) Merge() bool {
|
|
if m.parent == nil {
|
|
panic("Cannot call Merge a a non-forked MatchDialog")
|
|
}
|
|
m.parent.runes = append(m.parent.runes, m.runes...)
|
|
m.parent.widths = append(m.parent.widths, m.widths...)
|
|
m.parent.offset = m.offset
|
|
m.Clear()
|
|
return true
|
|
}
|
|
|
|
// NextRune can be called by a Matcher on a MatchDialog in order
|
|
// to receive the next rune from the input.
|
|
// The rune is automatically added to the MatchDialog's runes.
|
|
// Returns the rune and a boolean. The boolean will be false in
|
|
// case an invalid UTF8 rune of the end of the file was encountered.
|
|
func (m *MatchDialog) NextRune() (rune, bool) {
|
|
if m.curRune == utf8.RuneError {
|
|
panic("Matcher must not call NextRune() after it returned false")
|
|
}
|
|
r, w, ok := m.p.peek(m.offset)
|
|
m.offset += w
|
|
m.curRune = r
|
|
m.curWidth = w
|
|
m.runes = append(m.runes, r)
|
|
m.widths = append(m.widths, w)
|
|
return r, ok
|
|
}
|
|
|
|
// Clear empties out the accumulated runes that are stored
|
|
// in the MatchDialog. The offset is kept as-is.
|
|
func (m *MatchDialog) Clear() {
|
|
m.runes = []rune{}
|
|
m.widths = []int{}
|
|
}
|
|
|
|
// Matcher is the interface that must be implemented to provide
|
|
// a matching stategy for the match() function.
|
|
// A MatchDialog is provided as input. This implements a
|
|
// specific set of methods that a Matcher needs to retrieve data
|
|
// from the parser and to report back results.
|
|
type Matcher interface {
|
|
Match(*MatchDialog) bool
|
|
}
|
|
|
|
type MatcherConstructors struct {
|
|
EndOfFile func() MatchEndOfFile
|
|
AnyRune func() MatchAny
|
|
Rune func(rune) MatchRune
|
|
RuneRange func(rune, rune) MatchRuneRange
|
|
Runes func(...rune) MatchAnyOf
|
|
String func(string) MatchSequence
|
|
StringNoCase func(string) MatchSequence
|
|
AnyOf func(...Matcher) MatchAnyOf
|
|
Not func(Matcher) MatchNot
|
|
Optional func(Matcher) MatchOptional
|
|
Sequence func(...Matcher) MatchSequence
|
|
Repeat func(int, Matcher) MatchRepeat
|
|
Min func(int, Matcher) MatchRepeat
|
|
Max func(int, Matcher) MatchRepeat
|
|
Bounded func(int, int, Matcher) MatchRepeat
|
|
ZeroOrMore func(Matcher) MatchRepeat
|
|
OneOrMore func(Matcher) MatchRepeat
|
|
Separated func(Matcher, Matcher) MatchSeparated
|
|
Drop func(Matcher) MatchDrop
|
|
}
|
|
|
|
// C provides access to a wide range of parser/combinator
|
|
// constructorshat can be used to build matching expressions.
|
|
// When using C in your own parser, then it is advised to create
|
|
// an alias in your own package for easy reference:
|
|
// var c = parsekit.C
|
|
var C = MatcherConstructors{
|
|
EndOfFile: func() MatchEndOfFile {
|
|
return MatchEndOfFile{}
|
|
},
|
|
AnyRune: func() MatchAny {
|
|
return MatchAny{}
|
|
},
|
|
Rune: func(rune rune) MatchRune {
|
|
return MatchRune{rune}
|
|
},
|
|
RuneRange: func(start rune, end rune) MatchRuneRange {
|
|
return MatchRuneRange{start, end}
|
|
},
|
|
Runes: func(runes ...rune) MatchAnyOf {
|
|
m := make([]Matcher, len(runes))
|
|
for i, r := range runes {
|
|
m[i] = MatchRune{r}
|
|
}
|
|
return MatchAnyOf{m}
|
|
},
|
|
String: func(s string) MatchSequence {
|
|
var m = []Matcher{}
|
|
for _, r := range s {
|
|
m = append(m, MatchRune{r})
|
|
}
|
|
return MatchSequence{m}
|
|
},
|
|
StringNoCase: func(s string) MatchSequence {
|
|
var m = []Matcher{}
|
|
for _, r := range s {
|
|
u := MatchRune{unicode.ToUpper(r)}
|
|
l := MatchRune{unicode.ToLower(r)}
|
|
m = append(m, MatchAnyOf{[]Matcher{u, l}})
|
|
}
|
|
return MatchSequence{m}
|
|
},
|
|
Optional: func(Matcher Matcher) MatchOptional {
|
|
return MatchOptional{Matcher}
|
|
},
|
|
Not: func(Matcher Matcher) MatchNot {
|
|
return MatchNot{Matcher}
|
|
},
|
|
AnyOf: func(Matchers ...Matcher) MatchAnyOf {
|
|
return MatchAnyOf{Matchers}
|
|
},
|
|
Sequence: func(Matchers ...Matcher) MatchSequence {
|
|
return MatchSequence{Matchers}
|
|
},
|
|
Repeat: func(count int, Matcher Matcher) MatchRepeat {
|
|
return MatchRepeat{count, count, Matcher}
|
|
},
|
|
Min: func(min int, Matcher Matcher) MatchRepeat {
|
|
return MatchRepeat{min, -1, Matcher}
|
|
},
|
|
Max: func(max int, Matcher Matcher) MatchRepeat {
|
|
return MatchRepeat{-1, max, Matcher}
|
|
},
|
|
Bounded: func(min int, max int, Matcher Matcher) MatchRepeat {
|
|
return MatchRepeat{min, max, Matcher}
|
|
},
|
|
OneOrMore: func(Matcher Matcher) MatchRepeat {
|
|
return MatchRepeat{1, -1, Matcher}
|
|
},
|
|
ZeroOrMore: func(Matcher Matcher) MatchRepeat {
|
|
return MatchRepeat{0, -1, Matcher}
|
|
},
|
|
Separated: func(separator Matcher, Matcher Matcher) MatchSeparated {
|
|
return MatchSeparated{separator, Matcher}
|
|
},
|
|
Drop: func(Matcher Matcher) MatchDrop {
|
|
return MatchDrop{Matcher}
|
|
},
|
|
}
|
|
|
|
type MatchEndOfFile struct{}
|
|
|
|
func (c MatchEndOfFile) Match(m *MatchDialog) bool {
|
|
r, ok := m.NextRune()
|
|
return !ok && r == EOF
|
|
}
|
|
|
|
type MatchAny struct{}
|
|
|
|
func (c MatchAny) Match(m *MatchDialog) bool {
|
|
_, ok := m.NextRune()
|
|
return ok
|
|
}
|
|
|
|
type MatchNot struct {
|
|
Matcher Matcher
|
|
}
|
|
|
|
func (c MatchNot) Match(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if !c.Matcher.Match(child) {
|
|
child.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
type MatchOptional struct {
|
|
Matcher Matcher
|
|
}
|
|
|
|
func (c MatchOptional) Match(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if c.Matcher.Match(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
|
|
type MatchRune struct {
|
|
match rune
|
|
}
|
|
|
|
func (c MatchRune) Match(m *MatchDialog) bool {
|
|
r, ok := m.NextRune()
|
|
return ok && r == c.match
|
|
}
|
|
|
|
type MatchRuneRange struct {
|
|
start rune
|
|
end rune
|
|
}
|
|
|
|
func (c MatchRuneRange) Match(m *MatchDialog) bool {
|
|
r, ok := m.NextRune()
|
|
return ok && r >= c.start && r <= c.end
|
|
}
|
|
|
|
type MatchAnyOf struct {
|
|
Matcher []Matcher
|
|
}
|
|
|
|
func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
|
for _, Matcher := range c.Matcher {
|
|
child := m.Fork()
|
|
if Matcher.Match(child) {
|
|
return child.Merge()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
type MatchRepeat struct {
|
|
min int
|
|
max int
|
|
Matcher Matcher
|
|
}
|
|
|
|
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if c.min >= 0 && c.max >= 0 && c.min > c.max {
|
|
panic("MatchRepeat definition error: max must not be < min")
|
|
}
|
|
total := 0
|
|
// Specified min: check for the minimal required amount of matches.
|
|
for total < c.min {
|
|
total++
|
|
if !c.Matcher.Match(child) {
|
|
return false
|
|
}
|
|
}
|
|
// No specified max: include the rest of the available matches.
|
|
if c.max < 0 {
|
|
child.Merge()
|
|
for c.Matcher.Match(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
// Specified max: include the rest of the availble matches, up to the max.
|
|
child.Merge()
|
|
for total < c.max {
|
|
total++
|
|
if !c.Matcher.Match(child) {
|
|
break
|
|
}
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
|
|
type MatchSequence struct {
|
|
Matchers []Matcher
|
|
}
|
|
|
|
func (c MatchSequence) Match(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
for _, Matcher := range c.Matchers {
|
|
if !Matcher.Match(child) {
|
|
return false
|
|
}
|
|
}
|
|
child.Merge()
|
|
return true
|
|
}
|
|
|
|
type MatchSeparated struct {
|
|
separator Matcher
|
|
Matcher Matcher
|
|
}
|
|
|
|
func (c MatchSeparated) Match(m *MatchDialog) bool {
|
|
seq := C.Sequence(c.Matcher, C.ZeroOrMore(C.Sequence(c.separator, c.Matcher)))
|
|
return seq.Match(m)
|
|
}
|
|
|
|
type MatchDrop struct {
|
|
Matcher Matcher
|
|
}
|
|
|
|
func (c MatchDrop) Match(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if c.Matcher.Match(child) {
|
|
child.Clear()
|
|
child.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|