go-parsekit/combinators.go

297 lines
7.6 KiB
Go

package parsekit
import (
"unicode"
"unicode/utf8"
)
// Nice to have I guess:
// - LookAhead
// - Ready to go combinators for various number notations
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
type Matcher func(m *MatchDialog) bool
// MatchDialog is used by Matcher functions to retrieve data from the parser
// input to match against and to report back successful matches.
type MatchDialog struct {
p *P
runes []rune
widths []int
offset int
curRune rune
curWidth int
parent *MatchDialog
}
// NextRune can be called by a Matcher on a MatchDialog in order
// to receive the next rune from the input.
// The rune is automatically added to the MatchDialog's slice of runes.
// Returns the rune and a boolean. The boolean will be false in
// case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.curRune == utf8.RuneError {
panic("Matcher must not call NextRune() after it returned false")
}
r, w, ok := m.p.peek(m.offset)
m.offset += w
m.curRune = r
m.curWidth = w
m.runes = append(m.runes, r)
m.widths = append(m.widths, w)
return r, ok
}
// Fork splits off a child MatchDialog, containing the same offset as the
// parent MatchDialog, but with all other data in a new state.
//
// By forking, a Matcher implementation can freely work with a MatchDialog,
// without affecting the parent MatchDialog. This is for example useful when
// the Matcher is checking for a sequence of runes. When there are first
// 3 runes returned from NextRune() which match the expectations, then the
// slice of runes inside the MatchDialog will contain these 3 runes.
// When after this the 4th rune turns out to be a mismatch, the forked
// MatchDialog can simply be discarded, and the state in the parent will be
// kept as-is.
//
// When a forked MatchDialog is in use, and the Matcher decides that a
// successul match was found, then the Merge() method can be called in
// order to transport the collected runes to the parent MatchDialog.
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
offset: m.offset,
parent: m,
}
return child
}
// Merge merges the data from a forked child MatchDialog back into its parent:
// * the runes that are accumulated in the child are added to the parent runes
// * the parent's offset is set to the child's offset
// After a Merge, the child MatchDialog is reset so it can immediately be
// reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog")
}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
m.parent.offset = m.offset
m.Clear()
return true
}
// Clear empties out the accumulated runes that are stored in the MatchDialog.
// The offset is kept as-is.
func (m *MatchDialog) Clear() {
m.runes = []rune{}
m.widths = []int{}
}
// C provides convenient access to a range of parser/combinator
// constructors that can be used to build matching expressions.
//
// When using C in your own parser, then it is advised to create
// a variable in your own package to reference it (var c = parsekit.C).
// This saves a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) Matcher
Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher
String func(string) Matcher
StringNoCase func(string) Matcher
AnyOf func(...Matcher) Matcher
Not func(Matcher) Matcher
Optional func(Matcher) Matcher
Sequence func(...Matcher) Matcher
Repeat func(int, Matcher) Matcher
Min func(int, Matcher) Matcher
Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
Bounded func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
String: MatchString,
StringNoCase: MatchStringNoCase,
Optional: MatchOptional,
AnyOf: MatchAnyOf,
Not: MatchNot,
Sequence: MatchSequence,
Repeat: MatchRepeat,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
Bounded: MatchBounded,
Separated: MatchSeparated,
Drop: MatchDrop,
}
func MatchRune(r rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
return ok && input == r
}
}
func MatchRunes(runes ...rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok {
for _, r := range runes {
if input == r {
return true
}
}
}
return false
}
}
func MatchRuneRange(start rune, end rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
return ok && input >= start && input <= end
}
}
func MatchString(s string) Matcher {
var matchers = []Matcher{}
for _, r := range s {
matchers = append(matchers, MatchRune(r))
}
return MatchSequence(matchers...)
}
func MatchStringNoCase(s string) Matcher {
var matchers = []Matcher{}
for _, r := range s {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
matchers = append(matchers, MatchRunes(u, l))
}
return MatchSequence(matchers...)
}
func MatchOptional(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Merge()
}
return true
}
}
func MatchSequence(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
for _, matcher := range matchers {
if !matcher(child) {
return false
}
}
child.Merge()
return true
}
}
func MatchAnyOf(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
for _, matcher := range matchers {
child := m.Fork()
if matcher(child) {
return child.Merge()
}
}
return false
}
}
func MatchNot(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if !matcher(child) {
return child.Merge()
}
return false
}
}
func MatchRepeat(count int, matcher Matcher) Matcher {
return MatchBounded(count, count, matcher)
}
func MatchMin(min int, matcher Matcher) Matcher {
return MatchBounded(min, -1, matcher)
}
func MatchMax(max int, matcher Matcher) Matcher {
return MatchBounded(-1, max, matcher)
}
func MatchZeroOrMore(matcher Matcher) Matcher {
return MatchBounded(0, -1, matcher)
}
func MatchOneOrMore(matcher Matcher) Matcher {
return MatchBounded(1, -1, matcher)
}
func MatchBounded(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if min >= 0 && max >= 0 && min > max {
panic("MatchRepeat definition error: max must not be < min")
}
total := 0
// Specified min: check for the minimum required amount of matches.
for min > 0 && total < min {
total++
if !matcher(child) {
return false
}
}
// No specified max: include the rest of the available matches.
if max < 0 {
child.Merge()
for matcher(child) {
child.Merge()
}
return true
}
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for total < max {
total++
if !matcher(child) {
break
}
child.Merge()
}
return true
}
}
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
}
func MatchDrop(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Clear()
child.Merge()
return true
}
return false
}
}