297 lines
7.6 KiB
Go
297 lines
7.6 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// Nice to have I guess:
|
|
// - LookAhead
|
|
// - Ready to go combinators for various number notations
|
|
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
|
|
|
|
type Matcher func(m *MatchDialog) bool
|
|
|
|
// MatchDialog is used by Matcher functions to retrieve data from the parser
|
|
// input to match against and to report back successful matches.
|
|
type MatchDialog struct {
|
|
p *P
|
|
runes []rune
|
|
widths []int
|
|
offset int
|
|
curRune rune
|
|
curWidth int
|
|
parent *MatchDialog
|
|
}
|
|
|
|
// NextRune can be called by a Matcher on a MatchDialog in order
|
|
// to receive the next rune from the input.
|
|
// The rune is automatically added to the MatchDialog's slice of runes.
|
|
// Returns the rune and a boolean. The boolean will be false in
|
|
// case an invalid UTF8 rune of the end of the file was encountered.
|
|
func (m *MatchDialog) NextRune() (rune, bool) {
|
|
if m.curRune == utf8.RuneError {
|
|
panic("Matcher must not call NextRune() after it returned false")
|
|
}
|
|
r, w, ok := m.p.peek(m.offset)
|
|
m.offset += w
|
|
m.curRune = r
|
|
m.curWidth = w
|
|
m.runes = append(m.runes, r)
|
|
m.widths = append(m.widths, w)
|
|
return r, ok
|
|
}
|
|
|
|
// Fork splits off a child MatchDialog, containing the same offset as the
|
|
// parent MatchDialog, but with all other data in a new state.
|
|
//
|
|
// By forking, a Matcher implementation can freely work with a MatchDialog,
|
|
// without affecting the parent MatchDialog. This is for example useful when
|
|
// the Matcher is checking for a sequence of runes. When there are first
|
|
// 3 runes returned from NextRune() which match the expectations, then the
|
|
// slice of runes inside the MatchDialog will contain these 3 runes.
|
|
// When after this the 4th rune turns out to be a mismatch, the forked
|
|
// MatchDialog can simply be discarded, and the state in the parent will be
|
|
// kept as-is.
|
|
//
|
|
// When a forked MatchDialog is in use, and the Matcher decides that a
|
|
// successul match was found, then the Merge() method can be called in
|
|
// order to transport the collected runes to the parent MatchDialog.
|
|
func (m *MatchDialog) Fork() *MatchDialog {
|
|
child := &MatchDialog{
|
|
p: m.p,
|
|
offset: m.offset,
|
|
parent: m,
|
|
}
|
|
return child
|
|
}
|
|
|
|
// Merge merges the data from a forked child MatchDialog back into its parent:
|
|
// * the runes that are accumulated in the child are added to the parent runes
|
|
// * the parent's offset is set to the child's offset
|
|
// After a Merge, the child MatchDialog is reset so it can immediately be
|
|
// reused for performing another match.
|
|
func (m *MatchDialog) Merge() bool {
|
|
if m.parent == nil {
|
|
panic("Cannot call Merge a a non-forked MatchDialog")
|
|
}
|
|
m.parent.runes = append(m.parent.runes, m.runes...)
|
|
m.parent.widths = append(m.parent.widths, m.widths...)
|
|
m.parent.offset = m.offset
|
|
m.Clear()
|
|
return true
|
|
}
|
|
|
|
// Clear empties out the accumulated runes that are stored in the MatchDialog.
|
|
// The offset is kept as-is.
|
|
func (m *MatchDialog) Clear() {
|
|
m.runes = []rune{}
|
|
m.widths = []int{}
|
|
}
|
|
|
|
// C provides convenient access to a range of parser/combinator
|
|
// constructors that can be used to build matching expressions.
|
|
//
|
|
// When using C in your own parser, then it is advised to create
|
|
// a variable in your own package to reference it (var c = parsekit.C).
|
|
// This saves a lot of typing, and it makes your code a lot cleaner.
|
|
var C = struct {
|
|
Rune func(rune) Matcher
|
|
Runes func(...rune) Matcher
|
|
RuneRange func(rune, rune) Matcher
|
|
String func(string) Matcher
|
|
StringNoCase func(string) Matcher
|
|
AnyOf func(...Matcher) Matcher
|
|
Not func(Matcher) Matcher
|
|
Optional func(Matcher) Matcher
|
|
Sequence func(...Matcher) Matcher
|
|
Repeat func(int, Matcher) Matcher
|
|
Min func(int, Matcher) Matcher
|
|
Max func(int, Matcher) Matcher
|
|
ZeroOrMore func(Matcher) Matcher
|
|
OneOrMore func(Matcher) Matcher
|
|
Bounded func(int, int, Matcher) Matcher
|
|
Separated func(Matcher, Matcher) Matcher
|
|
Drop func(Matcher) Matcher
|
|
}{
|
|
Rune: MatchRune,
|
|
Runes: MatchRunes,
|
|
RuneRange: MatchRuneRange,
|
|
String: MatchString,
|
|
StringNoCase: MatchStringNoCase,
|
|
Optional: MatchOptional,
|
|
AnyOf: MatchAnyOf,
|
|
Not: MatchNot,
|
|
Sequence: MatchSequence,
|
|
Repeat: MatchRepeat,
|
|
Min: MatchMin,
|
|
Max: MatchMax,
|
|
ZeroOrMore: MatchZeroOrMore,
|
|
OneOrMore: MatchOneOrMore,
|
|
Bounded: MatchBounded,
|
|
Separated: MatchSeparated,
|
|
Drop: MatchDrop,
|
|
}
|
|
|
|
func MatchRune(r rune) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
input, ok := m.NextRune()
|
|
return ok && input == r
|
|
}
|
|
}
|
|
|
|
func MatchRunes(runes ...rune) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
input, ok := m.NextRune()
|
|
if ok {
|
|
for _, r := range runes {
|
|
if input == r {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
func MatchRuneRange(start rune, end rune) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
input, ok := m.NextRune()
|
|
return ok && input >= start && input <= end
|
|
}
|
|
}
|
|
|
|
func MatchString(s string) Matcher {
|
|
var matchers = []Matcher{}
|
|
for _, r := range s {
|
|
matchers = append(matchers, MatchRune(r))
|
|
}
|
|
return MatchSequence(matchers...)
|
|
}
|
|
|
|
func MatchStringNoCase(s string) Matcher {
|
|
var matchers = []Matcher{}
|
|
for _, r := range s {
|
|
u := unicode.ToUpper(r)
|
|
l := unicode.ToLower(r)
|
|
matchers = append(matchers, MatchRunes(u, l))
|
|
}
|
|
return MatchSequence(matchers...)
|
|
}
|
|
|
|
func MatchOptional(matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
func MatchSequence(matchers ...Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
for _, matcher := range matchers {
|
|
if !matcher(child) {
|
|
return false
|
|
}
|
|
}
|
|
child.Merge()
|
|
return true
|
|
}
|
|
}
|
|
|
|
func MatchAnyOf(matchers ...Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
for _, matcher := range matchers {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
return child.Merge()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
func MatchNot(matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if !matcher(child) {
|
|
return child.Merge()
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
func MatchRepeat(count int, matcher Matcher) Matcher {
|
|
return MatchBounded(count, count, matcher)
|
|
}
|
|
|
|
func MatchMin(min int, matcher Matcher) Matcher {
|
|
return MatchBounded(min, -1, matcher)
|
|
}
|
|
|
|
func MatchMax(max int, matcher Matcher) Matcher {
|
|
return MatchBounded(-1, max, matcher)
|
|
}
|
|
|
|
func MatchZeroOrMore(matcher Matcher) Matcher {
|
|
return MatchBounded(0, -1, matcher)
|
|
}
|
|
|
|
func MatchOneOrMore(matcher Matcher) Matcher {
|
|
return MatchBounded(1, -1, matcher)
|
|
}
|
|
|
|
func MatchBounded(min int, max int, matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if min >= 0 && max >= 0 && min > max {
|
|
panic("MatchRepeat definition error: max must not be < min")
|
|
}
|
|
total := 0
|
|
// Specified min: check for the minimum required amount of matches.
|
|
for min > 0 && total < min {
|
|
total++
|
|
if !matcher(child) {
|
|
return false
|
|
}
|
|
}
|
|
// No specified max: include the rest of the available matches.
|
|
if max < 0 {
|
|
child.Merge()
|
|
for matcher(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
// Specified max: include the rest of the availble matches, up to the max.
|
|
child.Merge()
|
|
for total < max {
|
|
total++
|
|
if !matcher(child) {
|
|
break
|
|
}
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
|
|
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
|
|
}
|
|
|
|
func MatchDrop(matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
child.Clear()
|
|
child.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|