package parsekit import ( "unicode" "unicode/utf8" ) // Not in need of it myself, but nice to have I guess: // - LookAhead // MatchDialog is used by Matcher implementations as a means // to retrieve data to match against and to report back // successful matches. type MatchDialog struct { p *P runes []rune widths []int offset int curRune rune curWidth int parent *MatchDialog } // Fork splits off a child MatchDialog, containing the same // offset as the parent MatchDialog, but with all other data // in a new state. // By forking, a Matcher implementation can freely work with // a MatchDialog, without affecting the parent MatchDialog. // When the Matcher decides that a match was found, it can // use the Merge() method on the child to merge the child's // matching data into the parent MatchDialog. func (m *MatchDialog) Fork() *MatchDialog { child := &MatchDialog{ p: m.p, offset: m.offset, parent: m, } return child } // Merge merges the data for a a forked child MatchDialog back // into its parent: // * the runes that are accumulated in the child are added // to the parent's runes // * the parent's offset is set to the child's offset // After a Merge, the child MatchDialog is reset so it can // immediately be reused for performing another match. func (m *MatchDialog) Merge() bool { if m.parent == nil { panic("Cannot call Merge a a non-forked MatchDialog") } m.parent.runes = append(m.parent.runes, m.runes...) m.parent.widths = append(m.parent.widths, m.widths...) m.parent.offset = m.offset m.Clear() return true } // NextRune can be called by a Matcher on a MatchDialog in order // to receive the next rune from the input. // The rune is automatically added to the MatchDialog's runes. // Returns the rune and a boolean. The boolean will be false in // case an invalid UTF8 rune of the end of the file was encountered. func (m *MatchDialog) NextRune() (rune, bool) { if m.curRune == utf8.RuneError { panic("Matcher must not call NextRune() after it returned false") } r, w, ok := m.p.peek(m.offset) m.offset += w m.curRune = r m.curWidth = w m.runes = append(m.runes, r) m.widths = append(m.widths, w) return r, ok } // Clear empties out the accumulated runes that are stored // in the MatchDialog. The offset is kept as-is. func (m *MatchDialog) Clear() { m.runes = []rune{} m.widths = []int{} } // Matcher is the interface that must be implemented to provide // a matching stategy for the match() function. // A MatchDialog is provided as input. This implements a // specific set of methods that a Matcher needs to retrieve data // from the parser and to report back results. type Matcher interface { Match(*MatchDialog) bool } type MatcherConstructors struct { EndOfFile func() MatchEndOfFile AnyRune func() MatchAny Rune func(rune) MatchRune RuneRange func(rune, rune) MatchRuneRange Runes func(...rune) MatchAnyOf String func(string) MatchSequence StringNoCase func(string) MatchSequence AnyOf func(...Matcher) MatchAnyOf Not func(Matcher) MatchNot Optional func(Matcher) MatchOptional Sequence func(...Matcher) MatchSequence Repeat func(int, Matcher) MatchRepeat Min func(int, Matcher) MatchRepeat Max func(int, Matcher) MatchRepeat Bounded func(int, int, Matcher) MatchRepeat ZeroOrMore func(Matcher) MatchRepeat OneOrMore func(Matcher) MatchRepeat Separated func(Matcher, Matcher) MatchSeparated Drop func(Matcher) MatchDrop } // C provides access to a wide range of parser/combinator // constructorshat can be used to build matching expressions. // When using C in your own parser, then it is advised to create // an alias in your own package for easy reference: // var c = parsekit.C var C = MatcherConstructors{ EndOfFile: func() MatchEndOfFile { return MatchEndOfFile{} }, AnyRune: func() MatchAny { return MatchAny{} }, Rune: func(rune rune) MatchRune { return MatchRune{rune} }, RuneRange: func(start rune, end rune) MatchRuneRange { return MatchRuneRange{start, end} }, Runes: func(runes ...rune) MatchAnyOf { m := make([]Matcher, len(runes)) for i, r := range runes { m[i] = MatchRune{r} } return MatchAnyOf{m} }, String: func(s string) MatchSequence { var m = []Matcher{} for _, r := range s { m = append(m, MatchRune{r}) } return MatchSequence{m} }, StringNoCase: func(s string) MatchSequence { var m = []Matcher{} for _, r := range s { u := MatchRune{unicode.ToUpper(r)} l := MatchRune{unicode.ToLower(r)} m = append(m, MatchAnyOf{[]Matcher{u, l}}) } return MatchSequence{m} }, Optional: func(Matcher Matcher) MatchOptional { return MatchOptional{Matcher} }, Not: func(Matcher Matcher) MatchNot { return MatchNot{Matcher} }, AnyOf: func(Matchers ...Matcher) MatchAnyOf { return MatchAnyOf{Matchers} }, Sequence: func(Matchers ...Matcher) MatchSequence { return MatchSequence{Matchers} }, Repeat: func(count int, Matcher Matcher) MatchRepeat { return MatchRepeat{count, count, Matcher} }, Min: func(min int, Matcher Matcher) MatchRepeat { return MatchRepeat{min, -1, Matcher} }, Max: func(max int, Matcher Matcher) MatchRepeat { return MatchRepeat{-1, max, Matcher} }, Bounded: func(min int, max int, Matcher Matcher) MatchRepeat { return MatchRepeat{min, max, Matcher} }, OneOrMore: func(Matcher Matcher) MatchRepeat { return MatchRepeat{1, -1, Matcher} }, ZeroOrMore: func(Matcher Matcher) MatchRepeat { return MatchRepeat{0, -1, Matcher} }, Separated: func(separator Matcher, Matcher Matcher) MatchSeparated { return MatchSeparated{separator, Matcher} }, Drop: func(Matcher Matcher) MatchDrop { return MatchDrop{Matcher} }, } type MatchEndOfFile struct{} func (c MatchEndOfFile) Match(m *MatchDialog) bool { r, ok := m.NextRune() return !ok && r == EOF } type MatchAny struct{} func (c MatchAny) Match(m *MatchDialog) bool { _, ok := m.NextRune() return ok } type MatchNot struct { Matcher Matcher } func (c MatchNot) Match(m *MatchDialog) bool { child := m.Fork() if !c.Matcher.Match(child) { child.Merge() return true } return false } type MatchOptional struct { Matcher Matcher } func (c MatchOptional) Match(m *MatchDialog) bool { child := m.Fork() if c.Matcher.Match(child) { child.Merge() } return true } type MatchRune struct { match rune } func (c MatchRune) Match(m *MatchDialog) bool { r, ok := m.NextRune() return ok && r == c.match } type MatchRuneRange struct { start rune end rune } func (c MatchRuneRange) Match(m *MatchDialog) bool { r, ok := m.NextRune() return ok && r >= c.start && r <= c.end } type MatchAnyOf struct { Matcher []Matcher } func (c MatchAnyOf) Match(m *MatchDialog) bool { for _, Matcher := range c.Matcher { child := m.Fork() if Matcher.Match(child) { return child.Merge() } } return false } type MatchRepeat struct { min int max int Matcher Matcher } func (c MatchRepeat) Match(m *MatchDialog) bool { child := m.Fork() if c.min >= 0 && c.max >= 0 && c.min > c.max { panic("MatchRepeat definition error: max must not be < min") } total := 0 // Specified min: check for the minimal required amount of matches. for total < c.min { total++ if !c.Matcher.Match(child) { return false } } // No specified max: include the rest of the available matches. if c.max < 0 { child.Merge() for c.Matcher.Match(child) { child.Merge() } return true } // Specified max: include the rest of the availble matches, up to the max. child.Merge() for total < c.max { total++ if !c.Matcher.Match(child) { break } child.Merge() } return true } type MatchSequence struct { Matchers []Matcher } func (c MatchSequence) Match(m *MatchDialog) bool { child := m.Fork() for _, Matcher := range c.Matchers { if !Matcher.Match(child) { return false } } child.Merge() return true } type MatchSeparated struct { separator Matcher Matcher Matcher } func (c MatchSeparated) Match(m *MatchDialog) bool { seq := C.Sequence(c.Matcher, C.ZeroOrMore(C.Sequence(c.separator, c.Matcher))) return seq.Match(m) } type MatchDrop struct { Matcher Matcher } func (c MatchDrop) Match(m *MatchDialog) bool { child := m.Fork() if c.Matcher.Match(child) { child.Clear() child.Merge() return true } return false }