go-parsekit/matcher_builtin.go

559 lines
18 KiB
Go

package parsekit
import (
"fmt"
"strings"
"unicode"
)
// C provides convenient access to a range of parser/combinators
// that can be used to construct Matcher functions.
//
// When using C in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var c = parsekit.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) Matcher
Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher
Str func(string) Matcher
StrNoCase func(string) Matcher
Any func(...Matcher) Matcher
Not func(Matcher) Matcher
Opt func(Matcher) Matcher
Seq func(...Matcher) Matcher
Rep func(int, Matcher) Matcher
Min func(int, Matcher) Matcher
Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
MinMax func(int, int, Matcher) Matcher
Separated func(separated Matcher, separator Matcher) Matcher
Except func(except Matcher, matcher Matcher) Matcher
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
Opt: MatchOpt,
Any: MatchAny,
Not: MatchNot,
Seq: MatchSeq,
Rep: MatchRep,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Except: MatchExcept,
}
// MatchRune creates a Matcher function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok && input == expected {
m.Accept()
return true
}
return false
}
}
// MatchRunes creates a Matcher function that that checks if the next rune
// from the input is one of the provided runes.
func MatchRunes(expected ...rune) Matcher {
s := string(expected)
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok {
if strings.ContainsRune(s, input) {
m.Accept()
return true
}
}
return false
}
}
// MatchRuneRange creates a Matcher function that that checks if the next rune
// from the input is contained by the provided rune range.
//
// The rune range is defined by a start and an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) Matcher {
return func(m *MatchDialog) bool {
if end < start {
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
input, ok := m.NextRune()
if ok && input >= start && input <= end {
m.Accept()
return true
}
return false
}
}
// MatchStr creates a Matcher that will check if the upcoming runes on the
// input match the provided string.
// TODO make this a more efficient string-level match?
func MatchStr(expected string) Matcher {
var matchers = []Matcher{}
for _, r := range expected {
matchers = append(matchers, MatchRune(r))
}
return MatchSeq(matchers...)
}
// MatchStrNoCase creates a Matcher that will check if the upcoming runes
// on the input match the provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStrNoCase(expected string) Matcher {
var matchers = []Matcher{}
for _, r := range expected {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
matchers = append(matchers, MatchRunes(u, l))
}
return MatchSeq(matchers...)
}
// MatchOpt creates a Matcher that makes the provided Matcher optional.
// When the provided Matcher applies, then its output is used, otherwise
// no output is generated but still a successful match is reported.
func MatchOpt(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Merge()
}
return true
}
}
// MatchSeq creates a Matcher that checks if the provided Matchers can be
// applied in their exact order. Only if all matcher apply, the sequence
// reports successful match.
func MatchSeq(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
for _, matcher := range matchers {
if !matcher(child) {
return false
}
}
child.Merge()
return true
}
}
// MatchAny creates a Matcher that checks if any of the provided Matchers
// can be applied. They are applied in their provided order. The first Matcher
// that applies is used for reporting back a match.
func MatchAny(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
for _, matcher := range matchers {
child := m.Fork()
if matcher(child) {
return child.Merge()
}
}
return false
}
}
// MatchNot creates a Matcher that checks if the provided Matcher applies to
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
probe := m.Fork()
if matcher(probe) {
return false
}
_, ok := m.NextRune()
if ok {
m.Accept()
return true
}
return false
}
}
// MatchRep creates a Matcher that checks if the provided Matcher can be
// applied exactly the provided amount of times.
//
// Note that the input can contain more Matches for the provided matcher, e.g.:
//
// MatchRep(4, MatchRune('X'))
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" of the input.
func MatchRep(times int, matcher Matcher) Matcher {
return matchMinMax(times, times, matcher)
}
// MatchMin creates a Matcher that checks if the provided Matcher can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, matcher Matcher) Matcher {
return matchMinMax(min, -1, matcher)
}
// MatchMax creates a Matcher that checks if the provided Matcher can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, matcher Matcher) Matcher {
return matchMinMax(0, max, matcher)
}
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(matcher Matcher) Matcher {
return matchMinMax(0, -1, matcher)
}
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(matcher Matcher) Matcher {
return matchMinMax(1, -1, matcher)
}
// MatchMinMax creates a Matcher that checks if the provided Matcher can
// be applied between the provided minimum and maximum number of times,
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
if max < 0 {
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
}
if min < 0 {
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
}
return matchMinMax(min, max, matcher)
}
func matchMinMax(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if max >= 0 && min > max {
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
}
total := 0
// Check for the minimum required amount of matches.
for total < min {
total++
if !matcher(child) {
return false
}
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for max < 0 || total < max {
total++
if !matcher(child) {
break
}
child.Merge()
}
return true
}
}
// MatchSeparated creates a Matcher that checks for a pattern of one or more
// Matchers of one type (the separated), separated by Matches of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}
// MatchExcept creates a Matcher that checks if the provided matcher can be
// applied to the upcoming input. It also checks if the except Matcher can be
// applied. If the matcher applies, but the except Matcher too, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except Matcher, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
if except(m.Fork()) {
return false
}
return matcher(m)
}
}
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
//
// In parsekit, an atom is defined as a ready to go Matcher function.
//
// When using A in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CR Matcher
LF Matcher
CRLF Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Newline Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.Str("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
// MatchEndOfFile creates a Matcher that checks if the end of the input data
// has been reached. This Matcher will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == EOF
}
}
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
// read from the input. It reports back a successful match if the end of the
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
if ok {
m.Accept()
return true
}
return false
}
}
// M provides convenient access to a range of modifiers that can be
// used when creating Matcher functions.
//
// In parsekit, a modifier is defined as a Matcher function that modifies the
// resulting output of another Matcher in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(Matcher) Matcher
Trim func(Matcher, string) Matcher
TrimLeft func(Matcher, string) Matcher
TrimRight func(Matcher, string) Matcher
ToLower func(Matcher) Matcher
ToUpper func(Matcher) Matcher
Replace func(Matcher, string) Matcher
ModifyByCallback func(Matcher, func(string) string) Matcher
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ModifyByCallback: ModifyByCallback,
}
// ModifyDrop creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is discarded completely.
//
// Note that if the Matcher does not apply, a mismatch will be reported back,
// even though we would have dropped the output anyway. So if you would like
// to drop optional whitespace, then use something like:
//
// M.Drop(C.Opt(A.Whitespace))
//
// instead of:
//
// M.Drop(A.Whitespace)
//
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(matcher Matcher) Matcher {
return ModifyByCallback(matcher, func(s string) string {
return ""
})
}
// ModifyTrim creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
func ModifyTrim(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, true, true)
}
// ModifyTrimLeft creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the left of the output.
func ModifyTrimLeft(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, true, false)
}
// ModifyTrimRight creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the right of the output.
func ModifyTrimRight(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, false, true)
}
func modifyTrim(matcher Matcher, cutset string, trimLeft bool, trimRight bool) Matcher {
modfunc := func(s string) string {
if trimLeft {
s = strings.TrimLeft(s, cutset)
}
if trimRight {
s = strings.TrimRight(s, cutset)
}
return s
}
return ModifyByCallback(matcher, modfunc)
}
// ModifyToUpper creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.
func ModifyToUpper(matcher Matcher) Matcher {
return ModifyByCallback(matcher, strings.ToUpper)
}
// ModifyToLower creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into lower case.
func ModifyToLower(matcher Matcher) Matcher {
return ModifyByCallback(matcher, strings.ToLower)
}
// ModifyReplace creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is replaced by the provided string.
func ModifyReplace(matcher Matcher, s string) Matcher {
return ModifyByCallback(matcher, func(string) string {
return s
})
}
// ModifyByCallback creates a Matcher that checks if the provided matcher applies.
// If it does, then its output is taken and it is fed to the provided modfunc.
// This is a simple function that takes a string on input and returns a possibly
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(matcher Matcher, modfunc func(string) string) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
s := modfunc(string(child.output))
child.output = []rune(s)
child.Merge()
return true
}
return false
}
}