478 lines
15 KiB
Go
478 lines
15 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// C provides convenient access to a range of parser/combinators
|
|
// that can be used to build Matcher functions.
|
|
//
|
|
// When using C in your own parser, then it is advised to create
|
|
// a variable in your own package to reference it:
|
|
//
|
|
// var c = parsekit.C
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var C = struct {
|
|
Rune func(rune) Matcher
|
|
Runes func(...rune) Matcher
|
|
RuneRange func(rune, rune) Matcher
|
|
String func(string) Matcher
|
|
StringNoCase func(string) Matcher
|
|
AnyOf func(...Matcher) Matcher
|
|
Not func(Matcher) Matcher
|
|
Optional func(Matcher) Matcher
|
|
Sequence func(...Matcher) Matcher
|
|
Repeat func(int, Matcher) Matcher
|
|
Min func(int, Matcher) Matcher
|
|
Max func(int, Matcher) Matcher
|
|
ZeroOrMore func(Matcher) Matcher
|
|
OneOrMore func(Matcher) Matcher
|
|
MinMax func(int, int, Matcher) Matcher
|
|
Separated func(Matcher, Matcher) Matcher
|
|
Drop func(Matcher) Matcher
|
|
Trim func(Matcher, string) Matcher
|
|
TrimLeft func(Matcher, string) Matcher
|
|
TrimRight func(Matcher, string) Matcher
|
|
}{
|
|
Rune: MatchRune,
|
|
Runes: MatchRunes,
|
|
RuneRange: MatchRuneRange,
|
|
String: MatchString,
|
|
StringNoCase: MatchStringNoCase,
|
|
Optional: MatchOptional,
|
|
AnyOf: MatchAnyOf,
|
|
Not: MatchNot,
|
|
Sequence: MatchSequence,
|
|
Repeat: MatchRepeat,
|
|
Min: MatchMin,
|
|
Max: MatchMax,
|
|
ZeroOrMore: MatchZeroOrMore,
|
|
OneOrMore: MatchOneOrMore,
|
|
MinMax: MatchMinMax,
|
|
Separated: MatchSeparated,
|
|
Drop: MatchDrop,
|
|
Trim: MatchTrim,
|
|
TrimLeft: MatchTrimLeft,
|
|
TrimRight: MatchTrimRight,
|
|
}
|
|
|
|
// A provides convenient access to a range of atoms that can be used to
|
|
// build combinators or parsing rules.
|
|
//
|
|
// In parsekit, an atom is defined as a ready to go Matcher function.
|
|
var A = struct {
|
|
EndOfFile Matcher
|
|
AnyRune Matcher
|
|
Space Matcher
|
|
Tab Matcher
|
|
CR Matcher
|
|
LF Matcher
|
|
CRLF Matcher
|
|
Excl Matcher
|
|
DoubleQuote Matcher
|
|
Hash Matcher
|
|
Dollar Matcher
|
|
Percent Matcher
|
|
Amp Matcher
|
|
SingleQuote Matcher
|
|
RoundOpen Matcher
|
|
RoundClose Matcher
|
|
Asterisk Matcher
|
|
Plus Matcher
|
|
Comma Matcher
|
|
Minus Matcher
|
|
Dot Matcher
|
|
Slash Matcher
|
|
Colon Matcher
|
|
Semicolon Matcher
|
|
AngleOpen Matcher
|
|
Equal Matcher
|
|
AngleClose Matcher
|
|
Question Matcher
|
|
At Matcher
|
|
SquareOpen Matcher
|
|
Backslash Matcher
|
|
SquareClose Matcher
|
|
Caret Matcher
|
|
Underscore Matcher
|
|
Backquote Matcher
|
|
CurlyOpen Matcher
|
|
Pipe Matcher
|
|
CurlyClose Matcher
|
|
Tilde Matcher
|
|
Newline Matcher
|
|
Whitespace Matcher
|
|
WhitespaceAndNewlines Matcher
|
|
EndOfLine Matcher
|
|
Digit Matcher
|
|
ASCII Matcher
|
|
ASCIILower Matcher
|
|
ASCIIUpper Matcher
|
|
HexDigit Matcher
|
|
}{
|
|
EndOfFile: MatchEndOfFile(),
|
|
AnyRune: MatchAnyRune(),
|
|
Space: C.Rune(' '),
|
|
Tab: C.Rune('\t'),
|
|
CR: C.Rune('\r'),
|
|
LF: C.Rune('\n'),
|
|
CRLF: C.String("\r\n"),
|
|
Excl: C.Rune('!'),
|
|
DoubleQuote: C.Rune('"'),
|
|
Hash: C.Rune('#'),
|
|
Dollar: C.Rune('$'),
|
|
Percent: C.Rune('%'),
|
|
Amp: C.Rune('&'),
|
|
SingleQuote: C.Rune('\''),
|
|
RoundOpen: C.Rune('('),
|
|
RoundClose: C.Rune(')'),
|
|
Asterisk: C.Rune('*'),
|
|
Plus: C.Rune('+'),
|
|
Comma: C.Rune(','),
|
|
Minus: C.Rune('-'),
|
|
Dot: C.Rune('.'),
|
|
Slash: C.Rune('/'),
|
|
Colon: C.Rune(':'),
|
|
Semicolon: C.Rune(';'),
|
|
AngleOpen: C.Rune('<'),
|
|
Equal: C.Rune('='),
|
|
AngleClose: C.Rune('>'),
|
|
Question: C.Rune('?'),
|
|
At: C.Rune('@'),
|
|
SquareOpen: C.Rune('['),
|
|
Backslash: C.Rune('\\'),
|
|
SquareClose: C.Rune(']'),
|
|
Caret: C.Rune('^'),
|
|
Underscore: C.Rune('_'),
|
|
Backquote: C.Rune('`'),
|
|
CurlyOpen: C.Rune('{'),
|
|
Pipe: C.Rune('|'),
|
|
CurlyClose: C.Rune('}'),
|
|
Tilde: C.Rune('~'),
|
|
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
|
|
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.String("\r\n"), C.Rune('\n'))),
|
|
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
|
Digit: C.RuneRange('0', '9'),
|
|
ASCII: C.RuneRange('\x00', '\x7F'),
|
|
ASCIILower: C.RuneRange('a', 'z'),
|
|
ASCIIUpper: C.RuneRange('A', 'Z'),
|
|
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
|
}
|
|
|
|
// MatchRune creates a Matcher function that checks if the next rune from
|
|
// the input matches the provided rune.
|
|
func MatchRune(expected rune) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
input, ok := m.NextRune()
|
|
if ok && input == expected {
|
|
m.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRunes creates a Matcher function that that checks if the next rune
|
|
// from the input is one of the provided runes.
|
|
func MatchRunes(expected ...rune) Matcher {
|
|
s := string(expected)
|
|
return func(m *MatchDialog) bool {
|
|
input, ok := m.NextRune()
|
|
if ok {
|
|
if strings.ContainsRune(s, input) {
|
|
m.Accept()
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRuneRange creates a Matcher function that that checks if the next rune
|
|
// from the input is contained by the provided rune range.
|
|
//
|
|
// The rune range is defined by a start and an end rune, inclusive, so:
|
|
//
|
|
// MatchRuneRange('g', 'k')
|
|
//
|
|
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
|
func MatchRuneRange(start rune, end rune) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
if end < start {
|
|
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
|
}
|
|
input, ok := m.NextRune()
|
|
if ok && input >= start && input <= end {
|
|
m.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchString creater a Matcher that will check if the upcoming runes on the
|
|
// input match the provided string.
|
|
// TODO make this a more efficient string-level match?
|
|
func MatchString(expected string) Matcher {
|
|
var matchers = []Matcher{}
|
|
for _, r := range expected {
|
|
matchers = append(matchers, MatchRune(r))
|
|
}
|
|
return MatchSequence(matchers...)
|
|
}
|
|
|
|
// MatchStringNoCase creater a Matcher that will check if the upcoming runes
|
|
// on the input match the provided string in a case-insensitive manner.
|
|
// TODO make this a more efficient string-level match?
|
|
func MatchStringNoCase(expected string) Matcher {
|
|
var matchers = []Matcher{}
|
|
for _, r := range expected {
|
|
u := unicode.ToUpper(r)
|
|
l := unicode.ToLower(r)
|
|
matchers = append(matchers, MatchRunes(u, l))
|
|
}
|
|
return MatchSequence(matchers...)
|
|
}
|
|
|
|
// MatchOptional creates a Matcher that makes the provided Matcher optional.
|
|
// When the provided Matcher applies, then its output is used, otherwise
|
|
// no output is generated but still a successful match is reported.
|
|
func MatchOptional(matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchSequence creates a Matcher that checks if the provided Matchers can be
|
|
// applied in their exact order. Only if all matcher apply, the sequence
|
|
// reports successful match.
|
|
func MatchSequence(matchers ...Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
for _, matcher := range matchers {
|
|
if !matcher(child) {
|
|
return false
|
|
}
|
|
}
|
|
child.Merge()
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchAnyOf creates a Matcher that checks if any of the provided Matchers
|
|
// can be applied. They are applied in their provided order. The first Matcher
|
|
// that applies is used for reporting back a match.
|
|
func MatchAnyOf(matchers ...Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
for _, matcher := range matchers {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
return child.Merge()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchNot creates a Matcher that checks if the provided Matcher applies to
|
|
// the current input. If it does, then a failed match will be reported. If it
|
|
// does not, then the next rune from the input will be reported as a match.
|
|
func MatchNot(matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
probe := m.Fork()
|
|
if matcher(probe) {
|
|
return false
|
|
}
|
|
_, ok := m.NextRune()
|
|
if ok {
|
|
m.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRepeat creates a Matcher that checks if the provided Matcher can be
|
|
// applied exactly the provided amount of times.
|
|
//
|
|
// Note that the input can contain more Matches for the provided matcher, e.g.:
|
|
//
|
|
// MatchRepeat(4, MatchRune('X'))
|
|
//
|
|
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
|
// In that last case, there will be a remainder "XX" of the input.
|
|
func MatchRepeat(times int, matcher Matcher) Matcher {
|
|
return matchMinMax(times, times, matcher)
|
|
}
|
|
|
|
// MatchMin creates a Matcher that checks if the provided Matcher can be
|
|
// applied at least the provided minimum number of times.
|
|
// When more matches are possible, these will be included in the output.
|
|
func MatchMin(min int, matcher Matcher) Matcher {
|
|
return matchMinMax(min, -1, matcher)
|
|
}
|
|
|
|
// MatchMax creates a Matcher that checks if the provided Matcher can be
|
|
// applied at maximum the provided minimum number of times.
|
|
// When more matches are possible, these will be included in the output.
|
|
// Zero matches are considered a successful match.
|
|
func MatchMax(max int, matcher Matcher) Matcher {
|
|
return matchMinMax(0, max, matcher)
|
|
}
|
|
|
|
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
|
|
// be applied zero or more times. All matches will be included in the output.
|
|
// Zero matches are considered a successful match.
|
|
func MatchZeroOrMore(matcher Matcher) Matcher {
|
|
return matchMinMax(0, -1, matcher)
|
|
}
|
|
|
|
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
|
|
// be applied one or more times. All matches will be included in the output.
|
|
func MatchOneOrMore(matcher Matcher) Matcher {
|
|
return matchMinMax(1, -1, matcher)
|
|
}
|
|
|
|
// MatchMinMax creates a Matcher that checks if the provided Matcher can
|
|
// be applied between the provided minimum and maximum number of times,
|
|
// inclusive. All matches will be included in the output.
|
|
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
|
|
if max < 0 {
|
|
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
|
|
}
|
|
if min < 0 {
|
|
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
|
|
}
|
|
return matchMinMax(min, max, matcher)
|
|
}
|
|
|
|
func matchMinMax(min int, max int, matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if max >= 0 && min > max {
|
|
panic(fmt.Sprintf("internal parser error: MatchRepeat definition error: max %d must not be < min %d", max, min))
|
|
}
|
|
total := 0
|
|
// Check for the minimum required amount of matches.
|
|
for total < min {
|
|
total++
|
|
if !matcher(child) {
|
|
return false
|
|
}
|
|
}
|
|
// No specified max: include the rest of the available matches.
|
|
// Specified max: include the rest of the availble matches, up to the max.
|
|
child.Merge()
|
|
for max < 0 || total < max {
|
|
total++
|
|
if !matcher(child) {
|
|
break
|
|
}
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchSeparated creates a Matcher that checks for a pattern of one or more
|
|
// Matchers of one type (the separated), separated by Matches of another type
|
|
// (the separator). All matches (separated + separator) are included in the
|
|
// output.
|
|
func MatchSeparated(separated Matcher, separator Matcher) Matcher {
|
|
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
|
|
}
|
|
|
|
// MatchDrop creates a Matcher that checks if the provided Matcher applies.
|
|
// If it does, then a successful match is reported, but its output is not used.
|
|
// If the Matcher does not apply, a successful match is reported as well.
|
|
func MatchDrop(matcher Matcher) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
child.ClearOutput()
|
|
child.Merge()
|
|
return true
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchTrim creates a Matcher that checks if the provided Matcher applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from both the left and the right of the output.
|
|
// The trimmed output is reported back as the match output.
|
|
func MatchTrim(matcher Matcher, cutset string) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
return matchTrim(m, cutset, matcher, true, true)
|
|
}
|
|
}
|
|
|
|
// MatchTrimLeft creates a Matcher that checks if the provided Matcher applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from the left of the output.
|
|
// The trimmed output is reported back as the match output.
|
|
func MatchTrimLeft(matcher Matcher, cutset string) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
return matchTrim(m, cutset, matcher, true, false)
|
|
}
|
|
}
|
|
|
|
// MatchTrimRight creates a Matcher that checks if the provided Matcher applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from the right of the output.
|
|
// The trimmed output is reported back as the match output.
|
|
func MatchTrimRight(matcher Matcher, cutset string) Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
return matchTrim(m, cutset, matcher, false, true)
|
|
}
|
|
}
|
|
|
|
func matchTrim(m *MatchDialog, cutset string, matcher Matcher, trimLeft bool, trimRight bool) bool {
|
|
child := m.Fork()
|
|
if matcher(child) {
|
|
child.Merge()
|
|
s := string(m.output)
|
|
if trimLeft {
|
|
s = strings.TrimLeft(s, cutset)
|
|
}
|
|
if trimRight {
|
|
s = strings.TrimRight(s, cutset)
|
|
}
|
|
m.output = []rune(s)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// MatchEndOfFile creates a Matcher that checks if the end of the input data
|
|
// has been reached. This Matcher will never produce output. It only reports
|
|
// a successful or a failing match through its boolean return value.
|
|
func MatchEndOfFile() Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
fork := m.Fork()
|
|
input, ok := fork.NextRune()
|
|
return !ok && input == EOF
|
|
}
|
|
}
|
|
|
|
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
|
|
// read from the input. It reports back a successful match if the end of the
|
|
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
|
func MatchAnyRune() Matcher {
|
|
return func(m *MatchDialog) bool {
|
|
_, ok := m.NextRune()
|
|
if ok {
|
|
m.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|