646 lines
23 KiB
Go
646 lines
23 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// C provides convenient access to a range of parser/combinators that can be
|
|
// used to construct TokenHandler functions.
|
|
//
|
|
// When using C in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var c = parsekit.C
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var C = struct {
|
|
Rune func(rune) TokenHandler
|
|
Runes func(...rune) TokenHandler
|
|
RuneRange func(rune, rune) TokenHandler
|
|
Str func(string) TokenHandler
|
|
StrNoCase func(string) TokenHandler
|
|
Any func(...TokenHandler) TokenHandler
|
|
Not func(TokenHandler) TokenHandler
|
|
Opt func(TokenHandler) TokenHandler
|
|
Seq func(...TokenHandler) TokenHandler
|
|
Rep func(times int, handler TokenHandler) TokenHandler
|
|
Min func(min int, handler TokenHandler) TokenHandler
|
|
Max func(max int, handler TokenHandler) TokenHandler
|
|
ZeroOrMore func(TokenHandler) TokenHandler
|
|
OneOrMore func(TokenHandler) TokenHandler
|
|
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
|
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
|
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
|
Signed func(TokenHandler) TokenHandler
|
|
}{
|
|
Rune: MatchRune,
|
|
Runes: MatchRunes,
|
|
RuneRange: MatchRuneRange,
|
|
Str: MatchStr,
|
|
StrNoCase: MatchStrNoCase,
|
|
Opt: MatchOpt,
|
|
Any: MatchAny,
|
|
Not: MatchNot,
|
|
Seq: MatchSeq,
|
|
Rep: MatchRep,
|
|
Min: MatchMin,
|
|
Max: MatchMax,
|
|
ZeroOrMore: MatchZeroOrMore,
|
|
OneOrMore: MatchOneOrMore,
|
|
MinMax: MatchMinMax,
|
|
Separated: MatchSeparated,
|
|
Except: MatchExcept,
|
|
Signed: MatchSigned,
|
|
}
|
|
|
|
// MatchRune creates a TokenHandler function that checks if the next rune from
|
|
// the input matches the provided rune.
|
|
func MatchRune(expected rune) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
input, ok := t.NextRune()
|
|
if ok && input == expected {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRunes creates a TokenHandler function that that checks if the next rune
|
|
// from the input is one of the provided runes.
|
|
func MatchRunes(expected ...rune) TokenHandler {
|
|
s := string(expected)
|
|
return func(t *TokenAPI) bool {
|
|
input, ok := t.NextRune()
|
|
if ok {
|
|
if strings.ContainsRune(s, input) {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRuneRange creates a TokenHandler function that that checks if the next rune
|
|
// from the input is contained by the provided rune range.
|
|
//
|
|
// The rune range is defined by a start and an end rune, inclusive, so:
|
|
//
|
|
// MatchRuneRange('g', 'k')
|
|
//
|
|
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
|
func MatchRuneRange(start rune, end rune) TokenHandler {
|
|
if end < start {
|
|
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
|
}
|
|
return func(t *TokenAPI) bool {
|
|
input, ok := t.NextRune()
|
|
if ok && input >= start && input <= end {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchStr creates a TokenHandler that will check if the upcoming runes on the
|
|
// input match the provided string.
|
|
// TODO make this a more efficient string-level match?
|
|
func MatchStr(expected string) TokenHandler {
|
|
var handlers = []TokenHandler{}
|
|
for _, r := range expected {
|
|
handlers = append(handlers, MatchRune(r))
|
|
}
|
|
return MatchSeq(handlers...)
|
|
}
|
|
|
|
// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
|
|
// on the input match the provided string in a case-insensitive manner.
|
|
// TODO make this a more efficient string-level match?
|
|
func MatchStrNoCase(expected string) TokenHandler {
|
|
var handlers = []TokenHandler{}
|
|
for _, r := range expected {
|
|
u := unicode.ToUpper(r)
|
|
l := unicode.ToLower(r)
|
|
handlers = append(handlers, MatchRunes(u, l))
|
|
}
|
|
return MatchSeq(handlers...)
|
|
}
|
|
|
|
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
|
|
// When the provided TokenHandler applies, then its output is used, otherwise
|
|
// no output is generated but still a successful match is reported.
|
|
func MatchOpt(handler TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
if handler(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
|
|
// applied in their exact order. Only if all matcher apply, the sequence
|
|
// reports successful match.
|
|
func MatchSeq(handlers ...TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
for _, matcher := range handlers {
|
|
if !matcher(child) {
|
|
return false
|
|
}
|
|
}
|
|
child.Merge()
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
|
|
// can be applied. They are applied in their provided order. The first TokenHandler
|
|
// that applies is used for reporting back a match.
|
|
func MatchAny(handlers ...TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
for _, handler := range handlers {
|
|
child := t.Fork()
|
|
if handler(child) {
|
|
return child.Merge()
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
|
|
// the current input. If it does, then a failed match will be reported. If it
|
|
// does not, then the next rune from the input will be reported as a match.
|
|
func MatchNot(handler TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
probe := t.Fork()
|
|
if handler(probe) {
|
|
return false
|
|
}
|
|
_, ok := t.NextRune()
|
|
if ok {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied exactly the provided amount of times.
|
|
//
|
|
// Note that the input can contain more than the provided number of matches, e.g.:
|
|
//
|
|
// MatchRep(4, MatchRune('X'))
|
|
//
|
|
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
|
// In that last case, there will be a remainder "XX" on the input.
|
|
func MatchRep(times int, handler TokenHandler) TokenHandler {
|
|
return matchMinMax(times, times, handler, "MatchRep")
|
|
}
|
|
|
|
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied at least the provided minimum number of times.
|
|
// When more matches are possible, these will be included in the output.
|
|
func MatchMin(min int, handler TokenHandler) TokenHandler {
|
|
if min < 0 {
|
|
panic("TokenHandler bug: MatchMin definition error: min must be >= 0")
|
|
}
|
|
return matchMinMax(min, -1, handler, "MatchMin")
|
|
}
|
|
|
|
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied at maximum the provided minimum number of times.
|
|
// When more matches are possible, these will be included in the output.
|
|
// Zero matches are considered a successful match.
|
|
func MatchMax(max int, handler TokenHandler) TokenHandler {
|
|
if max < 0 {
|
|
panic("TokenHandler bug: MatchMax definition error: max must be >= 0")
|
|
}
|
|
return matchMinMax(0, max, handler, "MatchMax")
|
|
}
|
|
|
|
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
|
// be applied zero or more times. All matches will be included in the output.
|
|
// Zero matches are considered a successful match.
|
|
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
|
|
return matchMinMax(0, -1, handler, "MatchZeroOfMore")
|
|
}
|
|
|
|
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
|
// be applied one or more times. All matches will be included in the output.
|
|
func MatchOneOrMore(handler TokenHandler) TokenHandler {
|
|
return matchMinMax(1, -1, handler, "MatchOneOrMore")
|
|
}
|
|
|
|
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
|
|
// be applied between the provided minimum and maximum number of times,
|
|
// inclusive. All matches will be included in the output.
|
|
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
|
if max < 0 {
|
|
panic("TokenHandler bug: MatchMinMax definition error: max must be >= 0")
|
|
}
|
|
if min < 0 {
|
|
panic("TokenHandler bug: MatchMinMax definition error: min must be >= 0")
|
|
}
|
|
return matchMinMax(min, max, handler, "MatchMinMax")
|
|
}
|
|
|
|
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
|
|
if max >= 0 && min > max {
|
|
panic(fmt.Sprintf("TokenHandler bug: %s definition error: max %d must not be < min %d", name, max, min))
|
|
}
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
total := 0
|
|
// Check for the minimum required amount of matches.
|
|
for total < min {
|
|
total++
|
|
if !handler(child) {
|
|
return false
|
|
}
|
|
}
|
|
// No specified max: include the rest of the available matches.
|
|
// Specified max: include the rest of the availble matches, up to the max.
|
|
child.Merge()
|
|
for max < 0 || total < max {
|
|
total++
|
|
if !handler(child) {
|
|
break
|
|
}
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
|
|
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
|
|
// (the separator). All matches (separated + separator) are included in the
|
|
// output.
|
|
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
|
|
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
|
}
|
|
|
|
// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied to the upcoming input. It also checks if the except TokenHandler can be
|
|
// applied. If the handler applies, but the except TokenHandler as well, then the match
|
|
// as a whole will be treated as a mismatch.
|
|
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
if except(t.Fork()) {
|
|
return false
|
|
}
|
|
return handler(t)
|
|
}
|
|
}
|
|
|
|
// MatchSigned creates a TokenHandler that checks if the provided TokenHandler is
|
|
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
|
// atoms into a signed version, e.g.
|
|
//
|
|
// C.Signed(A.Integer)
|
|
func MatchSigned(handler TokenHandler) TokenHandler {
|
|
sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-')))
|
|
return MatchSeq(sign, handler)
|
|
}
|
|
|
|
// A provides convenient access to a range of atoms that can be used to
|
|
// build TokenHandlers or parser rules.
|
|
//
|
|
// In parsekit, an atom is defined as a ready for use TokenHandler function.
|
|
//
|
|
// When using A in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var a = parsekit.A
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var A = struct {
|
|
EndOfFile TokenHandler
|
|
AnyRune TokenHandler
|
|
Space TokenHandler
|
|
Tab TokenHandler
|
|
CR TokenHandler
|
|
LF TokenHandler
|
|
CRLF TokenHandler
|
|
Excl TokenHandler
|
|
DoubleQuote TokenHandler
|
|
Hash TokenHandler
|
|
Dollar TokenHandler
|
|
Percent TokenHandler
|
|
Amp TokenHandler
|
|
SingleQuote TokenHandler
|
|
RoundOpen TokenHandler
|
|
LeftParen TokenHandler
|
|
RoundClose TokenHandler
|
|
RightParen TokenHandler
|
|
Asterisk TokenHandler
|
|
Multiply TokenHandler
|
|
Plus TokenHandler
|
|
Add TokenHandler
|
|
Comma TokenHandler
|
|
Minus TokenHandler
|
|
Subtract TokenHandler
|
|
Dot TokenHandler
|
|
Slash TokenHandler
|
|
Divide TokenHandler
|
|
Colon TokenHandler
|
|
Semicolon TokenHandler
|
|
AngleOpen TokenHandler
|
|
LessThan TokenHandler
|
|
Equal TokenHandler
|
|
AngleClose TokenHandler
|
|
GreaterThan TokenHandler
|
|
Question TokenHandler
|
|
At TokenHandler
|
|
SquareOpen TokenHandler
|
|
Backslash TokenHandler
|
|
SquareClose TokenHandler
|
|
Caret TokenHandler
|
|
Underscore TokenHandler
|
|
Backquote TokenHandler
|
|
CurlyOpen TokenHandler
|
|
Pipe TokenHandler
|
|
CurlyClose TokenHandler
|
|
Tilde TokenHandler
|
|
Newline TokenHandler
|
|
Whitespace TokenHandler
|
|
WhitespaceAndNewlines TokenHandler
|
|
EndOfLine TokenHandler
|
|
Digit TokenHandler
|
|
DigitNotZero TokenHandler
|
|
Digits TokenHandler
|
|
Float TokenHandler
|
|
Integer TokenHandler
|
|
ASCII TokenHandler
|
|
ASCIILower TokenHandler
|
|
ASCIIUpper TokenHandler
|
|
HexDigit TokenHandler
|
|
}{
|
|
EndOfFile: MatchEndOfFile(),
|
|
AnyRune: MatchAnyRune(),
|
|
Space: C.Rune(' '),
|
|
Tab: C.Rune('\t'),
|
|
CR: C.Rune('\r'),
|
|
LF: C.Rune('\n'),
|
|
CRLF: C.Str("\r\n"),
|
|
Excl: C.Rune('!'),
|
|
DoubleQuote: C.Rune('"'),
|
|
Hash: C.Rune('#'),
|
|
Dollar: C.Rune('$'),
|
|
Percent: C.Rune('%'),
|
|
Amp: C.Rune('&'),
|
|
SingleQuote: C.Rune('\''),
|
|
RoundOpen: C.Rune('('),
|
|
LeftParen: C.Rune('('),
|
|
RoundClose: C.Rune(')'),
|
|
RightParen: C.Rune(')'),
|
|
Asterisk: C.Rune('*'),
|
|
Multiply: C.Rune('*'),
|
|
Plus: C.Rune('+'),
|
|
Add: C.Rune('+'),
|
|
Comma: C.Rune(','),
|
|
Minus: C.Rune('-'),
|
|
Subtract: C.Rune('-'),
|
|
Dot: C.Rune('.'),
|
|
Slash: C.Rune('/'),
|
|
Divide: C.Rune('/'),
|
|
Colon: C.Rune(':'),
|
|
Semicolon: C.Rune(';'),
|
|
AngleOpen: C.Rune('<'),
|
|
LessThan: C.Rune('<'),
|
|
Equal: C.Rune('='),
|
|
AngleClose: C.Rune('>'),
|
|
GreaterThan: C.Rune('>'),
|
|
Question: C.Rune('?'),
|
|
At: C.Rune('@'),
|
|
SquareOpen: C.Rune('['),
|
|
Backslash: C.Rune('\\'),
|
|
SquareClose: C.Rune(']'),
|
|
Caret: C.Rune('^'),
|
|
Underscore: C.Rune('_'),
|
|
Backquote: C.Rune('`'),
|
|
CurlyOpen: C.Rune('{'),
|
|
Pipe: C.Rune('|'),
|
|
CurlyClose: C.Rune('}'),
|
|
Tilde: C.Rune('~'),
|
|
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
|
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
|
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
|
Digit: MatchDigit(),
|
|
DigitNotZero: MatchDigitNotZero(),
|
|
Digits: MatchDigits(),
|
|
Integer: MatchInteger(),
|
|
Float: MatchFloat(),
|
|
ASCII: C.RuneRange('\x00', '\x7F'),
|
|
ASCIILower: C.RuneRange('a', 'z'),
|
|
ASCIIUpper: C.RuneRange('A', 'Z'),
|
|
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
|
}
|
|
|
|
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
|
// has been reached. This TokenHandler will never produce output. It only reports
|
|
// a successful or a failing match through its boolean return value.
|
|
func MatchEndOfFile() TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
fork := t.Fork()
|
|
input, ok := fork.NextRune()
|
|
return !ok && input == eofRune
|
|
}
|
|
}
|
|
|
|
// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
|
|
// read from the input. It reports back a successful match if the end of the
|
|
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
|
func MatchAnyRune() TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
_, ok := t.NextRune()
|
|
if ok {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchDigit creates a TokenHandler that checks if a single digit can be read
|
|
// from the input.
|
|
func MatchDigit() TokenHandler {
|
|
return MatchRuneRange('0', '9')
|
|
}
|
|
|
|
// MatchDigits creates a TokenHandler that checks if one or more digits can be read
|
|
// from the input.
|
|
func MatchDigits() TokenHandler {
|
|
return MatchOneOrMore(MatchRuneRange('0', '9'))
|
|
}
|
|
|
|
// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
|
|
// to zero '0' can be read from the input.
|
|
func MatchDigitNotZero() TokenHandler {
|
|
return MatchRuneRange('1', '9')
|
|
}
|
|
|
|
// MatchInteger creates a TokenHandler function that checks if a valid integer
|
|
// can be read from the input. In line with Go, a integer cannot start with
|
|
// a zero. Starting with a zero is used to indicate other bases, like octal or
|
|
// hexadecimal.
|
|
func MatchInteger() TokenHandler {
|
|
justZero := MatchRune('0')
|
|
integer := C.Seq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
|
return MatchAny(integer, justZero)
|
|
}
|
|
|
|
// MatchFloat creates a TokenHandler function that checks if a valid float value
|
|
// can be read from the input. In case the fractional part is missing, this
|
|
// TokenHandler will report a match, so both "123" and "123.123" will match.
|
|
func MatchFloat() TokenHandler {
|
|
digits := MatchDigits()
|
|
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
|
|
}
|
|
|
|
// M provides convenient access to a range of modifiers (which in their nature are
|
|
// parser/combinators) that can be used when creating TokenHandler functions.
|
|
//
|
|
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
|
|
// resulting output of another TokenHandler in some way. It does not do any matching
|
|
// against input of its own.
|
|
//
|
|
// When using M in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var m = parsekit.M
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var M = struct {
|
|
Drop func(TokenHandler) TokenHandler
|
|
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
|
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
|
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
|
TrimSpace func(handler TokenHandler) TokenHandler
|
|
ToLower func(TokenHandler) TokenHandler
|
|
ToUpper func(TokenHandler) TokenHandler
|
|
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
|
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
|
|
}{
|
|
Drop: ModifyDrop,
|
|
Trim: ModifyTrim,
|
|
TrimLeft: ModifyTrimLeft,
|
|
TrimRight: ModifyTrimRight,
|
|
TrimSpace: ModifyTrimSpace,
|
|
ToLower: ModifyToLower,
|
|
ToUpper: ModifyToUpper,
|
|
Replace: ModifyReplace,
|
|
ModifyByCallback: ModifyByCallback,
|
|
}
|
|
|
|
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is discarded completely.
|
|
//
|
|
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
|
|
// even though we would have dropped the output anyway. So if you would like
|
|
// to drop optional whitespace, then use something like:
|
|
//
|
|
// M.Drop(C.Opt(A.Whitespace))
|
|
//
|
|
// instead of:
|
|
//
|
|
// M.Drop(A.Whitespace)
|
|
//
|
|
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
|
|
// string "bork" would not match against the second form, but " bork" would.
|
|
// In both cases, it would match the first form.
|
|
func ModifyDrop(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, func(s string) string {
|
|
return ""
|
|
})
|
|
}
|
|
|
|
// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from both the left and the right of the output.
|
|
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
|
|
return modifyTrim(handler, cutset, true, true)
|
|
}
|
|
|
|
// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from the left of the output.
|
|
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
|
|
return modifyTrim(handler, cutset, true, false)
|
|
}
|
|
|
|
// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from the right of the output.
|
|
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
|
|
return modifyTrim(handler, cutset, false, true)
|
|
}
|
|
|
|
func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
|
|
modfunc := func(s string) string {
|
|
if trimLeft {
|
|
s = strings.TrimLeft(s, cutset)
|
|
}
|
|
if trimRight {
|
|
s = strings.TrimRight(s, cutset)
|
|
}
|
|
return s
|
|
}
|
|
return ModifyByCallback(handler, modfunc)
|
|
}
|
|
|
|
// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and all leading and trailing whitespace charcters,
|
|
// as defined by Unicode (spaces, tabs, carriage returns and newlines) are removed from it.
|
|
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, strings.TrimSpace)
|
|
}
|
|
|
|
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are converted into upper case.
|
|
func ModifyToUpper(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, strings.ToUpper)
|
|
}
|
|
|
|
// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are converted into lower case.
|
|
func ModifyToLower(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, strings.ToLower)
|
|
}
|
|
|
|
// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is replaced by the provided string.
|
|
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
|
|
return ModifyByCallback(handler, func(string) string {
|
|
return replaceWith
|
|
})
|
|
}
|
|
|
|
// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and it is fed to the provided modfunc.
|
|
// This is a simple function that takes a string on input and returns a possibly
|
|
// modified string on output. The return value of the modfunc will replace the
|
|
// resulting output.
|
|
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
if handler(child) {
|
|
s := modfunc(string(child.output))
|
|
child.output = []rune(s)
|
|
child.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|