go-parsekit/tokenhandlers_builtin.go

1329 lines
45 KiB
Go
Raw Blame History

package parsekit
import (
"fmt"
"io"
"net"
"runtime"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// C provides convenient access to a range of parser/combinators that can be
// used to construct TokenHandler functions.
//
// Parser/combinators are so called higher order functions that take in one
// or more other TokenHandlers and output a new TokenHandler. They can be
// used to combine TokenHandlers in useful ways to create new more complex
// TokenHandlers.
//
// When using C in your own parser, then it is advised to create a variable
// to reference it:
//
// var c = parsekit.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Any func(...TokenHandler) TokenHandler
Not func(TokenHandler) TokenHandler
Opt func(TokenHandler) TokenHandler
Seq func(...TokenHandler) TokenHandler
Rep func(times int, handler TokenHandler) TokenHandler
Min func(min int, handler TokenHandler) TokenHandler
Max func(max int, handler TokenHandler) TokenHandler
ZeroOrMore func(TokenHandler) TokenHandler
OneOrMore func(TokenHandler) TokenHandler
MinMax func(min int, max int, handler TokenHandler) TokenHandler
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
Except func(except TokenHandler, handler TokenHandler) TokenHandler
}{
Opt: MatchOpt,
Any: MatchAny,
Not: MatchNot,
Seq: MatchSeq,
Rep: MatchRep,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Except: MatchExcept,
}
// A provides convenient access to a range of atoms or functions to build atoms.
//
// When using A in your own parser, then it is advised to create a variable
// to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
Rune func(rune) TokenHandler
Runes func(...rune) TokenHandler
RuneRange func(rune, rune) TokenHandler
Str func(string) TokenHandler
StrNoCase func(string) TokenHandler
EndOfFile TokenHandler
AnyRune TokenHandler
ValidRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
LF TokenHandler
CRLF TokenHandler
Excl TokenHandler
DoubleQuote TokenHandler
Hash TokenHandler
Dollar TokenHandler
Percent TokenHandler
Amp TokenHandler
SingleQuote TokenHandler
RoundOpen TokenHandler
LeftParen TokenHandler
RoundClose TokenHandler
RightParen TokenHandler
Asterisk TokenHandler
Multiply TokenHandler
Plus TokenHandler
Add TokenHandler
Comma TokenHandler
Minus TokenHandler
Subtract TokenHandler
Dot TokenHandler
Slash TokenHandler
Divide TokenHandler
Colon TokenHandler
Semicolon TokenHandler
AngleOpen TokenHandler
LessThan TokenHandler
Equal TokenHandler
AngleClose TokenHandler
GreaterThan TokenHandler
Question TokenHandler
At TokenHandler
SquareOpen TokenHandler
Backslash TokenHandler
SquareClose TokenHandler
Caret TokenHandler
Underscore TokenHandler
Backquote TokenHandler
CurlyOpen TokenHandler
Pipe TokenHandler
CurlyClose TokenHandler
Tilde TokenHandler
Newline TokenHandler
Blank TokenHandler
Blanks TokenHandler
Whitespace TokenHandler
EndOfLine TokenHandler
Digit TokenHandler
DigitNotZero TokenHandler
Digits TokenHandler
Float TokenHandler
Boolean TokenHandler
Integer TokenHandler
Signed func(TokenHandler) TokenHandler
IntegerBetween func(min int64, max int64) TokenHandler
ASCII TokenHandler
ASCIILower TokenHandler
ASCIIUpper TokenHandler
Letter TokenHandler
Lower TokenHandler
Upper TokenHandler
HexDigit TokenHandler
Octet TokenHandler
IPv4 TokenHandler
IPv4CIDRMask TokenHandler
IPv4Netmask TokenHandler
IPv4Net TokenHandler
IPv6 TokenHandler
IPv6CIDRMask TokenHandler
IPv6Net TokenHandler
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
Space: MatchRune(' '),
Tab: MatchRune('\t'),
CR: MatchRune('\r'),
LF: MatchRune('\n'),
CRLF: MatchStr("\r\n"),
Excl: MatchRune('!'),
DoubleQuote: MatchRune('"'),
Hash: MatchRune('#'),
Dollar: MatchRune('$'),
Percent: MatchRune('%'),
Amp: MatchRune('&'),
SingleQuote: MatchRune('\''),
RoundOpen: MatchRune('('),
LeftParen: MatchRune('('),
RoundClose: MatchRune(')'),
RightParen: MatchRune(')'),
Asterisk: MatchRune('*'),
Multiply: MatchRune('*'),
Plus: MatchRune('+'),
Add: MatchRune('+'),
Comma: MatchRune(','),
Minus: MatchRune('-'),
Subtract: MatchRune('-'),
Dot: MatchRune('.'),
Slash: MatchRune('/'),
Divide: MatchRune('/'),
Colon: MatchRune(':'),
Semicolon: MatchRune(';'),
AngleOpen: MatchRune('<'),
LessThan: MatchRune('<'),
Equal: MatchRune('='),
AngleClose: MatchRune('>'),
GreaterThan: MatchRune('>'),
Question: MatchRune('?'),
At: MatchRune('@'),
SquareOpen: MatchRune('['),
Backslash: MatchRune('\\'),
SquareClose: MatchRune(']'),
Caret: MatchRune('^'),
Underscore: MatchRune('_'),
Backquote: MatchRune('`'),
CurlyOpen: MatchRune('{'),
Pipe: MatchRune('|'),
CurlyClose: MatchRune('}'),
Tilde: MatchRune('~'),
Blank: MatchBlank(),
Blanks: MatchBlanks(),
Whitespace: MatchWhitespace(),
EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
Integer: MatchInteger(),
Signed: MatchSigned,
IntegerBetween: MatchIntegerBetween,
Float: MatchFloat(),
Boolean: MatchBoolean(),
ASCII: MatchASCII(),
ASCIILower: MatchASCIILower(),
ASCIIUpper: MatchASCIIUpper(),
Letter: MatchUnicodeLetter(),
Lower: MatchUnicodeLower(),
Upper: MatchUnicodeUpper(),
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(true),
IPv4Netmask: MatchIPv4Netmask(true),
IPv4Net: MatchIPv4Net(true),
IPv6: MatchIPv6(true),
IPv6CIDRMask: MatchIPv6CIDRMask(true),
IPv6Net: MatchIPv6Net(true),
}
// M provides convenient access to a range of modifiers (which in their nature are
// parser/combinators) that can be used when creating TokenHandler functions.
//
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
// resulting output of another TokenHandler in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create a variable
// to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
ByCallback func(TokenHandler, func(string) string) TokenHandler
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ByCallback: ModifyByCallback,
}
// T provides convenient access to a range of Token producers (which in their
// nature are parser/combinators) that can be used when creating TokenHandler
// functions.
//
// When using T in your own parser, then it is advised to create a variable
// to reference it:
//
// var t = parsekit.T
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var T = struct {
Str func(interface{}, TokenHandler) TokenHandler
StrInterpreted func(interface{}, TokenHandler) TokenHandler
Byte func(interface{}, TokenHandler) TokenHandler
Rune func(interface{}, TokenHandler) TokenHandler
Int func(interface{}, TokenHandler) TokenHandler
Int8 func(interface{}, TokenHandler) TokenHandler
Int16 func(interface{}, TokenHandler) TokenHandler
Int32 func(interface{}, TokenHandler) TokenHandler
Int64 func(interface{}, TokenHandler) TokenHandler
Uint func(interface{}, TokenHandler) TokenHandler
Uint8 func(interface{}, TokenHandler) TokenHandler
Uint16 func(interface{}, TokenHandler) TokenHandler
Uint32 func(interface{}, TokenHandler) TokenHandler
Uint64 func(interface{}, TokenHandler) TokenHandler
Float32 func(interface{}, TokenHandler) TokenHandler
Float64 func(interface{}, TokenHandler) TokenHandler
Boolean func(interface{}, TokenHandler) TokenHandler
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
}{
Str: MakeStrLiteralToken,
StrInterpreted: MakeStrInterpretedToken,
Byte: MakeByteToken,
Rune: MakeRuneToken,
Int: MakeIntToken,
Int8: MakeInt8Token,
Int16: MakeInt16Token,
Int32: MakeInt32Token,
Int64: MakeInt64Token,
Uint: MakeUintToken,
Uint8: MakeUint8Token,
Uint16: MakeUint16Token,
Uint32: MakeUint32Token,
Uint64: MakeUint64Token,
Float32: MakeFloat32Token,
Float64: MakeFloat64Token,
Boolean: MakeBooleanToken,
ByCallback: MakeTokenByCallback,
}
// MatchRune creates a TokenHandler function that matches against the provided rune.
func MatchRune(expected rune) TokenHandler {
return func(t *TokenAPI) bool {
input, err := t.NextRune()
if err == nil && input == expected {
t.Accept()
return true
}
return false
}
}
// MatchRunes creates a TokenHandler function that checks if the input matches
// one of the provided runes.
func MatchRunes(expected ...rune) TokenHandler {
s := string(expected)
return func(t *TokenAPI) bool {
input, err := t.NextRune()
if err == nil {
if strings.ContainsRune(s, input) {
t.Accept()
return true
}
}
return false
}
}
// MatchRuneRange creates a TokenHandler function that checks if the input
// matches the provided rune range. The rune range is defined by a start and
// an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) TokenHandler {
if end < start {
callerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
}
return func(t *TokenAPI) bool {
input, err := t.NextRune()
if err == nil && input >= start && input <= end {
t.Accept()
return true
}
return false
}
}
// MatchBlank creates a TokenHandler that matches one rune from the input
// against blank characters, meaning tabs and spaces.
//
// When you need whitespace matching, which also includes characters like
// newlines, then take a look at MatchWhitespace().
func MatchBlank() TokenHandler {
return MatchAny(MatchRune(' '), MatchRune('\t'))
}
// MatchBlanks creates a TokenHandler that matches the input against one
// or more blank characters, meaning tabs and spaces.
//
// When you need whitespace matching, which also includes characters like
// newlines, then make use of MatchSpace().
func MatchBlanks() TokenHandler {
return MatchOneOrMore(MatchBlank())
}
// MatchWhitespace creates a TokenHandler that matches the input against one or more
// whitespace characters, as defined by unicode.
func MatchWhitespace() TokenHandler {
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
}
// MatchRuneByCallback creates a TokenHandler that matches a single rune from the
// input against the provided callback function. When the callback returns true,
// it is considered a match.
//
// Note that the callback function matches the signature of the unicode.Is* functions,
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchRuneByCallback(callback func(rune) bool) TokenHandler {
return func(t *TokenAPI) bool {
input, err := t.NextRune()
if err == nil && callback(input) {
t.Accept()
return true
}
return false
}
}
// MatchEndOfLine creates a TokenHandler that matches a newline ("\r\n" or "\n") or EOF.
func MatchEndOfLine() TokenHandler {
return MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile())
}
// MatchStr creates a TokenHandler that matches the input against the provided string.
// TODO make this a more efficient string-level match?
func MatchStr(expected string) TokenHandler {
var handlers = []TokenHandler{}
for _, r := range expected {
handlers = append(handlers, MatchRune(r))
}
return MatchSeq(handlers...)
}
// MatchStrNoCase creates a TokenHandler that matches the input against the
// provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStrNoCase(expected string) TokenHandler {
var handlers = []TokenHandler{}
for _, r := range expected {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
handlers = append(handlers, MatchRunes(u, l))
}
return MatchSeq(handlers...)
}
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
// When the provided TokenHandler applies, then its output is used, otherwise
// no output is generated but still a successful match is reported (but the
// result will be empty).
func MatchOpt(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
child.Merge()
}
return true
}
}
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
// applied in their exact order. Only if all TokenHandlers apply, the sequence
// reports successful match.
func MatchSeq(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
for _, handler := range handlers {
subchild := child.Fork()
if !handler(subchild) {
return false
}
subchild.Merge()
}
child.Merge()
return true
}
}
// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
// can be applied. They are applied in their provided order. The first TokenHandler
// that applies is used for reporting back a match.
func MatchAny(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
for _, handler := range handlers {
child := t.Fork()
if handler(child) {
child.Merge()
return true
}
}
return false
}
}
// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
probe := t.Fork()
if handler(probe) {
return false
}
_, err := t.NextRune()
if err == nil {
t.Accept()
return true
}
return false
}
}
// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
// applied exactly the provided amount of times.
//
// Note that the input can contain more than the provided number of matches, e.g.:
//
// MatchRep(4, MatchRune('X'))
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input.
func MatchRep(times int, handler TokenHandler) TokenHandler {
return matchMinMax(times, times, handler, "MatchRep")
}
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler TokenHandler) TokenHandler {
if min < 0 {
callerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, -1, handler, "MatchMin")
}
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
if max < 0 {
callerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0")
}
return matchMinMax(0, max, handler, "MatchMax")
}
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
return matchMinMax(0, -1, handler, "MatchZeroOfMore")
}
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(handler TokenHandler) TokenHandler {
return matchMinMax(1, -1, handler, "MatchOneOrMore")
}
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
// be applied between the provided minimum and maximum number of times,
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
if max < 0 {
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0")
}
if min < 0 {
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, max, handler, "MatchMinMax")
}
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
if max >= 0 && min > max {
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
}
return func(t *TokenAPI) bool {
child := t.Fork()
total := 0
// Check for the minimum required amount of matches.
for total < min {
total++
if !handler(child) {
return false
}
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for max < 0 || total < max {
total++
if !handler(child) {
break
}
child.Merge()
}
return true
}
}
// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}
// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
// applied to the upcoming input. It also checks if the except TokenHandler can be
// applied. If the handler applies, but the except TokenHandler as well, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
if except(t.Fork()) {
return false
}
return handler(t)
}
}
// MatchSigned creates a TokenHandler that checks if the provided TokenHandler is
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
// atoms into a signed version, e.g.
//
// C.Signed(A.Integer)
func MatchSigned(handler TokenHandler) TokenHandler {
sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-')))
return MatchSeq(sign, handler)
}
// MatchIntegerBetween creates a TokenHandler that checks for an integer
// value between the provided min and max boundaries (inclusive).
// It uses an int64 for checking internally, so you can check values
// ranging from -9223372036854775808 to 9223372036854775807.
func MatchIntegerBetween(min int64, max int64) TokenHandler {
if max < min {
callerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min)
}
digits := MatchSigned(MatchDigits())
return func(t *TokenAPI) bool {
if !digits(t) {
return false
}
value, _ := strconv.ParseInt(t.Result().String(), 10, 64)
if value < min || value > max {
return false
}
return true
}
}
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
// has been reached. This TokenHandler will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
_, err := child.NextRune()
return err == io.EOF
}
}
// MatchAnyRune creates a TokenHandler function that checks if a rune can be
// read from the input. Invalid runes on the input are replaced with the UTF8
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() TokenHandler {
return func(t *TokenAPI) bool {
_, err := t.NextRune()
if err == nil {
t.Accept()
return true
}
return false
}
}
// MatchValidRune creates a TokenHandler function that checks if a valid
// UTF8 rune can be read from the input.
func MatchValidRune() TokenHandler {
return func(t *TokenAPI) bool {
r, err := t.NextRune()
if err == nil && r != utf8.RuneError {
t.Accept()
return true
}
return false
}
}
// MatchDigit creates a TokenHandler that checks if a single digit can be read
// from the input.
func MatchDigit() TokenHandler {
return MatchRuneRange('0', '9')
}
// MatchDigits creates a TokenHandler that checks if one or more digits can be read
// from the input.
func MatchDigits() TokenHandler {
return MatchOneOrMore(MatchDigit())
}
// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
// to zero '0' can be read from the input.
func MatchDigitNotZero() TokenHandler {
return MatchRuneRange('1', '9')
}
// MatchInteger creates a TokenHandler function that checks if a valid integer
// can be read from the input. In line with Go, an integer cannot start with
// a zero. Starting with a zero is used to indicate other bases, like octal or
// hexadecimal.
func MatchInteger() TokenHandler {
justZero := MatchRune('0')
integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
return MatchAny(integer, justZero)
}
// MatchFloat creates a TokenHandler function that checks if a valid float value
// can be read from the input. In case the fractional part is missing, this
// TokenHandler will report a match, so both "123" and "123.123" will match.
func MatchFloat() TokenHandler {
digits := MatchDigits()
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
}
// MatchBoolean creates a TokenHandler function that checks if a boolean
// value can be read from the input. It supports the boolean values as understood
// by Go's strconv.ParseBool() function.
//
// True values: true, TRUE, True, 1, t, T
//
// False falues: false, FALSE, False, 0, f, F
func MatchBoolean() TokenHandler {
trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
return MatchAny(trues, falses)
}
// MatchASCII creates a TokenHandler function that matches against any
// ASCII value on the input.
func MatchASCII() TokenHandler {
return MatchRuneRange('\x00', '\x7F')
}
// MatchASCIILower creates a TokenHandler function that matches against any
// lower case ASCII letter on the input (a - z).
func MatchASCIILower() TokenHandler {
return MatchRuneRange('a', 'z')
}
// MatchASCIIUpper creates a TokenHandler function that matches against any
// upper case ASCII letter on the input (a - z).
func MatchASCIIUpper() TokenHandler {
return MatchRuneRange('A', 'Z')
}
// MatchUnicodeLetter creates a TokenHandler function that matches against any
// unicode letter on the input (see unicode.IsLetter(rune)).
func MatchUnicodeLetter() TokenHandler {
return MatchRuneByCallback(unicode.IsLetter)
}
// MatchUnicodeUpper creates a TokenHandler function that matches against any
// upper case unicode letter on the input (see unicode.IsUpper(rune)).
func MatchUnicodeUpper() TokenHandler {
return MatchRuneByCallback(unicode.IsUpper)
}
// MatchUnicodeLower creates a TokenHandler function that matches against any
// lower case unicode letter on the input (see unicode.IsLower(rune)).
func MatchUnicodeLower() TokenHandler {
return MatchRuneByCallback(unicode.IsLower)
}
// MatchHexDigit creates a TokenHandler function that check if a single hexadecimal
// digit can be read from the input.
func MatchHexDigit() TokenHandler {
return MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F'))
}
// MatchOctet creates a TokenHandler function that checks if a valid octet value
// can be read from the input (octet = byte value representation, with a value
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
// digits, not if there's a non-digit after it, meaning that "123255" would be
// a valid sequence of two octets.
//
// When the normalize parameter is set to true, then leading zeroes will be
// stripped from the octet.
func MatchOctet(normalize bool) TokenHandler {
max3Digits := MatchMinMax(1, 3, MatchDigit())
return func(t *TokenAPI) bool {
if !max3Digits(t) {
return false
}
value, _ := strconv.ParseInt(t.Result().String(), 10, 16)
if value > 255 {
return false
}
if normalize {
runes := t.Result().Runes()
for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:]
}
t.Result().SetRunes(runes)
}
return true
}
}
// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
// IP address value can be read from the input.
//
// When the normalize parameter is true, IP-addresses that look like
// "192.168.001.012" will be normalize to "192.168.1.12".
func MatchIPv4(normalize bool) TokenHandler {
octet := MatchOctet(normalize)
dot := MatchRune('.')
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
}
// MatchIPv4CIDRMask creates a TokenHandler function that checks if a
// valid IPv4 CIDR mask (0 - 32) value can be read from the input.
func MatchIPv4CIDRMask(normalize bool) TokenHandler {
return matchCIDRMask(32, normalize)
}
// MatchIPv4Netmask creates a TokenHandler function that checks if a valid
// IPv4 netmask can be read from input (e.g. 255.255.255.0).
// Only a netmask in canonical form is accepted (meaning that in binary form
// it start with zero or more 1-bits, followed by only 0-bits up to the
// 32 bit length).
//
// When the normalize parameter is true, netmasks that look like
// "255.255.192.000" will be normalized to "255.255.192.0".
func MatchIPv4Netmask(normalize bool) TokenHandler {
octet := MakeUint8Token(nil, MatchOctet(normalize))
dot := MatchRune('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
return func(t *TokenAPI) bool {
if !netmask(t) {
return false
}
// Check if the mask is provided in canonical form (ones followed by zeroes).
r := t.Result()
mask := net.IPv4Mask(r.Value(0).(byte), r.Value(1).(byte), r.Value(2).(byte), r.Value(3).(byte))
ones, bits := mask.Size()
if ones == 0 && bits == 0 {
return false
}
r.ClearTokens()
return true
}
}
// MatchIPv4Net creates a TokenHandler function that checks the input for an
// IPv4 + mask input. Both <ip>/<cidr> (e.g. 192.168.0.1/24) and <ip>/<netmask>
// (e.g. 172.16.10.254/255.255.192.0) are acceptable.
//
// When the normalize parameter is true, then the IP address and the mask are
// normalized. The mask will be normalized to cidr, so the above example would
// be normalized to 172.16.10.254/18.
func MatchIPv4Net(normalize bool) TokenHandler {
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
slash := MatchRune('/')
mask := MatchAny(
MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)),
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
ipnet := MatchSeq(ip, slash, mask)
return func(t *TokenAPI) bool {
if !ipnet(t) {
return false
}
if !normalize {
return true
}
r := t.Result()
maskToken := r.Token(1)
if maskToken.Type == "cidr" {
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), r.Value(1).(uint8)))
} else {
o := strings.Split(r.Value(1).(string), ".")
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
bits, _ := mask.Size()
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), bits))
}
r.ClearTokens()
return true
}
}
// MatchIPv6 creates a TokenHandler function that checks if an IPv6 address
// can be read from the input.
func MatchIPv6(normalize bool) TokenHandler {
hextet := MatchMinMax(1, 4, MatchHexDigit())
colon := MatchRune(':')
empty := MatchSeq(colon, colon)
return func(t *TokenAPI) bool {
nrOfHextets := 0
for nrOfHextets < 8 {
if hextet(t) {
nrOfHextets++
} else if empty(t) {
nrOfHextets += 2
} else if !colon(t) {
break
}
}
// No hextets or too many hextets (e.g. 1:1:1:1:1:1:1:: <-- since :: is 2 or more hextets).
if nrOfHextets == 0 || nrOfHextets > 8 {
return false
}
// Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(t.Result().String())
if parsed == nil {
return false
}
if normalize {
t.Result().SetRunes(parsed.String())
}
return true
}
}
// MatchIPv6CIDRMask creates a TokenHandler function that checks if a
// valid IPv6 CIDR mask (0 - 128) value can be read from the input.
func MatchIPv6CIDRMask(normalize bool) TokenHandler {
return matchCIDRMask(128, normalize)
}
func matchCIDRMask(bits int64, normalize bool) TokenHandler {
mask := MatchIntegerBetween(0, bits)
if !normalize {
return mask
}
return func(t *TokenAPI) bool {
if !mask(t) {
return false
}
r := t.Result()
bits, _ := strconv.Atoi(r.String())
t.Result().SetRunes(fmt.Sprintf("%d", bits))
return true
}
}
// MatchIPv6Net creates a TokenHandler function that checks the input for an
// IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64.
//
// When the normalize parameter is true, then the IP address and the mask are
// normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64.
func MatchIPv6Net(normalize bool) TokenHandler {
ip := MatchIPv6(normalize)
slash := MatchRune('/')
mask := MatchIPv6CIDRMask(normalize)
return MatchSeq(ip, slash, mask)
}
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is discarded completely.
//
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
// even though we would have dropped the output anyway. So if you would like
// to drop optional blanks (spaces and tabs), then use something like:
//
// M.Drop(C.Opt(A.Blank))
//
// instead of:
//
// M.Drop(A.Blank)
//
// Since A.Blanks is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, func(s string) string {
return ""
})
}
// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
return modifyTrim(handler, cutset, true, true)
}
// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the left of the output.
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
return modifyTrim(handler, cutset, true, false)
}
// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the right of the output.
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
return modifyTrim(handler, cutset, false, true)
}
func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
modfunc := func(s string) string {
if trimLeft {
s = strings.TrimLeft(s, cutset)
}
if trimRight {
s = strings.TrimRight(s, cutset)
}
return s
}
return ModifyByCallback(handler, modfunc)
}
// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and all leading and trailing whitespace characters,
// as defined by Unicode are removed from it.
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.TrimSpace)
}
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.
func ModifyToUpper(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.ToUpper)
}
// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into lower case.
func ModifyToLower(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.ToLower)
}
// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is replaced by the provided string.
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
return ModifyByCallback(handler, func(string) string {
return replaceWith
})
}
// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and it is fed to the provided modfunc.
// This is a simple function that takes a string on input and returns a possibly
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
s := modfunc(child.Result().String())
child.Result().SetRunes(s)
child.Merge()
return true
}
return false
}
}
// MakeStrLiteralToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a string-typed
// representation of the read Runes. This string is literal, meaning that an
// escape sequence like "\n" is kept as-is (a backslash character, followed by
// an 'n'-character).
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
literal := t.Result().String()
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal}
})
}
// MakeStrInterpretedToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a string-typed
// representation of the read Runes. This string is interpreted, meaning that an
// escape sequence like "\n" is translated to an actual newline control character
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING
interpreted, _ := interpretString(t.Result().String())
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted}
})
}
func interpretString(str string) (string, error) {
var sb strings.Builder
for len(str) > 0 {
r, _, remainder, err := strconv.UnquoteChar(str, '"')
if err != nil {
return sb.String(), err
}
str = remainder
sb.WriteRune(r)
}
return sb.String(), nil
}
// MakeRuneToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a Rune-representation
// of the read Rune.
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 rune input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
})
}
// MakeByteToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to a Byte-representation
// of the read Rune.
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 byte input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
})
}
// MakeIntToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int-representation
// of the read Rune.
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
return strconv.Atoi(s)
})
}
// MakeInt8Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int8-representation
// of the read Rune.
// TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 8)
if err == nil {
return int8(value), err
}
return value, err
})
}
// MakeInt16Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int16-representation
// of the read Rune.
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 16)
if err == nil {
return int16(value), err
}
return value, err
})
}
// MakeInt32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int32-representation
// of the read Rune.
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 32)
if err == nil {
return int32(value), err
}
return value, err
})
}
// MakeInt64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an int64-representation
// of the read Rune.
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 64)
if err == nil {
return int64(value), err
}
return value, err
})
}
// MakeUintToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint-representation
// of the read Rune.
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 0)
if err == nil {
return uint(value), err
}
return value, err
})
}
// MakeUint8Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint8-representation
// of the read Rune.
// TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 8)
if err == nil {
return uint8(value), err
}
return value, err
})
}
// MakeUint16Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint16-representation
// of the read Rune.
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 16)
if err == nil {
return uint16(value), err
}
return value, err
})
}
// MakeUint32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint32-representation
// of the read Rune.
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 32)
if err == nil {
return uint32(value), err
}
return value, err
})
}
// MakeUint64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an uint64-representation
// of the read Rune.
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 64)
if err == nil {
return uint64(value), err
}
return value, err
})
}
// MakeFloat32Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an float32-representation
// of the read Rune.
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 32)
if err == nil {
return float32(value), err
}
return value, err
})
}
// MakeFloat64Token creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an float64-representation
// of the read Rune.
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 64)
if err == nil {
return float64(value), err
}
return value, err
})
}
// MakeBooleanToken creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token.Value is set to an bool-representation
// of the read Rune.
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseBool(s)
if err == nil {
return bool(value), err
}
return value, err
})
}
func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler {
pc, _, _, _ := runtime.Caller(1)
fullName := runtime.FuncForPC(pc).Name()
parts := strings.Split(fullName, ".")
name := parts[len(parts)-1]
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
value, err := convert(t.Result().String())
if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
panic(fmt.Sprintf(
"TokenHandler error: %s cannot handle input %q: %s "+
"(only use a type conversion token maker, when the input has been "+
"validated on beforehand)", name, t.Result().String(), err))
}
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value}
})
}
// MakeTokenByCallback creates a TokenHandler that will add a Token to the
// TokenHandlerResult, for which the Token is to be generated by the provided
// callback function. The function gets the current TokenAPI as its input and
// must return a complete Token.
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
t.Result().AddToken(callback(child))
child.Merge()
return true
}
return false
}
}