993 lines
34 KiB
Go
993 lines
34 KiB
Go
package parsekit
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// C provides convenient access to a range of parser/combinators that can be
|
|
// used to construct TokenHandler functions.
|
|
//
|
|
// Parser/combinators are so called higher order functions that take in one
|
|
// or more other TokenHandlers and output a new TokenHandler. They can be
|
|
// used to combine TokenHandlers in useful ways to create new more complex
|
|
// TokenHandlers.
|
|
//
|
|
// When using C in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var c = parsekit.C
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var C = struct {
|
|
Any func(...TokenHandler) TokenHandler
|
|
Not func(TokenHandler) TokenHandler
|
|
Opt func(TokenHandler) TokenHandler
|
|
Seq func(...TokenHandler) TokenHandler
|
|
Rep func(times int, handler TokenHandler) TokenHandler
|
|
Min func(min int, handler TokenHandler) TokenHandler
|
|
Max func(max int, handler TokenHandler) TokenHandler
|
|
ZeroOrMore func(TokenHandler) TokenHandler
|
|
OneOrMore func(TokenHandler) TokenHandler
|
|
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
|
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
|
|
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
|
}{
|
|
Opt: MatchOpt,
|
|
Any: MatchAny,
|
|
Not: MatchNot,
|
|
Seq: MatchSeq,
|
|
Rep: MatchRep,
|
|
Min: MatchMin,
|
|
Max: MatchMax,
|
|
ZeroOrMore: MatchZeroOrMore,
|
|
OneOrMore: MatchOneOrMore,
|
|
MinMax: MatchMinMax,
|
|
Separated: MatchSeparated,
|
|
Except: MatchExcept,
|
|
}
|
|
|
|
// A provides convenient access to a range of atoms or functions to build atoms.
|
|
//
|
|
// When using A in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var a = parsekit.A
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var A = struct {
|
|
Rune func(rune) TokenHandler
|
|
Runes func(...rune) TokenHandler
|
|
RuneRange func(rune, rune) TokenHandler
|
|
Str func(string) TokenHandler
|
|
StrNoCase func(string) TokenHandler
|
|
EndOfFile TokenHandler
|
|
AnyRune TokenHandler
|
|
Space TokenHandler
|
|
Tab TokenHandler
|
|
CR TokenHandler
|
|
LF TokenHandler
|
|
CRLF TokenHandler
|
|
Excl TokenHandler
|
|
DoubleQuote TokenHandler
|
|
Hash TokenHandler
|
|
Dollar TokenHandler
|
|
Percent TokenHandler
|
|
Amp TokenHandler
|
|
SingleQuote TokenHandler
|
|
RoundOpen TokenHandler
|
|
LeftParen TokenHandler
|
|
RoundClose TokenHandler
|
|
RightParen TokenHandler
|
|
Asterisk TokenHandler
|
|
Multiply TokenHandler
|
|
Plus TokenHandler
|
|
Add TokenHandler
|
|
Comma TokenHandler
|
|
Minus TokenHandler
|
|
Subtract TokenHandler
|
|
Dot TokenHandler
|
|
Slash TokenHandler
|
|
Divide TokenHandler
|
|
Colon TokenHandler
|
|
Semicolon TokenHandler
|
|
AngleOpen TokenHandler
|
|
LessThan TokenHandler
|
|
Equal TokenHandler
|
|
AngleClose TokenHandler
|
|
GreaterThan TokenHandler
|
|
Question TokenHandler
|
|
At TokenHandler
|
|
SquareOpen TokenHandler
|
|
Backslash TokenHandler
|
|
SquareClose TokenHandler
|
|
Caret TokenHandler
|
|
Underscore TokenHandler
|
|
Backquote TokenHandler
|
|
CurlyOpen TokenHandler
|
|
Pipe TokenHandler
|
|
CurlyClose TokenHandler
|
|
Tilde TokenHandler
|
|
Newline TokenHandler
|
|
Whitespace TokenHandler
|
|
WhitespaceAndNewlines TokenHandler
|
|
EndOfLine TokenHandler
|
|
Digit TokenHandler
|
|
DigitNotZero TokenHandler
|
|
Digits TokenHandler
|
|
Float TokenHandler
|
|
Boolean TokenHandler
|
|
Integer TokenHandler
|
|
Signed func(TokenHandler) TokenHandler
|
|
IntegerBetween func(min int64, max int64) TokenHandler
|
|
ASCII TokenHandler
|
|
ASCIILower TokenHandler
|
|
ASCIIUpper TokenHandler
|
|
HexDigit TokenHandler
|
|
Octet TokenHandler
|
|
IPv4 TokenHandler
|
|
IPv4MaskBits TokenHandler
|
|
}{
|
|
Rune: MatchRune,
|
|
Runes: MatchRunes,
|
|
RuneRange: MatchRuneRange,
|
|
Str: MatchStr,
|
|
StrNoCase: MatchStrNoCase,
|
|
EndOfFile: MatchEndOfFile(),
|
|
AnyRune: MatchAnyRune(),
|
|
Space: MatchRune(' '),
|
|
Tab: MatchRune('\t'),
|
|
CR: MatchRune('\r'),
|
|
LF: MatchRune('\n'),
|
|
CRLF: MatchStr("\r\n"),
|
|
Excl: MatchRune('!'),
|
|
DoubleQuote: MatchRune('"'),
|
|
Hash: MatchRune('#'),
|
|
Dollar: MatchRune('$'),
|
|
Percent: MatchRune('%'),
|
|
Amp: MatchRune('&'),
|
|
SingleQuote: MatchRune('\''),
|
|
RoundOpen: MatchRune('('),
|
|
LeftParen: MatchRune('('),
|
|
RoundClose: MatchRune(')'),
|
|
RightParen: MatchRune(')'),
|
|
Asterisk: MatchRune('*'),
|
|
Multiply: MatchRune('*'),
|
|
Plus: MatchRune('+'),
|
|
Add: MatchRune('+'),
|
|
Comma: MatchRune(','),
|
|
Minus: MatchRune('-'),
|
|
Subtract: MatchRune('-'),
|
|
Dot: MatchRune('.'),
|
|
Slash: MatchRune('/'),
|
|
Divide: MatchRune('/'),
|
|
Colon: MatchRune(':'),
|
|
Semicolon: MatchRune(';'),
|
|
AngleOpen: MatchRune('<'),
|
|
LessThan: MatchRune('<'),
|
|
Equal: MatchRune('='),
|
|
AngleClose: MatchRune('>'),
|
|
GreaterThan: MatchRune('>'),
|
|
Question: MatchRune('?'),
|
|
At: MatchRune('@'),
|
|
SquareOpen: MatchRune('['),
|
|
Backslash: MatchRune('\\'),
|
|
SquareClose: MatchRune(']'),
|
|
Caret: MatchRune('^'),
|
|
Underscore: MatchRune('_'),
|
|
Backquote: MatchRune('`'),
|
|
CurlyOpen: MatchRune('{'),
|
|
Pipe: MatchRune('|'),
|
|
CurlyClose: MatchRune('}'),
|
|
Tilde: MatchRune('~'),
|
|
Whitespace: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))),
|
|
WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))),
|
|
EndOfLine: MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()),
|
|
Digit: MatchDigit(),
|
|
DigitNotZero: MatchDigitNotZero(),
|
|
Digits: MatchDigits(),
|
|
Integer: MatchInteger(),
|
|
Signed: MatchSigned,
|
|
IntegerBetween: MatchIntegerBetween,
|
|
Float: MatchFloat(),
|
|
Boolean: MatchBoolean(),
|
|
ASCII: MatchRuneRange('\x00', '\x7F'),
|
|
ASCIILower: MatchRuneRange('a', 'z'),
|
|
ASCIIUpper: MatchRuneRange('A', 'Z'),
|
|
HexDigit: MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')),
|
|
Octet: MatchOctet(false),
|
|
IPv4: MatchIPv4(),
|
|
IPv4MaskBits: MatchIntegerBetween(0, 32),
|
|
}
|
|
|
|
// T provides convenient access to a range of Token producers (which in their
|
|
// nature are parser/combinators) that can be used when creating TokenHandler
|
|
// functions.
|
|
//
|
|
// When using T in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var t = parsekit.T
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var T = struct {
|
|
Str func(interface{}, TokenHandler) TokenHandler
|
|
StrInterpreted func(interface{}, TokenHandler) TokenHandler
|
|
Byte func(interface{}, TokenHandler) TokenHandler
|
|
Rune func(interface{}, TokenHandler) TokenHandler
|
|
Int func(interface{}, TokenHandler) TokenHandler
|
|
Int8 func(interface{}, TokenHandler) TokenHandler
|
|
Int16 func(interface{}, TokenHandler) TokenHandler
|
|
Int32 func(interface{}, TokenHandler) TokenHandler
|
|
Int64 func(interface{}, TokenHandler) TokenHandler
|
|
Uint func(interface{}, TokenHandler) TokenHandler
|
|
Uint8 func(interface{}, TokenHandler) TokenHandler
|
|
Uint16 func(interface{}, TokenHandler) TokenHandler
|
|
Uint32 func(interface{}, TokenHandler) TokenHandler
|
|
Uint64 func(interface{}, TokenHandler) TokenHandler
|
|
Float32 func(interface{}, TokenHandler) TokenHandler
|
|
Float64 func(interface{}, TokenHandler) TokenHandler
|
|
Boolean func(interface{}, TokenHandler) TokenHandler
|
|
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
|
|
}{
|
|
Str: MakeStrLiteralToken,
|
|
StrInterpreted: MakeStrInterpretedToken,
|
|
Byte: MakeByteToken,
|
|
Rune: MakeRuneToken,
|
|
Int: MakeIntToken,
|
|
Int8: MakeInt8Token,
|
|
Int16: MakeInt16Token,
|
|
Int32: MakeInt32Token,
|
|
Int64: MakeInt64Token,
|
|
Uint: MakeUintToken,
|
|
Uint8: MakeUint8Token,
|
|
Uint16: MakeUint16Token,
|
|
Uint32: MakeUint32Token,
|
|
Uint64: MakeUint64Token,
|
|
Float32: MakeFloat32Token,
|
|
Float64: MakeFloat64Token,
|
|
Boolean: MakeBooleanToken,
|
|
ByCallback: MakeTokenByCallback,
|
|
}
|
|
|
|
// MatchRune creates a TokenHandler function that checks if the next rune from
|
|
// the input matches the provided rune.
|
|
func MatchRune(expected rune) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
input, err := t.NextRune()
|
|
if err == nil && input == expected {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRunes creates a TokenHandler function that that checks if the next rune
|
|
// from the input is one of the provided runes.
|
|
func MatchRunes(expected ...rune) TokenHandler {
|
|
s := string(expected)
|
|
return func(t *TokenAPI) bool {
|
|
input, err := t.NextRune()
|
|
if err == nil {
|
|
if strings.ContainsRune(s, input) {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRuneRange creates a TokenHandler function that that checks if the next rune
|
|
// from the input is contained by the provided rune range.
|
|
//
|
|
// The rune range is defined by a start and an end rune, inclusive, so:
|
|
//
|
|
// MatchRuneRange('g', 'k')
|
|
//
|
|
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
|
func MatchRuneRange(start rune, end rune) TokenHandler {
|
|
if end < start {
|
|
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
|
}
|
|
return func(t *TokenAPI) bool {
|
|
input, err := t.NextRune()
|
|
if err == nil && input >= start && input <= end {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchStr creates a TokenHandler that will check if the upcoming runes on the
|
|
// input match the provided string.
|
|
// TODO make this a more efficient string-level match?
|
|
func MatchStr(expected string) TokenHandler {
|
|
var handlers = []TokenHandler{}
|
|
for _, r := range expected {
|
|
handlers = append(handlers, MatchRune(r))
|
|
}
|
|
return MatchSeq(handlers...)
|
|
}
|
|
|
|
// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
|
|
// on the input match the provided string in a case-insensitive manner.
|
|
// TODO make this a more efficient string-level match?
|
|
func MatchStrNoCase(expected string) TokenHandler {
|
|
var handlers = []TokenHandler{}
|
|
for _, r := range expected {
|
|
u := unicode.ToUpper(r)
|
|
l := unicode.ToLower(r)
|
|
handlers = append(handlers, MatchRunes(u, l))
|
|
}
|
|
return MatchSeq(handlers...)
|
|
}
|
|
|
|
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
|
|
// When the provided TokenHandler applies, then its output is used, otherwise
|
|
// no output is generated but still a successful match is reported.
|
|
func MatchOpt(handler TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
if handler(child) {
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
|
|
// applied in their exact order. Only if all TokenHandlers apply, the sequence
|
|
// reports successful match.
|
|
func MatchSeq(handlers ...TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
for _, handler := range handlers {
|
|
if !handler(child) {
|
|
return false
|
|
}
|
|
}
|
|
child.Merge()
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
|
|
// can be applied. They are applied in their provided order. The first TokenHandler
|
|
// that applies is used for reporting back a match.
|
|
func MatchAny(handlers ...TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
for _, handler := range handlers {
|
|
child := t.Fork()
|
|
if handler(child) {
|
|
child.Merge()
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
|
|
// the current input. If it does, then a failed match will be reported. If it
|
|
// does not, then the next rune from the input will be reported as a match.
|
|
func MatchNot(handler TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
probe := t.Fork()
|
|
if handler(probe) {
|
|
return false
|
|
}
|
|
_, err := t.NextRune()
|
|
if err == nil {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied exactly the provided amount of times.
|
|
//
|
|
// Note that the input can contain more than the provided number of matches, e.g.:
|
|
//
|
|
// MatchRep(4, MatchRune('X'))
|
|
//
|
|
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
|
// In that last case, there will be a remainder "XX" on the input.
|
|
func MatchRep(times int, handler TokenHandler) TokenHandler {
|
|
return matchMinMax(times, times, handler, "MatchRep")
|
|
}
|
|
|
|
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied at least the provided minimum number of times.
|
|
// When more matches are possible, these will be included in the output.
|
|
func MatchMin(min int, handler TokenHandler) TokenHandler {
|
|
if min < 0 {
|
|
panic("TokenHandler bug: MatchMin definition error: min must be >= 0")
|
|
}
|
|
return matchMinMax(min, -1, handler, "MatchMin")
|
|
}
|
|
|
|
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied at maximum the provided minimum number of times.
|
|
// When more matches are possible, these will be included in the output.
|
|
// Zero matches are considered a successful match.
|
|
func MatchMax(max int, handler TokenHandler) TokenHandler {
|
|
if max < 0 {
|
|
panic("TokenHandler bug: MatchMax definition error: max must be >= 0")
|
|
}
|
|
return matchMinMax(0, max, handler, "MatchMax")
|
|
}
|
|
|
|
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
|
// be applied zero or more times. All matches will be included in the output.
|
|
// Zero matches are considered a successful match.
|
|
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
|
|
return matchMinMax(0, -1, handler, "MatchZeroOfMore")
|
|
}
|
|
|
|
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
|
// be applied one or more times. All matches will be included in the output.
|
|
func MatchOneOrMore(handler TokenHandler) TokenHandler {
|
|
return matchMinMax(1, -1, handler, "MatchOneOrMore")
|
|
}
|
|
|
|
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
|
|
// be applied between the provided minimum and maximum number of times,
|
|
// inclusive. All matches will be included in the output.
|
|
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
|
if max < 0 {
|
|
panic("TokenHandler bug: MatchMinMax definition error: max must be >= 0")
|
|
}
|
|
if min < 0 {
|
|
panic("TokenHandler bug: MatchMinMax definition error: min must be >= 0")
|
|
}
|
|
return matchMinMax(min, max, handler, "MatchMinMax")
|
|
}
|
|
|
|
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
|
|
if max >= 0 && min > max {
|
|
panic(fmt.Sprintf("TokenHandler bug: %s definition error: max %d must not be < min %d", name, max, min))
|
|
}
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
total := 0
|
|
// Check for the minimum required amount of matches.
|
|
for total < min {
|
|
total++
|
|
if !handler(child) {
|
|
return false
|
|
}
|
|
}
|
|
// No specified max: include the rest of the available matches.
|
|
// Specified max: include the rest of the availble matches, up to the max.
|
|
child.Merge()
|
|
for max < 0 || total < max {
|
|
total++
|
|
if !handler(child) {
|
|
break
|
|
}
|
|
child.Merge()
|
|
}
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
|
|
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
|
|
// (the separator). All matches (separated + separator) are included in the
|
|
// output.
|
|
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
|
|
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
|
}
|
|
|
|
// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
|
|
// applied to the upcoming input. It also checks if the except TokenHandler can be
|
|
// applied. If the handler applies, but the except TokenHandler as well, then the match
|
|
// as a whole will be treated as a mismatch.
|
|
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
if except(t.Fork()) {
|
|
return false
|
|
}
|
|
return handler(t)
|
|
}
|
|
}
|
|
|
|
// MatchSigned creates a TokenHandler that checks if the provided TokenHandler is
|
|
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
|
// atoms into a signed version, e.g.
|
|
//
|
|
// C.Signed(A.Integer)
|
|
func MatchSigned(handler TokenHandler) TokenHandler {
|
|
sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-')))
|
|
return MatchSeq(sign, handler)
|
|
}
|
|
|
|
// MatchIntegerBetween creates a TokenHandler that checks for an integer
|
|
// value between the provided min and max boundaries (inclusive).
|
|
// It uses an int64 for checking internally, so you can check values
|
|
// ranging from -9223372036854775808 to 9223372036854775807.
|
|
func MatchIntegerBetween(min int64, max int64) TokenHandler {
|
|
digits := MatchSigned(MatchDigits())
|
|
return func(t *TokenAPI) bool {
|
|
fork := t.Fork()
|
|
if !digits(fork) {
|
|
return false
|
|
}
|
|
value, _ := strconv.ParseInt(fork.Result().String(), 10, 64)
|
|
if value < min || value > max {
|
|
return false
|
|
}
|
|
fork.Merge()
|
|
return true
|
|
}
|
|
}
|
|
|
|
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
|
// has been reached. This TokenHandler will never produce output. It only reports
|
|
// a successful or a failing match through its boolean return value.
|
|
func MatchEndOfFile() TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
fork := t.Fork()
|
|
_, err := fork.NextRune()
|
|
return err == io.EOF
|
|
}
|
|
}
|
|
|
|
// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
|
|
// read from the input. It reports back a successful match if the end of the
|
|
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
|
func MatchAnyRune() TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
_, err := t.NextRune()
|
|
if err == nil {
|
|
t.Accept()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchDigit creates a TokenHandler that checks if a single digit can be read
|
|
// from the input.
|
|
func MatchDigit() TokenHandler {
|
|
return MatchRuneRange('0', '9')
|
|
}
|
|
|
|
// MatchDigits creates a TokenHandler that checks if one or more digits can be read
|
|
// from the input.
|
|
func MatchDigits() TokenHandler {
|
|
return MatchOneOrMore(MatchRuneRange('0', '9'))
|
|
}
|
|
|
|
// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
|
|
// to zero '0' can be read from the input.
|
|
func MatchDigitNotZero() TokenHandler {
|
|
return MatchRuneRange('1', '9')
|
|
}
|
|
|
|
// MatchInteger creates a TokenHandler function that checks if a valid integer
|
|
// can be read from the input. In line with Go, a integer cannot start with
|
|
// a zero. Starting with a zero is used to indicate other bases, like octal or
|
|
// hexadecimal.
|
|
func MatchInteger() TokenHandler {
|
|
justZero := MatchRune('0')
|
|
integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
|
return MatchAny(integer, justZero)
|
|
}
|
|
|
|
// MatchFloat creates a TokenHandler function that checks if a valid float value
|
|
// can be read from the input. In case the fractional part is missing, this
|
|
// TokenHandler will report a match, so both "123" and "123.123" will match.
|
|
func MatchFloat() TokenHandler {
|
|
digits := MatchDigits()
|
|
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
|
|
}
|
|
|
|
// MatchBoolean creates a TokenHandler function that checks if a valid boolean
|
|
// value can be read from the input. It supports the boolean values as understood
|
|
// by Go's strconv.ParseBool() function.
|
|
func MatchBoolean() TokenHandler {
|
|
trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
|
|
falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
|
|
return MatchAny(trues, falses)
|
|
}
|
|
|
|
// MatchOctet creates a TokenHandler function that checks if a valid octet value
|
|
// can be read from the input (octet = byte value representation, with a value
|
|
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
|
|
// digits, not if there's a non-digit after it, meaning that "123255" would be
|
|
// a valid sequence of two octets.
|
|
//
|
|
// When the normalize parameter is set to true, then leading zeroes will be
|
|
// stripped from the octet.
|
|
func MatchOctet(normalize bool) TokenHandler {
|
|
digits := MatchMinMax(1, 3, MatchDigit())
|
|
return func(t *TokenAPI) bool {
|
|
fork := t.Fork()
|
|
if !digits(fork) {
|
|
return false
|
|
}
|
|
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
|
|
if value <= 255 {
|
|
if normalize {
|
|
runes := fork.Result().Runes()
|
|
for len(runes) > 1 && runes[0] == '0' {
|
|
runes = runes[1:]
|
|
}
|
|
fork.Result().SetRunes(runes)
|
|
}
|
|
fork.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
|
|
// IP address value can be read from the input.
|
|
// It will normalize IP-addresses that look like "192.168.001.012" to
|
|
// "192.168.1.12".
|
|
func MatchIPv4() TokenHandler {
|
|
octet := MatchOctet(true)
|
|
dot := MatchRune('.')
|
|
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
|
}
|
|
|
|
// M provides convenient access to a range of modifiers (which in their nature are
|
|
// parser/combinators) that can be used when creating TokenHandler functions.
|
|
//
|
|
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
|
|
// resulting output of another TokenHandler in some way. It does not do any matching
|
|
// against input of its own.
|
|
//
|
|
// When using M in your own parser, then it is advised to create a variable
|
|
// to reference it:
|
|
//
|
|
// var m = parsekit.M
|
|
//
|
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
var M = struct {
|
|
Drop func(TokenHandler) TokenHandler
|
|
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
|
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
|
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
|
TrimSpace func(handler TokenHandler) TokenHandler
|
|
ToLower func(TokenHandler) TokenHandler
|
|
ToUpper func(TokenHandler) TokenHandler
|
|
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
|
ByCallback func(TokenHandler, func(string) string) TokenHandler
|
|
}{
|
|
Drop: ModifyDrop,
|
|
Trim: ModifyTrim,
|
|
TrimLeft: ModifyTrimLeft,
|
|
TrimRight: ModifyTrimRight,
|
|
TrimSpace: ModifyTrimSpace,
|
|
ToLower: ModifyToLower,
|
|
ToUpper: ModifyToUpper,
|
|
Replace: ModifyReplace,
|
|
ByCallback: ModifyByCallback,
|
|
}
|
|
|
|
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is discarded completely.
|
|
//
|
|
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
|
|
// even though we would have dropped the output anyway. So if you would like
|
|
// to drop optional whitespace, then use something like:
|
|
//
|
|
// M.Drop(C.Opt(A.Whitespace))
|
|
//
|
|
// instead of:
|
|
//
|
|
// M.Drop(A.Whitespace)
|
|
//
|
|
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
|
|
// string "bork" would not match against the second form, but " bork" would.
|
|
// In both cases, it would match the first form.
|
|
func ModifyDrop(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, func(s string) string {
|
|
return ""
|
|
})
|
|
}
|
|
|
|
// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from both the left and the right of the output.
|
|
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
|
|
return modifyTrim(handler, cutset, true, true)
|
|
}
|
|
|
|
// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from the left of the output.
|
|
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
|
|
return modifyTrim(handler, cutset, true, false)
|
|
}
|
|
|
|
// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are trimmed from the right of the output.
|
|
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
|
|
return modifyTrim(handler, cutset, false, true)
|
|
}
|
|
|
|
func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
|
|
modfunc := func(s string) string {
|
|
if trimLeft {
|
|
s = strings.TrimLeft(s, cutset)
|
|
}
|
|
if trimRight {
|
|
s = strings.TrimRight(s, cutset)
|
|
}
|
|
return s
|
|
}
|
|
return ModifyByCallback(handler, modfunc)
|
|
}
|
|
|
|
// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and all leading and trailing whitespace charcters,
|
|
// as defined by Unicode (spaces, tabs, carriage returns and newlines) are removed from it.
|
|
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, strings.TrimSpace)
|
|
}
|
|
|
|
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are converted into upper case.
|
|
func ModifyToUpper(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, strings.ToUpper)
|
|
}
|
|
|
|
// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and characters from the provided
|
|
// cutset are converted into lower case.
|
|
func ModifyToLower(handler TokenHandler) TokenHandler {
|
|
return ModifyByCallback(handler, strings.ToLower)
|
|
}
|
|
|
|
// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is replaced by the provided string.
|
|
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
|
|
return ModifyByCallback(handler, func(string) string {
|
|
return replaceWith
|
|
})
|
|
}
|
|
|
|
// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
|
|
// If it does, then its output is taken and it is fed to the provided modfunc.
|
|
// This is a simple function that takes a string on input and returns a possibly
|
|
// modified string on output. The return value of the modfunc will replace the
|
|
// resulting output.
|
|
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
child := t.Fork()
|
|
if handler(child) {
|
|
s := modfunc(child.Result().String())
|
|
child.Result().SetRunes(s)
|
|
child.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
|
literal := t.Result().String()
|
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal}
|
|
})
|
|
}
|
|
|
|
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
|
// TODO ERROR HANDLING
|
|
interpreted, _ := interpretString(t.Result().String())
|
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted}
|
|
})
|
|
}
|
|
|
|
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
|
// TODO ERROR HANDLING --- not a 1 rune input
|
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
|
|
})
|
|
}
|
|
|
|
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
|
// TODO ERROR HANDLING --- not a 1 byte input
|
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
|
|
})
|
|
}
|
|
|
|
func interpretString(str string) (string, error) {
|
|
var sb strings.Builder
|
|
for len(str) > 0 {
|
|
r, _, remainder, err := strconv.UnquoteChar(str, '"')
|
|
if err != nil {
|
|
return sb.String(), err
|
|
}
|
|
str = remainder
|
|
sb.WriteRune(r)
|
|
}
|
|
return sb.String(), nil
|
|
}
|
|
|
|
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
|
|
return strconv.Atoi(s)
|
|
})
|
|
}
|
|
|
|
// TODO allow other Go types for oct and hex too.
|
|
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseInt(s, 10, 8)
|
|
if err == nil {
|
|
return int8(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseInt(s, 10, 16)
|
|
if err == nil {
|
|
return int16(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseInt(s, 10, 32)
|
|
if err == nil {
|
|
return int32(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseInt(s, 10, 64)
|
|
if err == nil {
|
|
return int64(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseUint(s, 10, 0)
|
|
if err == nil {
|
|
return uint(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
// TODO allow other Go types for oct and hex too.
|
|
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseUint(s, 10, 8)
|
|
if err == nil {
|
|
return uint8(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseUint(s, 10, 16)
|
|
if err == nil {
|
|
return uint16(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseUint(s, 10, 32)
|
|
if err == nil {
|
|
return uint32(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseUint(s, 10, 64)
|
|
if err == nil {
|
|
return uint64(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseFloat(s, 32)
|
|
if err == nil {
|
|
return float32(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseFloat(s, 64)
|
|
if err == nil {
|
|
return float64(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
|
return makeStrconvToken(toktype, handler,
|
|
func(s string) (interface{}, error) {
|
|
value, err := strconv.ParseBool(s)
|
|
if err == nil {
|
|
return bool(value), err
|
|
}
|
|
return value, err
|
|
})
|
|
}
|
|
|
|
func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler {
|
|
pc, _, _, _ := runtime.Caller(1)
|
|
fullName := runtime.FuncForPC(pc).Name()
|
|
parts := strings.Split(fullName, ".")
|
|
name := parts[len(parts)-1]
|
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
|
value, err := convert(t.Result().String())
|
|
if err != nil {
|
|
panic(fmt.Sprintf(
|
|
"TokenHandler error: %s cannot handle input %q: %s "+
|
|
"(only use a type conversion token maker, when the input has been "+
|
|
"validated on beforehand)", name, t.Result().String(), err))
|
|
}
|
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value}
|
|
})
|
|
}
|
|
|
|
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
|
|
return func(t *TokenAPI) bool {
|
|
fork := t.Fork()
|
|
if handler(fork) {
|
|
t.Result().AddToken(callback(fork))
|
|
fork.Merge()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
}
|