2051 lines
61 KiB
Go
2051 lines
61 KiB
Go
package tokenize
|
||
|
||
import (
|
||
"fmt"
|
||
"io"
|
||
"net"
|
||
"strconv"
|
||
"strings"
|
||
"unicode"
|
||
"unicode/utf8"
|
||
)
|
||
|
||
// C provides convenient access to a range of parser/combinators that can be
|
||
// used to construct Handler functions.
|
||
//
|
||
// Parser/combinators are so called higher order functions that take in one
|
||
// or more other Handler functions and output a new Handler. They can be
|
||
// used to combine Handler functions in useful ways to create new more complex
|
||
// Handler functions.
|
||
//
|
||
// When using C in your own parser, then it is advised to create a variable
|
||
// to reference it, for example:
|
||
//
|
||
// c := tokenize.C
|
||
//
|
||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||
var C = struct {
|
||
Any func(...Handler) Handler
|
||
Not func(Handler) Handler
|
||
Seq func(...Handler) Handler
|
||
Min func(min int, handler Handler) Handler
|
||
Max func(max int, handler Handler) Handler
|
||
Repeated func(times int, handler Handler) Handler
|
||
Optional func(Handler) Handler
|
||
ZeroOrMore func(Handler) Handler
|
||
OneOrMore func(Handler) Handler
|
||
MinMax func(min int, max int, handler Handler) Handler
|
||
Separated func(separator Handler, separated Handler) Handler
|
||
Except func(except Handler, handler Handler) Handler
|
||
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
||
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
||
InOptionalBlanks func(handler Handler) Handler
|
||
FlushInput func(Handler) Handler
|
||
}{
|
||
Any: MatchAny,
|
||
Not: MatchNot,
|
||
Seq: MatchSeq,
|
||
Min: MatchMin,
|
||
Max: MatchMax,
|
||
Repeated: MatchRep,
|
||
Optional: MatchOptional,
|
||
ZeroOrMore: MatchZeroOrMore,
|
||
OneOrMore: MatchOneOrMore,
|
||
MinMax: MatchMinMax,
|
||
Separated: MatchSeparated,
|
||
Except: MatchExcept,
|
||
FollowedBy: MatchFollowedBy,
|
||
NotFollowedBy: MatchNotFollowedBy,
|
||
InOptionalBlanks: MatchInOptionalBlanks,
|
||
FlushInput: MakeInputFlusher,
|
||
}
|
||
|
||
// A provides convenient access to a range of atoms or functions to build atoms.
|
||
//
|
||
// When using A in your own parser, then it is advised to create a variable
|
||
// to reference it:
|
||
//
|
||
// a := tokenize.A
|
||
//
|
||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||
var A = struct {
|
||
Char func(...rune) Handler
|
||
CharRange func(...rune) Handler
|
||
ByteByCallback func(func(byte) bool) Handler
|
||
BytesByCallback func(func(byte) bool) Handler
|
||
RuneByCallback func(func(rune) bool) Handler
|
||
AnyByte Handler
|
||
AnyRune Handler
|
||
ValidRune Handler
|
||
InvalidRune Handler
|
||
Str func(string) Handler
|
||
StrNoCase func(string) Handler
|
||
EndOfLine Handler
|
||
EndOfFile Handler
|
||
UntilEndOfLine Handler
|
||
Space Handler
|
||
Tab Handler
|
||
CR Handler
|
||
LF Handler
|
||
CRLF Handler
|
||
Excl Handler
|
||
DoubleQuote Handler
|
||
Hash Handler
|
||
Dollar Handler
|
||
Percent Handler
|
||
Amp Handler
|
||
SingleQuote Handler
|
||
RoundOpen Handler
|
||
LeftParen Handler
|
||
RoundClose Handler
|
||
RightParen Handler
|
||
Asterisk Handler
|
||
Multiply Handler
|
||
Plus Handler
|
||
Add Handler
|
||
Comma Handler
|
||
Minus Handler
|
||
Subtract Handler
|
||
Dot Handler
|
||
Slash Handler
|
||
Divide Handler
|
||
Colon Handler
|
||
Semicolon Handler
|
||
AngleOpen Handler
|
||
LessThan Handler
|
||
Equal Handler
|
||
AngleClose Handler
|
||
GreaterThan Handler
|
||
Question Handler
|
||
At Handler
|
||
SquareOpen Handler
|
||
Backslash Handler
|
||
SquareClose Handler
|
||
Caret Handler
|
||
Underscore Handler
|
||
Backquote Handler
|
||
CurlyOpen Handler
|
||
Pipe Handler
|
||
CurlyClose Handler
|
||
Tilde Handler
|
||
Newline Handler
|
||
Blank Handler
|
||
Blanks Handler
|
||
Whitespace Handler
|
||
UnicodeSpace Handler
|
||
Digit Handler
|
||
DigitNotZero Handler
|
||
Digits Handler
|
||
Zero Handler
|
||
Boolean Handler
|
||
Signed func(Handler) Handler
|
||
Integer Handler
|
||
IntegerBetween func(min int64, max int64) Handler
|
||
Decimal Handler
|
||
ASCII Handler
|
||
ASCIILower Handler
|
||
ASCIIUpper Handler
|
||
Letter Handler
|
||
Lower Handler
|
||
Upper Handler
|
||
HexDigit Handler
|
||
Octet Handler
|
||
IPv4 Handler
|
||
IPv4CIDRMask Handler
|
||
IPv4Netmask Handler
|
||
IPv4Net Handler
|
||
IPv6 Handler
|
||
IPv6CIDRMask Handler
|
||
IPv6Net Handler
|
||
}{
|
||
Char: MatchChar,
|
||
CharRange: MatchCharRange,
|
||
ByteByCallback: MatchByteByCallback,
|
||
BytesByCallback: MatchBytesByCallback,
|
||
RuneByCallback: MatchRuneByCallback,
|
||
AnyByte: MatchAnyByte(),
|
||
AnyRune: MatchAnyRune(),
|
||
ValidRune: MatchValidRune(),
|
||
InvalidRune: MatchInvalidRune(),
|
||
Str: MatchStr,
|
||
StrNoCase: MatchStrNoCase,
|
||
EndOfFile: MatchEndOfFile(),
|
||
EndOfLine: MatchEndOfLine(),
|
||
UntilEndOfLine: MatchUntilEndOfLine(),
|
||
Space: MatchChar(' '),
|
||
Tab: MatchChar('\t'),
|
||
CR: MatchChar('\r'),
|
||
LF: MatchChar('\n'),
|
||
CRLF: MatchStr("\r\n"),
|
||
Excl: MatchChar('!'),
|
||
DoubleQuote: MatchChar('"'),
|
||
Hash: MatchChar('#'),
|
||
Dollar: MatchChar('$'),
|
||
Percent: MatchChar('%'),
|
||
Amp: MatchChar('&'),
|
||
SingleQuote: MatchChar('\''),
|
||
RoundOpen: MatchChar('('),
|
||
LeftParen: MatchChar('('),
|
||
RoundClose: MatchChar(')'),
|
||
RightParen: MatchChar(')'),
|
||
Asterisk: MatchChar('*'),
|
||
Multiply: MatchChar('*'),
|
||
Plus: MatchChar('+'),
|
||
Add: MatchChar('+'),
|
||
Comma: MatchChar(','),
|
||
Minus: MatchChar('-'),
|
||
Subtract: MatchChar('-'),
|
||
Dot: MatchChar('.'),
|
||
Slash: MatchChar('/'),
|
||
Divide: MatchChar('/'),
|
||
Colon: MatchChar(':'),
|
||
Semicolon: MatchChar(';'),
|
||
AngleOpen: MatchChar('<'),
|
||
LessThan: MatchChar('<'),
|
||
Equal: MatchChar('='),
|
||
AngleClose: MatchChar('>'),
|
||
GreaterThan: MatchChar('>'),
|
||
Question: MatchChar('?'),
|
||
At: MatchChar('@'),
|
||
SquareOpen: MatchChar('['),
|
||
Backslash: MatchChar('\\'),
|
||
SquareClose: MatchChar(']'),
|
||
Caret: MatchChar('^'),
|
||
Underscore: MatchChar('_'),
|
||
Backquote: MatchChar('`'),
|
||
CurlyOpen: MatchChar('{'),
|
||
Pipe: MatchChar('|'),
|
||
CurlyClose: MatchChar('}'),
|
||
Tilde: MatchChar('~'),
|
||
Newline: MatchNewline(),
|
||
Blank: MatchBlank(),
|
||
Blanks: MatchBlanks(),
|
||
Whitespace: MatchWhitespace(),
|
||
UnicodeSpace: MatchUnicodeSpace(),
|
||
Digit: MatchDigit(),
|
||
DigitNotZero: MatchDigitNotZero(),
|
||
Digits: MatchDigits(),
|
||
Zero: MatchChar('0'),
|
||
Signed: MatchSigned,
|
||
Integer: MatchInteger(true),
|
||
IntegerBetween: MatchIntegerBetween,
|
||
Decimal: MatchDecimal(true),
|
||
Boolean: MatchBoolean(),
|
||
ASCII: MatchASCII(),
|
||
ASCIILower: MatchASCIILower(),
|
||
ASCIIUpper: MatchASCIIUpper(),
|
||
Letter: MatchUnicodeLetter(),
|
||
Lower: MatchUnicodeLower(),
|
||
Upper: MatchUnicodeUpper(),
|
||
HexDigit: MatchHexDigit(),
|
||
Octet: MatchOctet(true),
|
||
IPv4: MatchIPv4(true),
|
||
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
||
IPv4Netmask: MatchIPv4Netmask(true),
|
||
IPv4Net: MatchIPv4Net(true),
|
||
IPv6: MatchIPv6(true),
|
||
IPv6CIDRMask: MatchIPv6CIDRMask(true),
|
||
IPv6Net: MatchIPv6Net(true),
|
||
}
|
||
|
||
// M provides convenient access to a range of modifiers (which in their nature are
|
||
// parser/combinators) that can be used when creating Handler functions.
|
||
//
|
||
// In parsekit, a modifier is defined as a Handler function that modifies the
|
||
// resulting output of another Handler in some way. It does not do any matching
|
||
// against input of its own.
|
||
//
|
||
// When using M in your own parser, then it is advised to create a variable
|
||
// to reference it:
|
||
//
|
||
// m := tokenize.M
|
||
//
|
||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||
var M = struct {
|
||
Drop func(Handler) Handler
|
||
Trim func(handler Handler, cutset string) Handler
|
||
TrimLeft func(handler Handler, cutset string) Handler
|
||
TrimRight func(handler Handler, cutset string) Handler
|
||
TrimSpace func(handler Handler) Handler
|
||
ToLower func(Handler) Handler
|
||
ToUpper func(Handler) Handler
|
||
Replace func(handler Handler, replaceWith string) Handler
|
||
ByCallback func(Handler, func(string) string) Handler
|
||
}{
|
||
Drop: ModifyDrop,
|
||
Trim: ModifyTrim,
|
||
TrimLeft: ModifyTrimLeft,
|
||
TrimRight: ModifyTrimRight,
|
||
TrimSpace: ModifyTrimSpace,
|
||
ToLower: ModifyToLower,
|
||
ToUpper: ModifyToUpper,
|
||
Replace: ModifyReplace,
|
||
ByCallback: ModifyByCallback,
|
||
}
|
||
|
||
// T provides convenient access to a range of Token producers (which in their
|
||
// nature are parser/combinators) that can be used when creating Handler
|
||
// functions.
|
||
//
|
||
// When using T in your own parser, then it is advised to create a variable
|
||
// to reference it:
|
||
//
|
||
// t := tokenize.T
|
||
//
|
||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||
var T = struct {
|
||
Str func(interface{}, Handler) Handler
|
||
StrInterpreted func(interface{}, Handler) Handler
|
||
Byte func(interface{}, Handler) Handler
|
||
Rune func(interface{}, Handler) Handler
|
||
Int func(interface{}, Handler) Handler
|
||
Int8 func(interface{}, Handler) Handler
|
||
Int16 func(interface{}, Handler) Handler
|
||
Int32 func(interface{}, Handler) Handler
|
||
Int64 func(interface{}, Handler) Handler
|
||
Int64Base func(interface{}, int, Handler) Handler
|
||
Uint func(interface{}, Handler) Handler
|
||
Uint8 func(interface{}, Handler) Handler
|
||
Uint16 func(interface{}, Handler) Handler
|
||
Uint32 func(interface{}, Handler) Handler
|
||
Uint64 func(interface{}, Handler) Handler
|
||
Uint64Base func(interface{}, int, Handler) Handler
|
||
Float32 func(interface{}, Handler) Handler
|
||
Float64 func(interface{}, Handler) Handler
|
||
Boolean func(interface{}, Handler) Handler
|
||
ByValue func(toktype interface{}, handler Handler, value interface{}) Handler
|
||
ByCallback func(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler
|
||
Group func(interface{}, Handler) Handler
|
||
}{
|
||
Str: MakeStrLiteralToken,
|
||
StrInterpreted: MakeStrInterpretedToken,
|
||
Byte: MakeByteToken,
|
||
Rune: MakeRuneToken,
|
||
Int: MakeIntToken,
|
||
Int8: MakeInt8Token,
|
||
Int16: MakeInt16Token,
|
||
Int32: MakeInt32Token,
|
||
Int64: MakeInt64Token,
|
||
Int64Base: MakeInt64BaseToken,
|
||
Uint: MakeUintToken,
|
||
Uint8: MakeUint8Token,
|
||
Uint16: MakeUint16Token,
|
||
Uint32: MakeUint32Token,
|
||
Uint64: MakeUint64Token,
|
||
Uint64Base: MakeUint64BaseToken,
|
||
Float32: MakeFloat32Token,
|
||
Float64: MakeFloat64Token,
|
||
Boolean: MakeBooleanToken,
|
||
ByValue: MakeTokenByValue,
|
||
ByCallback: MakeTokenByCallback,
|
||
Group: MakeTokenGroup,
|
||
}
|
||
|
||
func MatchChar(expected ...rune) Handler {
|
||
if len(expected) == 0 {
|
||
callerPanic("MatchChar", "Handler: {name} definition error at {caller}: at least one character must be provided")
|
||
}
|
||
if len(expected) == 1 {
|
||
return matchAgainstSingleChar(expected[0])
|
||
}
|
||
return matchAgainstMultipleChars(expected)
|
||
}
|
||
|
||
func matchAgainstSingleChar(expected rune) Handler {
|
||
// Handle an ASCII character.
|
||
if expected <= '\x7F' {
|
||
expectedByte := byte(expected)
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err == nil && b == expectedByte {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// Handle an UTF8 character.
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil && r == expected {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
func matchAgainstMultipleChars(expected []rune) Handler {
|
||
// Check if all characters are ASCII characters.
|
||
onlyBytes := true
|
||
expectedBytes := make([]byte, len(expected))
|
||
for i, r := range expected {
|
||
if r > '\x7F' {
|
||
onlyBytes = false
|
||
break
|
||
}
|
||
expectedBytes[i] = byte(r)
|
||
}
|
||
|
||
// Handle ASCII characters.
|
||
if onlyBytes {
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil {
|
||
return false
|
||
}
|
||
for _, e := range expectedBytes {
|
||
if b == e {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// Handle UTF8 characters.
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err != nil {
|
||
return false
|
||
}
|
||
for _, e := range expected {
|
||
if r == e {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
func MatchCharRange(expected ...rune) Handler {
|
||
if len(expected) == 0 {
|
||
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: at least one character range pair must be provided")
|
||
}
|
||
if len(expected)%2 != 0 {
|
||
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: an even number of character range pairs must be provided")
|
||
}
|
||
starts := make([]rune, len(expected))
|
||
ends := make([]rune, len(expected))
|
||
for i := 0; i < len(expected); i += 2 {
|
||
start := expected[i]
|
||
end := expected[i+1]
|
||
if start > end {
|
||
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: start %q must be <= end %q", start, end)
|
||
}
|
||
starts[i/2] = start
|
||
ends[i/2] = end
|
||
}
|
||
|
||
if len(expected) == 1 {
|
||
return matchAgainstSingleCharRange(starts[0], ends[0])
|
||
}
|
||
return matchAgainstMultipleCharRanges(starts, ends)
|
||
}
|
||
|
||
func matchAgainstSingleCharRange(start rune, end rune) Handler {
|
||
if end <= '\x7F' {
|
||
start := byte(start)
|
||
end := byte(end)
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err == nil && b >= start && b <= end {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil && r >= start && r <= end {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
func matchAgainstMultipleCharRanges(starts []rune, ends []rune) Handler {
|
||
// Check if all characters are ASCII characters.
|
||
onlyBytes := true
|
||
expectedStarts := make([]byte, len(starts))
|
||
expectedEnds := make([]byte, len(ends))
|
||
for i, start := range starts {
|
||
end := ends[i]
|
||
if end > '\x7F' {
|
||
onlyBytes = false
|
||
break
|
||
}
|
||
expectedStarts[i] = byte(start)
|
||
expectedEnds[i] = byte(end)
|
||
}
|
||
|
||
if onlyBytes {
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
for i := range expectedStarts {
|
||
if err == nil && b >= expectedStarts[i] && b <= expectedEnds[i] {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
for i := range starts {
|
||
if err == nil && r >= starts[i] && r <= ends[i] {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchNewline creates a handler that matches a newline, which is either
|
||
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
||
func MatchNewline() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil {
|
||
return false
|
||
}
|
||
if b1 == '\n' {
|
||
tokenAPI.Input.Byte.Accept(b1)
|
||
return true
|
||
}
|
||
if b1 == '\r' {
|
||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||
if err == nil && b2 == '\n' {
|
||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchBlank creates a Handler that matches one rune from the input
|
||
// against blank characters, meaning tabs and spaces.
|
||
//
|
||
// When you need whitespace matching, which also includes characters like
|
||
// newlines, then take a look at MatchWhitespace().
|
||
func MatchBlank() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err == nil && (b == ' ' || b == '\t') {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchBlanks creates a Handler that matches the input against one
|
||
// or more blank characters, meaning tabs and spaces.
|
||
//
|
||
// When you need whitespace matching, which also includes characters like
|
||
// newlines, then make use of MatchWhitespace().
|
||
// When you need unicode whitespace matching, which also includes characters
|
||
// like a vertical tab, then make use of MatchUnicodeSpace().
|
||
func MatchBlanks() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
f := tokenAPI.Input.Byte.AcceptMulti
|
||
if tokenAPI.Output.suspended > 0 {
|
||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||
}
|
||
ok := false
|
||
for {
|
||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||
for i, b := range chunk {
|
||
if b != ' ' && b != '\t' {
|
||
if i > 0 {
|
||
f(chunk[:i]...)
|
||
}
|
||
return ok
|
||
}
|
||
ok = true
|
||
}
|
||
if err != nil {
|
||
if err == io.EOF {
|
||
if len(chunk) > 0 {
|
||
f(chunk...)
|
||
}
|
||
return ok
|
||
}
|
||
return false
|
||
}
|
||
f(chunk...)
|
||
}
|
||
}
|
||
}
|
||
|
||
// MatchWhitespace creates a Handler that matches the input against one or more
|
||
// whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and
|
||
// carriage return '\r' followed by a newline '\n' (CRLF).
|
||
func MatchWhitespace() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
f := tokenAPI.Input.Byte.AcceptMulti
|
||
if tokenAPI.Output.suspended > 0 {
|
||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||
}
|
||
ok := false
|
||
for {
|
||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||
for i, b := range chunk {
|
||
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
|
||
if i > 0 {
|
||
f(chunk[:i]...)
|
||
}
|
||
return ok
|
||
}
|
||
ok = true
|
||
}
|
||
if err != nil {
|
||
if err == io.EOF {
|
||
if len(chunk) > 0 {
|
||
f(chunk...)
|
||
}
|
||
return ok
|
||
}
|
||
return false
|
||
}
|
||
f(chunk...)
|
||
}
|
||
}
|
||
}
|
||
|
||
// MatchUnicodeSpace creates a Handler that matches the input against one or more
|
||
// whitespace characters, as defined by unicode.
|
||
func MatchUnicodeSpace() Handler {
|
||
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
|
||
}
|
||
|
||
// MatchByteByCallback creates a Handler that matches a single byte from the
|
||
// input against the provided callback function. When the callback returns true,
|
||
// it is considered a match.
|
||
func MatchByteByCallback(callback func(byte) bool) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err == nil && callback(b) {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchBytesByCallback creates a Handler that matches one or more bytes from the
|
||
// input against the provided callback function. As long as the callback returns true,
|
||
// it is considered a match.
|
||
func MatchBytesByCallback(callback func(byte) bool) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
f := tokenAPI.Input.Byte.AcceptMulti
|
||
if tokenAPI.Output.suspended > 0 {
|
||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||
}
|
||
ok := false
|
||
for {
|
||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||
for i, b := range chunk {
|
||
if !callback(b) {
|
||
if i > 0 {
|
||
f(chunk[:i]...)
|
||
}
|
||
return ok
|
||
}
|
||
ok = true
|
||
}
|
||
if err != nil {
|
||
if err == io.EOF {
|
||
if len(chunk) > 0 {
|
||
f(chunk...)
|
||
}
|
||
return ok
|
||
}
|
||
return false
|
||
}
|
||
f(chunk...)
|
||
}
|
||
}
|
||
}
|
||
|
||
// MatchRuneByCallback creates a Handler that matches a single rune from the
|
||
// input against the provided callback function. When the callback returns true,
|
||
// it is considered a match.
|
||
//
|
||
// Note that the callback function matches the signature of the unicode.Is* functions,
|
||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil && callback(r) {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF.
|
||
func MatchEndOfLine() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil {
|
||
return err == io.EOF
|
||
}
|
||
if b1 == '\n' {
|
||
tokenAPI.Input.Byte.Accept(b1)
|
||
return true
|
||
}
|
||
if b1 == '\r' {
|
||
b2, _ := tokenAPI.Input.Byte.Peek(1)
|
||
if b2 == '\n' {
|
||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchStr creates a Handler that matches the input against the provided string.
|
||
func MatchStr(expected string) Handler {
|
||
expectedBytes := []byte(expected)
|
||
expectedLength := len(expectedBytes)
|
||
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.PeekMulti(0, expectedLength)
|
||
if err != nil || len(b) < expectedLength {
|
||
return false
|
||
}
|
||
for i, bExpected := range expectedBytes {
|
||
if b[i] != bExpected {
|
||
return false
|
||
}
|
||
}
|
||
tokenAPI.Input.Byte.AcceptMulti(expectedBytes...)
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchStrNoCase creates a Handler that matches the input against the
|
||
// provided string in a case-insensitive manner.
|
||
func MatchStrNoCase(expected string) Handler {
|
||
l := utf8.RuneCountInString(expected)
|
||
|
||
return func(tokenAPI *API) bool {
|
||
matches := make([]rune, l)
|
||
offset := 0
|
||
i := 0
|
||
for _, e := range expected {
|
||
if e <= '\x7F' {
|
||
b, err := tokenAPI.Input.Byte.Peek(offset)
|
||
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
|
||
return false
|
||
}
|
||
matches[i] = rune(b)
|
||
offset++
|
||
} else {
|
||
r, w, err := tokenAPI.Input.Rune.Peek(offset)
|
||
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
|
||
return false
|
||
}
|
||
matches[i] = r
|
||
offset += w
|
||
}
|
||
i++
|
||
}
|
||
tokenAPI.Input.Rune.AcceptMulti(matches...)
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchOptional creates a Handler that makes the provided Handler optional.
|
||
// When the provided Handler applies, then its output is used, otherwise
|
||
// no output is generated but still a successful match is reported (but the
|
||
// result will be empty).
|
||
func MatchOptional(handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
if !handler(tokenAPI) {
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
}
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchSeq creates a Handler that checks if the provided Handlers can be
|
||
// applied in their exact order. Only if all Handlers apply, the sequence
|
||
// reports successful match.
|
||
func MatchSeq(handlers ...Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
for _, handler := range handlers {
|
||
split := tokenAPI.SplitOutput()
|
||
if !handler(tokenAPI) {
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return false
|
||
}
|
||
tokenAPI.MergeSplitOutput(split)
|
||
}
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchAny creates a Handler that checks if any of the provided Handlers
|
||
// can be applied. They are applied in their provided order. The first Handler
|
||
// that applies is used for reporting back a match.
|
||
func MatchAny(handlers ...Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
for _, handler := range handlers {
|
||
if handler(tokenAPI) {
|
||
return true
|
||
}
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchNot creates a Handler that checks if the provided Handler applies to
|
||
// the current input. If it does, then a failed match will be reported. If it
|
||
// does not, then the next rune from the input will be reported as a match.
|
||
func MatchNot(handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
if handler(tokenAPI) {
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return false
|
||
}
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchRep creates a Handler that checks if the provided Handler can be
|
||
// applied exactly the provided amount of times.
|
||
//
|
||
// Note that the input can contain more than the provided number of matches, e.g.:
|
||
//
|
||
// MatchRep(4, MatchChar('X'))
|
||
//
|
||
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||
// In that last case, there will be a remainder "XX" on the input.
|
||
//
|
||
// Another way to use this method, is by applying the following syntactic sugar:
|
||
//
|
||
// MatchChar('X').Times(4)
|
||
func MatchRep(times int, handler Handler) Handler {
|
||
return matchMinMax(times, times, handler, "MatchRep")
|
||
}
|
||
|
||
// MatchMin creates a Handler that checks if the provided Handler can be
|
||
// applied at least the provided minimum number of times.
|
||
// When more matches are possible, these will be included in the output.
|
||
func MatchMin(min int, handler Handler) Handler {
|
||
if min < 0 {
|
||
callerPanic("MatchMin", "Handler: {name} definition error at {caller}: min must be >= 0")
|
||
}
|
||
return matchMinMax(min, -1, handler, "MatchMin")
|
||
}
|
||
|
||
// MatchMax creates a Handler that checks if the provided Handler can be
|
||
// applied at maximum the provided minimum number of times.
|
||
// When more matches are possible, thhandler(ese will be included in the output.
|
||
// Zero matches are considered a successful match.
|
||
func MatchMax(max int, handler Handler) Handler {
|
||
if max < 0 {
|
||
callerPanic("MatchMax", "Handler: {name} definition error at {caller}: max must be >= 0")
|
||
}
|
||
return matchMinMax(0, max, handler, "MatchMax")
|
||
}
|
||
|
||
// MatchZeroOrMore creates a Handler that checks if the provided Handler can
|
||
// be applied zero or more times. All matches will be included in the output.
|
||
// Zero matches are considered a successful match.
|
||
func MatchZeroOrMore(handler Handler) Handler {
|
||
return matchMinMax(0, -1, handler, "MatchZeroOfMore")
|
||
}
|
||
|
||
// MatchOneOrMore creates a Handler that checks if the provided Handler can
|
||
// be applied one or more times. All matches will be included in the output.
|
||
func MatchOneOrMore(handler Handler) Handler {
|
||
return matchMinMax(1, -1, handler, "MatchOneOrMore")
|
||
}
|
||
|
||
// MatchMinMax creates a Handler that checks if the provided Handler can
|
||
// be applied between the provided minimum and maximum number of times,
|
||
// inclusive. All matches will be included in the output.
|
||
func MatchMinMax(min int, max int, handler Handler) Handler {
|
||
if max < 0 {
|
||
callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: max must be >= 0")
|
||
}
|
||
if min < 0 {
|
||
callerPanic("MatchMinMax", "Handler: {name} definition error at {caller}: min must be >= 0")
|
||
}
|
||
return matchMinMax(min, max, handler, "MatchMinMax")
|
||
}
|
||
|
||
func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
||
if max >= 0 && min > max {
|
||
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
||
}
|
||
return func(tokenAPI *API) bool {
|
||
total := 0
|
||
|
||
// Check for the minimum required amount of matches.
|
||
snap := tokenAPI.MakeSnapshot()
|
||
for total < min {
|
||
total++
|
||
split := tokenAPI.SplitOutput()
|
||
ok := handler(tokenAPI)
|
||
tokenAPI.MergeSplitOutput(split)
|
||
if !ok {
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return false
|
||
}
|
||
}
|
||
|
||
// No specified max: include the rest of the available matches.
|
||
// Specified max: include the rest of the availble matches, up to the max.
|
||
//child.Merge()
|
||
for max < 0 || total < max {
|
||
total++
|
||
split := tokenAPI.SplitOutput()
|
||
ok := handler(tokenAPI)
|
||
tokenAPI.MergeSplitOutput(split)
|
||
if !ok {
|
||
break
|
||
}
|
||
}
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchSeparated creates a Handler that checks for a pattern of one or more
|
||
// Handlers of one type (the separated), separated by Handler of another type
|
||
// (the separator). All matches (separated + separator) are included in the
|
||
// output.
|
||
func MatchSeparated(separator Handler, separated Handler) Handler {
|
||
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
||
}
|
||
|
||
// MatchExcept creates a Handler that checks if the provided Handler can be
|
||
// applied to the upcoming input. It also checks if the except Handler can be
|
||
// applied. If the handler applies, but the except Handler as well, then the match
|
||
// as a whole will be treated as a mismatch.
|
||
func MatchExcept(handler Handler, except Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
if except(tokenAPI) {
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return false
|
||
}
|
||
return handler(tokenAPI)
|
||
}
|
||
}
|
||
|
||
// MatchFollowedBy creates a Handler that checks if the provided handler matches
|
||
// and if the provided lookAhead handler matches after the handler.
|
||
// When both handlers match, the match for the handler is accepted and the match
|
||
// for the lookAhead handler is ignored.
|
||
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
if handler(tokenAPI) {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
ok := lookAhead(tokenAPI)
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return ok
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchNotFollowedBy creates a Handler that checks if the provided handler matches
|
||
// and if the provided lookAhead handler does not match after the handler.
|
||
// If the handler matches and the lookAhead handler doesn't, then the match for
|
||
// the handler is accepted.
|
||
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
if handler(tokenAPI) {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
ok := !lookAhead(tokenAPI)
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return ok
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
func MatchInOptionalBlanks(handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
skipBlanks(tokenAPI)
|
||
if !handler(tokenAPI) {
|
||
return false
|
||
}
|
||
skipBlanks(tokenAPI)
|
||
return true
|
||
}
|
||
}
|
||
|
||
func skipBlanks(tokenAPI *API) {
|
||
for {
|
||
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||
for i, b := range bs {
|
||
if b != ' ' && b != '\t' {
|
||
if i > 0 {
|
||
tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...)
|
||
}
|
||
return
|
||
}
|
||
}
|
||
if err != nil {
|
||
if len(bs) > 0 {
|
||
tokenAPI.Input.Byte.MoveCursorMulti(bs...)
|
||
}
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
// MakeInputFlusher creates a Handler that will flush the input buffer when the
|
||
// provided handler matches.
|
||
//
|
||
// This is useful when constructing a grammar using only parsekit.tokenize
|
||
// functionality (parsekit.parse will automatically flush the input for you)
|
||
// that has to process large input data.
|
||
//
|
||
// Without flushing the input, the input reader will allocate memory
|
||
// during the parsing process, eventually enough to hold the full input
|
||
// in memory. By wrapping Handlers with an input flusher, you can tell parsekit
|
||
// that the accumulated input so far will no longer be needed, allowing
|
||
// this input to be flushed from memory.
|
||
//
|
||
// Rule of thumb is: only use it when you have to actually fix a memory
|
||
// hogging issue for your use case.
|
||
func MakeInputFlusher(handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
if handler(tokenAPI) {
|
||
tokenAPI.Input.Flush()
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchSigned creates a Handler that checks if the provided Handler is
|
||
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
||
// atoms into a signed version, e.g.
|
||
//
|
||
// C.Signed(A.Integer)
|
||
func MatchSigned(handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil {
|
||
return false
|
||
}
|
||
snap := tokenAPI.MakeSnapshot()
|
||
if b == '-' || b == '+' {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
}
|
||
if handler(tokenAPI) {
|
||
return true
|
||
}
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchIntegerBetween creates a Handler that checks for an integer
|
||
// value between the provided min and max boundaries (inclusive).
|
||
// It uses an int64 for checking internally, so you can check values
|
||
// ranging from -9223372036854775808 to 9223372036854775807.
|
||
func MatchIntegerBetween(min int64, max int64) Handler {
|
||
if max < min {
|
||
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
||
}
|
||
digits := MatchSigned(MatchDigits())
|
||
|
||
return func(tokenAPI *API) bool {
|
||
if !digits(tokenAPI) {
|
||
return false
|
||
}
|
||
value, _ := strconv.ParseInt(tokenAPI.Output.String(), 10, 64)
|
||
if value < min || value > max {
|
||
return false
|
||
}
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchEndOfFile creates a Handler that checks if the end of the input data
|
||
// has been reached. This Handler will never produce output. It only reports
|
||
// a successful or a failing match through its boolean return value.
|
||
func MatchEndOfFile() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
_, err := tokenAPI.Input.Byte.Peek(0)
|
||
return err == io.EOF
|
||
}
|
||
}
|
||
|
||
// MatchUntilEndOfLine creates a Handler function that accepts one or
|
||
// more runes until the end of the line (or file when that's the case).
|
||
// The newline itself is not included in the match.
|
||
func MatchUntilEndOfLine() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
f := tokenAPI.Input.Byte.AcceptMulti
|
||
if tokenAPI.Output.suspended > 0 {
|
||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||
}
|
||
for {
|
||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||
state := 0
|
||
ok := false
|
||
for i, b := range chunk {
|
||
if b == '\r' {
|
||
state = 1
|
||
continue
|
||
}
|
||
if b == '\n' {
|
||
if state == 1 {
|
||
f(chunk[:i+1]...)
|
||
} else if i > 0 {
|
||
f(chunk[:i]...)
|
||
}
|
||
return ok
|
||
}
|
||
state = 0
|
||
ok = true
|
||
}
|
||
if err != nil {
|
||
if err == io.EOF {
|
||
if len(chunk) > 0 {
|
||
f(chunk...)
|
||
}
|
||
return ok
|
||
}
|
||
return false
|
||
}
|
||
f(chunk...)
|
||
}
|
||
}
|
||
}
|
||
|
||
// MatchAnyByte creates a Handler function that accepts any byte from the input.
|
||
func MatchAnyByte() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err == nil {
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchAnyRune creates a Handler function that checks if a rune can be
|
||
// read from the input. Invalid runes on the input are replaced with the UTF8
|
||
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
||
func MatchAnyRune() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchValidRune creates a Handler function that checks if a valid
|
||
// UTF8 rune can be read from the input.
|
||
func MatchValidRune() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil && r != utf8.RuneError {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchInvalidRune creates a Handler function that checks if an invalid
|
||
// UTF8 rune can be read from the input.
|
||
func MatchInvalidRune() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||
if err == nil && r == utf8.RuneError {
|
||
tokenAPI.Input.Rune.Accept(r)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MatchDigit creates a Handler that checks if a single digit can be read
|
||
// from the input.
|
||
func MatchDigit() Handler {
|
||
return MatchCharRange('0', '9')
|
||
}
|
||
|
||
// MatchDigits creates a Handler that checks if one or more digits can be read
|
||
// from the input.
|
||
func MatchDigits() Handler {
|
||
return func(tokenAPI *API) bool {
|
||
// Check if the first character is a digit.
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
return false
|
||
}
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
|
||
// Continue accepting bytes as long as they are digits.
|
||
for {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
return true
|
||
}
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
}
|
||
}
|
||
}
|
||
|
||
// MatchDigitNotZero creates a Handler that checks if a single digit not equal
|
||
// to zero '0' can be read from the input.
|
||
func MatchDigitNotZero() Handler {
|
||
return MatchCharRange('1', '9')
|
||
}
|
||
|
||
// MatchInteger creates a Handler function that checks if a valid integer
|
||
// can be read from the input.
|
||
//
|
||
// Leading zeroes are allowed. When the normalize parameter is true, these
|
||
// will be stripped from the input.
|
||
func MatchInteger(normalize bool) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
// Check if the first character is a digit.
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
return false
|
||
}
|
||
|
||
// When normalization is requested, drop leading zeroes.
|
||
if normalize && b == '0' {
|
||
for {
|
||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||
|
||
// The next character is a zero, skip the leading zero and check again.
|
||
if err == nil && b2 == b {
|
||
tokenAPI.Input.Byte.MoveCursor('0')
|
||
continue
|
||
}
|
||
// The next character is not a zero, nor a digit at all.
|
||
// We're looking at a zero on its own here.
|
||
if err != nil || b2 < '1' || b2 > '9' {
|
||
tokenAPI.Input.Byte.Accept('0')
|
||
return true
|
||
}
|
||
// The next character is a digit. SKip the leading zero and go with the digit.
|
||
tokenAPI.Input.Byte.MoveCursor('0')
|
||
tokenAPI.Input.Byte.Accept(b2)
|
||
break
|
||
}
|
||
}
|
||
|
||
// Continue accepting bytes as long as they are digits.
|
||
for {
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
return true
|
||
}
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
}
|
||
}
|
||
}
|
||
|
||
// MatchDecimal creates a Handler function that checks if a valid decimal value
|
||
// can be read from the input. In case the fractional part is missing (which is
|
||
// a valid decimal number), this Handler will report a match, so both "123" and
|
||
// "123.123" will match.
|
||
//
|
||
// Leading zeroes are allowed. When the normalize parameter is true, these
|
||
// will be stripped from the input.
|
||
func MatchDecimal(normalize bool) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
// Check if the first character is a digit.
|
||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
return false
|
||
}
|
||
|
||
// When normalization is requested, drop leading zeroes.
|
||
if normalize && b == '0' {
|
||
for {
|
||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||
|
||
// The next character is a zero, skip the leading zero and check again.
|
||
if err == nil && b2 == b {
|
||
tokenAPI.Input.Byte.MoveCursor('0')
|
||
continue
|
||
}
|
||
// The next character is a dot, go with the zero before the dot and
|
||
// let the upcoming code handle the dot.
|
||
if err == nil && b2 == '.' {
|
||
tokenAPI.Input.Byte.Accept('0')
|
||
break
|
||
}
|
||
// The next character is not a zero, nor a digit at all.
|
||
// We're looking at a zero on its own here.
|
||
if err != nil || b2 < '1' || b2 > '9' {
|
||
tokenAPI.Input.Byte.Accept('0')
|
||
return true
|
||
}
|
||
// The next character is a digit. SKip the leading zero and go with the digit.
|
||
tokenAPI.Input.Byte.MoveCursor('0')
|
||
tokenAPI.Input.Byte.Accept(b2)
|
||
break
|
||
}
|
||
}
|
||
|
||
// Continue accepting bytes as long as they are digits.
|
||
for {
|
||
b, err = tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
break
|
||
}
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
}
|
||
|
||
// No dot or no digit after a dot? Then we're done.
|
||
if b != '.' {
|
||
return true
|
||
}
|
||
b, err = tokenAPI.Input.Byte.Peek(1)
|
||
if err != nil || b < '0' || b > '9' {
|
||
return true
|
||
}
|
||
|
||
// Continue accepting bytes as long as they are digits.
|
||
tokenAPI.Input.Byte.AcceptMulti('.', b)
|
||
for {
|
||
b, err = tokenAPI.Input.Byte.Peek(0)
|
||
if err != nil || b < '0' || b > '9' {
|
||
break
|
||
}
|
||
tokenAPI.Input.Byte.Accept(b)
|
||
}
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchBoolean creates a Handler function that checks if a boolean
|
||
// value can be read from the input. It supports the boolean values as understood
|
||
// by Go's strconv.ParseBool() function.
|
||
//
|
||
// True values: true, TRUE, True, 1, t, T
|
||
//
|
||
// False falues: false, FALSE, False, 0, f, F
|
||
func MatchBoolean() Handler {
|
||
return MatchAny(
|
||
MatchStr("true"),
|
||
MatchStr("TRUE"),
|
||
MatchStr("True"),
|
||
MatchChar('t'),
|
||
MatchChar('T'),
|
||
MatchChar('1'),
|
||
MatchStr("false"),
|
||
MatchStr("FALSE"),
|
||
MatchStr("False"),
|
||
MatchChar('f'),
|
||
MatchChar('F'),
|
||
MatchChar('0'),
|
||
)
|
||
}
|
||
|
||
// MatchASCII creates a Handler function that matches against any
|
||
// ASCII value on the input.
|
||
func MatchASCII() Handler {
|
||
return MatchCharRange('\x00', '\x7F')
|
||
}
|
||
|
||
// MatchASCIILower creates a Handler function that matches against any
|
||
// lower case ASCII letter on the input (a - z).
|
||
func MatchASCIILower() Handler {
|
||
return MatchCharRange('a', 'z')
|
||
}
|
||
|
||
// MatchASCIIUpper creates a Handler function that matches against any
|
||
// upper case ASCII letter on the input (a - z).
|
||
func MatchASCIIUpper() Handler {
|
||
return MatchCharRange('A', 'Z')
|
||
}
|
||
|
||
// MatchUnicodeLetter creates a Handler function that matches against any
|
||
// unicode letter on the input (see unicode.IsLetter(rune)).
|
||
func MatchUnicodeLetter() Handler {
|
||
return MatchRuneByCallback(unicode.IsLetter)
|
||
}
|
||
|
||
// MatchUnicodeUpper creates a Handler function that matches against any
|
||
// upper case unicode letter on the input (see unicode.IsUpper(rune)).
|
||
func MatchUnicodeUpper() Handler {
|
||
return MatchRuneByCallback(unicode.IsUpper)
|
||
}
|
||
|
||
// MatchUnicodeLower creates a Handler function that matches against any
|
||
// lower case unicode letter on the input (see unicode.IsLower(rune)).
|
||
func MatchUnicodeLower() Handler {
|
||
return MatchRuneByCallback(unicode.IsLower)
|
||
}
|
||
|
||
// MatchHexDigit creates a Handler function that check if a single hexadecimal
|
||
// digit can be read from the input.
|
||
func MatchHexDigit() Handler {
|
||
return MatchCharRange('0', '9', 'a', 'f', 'A', 'F')
|
||
}
|
||
|
||
// MatchOctet creates a Handler function that checks if a valid octet value
|
||
// can be read from the input (octet = byte value representation, with a value
|
||
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
|
||
// digits, not if there's a non-digit after it, meaning that "123255" would be
|
||
// a valid sequence of two octets.
|
||
//
|
||
// When the normalize parameter is set to true, then leading zeroes will be
|
||
// stripped from the octet.
|
||
func MatchOctet(normalize bool) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3)
|
||
value := 0
|
||
start := 0
|
||
end := 0
|
||
for i, b := range chunk {
|
||
if b < '0' || b > '9' {
|
||
if i == 0 {
|
||
return false
|
||
}
|
||
break
|
||
}
|
||
if b == '0' && value == 0 {
|
||
start++
|
||
} else {
|
||
value = value*10 + int(b-'0')
|
||
}
|
||
end++
|
||
}
|
||
|
||
if value > 255 {
|
||
return false
|
||
}
|
||
|
||
if normalize {
|
||
if value == 0 {
|
||
start--
|
||
}
|
||
if start > 0 {
|
||
tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...)
|
||
}
|
||
tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...)
|
||
} else {
|
||
tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...)
|
||
}
|
||
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchIPv4 creates a Handler function that checks if a valid IPv4
|
||
// IP address value can be read from the input.
|
||
//
|
||
// When the normalize parameter is true, IP-addresses that look like
|
||
// "192.168.001.012" will be normalize to "192.168.1.12".
|
||
func MatchIPv4(normalize bool) Handler {
|
||
octet := MatchOctet(normalize)
|
||
dot := MatchChar('.')
|
||
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||
}
|
||
|
||
// MatchIPv4CIDRMask creates a Handler function that checks if a
|
||
// valid IPv4 CIDR mask (0 - 32) value can be read from the input.
|
||
func MatchIPv4CIDRMask(normalize bool) Handler {
|
||
return matchCIDRMask(32, normalize)
|
||
}
|
||
|
||
// MatchIPv4Netmask creates a Handler function that checks if a valid
|
||
// IPv4 netmask can be read from input (e.g. 255.255.255.0).
|
||
// Only a netmask in canonical form is accepted (meaning that in binary form
|
||
// it start with zero or more 1-bits, followed by only 0-bits up to the
|
||
// 32 bit length).
|
||
//
|
||
// When the normalize parameter is true, netmasks that look like
|
||
// "255.255.192.000" will be normalized to "255.255.192.0".
|
||
func MatchIPv4Netmask(normalize bool) Handler {
|
||
octet := MakeUint8Token(nil, MatchOctet(normalize))
|
||
dot := MatchChar('.')
|
||
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||
|
||
return func(tokenAPI *API) bool {
|
||
if !netmask(tokenAPI) {
|
||
return false
|
||
}
|
||
|
||
// Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes).
|
||
val := tokenAPI.Output.TokenValue
|
||
mask := net.IPv4Mask(val(0).(byte), val(1).(byte), val(2).(byte), val(3).(byte))
|
||
ones, bits := mask.Size()
|
||
if ones == 0 && bits == 0 {
|
||
return false
|
||
}
|
||
|
||
tokenAPI.Output.ClearTokens()
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchIPv4Net creates a Handler function that checks the input for an
|
||
// IPv4 + mask input. Both <ip>/<cidr> (e.g. 192.168.0.1/24) and <ip>/<netmask>
|
||
// (e.g. 172.16.10.254/255.255.192.0) are acceptable.
|
||
//
|
||
// When the normalize parameter is true, then the IP address and the mask are
|
||
// normalized. The mask will be normalized to cidr, so the above example would
|
||
// be normalized to 172.16.10.254/18.
|
||
func MatchIPv4Net(normalize bool) Handler {
|
||
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
|
||
slash := MatchChar('/')
|
||
mask := MatchAny(
|
||
MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)),
|
||
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
|
||
ipnet := MatchSeq(ip, slash, mask)
|
||
|
||
return func(tokenAPI *API) bool {
|
||
if !ipnet(tokenAPI) {
|
||
return false
|
||
}
|
||
|
||
if !normalize {
|
||
return true
|
||
}
|
||
|
||
maskToken := tokenAPI.Output.Token(1)
|
||
val := tokenAPI.Output.TokenValue
|
||
if maskToken.Type == "cidr" {
|
||
tokenAPI.Output.SetString(fmt.Sprintf("%s/%d", val(0), val(1).(uint8)))
|
||
} else {
|
||
o := strings.Split(val(1).(string), ".")
|
||
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
|
||
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
|
||
bits, _ := mask.Size()
|
||
tokenAPI.Output.SetString(fmt.Sprintf("%s/%d", val(0), bits))
|
||
}
|
||
|
||
tokenAPI.Output.ClearTokens()
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchIPv6 creates a Handler function that checks if an IPv6 address
|
||
// can be read from the input.
|
||
func MatchIPv6(normalize bool) Handler {
|
||
hextet := MatchMinMax(1, 4, MatchHexDigit())
|
||
colon := MatchChar(':')
|
||
empty := MatchSeq(colon, colon)
|
||
|
||
return func(tokenAPI *API) bool {
|
||
nrOfHextets := 0
|
||
for nrOfHextets < 8 {
|
||
if hextet(tokenAPI) {
|
||
nrOfHextets++
|
||
} else if empty(tokenAPI) {
|
||
nrOfHextets += 2
|
||
} else if !colon(tokenAPI) {
|
||
break
|
||
}
|
||
}
|
||
// No hextets or too many hextets (e.g. 1:1:1:1:1:1:1:: <-- since :: is 2 or more hextets).
|
||
if nrOfHextets == 0 || nrOfHextets > 8 {
|
||
return false
|
||
}
|
||
|
||
// Invalid IPv6, when net.ParseIP() cannot handle it.
|
||
input := tokenAPI.Output.String()
|
||
parsed := net.ParseIP(input)
|
||
if parsed == nil {
|
||
return false
|
||
}
|
||
|
||
if normalize {
|
||
tokenAPI.Output.SetString(parsed.String())
|
||
}
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchIPv6CIDRMask creates a Handler function that checks if a
|
||
// valid IPv6 CIDR mask (0 - 128) value can be read from the input.
|
||
func MatchIPv6CIDRMask(normalize bool) Handler {
|
||
return matchCIDRMask(128, normalize)
|
||
}
|
||
|
||
func matchCIDRMask(bits int64, normalize bool) Handler {
|
||
mask := MatchIntegerBetween(0, bits)
|
||
|
||
if !normalize {
|
||
return mask
|
||
}
|
||
|
||
return func(tokenAPI *API) bool {
|
||
if !mask(tokenAPI) {
|
||
return false
|
||
}
|
||
maskStr := tokenAPI.Output.String()
|
||
bits, _ := strconv.Atoi(maskStr)
|
||
tokenAPI.Output.SetString(fmt.Sprintf("%d", bits))
|
||
return true
|
||
}
|
||
}
|
||
|
||
// MatchIPv6Net creates a Handler function that checks the input for an
|
||
// IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64.
|
||
//
|
||
// When the normalize parameter is true, then the IP address and the mask are
|
||
// normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64.
|
||
func MatchIPv6Net(normalize bool) Handler {
|
||
ip := MatchIPv6(normalize)
|
||
slash := MatchChar('/')
|
||
mask := MatchIPv6CIDRMask(normalize)
|
||
return MatchSeq(ip, slash, mask)
|
||
}
|
||
|
||
// ModifyDrop creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is disposed completely.
|
||
//
|
||
// Note that if the Handler does not apply, a mismatch will be reported back,
|
||
// even though we would have dropped the output anyway. So if you would like
|
||
// to drop optional blanks (spaces and tabs), then use something like:
|
||
//
|
||
// M.Drop(C.Optional(A.Blanks))
|
||
//
|
||
// instead of:
|
||
//
|
||
// M.Drop(A.Blanks)
|
||
//
|
||
// Since A.Blanks is defined as "1 or more spaces and/or tabs", the input
|
||
// string "bork" would not match against the second form, but " bork" would.
|
||
// In both cases, it would match the first form.
|
||
func ModifyDrop(handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
tokenAPI.Output.Suspend()
|
||
ok := handler(tokenAPI)
|
||
tokenAPI.Output.Resume()
|
||
return ok
|
||
}
|
||
}
|
||
|
||
// ModifyTrim creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and characters from the provided
|
||
// cutset are trimmed from both the left and the right of the output.
|
||
func ModifyTrim(handler Handler, cutset string) Handler {
|
||
return modifyTrim(handler, cutset, true, true)
|
||
}
|
||
|
||
// ModifyTrimLeft creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and characters from the provided
|
||
// cutset are trimmed from the left of the output.
|
||
func ModifyTrimLeft(handler Handler, cutset string) Handler {
|
||
return modifyTrim(handler, cutset, true, false)
|
||
}
|
||
|
||
// ModifyTrimRight creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and characters from the provided
|
||
// cutset are trimmed from the right of the output.
|
||
func ModifyTrimRight(handler Handler, cutset string) Handler {
|
||
return modifyTrim(handler, cutset, false, true)
|
||
}
|
||
|
||
func modifyTrim(handler Handler, cutset string, trimLeft bool, trimRight bool) Handler {
|
||
modfunc := func(s string) string {
|
||
if trimLeft {
|
||
s = strings.TrimLeft(s, cutset)
|
||
}
|
||
if trimRight {
|
||
s = strings.TrimRight(s, cutset)
|
||
}
|
||
return s
|
||
}
|
||
return ModifyByCallback(handler, modfunc)
|
||
}
|
||
|
||
// ModifyTrimSpace creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and all leading and trailing whitespace characters,
|
||
// as defined by Unicode are removed from it.
|
||
func ModifyTrimSpace(handler Handler) Handler {
|
||
return ModifyByCallback(handler, strings.TrimSpace)
|
||
}
|
||
|
||
// ModifyToUpper creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and characters from the provided
|
||
// cutset are converted into upper case.
|
||
func ModifyToUpper(handler Handler) Handler {
|
||
return ModifyByCallback(handler, strings.ToUpper)
|
||
}
|
||
|
||
// ModifyToLower creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and characters from the provided
|
||
// cutset are converted into lower case.
|
||
func ModifyToLower(handler Handler) Handler {
|
||
return ModifyByCallback(handler, strings.ToLower)
|
||
}
|
||
|
||
// ModifyReplace creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is replaced by the provided string.
|
||
func ModifyReplace(handler Handler, replaceWith string) Handler {
|
||
return ModifyByCallback(handler, func(string) string {
|
||
return replaceWith
|
||
})
|
||
}
|
||
|
||
// ModifyByCallback creates a Handler that checks if the provided Handler applies.
|
||
// If it does, then its output is taken and it is fed to the provided modfunc.
|
||
// This is a simple function that takes a string on input and returns a possibly
|
||
// modified string on output. The return value of the modfunc will replace the
|
||
// resulting output.
|
||
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
snap := tokenAPI.MakeSnapshot()
|
||
split := tokenAPI.SplitOutput()
|
||
if handler(tokenAPI) {
|
||
origS := tokenAPI.Output.String()
|
||
s := modfunc(origS)
|
||
if s != origS {
|
||
tokenAPI.Output.SetString(s)
|
||
}
|
||
tokenAPI.MergeSplitOutput(split)
|
||
return true
|
||
}
|
||
tokenAPI.RestoreSnapshot(snap)
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MakeStrLiteralToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to a string-typed
|
||
// representation of the read Runes. This string is literal, meaning that an
|
||
// escape sequence like "\n" is kept as-is (a backslash character, followed by
|
||
// an 'n'-character).
|
||
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
|
||
return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} {
|
||
literal := tokenAPI.Output.String()
|
||
return literal
|
||
})
|
||
}
|
||
|
||
// MakeStrInterpretedToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to a string-typed
|
||
// representation of the read Runes. This string is interpreted, meaning that an
|
||
// escape sequence like "\n" is translated to an actual newline control character
|
||
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
|
||
return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} {
|
||
// TODO ERROR HANDLING
|
||
interpreted, _ := interpretString(tokenAPI.Output.String())
|
||
return interpreted
|
||
})
|
||
}
|
||
|
||
// TODO I think here I can win some speed by using the methods from, I think, the parse2 solution.
|
||
func interpretString(str string) (string, error) {
|
||
var sb strings.Builder
|
||
for len(str) > 0 {
|
||
r, _, remainder, err := strconv.UnquoteChar(str, '"')
|
||
if err != nil {
|
||
return sb.String(), err
|
||
}
|
||
str = remainder
|
||
sb.WriteRune(r)
|
||
}
|
||
return sb.String(), nil
|
||
}
|
||
|
||
// MakeRuneToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to a Rune-representation
|
||
// of the read Rune.
|
||
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
|
||
return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} {
|
||
// TODO ERROR HANDLING --- not a 1 rune input
|
||
return tokenAPI.Output.Rune(0)
|
||
})
|
||
}
|
||
|
||
// MakeByteToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to a Byte-representation
|
||
// of the read Rune.
|
||
func MakeByteToken(toktype interface{}, handler Handler) Handler {
|
||
return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} {
|
||
// TODO ERROR HANDLING --- not a 1 byte input
|
||
return byte(tokenAPI.Output.Rune(0))
|
||
})
|
||
}
|
||
|
||
// MakeIntToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an int-representation
|
||
// of the read Rune.
|
||
func MakeIntToken(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("int", toktype, handler, func(s string) (interface{}, error) {
|
||
return strconv.Atoi(s)
|
||
})
|
||
}
|
||
|
||
// MakeInt8Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an int8-representation
|
||
// of the read Rune.
|
||
// TODO allow other Go types for oct and hex too.
|
||
func MakeInt8Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("int8", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseInt(s, 10, 8)
|
||
if err == nil {
|
||
return int8(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeInt16Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an int16-representation
|
||
// of the read Rune.
|
||
func MakeInt16Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("int16", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseInt(s, 10, 16)
|
||
if err == nil {
|
||
return int16(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeInt32Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an int32-representation
|
||
// of the read Rune.
|
||
func MakeInt32Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("int32", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseInt(s, 10, 32)
|
||
if err == nil {
|
||
return int32(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeInt64BaseToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an int64-representation
|
||
// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal,
|
||
// 10 = decimal, 16 = hexadecimal).
|
||
func MakeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler {
|
||
return makeInt64BaseToken(toktype, base, handler)
|
||
}
|
||
|
||
func makeInt64BaseToken(toktype interface{}, base int, handler Handler) Handler {
|
||
return makeStrconvToken("int64", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseInt(s, base, 64)
|
||
if err == nil {
|
||
return int64(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeInt64Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an int64-representation
|
||
// of the read Rune.
|
||
func MakeInt64Token(toktype interface{}, handler Handler) Handler {
|
||
return MakeInt64BaseToken(toktype, 10, handler)
|
||
}
|
||
|
||
// MakeUintToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an uint-representation
|
||
// of the read Rune.
|
||
func MakeUintToken(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("uint", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseUint(s, 10, 0)
|
||
if err == nil {
|
||
return uint(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeUint8Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an uint8-representation
|
||
// of the read Rune.
|
||
// TODO allow other Go types for oct and hex too.
|
||
func MakeUint8Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("uint8", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseUint(s, 10, 8)
|
||
if err == nil {
|
||
return uint8(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeUint16Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an uint16-representation
|
||
// of the read Rune.
|
||
func MakeUint16Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("uint16", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseUint(s, 10, 16)
|
||
if err == nil {
|
||
return uint16(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeUint32Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an uint32-representation
|
||
// of the read Rune.
|
||
func MakeUint32Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("unit32", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseUint(s, 10, 32)
|
||
if err == nil {
|
||
return uint32(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeUint64BaseToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an uint64-representation
|
||
// of the read Rune, using the provided base (e.g. 2 = binary, 8 = octal,
|
||
// 10 = decimal, 16 = hexadecimal).
|
||
func MakeUint64BaseToken(toktype interface{}, base int, handler Handler) Handler {
|
||
return makeStrconvToken("uint64", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseUint(s, base, 64)
|
||
if err == nil {
|
||
return uint64(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeUint64Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an uint64-representation
|
||
// of the read Rune.
|
||
func MakeUint64Token(toktype interface{}, handler Handler) Handler {
|
||
return MakeUint64BaseToken(toktype, 10, handler)
|
||
}
|
||
|
||
// MakeFloat32Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an float32-representation
|
||
// of the read Rune.
|
||
func MakeFloat32Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("float32", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseFloat(s, 32)
|
||
if err == nil {
|
||
return float32(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeFloat64Token creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an float64-representation
|
||
// of the read Rune.
|
||
func MakeFloat64Token(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("float64", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseFloat(s, 64)
|
||
if err == nil {
|
||
return float64(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
// MakeBooleanToken creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is set to an bool-representation
|
||
// of the read Rune.
|
||
func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
|
||
return makeStrconvToken("boolean", toktype, handler,
|
||
func(s string) (interface{}, error) {
|
||
value, err := strconv.ParseBool(s)
|
||
if err == nil {
|
||
return bool(value), err
|
||
}
|
||
return value, err
|
||
})
|
||
}
|
||
|
||
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
|
||
return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} {
|
||
value, err := convert(tokenAPI.Output.String())
|
||
if err != nil {
|
||
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
|
||
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
|
||
}
|
||
return value
|
||
})
|
||
}
|
||
|
||
// MakeTokenByValue creates a Handler that will add a static Token value
|
||
// to the Result.
|
||
func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler {
|
||
return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { return value })
|
||
}
|
||
|
||
// MakeTokenByCallback creates a Handler that will add a Token to the
|
||
// Result, for which the Token.Value is to be generated by the provided
|
||
// makeValue() callback function. The function gets the current API as
|
||
// its input and must return the token value.
|
||
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
if handler(tokenAPI) {
|
||
// When a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
||
// tokens must end up in the order "date", "year", "month", "day" and not
|
||
// "year", "month", "day", "date". Therefore (since the inner tokens have already
|
||
// been produced at this point) we have to insert this token before any tokens
|
||
// that were already created by the handler call.
|
||
token := Token{Type: toktype, Value: makeValue(tokenAPI)}
|
||
tokenAPI.Output.InsertTokenAtStart(token)
|
||
return true
|
||
}
|
||
|
||
return false
|
||
}
|
||
}
|
||
|
||
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
|
||
// take the tokens as produced by the handler and group them together in a single token.
|
||
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
||
return func(tokenAPI *API) bool {
|
||
if handler(tokenAPI) {
|
||
tokens := tokenAPI.Output.Tokens()
|
||
tokensCopy := make([]Token, len(tokens))
|
||
copy(tokensCopy, tokens)
|
||
tokenAPI.Output.SetTokens(Token{Type: toktype, Value: tokensCopy})
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
}
|