Switching from various Byte and Rune handlers to single Char handlers.
The Char handlers determine on their own if they should handle things in byte or rune mode.
This commit is contained in:
parent
e0b1039abd
commit
8ef9aed096
|
@ -83,12 +83,13 @@ func ExampleAPI_Accept_inIfStatement() {
|
|||
|
||||
func ExampleAPI_Accept_inSwitchStatement() {
|
||||
var result string
|
||||
a := tokenize.A
|
||||
parser := parse.New(func(p *parse.API) {
|
||||
for loop := true; loop; {
|
||||
switch {
|
||||
case p.Accept(tokenize.A.Rune('X')):
|
||||
case p.Accept(a.Char('X')):
|
||||
// NOOP, skip this rune
|
||||
case p.Accept(tokenize.A.AnyRune):
|
||||
case p.Accept(a.AnyRune):
|
||||
result += p.Result.String()
|
||||
default:
|
||||
loop = false
|
||||
|
|
|
@ -204,7 +204,7 @@ func ExampleAPI_modifyingResults() {
|
|||
// a := tokenize.A
|
||||
// for _, r := range []rune{'a', 'b', 'c'} {
|
||||
// child := t.Fork() // fork, so we won't change parent t
|
||||
// if a.Rune(r)(t) {
|
||||
// if a.Char(r)(t) {
|
||||
// t.Merge(child) // accept results into parent of child
|
||||
// t.Dispose(child) // return to the parent level
|
||||
// return true // and report a successful match
|
||||
|
@ -220,7 +220,7 @@ func ExampleAPI_modifyingResults() {
|
|||
// // You can make use of the parser/combinator tooling to make the
|
||||
// // implementation a lot simpler and to take care of forking at
|
||||
// // the appropriate places. The handler from above can be replaced with:
|
||||
// simpler := tokenize.A.RuneRange('a', 'c')
|
||||
// simpler := tokenize.A.CharRange('a', 'c')
|
||||
|
||||
// result, err := tokenize.New(abcHandler)("another test")
|
||||
// fmt.Println(result, err)
|
||||
|
|
|
@ -10,12 +10,12 @@ import (
|
|||
func TestSyntacticSugar(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
||||
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
||||
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
||||
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
|
||||
{"cccccc", a.Rune('c').Optional(), true, "c"},
|
||||
{"dddddd", a.Rune('c').Optional(), true, ""},
|
||||
{"aaaaaa", a.Char('a').Times(4), true, "aaaa"},
|
||||
{"ababab", a.Char('a').Or(a.Char('b')).Times(4), true, "abab"},
|
||||
{"ababab", a.Char('a').Then(a.Char('b')), true, "ab"},
|
||||
{"bababa", a.Char('a').Then(a.Char('b')), false, ""},
|
||||
{"cccccc", a.Char('c').Optional(), true, "c"},
|
||||
{"dddddd", a.Char('c').Optional(), true, ""},
|
||||
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
|
||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
|
||||
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
|
||||
|
@ -26,7 +26,7 @@ func TestSyntacticSugar(t *testing.T) {
|
|||
|
||||
func ExampleHandler_Times() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
|
||||
phoneNumber := c.Seq(a.Char('0'), a.Digit.Times(9))
|
||||
|
||||
fmt.Println(phoneNumber.Match("0201234567"))
|
||||
// Output:
|
||||
|
@ -35,7 +35,7 @@ func ExampleHandler_Times() {
|
|||
|
||||
func ExampleHandler_Then() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
|
||||
phoneNumber := a.Char('0').Then(c.Repeated(9, a.Digit))
|
||||
|
||||
fmt.Println(phoneNumber.Match("0208888888"))
|
||||
// Output:
|
||||
|
@ -78,9 +78,9 @@ func ExampleHandler_Optional() {
|
|||
c, a := tokenize.C, tokenize.A
|
||||
|
||||
spanish := c.Seq(
|
||||
a.Rune('¿').Optional(),
|
||||
a.Char('¿').Optional(),
|
||||
c.OneOrMore(a.AnyRune.Except(a.Question)),
|
||||
a.Rune('?').Optional())
|
||||
a.Char('?').Optional())
|
||||
|
||||
fmt.Println(spanish.Match("¿Habla español María?"))
|
||||
fmt.Println(spanish.Match("Sí, María habla español."))
|
||||
|
|
|
@ -67,23 +67,19 @@ var C = struct {
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
Byte func(byte) Handler
|
||||
Bytes func(...byte) Handler
|
||||
ByteRange func(byte, byte) Handler
|
||||
Char func(...rune) Handler
|
||||
CharRange func(...rune) Handler
|
||||
ByteByCallback func(func(byte) bool) Handler
|
||||
Rune func(rune) Handler
|
||||
Runes func(...rune) Handler
|
||||
RuneRange func(rune, rune) Handler
|
||||
RuneByCallback func(func(rune) bool) Handler
|
||||
AnyByte Handler
|
||||
AnyRune Handler
|
||||
ValidRune Handler
|
||||
InvalidRune Handler
|
||||
Str func(string) Handler
|
||||
StrNoCase func(string) Handler
|
||||
EndOfLine Handler
|
||||
EndOfFile Handler
|
||||
UntilEndOfLine Handler
|
||||
AnyByte Handler
|
||||
AnyRune Handler
|
||||
ValidRune Handler
|
||||
InvalidRune Handler
|
||||
Space Handler
|
||||
Tab Handler
|
||||
CR Handler
|
||||
|
@ -159,68 +155,64 @@ var A = struct {
|
|||
IPv6CIDRMask Handler
|
||||
IPv6Net Handler
|
||||
}{
|
||||
Byte: MatchByte,
|
||||
Bytes: MatchBytes,
|
||||
ByteRange: MatchByteRange,
|
||||
Char: MatchChar,
|
||||
CharRange: MatchCharRange,
|
||||
ByteByCallback: MatchByteByCallback,
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
RuneRange: MatchRuneRange,
|
||||
RuneByCallback: MatchRuneByCallback,
|
||||
AnyByte: MatchAnyByte(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
ValidRune: MatchValidRune(),
|
||||
InvalidRune: MatchInvalidRune(),
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
EndOfLine: MatchEndOfLine(),
|
||||
UntilEndOfLine: MatchUntilEndOfLine(),
|
||||
AnyByte: MatchAnyByte(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
ValidRune: MatchValidRune(),
|
||||
InvalidRune: MatchInvalidRune(),
|
||||
Space: MatchByte(' '),
|
||||
Tab: MatchByte('\t'),
|
||||
CR: MatchByte('\r'),
|
||||
LF: MatchByte('\n'),
|
||||
Space: MatchChar(' '),
|
||||
Tab: MatchChar('\t'),
|
||||
CR: MatchChar('\r'),
|
||||
LF: MatchChar('\n'),
|
||||
CRLF: MatchStr("\r\n"),
|
||||
Excl: MatchByte('!'),
|
||||
DoubleQuote: MatchByte('"'),
|
||||
Hash: MatchByte('#'),
|
||||
Dollar: MatchByte('$'),
|
||||
Percent: MatchByte('%'),
|
||||
Amp: MatchByte('&'),
|
||||
SingleQuote: MatchByte('\''),
|
||||
RoundOpen: MatchByte('('),
|
||||
LeftParen: MatchByte('('),
|
||||
RoundClose: MatchByte(')'),
|
||||
RightParen: MatchByte(')'),
|
||||
Asterisk: MatchByte('*'),
|
||||
Multiply: MatchByte('*'),
|
||||
Plus: MatchByte('+'),
|
||||
Add: MatchByte('+'),
|
||||
Comma: MatchByte(','),
|
||||
Minus: MatchByte('-'),
|
||||
Subtract: MatchByte('-'),
|
||||
Dot: MatchByte('.'),
|
||||
Slash: MatchByte('/'),
|
||||
Divide: MatchByte('/'),
|
||||
Colon: MatchByte(':'),
|
||||
Semicolon: MatchByte(';'),
|
||||
AngleOpen: MatchByte('<'),
|
||||
LessThan: MatchByte('<'),
|
||||
Equal: MatchByte('='),
|
||||
AngleClose: MatchByte('>'),
|
||||
GreaterThan: MatchByte('>'),
|
||||
Question: MatchByte('?'),
|
||||
At: MatchByte('@'),
|
||||
SquareOpen: MatchByte('['),
|
||||
Backslash: MatchByte('\\'),
|
||||
SquareClose: MatchByte(']'),
|
||||
Caret: MatchByte('^'),
|
||||
Underscore: MatchByte('_'),
|
||||
Backquote: MatchByte('`'),
|
||||
CurlyOpen: MatchByte('{'),
|
||||
Pipe: MatchByte('|'),
|
||||
CurlyClose: MatchByte('}'),
|
||||
Tilde: MatchByte('~'),
|
||||
Excl: MatchChar('!'),
|
||||
DoubleQuote: MatchChar('"'),
|
||||
Hash: MatchChar('#'),
|
||||
Dollar: MatchChar('$'),
|
||||
Percent: MatchChar('%'),
|
||||
Amp: MatchChar('&'),
|
||||
SingleQuote: MatchChar('\''),
|
||||
RoundOpen: MatchChar('('),
|
||||
LeftParen: MatchChar('('),
|
||||
RoundClose: MatchChar(')'),
|
||||
RightParen: MatchChar(')'),
|
||||
Asterisk: MatchChar('*'),
|
||||
Multiply: MatchChar('*'),
|
||||
Plus: MatchChar('+'),
|
||||
Add: MatchChar('+'),
|
||||
Comma: MatchChar(','),
|
||||
Minus: MatchChar('-'),
|
||||
Subtract: MatchChar('-'),
|
||||
Dot: MatchChar('.'),
|
||||
Slash: MatchChar('/'),
|
||||
Divide: MatchChar('/'),
|
||||
Colon: MatchChar(':'),
|
||||
Semicolon: MatchChar(';'),
|
||||
AngleOpen: MatchChar('<'),
|
||||
LessThan: MatchChar('<'),
|
||||
Equal: MatchChar('='),
|
||||
AngleClose: MatchChar('>'),
|
||||
GreaterThan: MatchChar('>'),
|
||||
Question: MatchChar('?'),
|
||||
At: MatchChar('@'),
|
||||
SquareOpen: MatchChar('['),
|
||||
Backslash: MatchChar('\\'),
|
||||
SquareClose: MatchChar(']'),
|
||||
Caret: MatchChar('^'),
|
||||
Underscore: MatchChar('_'),
|
||||
Backquote: MatchChar('`'),
|
||||
CurlyOpen: MatchChar('{'),
|
||||
Pipe: MatchChar('|'),
|
||||
CurlyClose: MatchChar('}'),
|
||||
Tilde: MatchChar('~'),
|
||||
Newline: MatchNewline(),
|
||||
Blank: MatchBlank(),
|
||||
Blanks: MatchBlanks(),
|
||||
|
@ -229,7 +221,7 @@ var A = struct {
|
|||
Digit: MatchDigit(),
|
||||
DigitNotZero: MatchDigitNotZero(),
|
||||
Digits: MatchDigits(),
|
||||
Zero: MatchByte('0'),
|
||||
Zero: MatchChar('0'),
|
||||
Signed: MatchSigned,
|
||||
Integer: MatchInteger(true),
|
||||
IntegerBetween: MatchIntegerBetween,
|
||||
|
@ -345,23 +337,31 @@ var T = struct {
|
|||
Group: MakeTokenGroup,
|
||||
}
|
||||
|
||||
// MatchByte creates a Handler function that matches against the provided byte.
|
||||
func MatchByte(expected byte) Handler {
|
||||
func MatchChar(expected ...rune) Handler {
|
||||
if len(expected) == 0 {
|
||||
callerPanic("MatchChar", "Handler: {name} definition error at {caller}: at least one character must be provided")
|
||||
}
|
||||
if len(expected) == 1 {
|
||||
return matchAgainstSingleChar(expected[0])
|
||||
}
|
||||
return matchAgainstMultipleChars(expected)
|
||||
}
|
||||
|
||||
func matchAgainstSingleChar(expected rune) Handler {
|
||||
// Handle an ASCII character.
|
||||
if expected <= '\x7F' {
|
||||
expectedByte := byte(expected)
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && b == expected {
|
||||
if err == nil && b == expectedByte {
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRune creates a Handler function that matches against the provided rune.
|
||||
func MatchRune(expected rune) Handler {
|
||||
if expected <= '\x7F' {
|
||||
return MatchByte(byte(expected))
|
||||
}
|
||||
|
||||
// Handle an UTF8 character.
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err == nil && r == expected {
|
||||
|
@ -372,27 +372,8 @@ func MatchRune(expected rune) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
// MatchBytes creates a Handler function that checks if the input matches
|
||||
// one of the provided bytes. The first match counts.
|
||||
func MatchBytes(expected ...byte) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range expected {
|
||||
if b == e {
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRunes creates a Handler function that checks if the input matches
|
||||
// one of the provided runes. The first match counts.
|
||||
func MatchRunes(expected ...rune) Handler {
|
||||
func matchAgainstMultipleChars(expected []rune) Handler {
|
||||
// Check if all characters are ASCII characters.
|
||||
onlyBytes := true
|
||||
expectedBytes := make([]byte, len(expected))
|
||||
for i, r := range expected {
|
||||
|
@ -402,9 +383,25 @@ func MatchRunes(expected ...rune) Handler {
|
|||
}
|
||||
expectedBytes[i] = byte(r)
|
||||
}
|
||||
|
||||
// Handle ASCII characters.
|
||||
if onlyBytes {
|
||||
return MatchBytes(expectedBytes...)
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, e := range expectedBytes {
|
||||
if b == e {
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Handle UTF8 characters.
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
if err != nil {
|
||||
|
@ -420,17 +417,35 @@ func MatchRunes(expected ...rune) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
// MatchByteRange creates a Handler function that checks if the input
|
||||
// matches the provided byte range. The byte range is defined by a start and
|
||||
// an end byte, inclusive, so:
|
||||
//
|
||||
// MatchByteRange('5', '9')
|
||||
//
|
||||
// creates a Handler that will match any of '5', '6', '7', '8' or '9'.
|
||||
func MatchByteRange(start byte, end byte) Handler {
|
||||
if end < start {
|
||||
callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||
func MatchCharRange(expected ...rune) Handler {
|
||||
if len(expected) == 0 {
|
||||
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: at least one character range pair must be provided")
|
||||
}
|
||||
if len(expected)%2 != 0 {
|
||||
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: an even number of character range pairs must be provided")
|
||||
}
|
||||
starts := make([]rune, len(expected))
|
||||
ends := make([]rune, len(expected))
|
||||
for i := 0; i < len(expected); i += 2 {
|
||||
start := expected[i]
|
||||
end := expected[i+1]
|
||||
if start > end {
|
||||
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: start %q must be <= end %q", start, end)
|
||||
}
|
||||
starts[i/2] = start
|
||||
ends[i/2] = end
|
||||
}
|
||||
|
||||
if len(expected) == 1 {
|
||||
return matchAgainstSingleCharRange(starts[0], ends[0])
|
||||
}
|
||||
return matchAgainstMultipleCharRanges(starts, ends)
|
||||
}
|
||||
|
||||
func matchAgainstSingleCharRange(start rune, end rune) Handler {
|
||||
if end <= '\x7F' {
|
||||
start := byte(start)
|
||||
end := byte(end)
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && b >= start && b <= end {
|
||||
|
@ -439,21 +454,6 @@ func MatchByteRange(start byte, end byte) Handler {
|
|||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRuneRange creates a Handler function that checks if the input
|
||||
// matches the provided rune range. The rune range is defined by a start and
|
||||
// an end rune, inclusive, so:
|
||||
//
|
||||
// MatchRuneRange('g', 'k')
|
||||
//
|
||||
// creates a Handler that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
||||
func MatchRuneRange(start rune, end rune) Handler {
|
||||
if end < start {
|
||||
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||
}
|
||||
if end <= '\x7F' {
|
||||
return MatchByteRange(byte(start), byte(end))
|
||||
}
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
|
@ -465,6 +465,45 @@ func MatchRuneRange(start rune, end rune) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
func matchAgainstMultipleCharRanges(starts []rune, ends []rune) Handler {
|
||||
// Check if all characters are ASCII characters.
|
||||
onlyBytes := true
|
||||
expectedStarts := make([]byte, len(starts))
|
||||
expectedEnds := make([]byte, len(ends))
|
||||
for i, start := range starts {
|
||||
end := ends[i]
|
||||
if end > '\x7F' {
|
||||
onlyBytes = false
|
||||
break
|
||||
}
|
||||
expectedStarts[i] = byte(start)
|
||||
expectedEnds[i] = byte(end)
|
||||
}
|
||||
|
||||
if onlyBytes {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
for i := range expectedStarts {
|
||||
if err == nil && b >= expectedStarts[i] && b <= expectedEnds[i] {
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
return func(tokenAPI *API) bool {
|
||||
r, _, err := tokenAPI.Input.Rune.Peek(0)
|
||||
for i := range starts {
|
||||
if err == nil && r >= starts[i] && r <= ends[i] {
|
||||
tokenAPI.Input.Rune.Accept(r)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchNewline creates a handler that matches a newline, which is either
|
||||
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
|
||||
func MatchNewline() Handler {
|
||||
|
@ -758,14 +797,14 @@ func MatchNot(handler Handler) Handler {
|
|||
//
|
||||
// Note that the input can contain more than the provided number of matches, e.g.:
|
||||
//
|
||||
// MatchRep(4, MatchRune('X'))
|
||||
// MatchRep(4, MatchChar('X'))
|
||||
//
|
||||
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||||
// In that last case, there will be a remainder "XX" on the input.
|
||||
//
|
||||
// Another way to use this method, is by applying the following syntactic sugar:
|
||||
//
|
||||
// MatchRune('X').Times(4)
|
||||
// MatchChar('X').Times(4)
|
||||
func MatchRep(times int, handler Handler) Handler {
|
||||
return matchMinMax(times, times, handler, "MatchRep")
|
||||
}
|
||||
|
@ -1082,7 +1121,7 @@ func MatchInvalidRune() Handler {
|
|||
// MatchDigit creates a Handler that checks if a single digit can be read
|
||||
// from the input.
|
||||
func MatchDigit() Handler {
|
||||
return MatchByteRange('0', '9')
|
||||
return MatchCharRange('0', '9')
|
||||
}
|
||||
|
||||
// MatchDigits creates a Handler that checks if one or more digits can be read
|
||||
|
@ -1110,7 +1149,7 @@ func MatchDigits() Handler {
|
|||
// MatchDigitNotZero creates a Handler that checks if a single digit not equal
|
||||
// to zero '0' can be read from the input.
|
||||
func MatchDigitNotZero() Handler {
|
||||
return MatchByteRange('1', '9')
|
||||
return MatchCharRange('1', '9')
|
||||
}
|
||||
|
||||
// MatchInteger creates a Handler function that checks if a valid integer
|
||||
|
@ -1247,34 +1286,34 @@ func MatchBoolean() Handler {
|
|||
MatchStr("true"),
|
||||
MatchStr("TRUE"),
|
||||
MatchStr("True"),
|
||||
MatchByte('t'),
|
||||
MatchByte('T'),
|
||||
MatchByte('1'),
|
||||
MatchChar('t'),
|
||||
MatchChar('T'),
|
||||
MatchChar('1'),
|
||||
MatchStr("false"),
|
||||
MatchStr("FALSE"),
|
||||
MatchStr("False"),
|
||||
MatchByte('f'),
|
||||
MatchByte('F'),
|
||||
MatchByte('0'),
|
||||
MatchChar('f'),
|
||||
MatchChar('F'),
|
||||
MatchChar('0'),
|
||||
)
|
||||
}
|
||||
|
||||
// MatchASCII creates a Handler function that matches against any
|
||||
// ASCII value on the input.
|
||||
func MatchASCII() Handler {
|
||||
return MatchByteRange('\x00', '\x7F')
|
||||
return MatchCharRange('\x00', '\x7F')
|
||||
}
|
||||
|
||||
// MatchASCIILower creates a Handler function that matches against any
|
||||
// lower case ASCII letter on the input (a - z).
|
||||
func MatchASCIILower() Handler {
|
||||
return MatchByteRange('a', 'z')
|
||||
return MatchCharRange('a', 'z')
|
||||
}
|
||||
|
||||
// MatchASCIIUpper creates a Handler function that matches against any
|
||||
// upper case ASCII letter on the input (a - z).
|
||||
func MatchASCIIUpper() Handler {
|
||||
return MatchByteRange('A', 'Z')
|
||||
return MatchCharRange('A', 'Z')
|
||||
}
|
||||
|
||||
// MatchUnicodeLetter creates a Handler function that matches against any
|
||||
|
@ -1298,14 +1337,7 @@ func MatchUnicodeLower() Handler {
|
|||
// MatchHexDigit creates a Handler function that check if a single hexadecimal
|
||||
// digit can be read from the input.
|
||||
func MatchHexDigit() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return MatchCharRange('0', '9', 'a', 'f', 'A', 'F')
|
||||
}
|
||||
|
||||
// MatchOctet creates a Handler function that checks if a valid octet value
|
||||
|
@ -1373,7 +1405,7 @@ func MatchOctet(normalize bool) Handler {
|
|||
// "192.168.001.012" will be normalize to "192.168.1.12".
|
||||
func MatchIPv4(normalize bool) Handler {
|
||||
octet := MatchOctet(normalize)
|
||||
dot := MatchRune('.')
|
||||
dot := MatchChar('.')
|
||||
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||||
}
|
||||
|
||||
|
@ -1393,7 +1425,7 @@ func MatchIPv4CIDRMask(normalize bool) Handler {
|
|||
// "255.255.192.000" will be normalized to "255.255.192.0".
|
||||
func MatchIPv4Netmask(normalize bool) Handler {
|
||||
octet := MakeUint8Token(nil, MatchOctet(normalize))
|
||||
dot := MatchRune('.')
|
||||
dot := MatchChar('.')
|
||||
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||||
|
||||
return func(tokenAPI *API) bool {
|
||||
|
@ -1423,7 +1455,7 @@ func MatchIPv4Netmask(normalize bool) Handler {
|
|||
// be normalized to 172.16.10.254/18.
|
||||
func MatchIPv4Net(normalize bool) Handler {
|
||||
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
|
||||
slash := MatchRune('/')
|
||||
slash := MatchChar('/')
|
||||
mask := MatchAny(
|
||||
MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)),
|
||||
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
|
||||
|
@ -1459,7 +1491,7 @@ func MatchIPv4Net(normalize bool) Handler {
|
|||
// can be read from the input.
|
||||
func MatchIPv6(normalize bool) Handler {
|
||||
hextet := MatchMinMax(1, 4, MatchHexDigit())
|
||||
colon := MatchRune(':')
|
||||
colon := MatchChar(':')
|
||||
empty := MatchSeq(colon, colon)
|
||||
|
||||
return func(tokenAPI *API) bool {
|
||||
|
@ -1523,7 +1555,7 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
|
|||
// normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64.
|
||||
func MatchIPv6Net(normalize bool) Handler {
|
||||
ip := MatchIPv6(normalize)
|
||||
slash := MatchRune('/')
|
||||
slash := MatchChar('/')
|
||||
mask := MatchIPv6CIDRMask(normalize)
|
||||
return MatchSeq(ip, slash, mask)
|
||||
}
|
||||
|
|
|
@ -10,82 +10,82 @@ import (
|
|||
func TestCombinators(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"", c.Not(a.Rune('b')), false, ""},
|
||||
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
|
||||
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
||||
{"", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"X", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
||||
{"", c.Not(a.Char('b')), false, ""},
|
||||
{"abc not", c.Not(a.Char('b')), true, "a"},
|
||||
{"bcd not", c.Not(a.Char('b')), false, ""},
|
||||
{"aaaxxxb", c.OneOrMore(c.Not(a.Char('b'))), true, "aaaxxx"},
|
||||
{"1010 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), true, "1"},
|
||||
{"2020 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), false, ""},
|
||||
{"abc any", c.Any(a.Char('a'), a.Char('b')), true, "a"},
|
||||
{"bcd any", c.Any(a.Char('a'), a.Char('b')), true, "b"},
|
||||
{"cde any", c.Any(a.Char('a'), a.Char('b')), false, ""},
|
||||
{"ababc repeated", c.Repeated(4, a.Char('a', 'b')), true, "abab"},
|
||||
{"ababc repeated", c.Repeated(5, a.Char('a', 'b')), false, ""},
|
||||
{"", c.Min(0, a.Char('a')), true, ""},
|
||||
{"a", c.Min(0, a.Char('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, a.Char('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, a.Char('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, a.Char('a')), false, ""},
|
||||
{"", c.Max(4, a.Char('b')), true, ""},
|
||||
{"X", c.Max(4, a.Char('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, a.Char('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, a.Char('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, a.Char('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, a.Char('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, a.Char('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, a.Char('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, a.Char('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, a.Char('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, a.Char('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, a.Char('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, a.Char('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, a.Char('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, a.Char('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, a.Char('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, a.Char('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, a.Char('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(a.Char('d')), false, ""},
|
||||
{"X", c.OneOrMore(a.Char('d')), false, ""},
|
||||
{"dX", c.OneOrMore(a.Char('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(a.Char('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(a.Char('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(a.Char('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(a.Char('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(a.Char('e')), true, "eeeee"},
|
||||
{"HI!", c.Seq(a.Char('H'), a.Char('I'), a.Char('!')), true, "HI!"},
|
||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
||||
{"101010123", c.OneOrMore(c.Seq(a.Char('1'), a.Char('0'))), true, "101010"},
|
||||
{"", c.Optional(c.OneOrMore(a.Char('f'))), true, ""},
|
||||
{"ghijkl", c.Optional(a.Char('h')), true, ""},
|
||||
{"ghijkl", c.Optional(a.Char('g')), true, "g"},
|
||||
{"fffffX", c.Optional(c.OneOrMore(a.Char('f'))), true, "fffff"},
|
||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Char('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
|
||||
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
|
||||
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
|
||||
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
|
||||
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
|
||||
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
||||
{"ab", c.FollowedBy(a.Char('b'), a.Char('a')), true, "a"},
|
||||
{"ba", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""},
|
||||
{"aa", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""},
|
||||
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), true, "aaabbbccc"},
|
||||
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), false, ""},
|
||||
{"xy", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"},
|
||||
{"yx", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""},
|
||||
{"xx", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"},
|
||||
{"xa", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""},
|
||||
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), false, ""},
|
||||
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestCombinatorPanics(t *testing.T) {
|
||||
var c, a = tokenize.C, tokenize.A
|
||||
AssertPanics(t, []PanicT{
|
||||
{func() { a.RuneRange('z', 'a') }, true,
|
||||
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
|
||||
{func() { a.CharRange('z', 'a') }, true,
|
||||
`Handler: MatchCharRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must be <= end 'a'`},
|
||||
{func() { c.MinMax(-1, 1, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.MinMax(1, -1, a.Space) }, true,
|
||||
|
@ -102,25 +102,27 @@ func TestCombinatorPanics(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
var a, c = tokenize.A, tokenize.C
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||
{"dd", a.CharRange('b', 'e'), true, "d"},
|
||||
{"ee", a.CharRange('b', 'e'), true, "e"},
|
||||
{"ff", a.CharRange('b', 'e'), false, ""},
|
||||
{"ff", a.CharRange('b', 'c', 'f', 'g'), true, "f"},
|
||||
{"abc123_-,other", c.OneOrMore(a.CharRange('a', 'z', '0', '9', '_', '_', '-', '-')), true, "abc123_-"},
|
||||
{"Hello, world 1!", a.Str("Hello"), true, "Hello"},
|
||||
{"Hello, world 2!", a.StrNoCase("hElLo"), true, "Hello"},
|
||||
{"H♥llÖ, wÖrld 3!", a.Str("H♥llÖ"), true, "H♥llÖ"},
|
||||
{"H♥llÖ, world 4!", a.StrNoCase("h♥llö"), true, "H♥llÖ"},
|
||||
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
||||
{"xxx", a.Rune('x'), true, "x"},
|
||||
{"x ", a.Rune(' '), false, ""},
|
||||
{"aa", a.RuneRange('b', 'e'), false, ""},
|
||||
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
||||
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
||||
{"+X", a.Char('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", a.Char('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", a.Char('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", a.Char('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", a.Char('+', '-', '*', '/'), false, ""},
|
||||
{"xxx", a.Char('x'), true, "x"},
|
||||
{"x ", a.Char(' '), false, ""},
|
||||
{"aa", a.CharRange('b', 'e'), false, ""},
|
||||
{"bb", a.CharRange('b', 'e'), true, "b"},
|
||||
{"cc", a.CharRange('b', 'e'), true, "c"},
|
||||
{"", a.EndOfFile, true, ""},
|
||||
{"😂", a.AnyRune, true, "😂"},
|
||||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||
|
@ -350,8 +352,8 @@ func TestIPv6Atoms(t *testing.T) {
|
|||
func TestModifiers(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
||||
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
||||
{"missed me!", m.Drop(a.Char('w')), false, ""},
|
||||
{"where are you?", m.Drop(a.Char('w')), true, ""},
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{"cool", a.Str("cool"), true, "cool"},
|
||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||
|
|
Loading…
Reference in New Issue