Switching from various Byte and Rune handlers to single Char handlers.

The Char handlers determine on their own if they should handle things
in byte or rune mode.
This commit is contained in:
Maurice Makaay 2019-07-29 09:45:25 +00:00
parent e0b1039abd
commit 8ef9aed096
5 changed files with 283 additions and 248 deletions

View File

@ -83,12 +83,13 @@ func ExampleAPI_Accept_inIfStatement() {
func ExampleAPI_Accept_inSwitchStatement() { func ExampleAPI_Accept_inSwitchStatement() {
var result string var result string
a := tokenize.A
parser := parse.New(func(p *parse.API) { parser := parse.New(func(p *parse.API) {
for loop := true; loop; { for loop := true; loop; {
switch { switch {
case p.Accept(tokenize.A.Rune('X')): case p.Accept(a.Char('X')):
// NOOP, skip this rune // NOOP, skip this rune
case p.Accept(tokenize.A.AnyRune): case p.Accept(a.AnyRune):
result += p.Result.String() result += p.Result.String()
default: default:
loop = false loop = false

View File

@ -204,7 +204,7 @@ func ExampleAPI_modifyingResults() {
// a := tokenize.A // a := tokenize.A
// for _, r := range []rune{'a', 'b', 'c'} { // for _, r := range []rune{'a', 'b', 'c'} {
// child := t.Fork() // fork, so we won't change parent t // child := t.Fork() // fork, so we won't change parent t
// if a.Rune(r)(t) { // if a.Char(r)(t) {
// t.Merge(child) // accept results into parent of child // t.Merge(child) // accept results into parent of child
// t.Dispose(child) // return to the parent level // t.Dispose(child) // return to the parent level
// return true // and report a successful match // return true // and report a successful match
@ -220,7 +220,7 @@ func ExampleAPI_modifyingResults() {
// // You can make use of the parser/combinator tooling to make the // // You can make use of the parser/combinator tooling to make the
// // implementation a lot simpler and to take care of forking at // // implementation a lot simpler and to take care of forking at
// // the appropriate places. The handler from above can be replaced with: // // the appropriate places. The handler from above can be replaced with:
// simpler := tokenize.A.RuneRange('a', 'c') // simpler := tokenize.A.CharRange('a', 'c')
// result, err := tokenize.New(abcHandler)("another test") // result, err := tokenize.New(abcHandler)("another test")
// fmt.Println(result, err) // fmt.Println(result, err)
@ -368,7 +368,7 @@ func TestClearData(t *testing.T) {
tokenAPI.Input.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a' r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a'
tokenAPI.Input.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start. tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p' r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p'
tokenAPI.Input.Rune.Accept(r) // Add to runes tokenAPI.Input.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r' r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r'

View File

@ -10,12 +10,12 @@ import (
func TestSyntacticSugar(t *testing.T) { func TestSyntacticSugar(t *testing.T) {
var a = tokenize.A var a = tokenize.A
AssertHandlers(t, []HandlerT{ AssertHandlers(t, []HandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"}, {"aaaaaa", a.Char('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"}, {"ababab", a.Char('a').Or(a.Char('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"}, {"ababab", a.Char('a').Then(a.Char('b')), true, "ab"},
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""}, {"bababa", a.Char('a').Then(a.Char('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"}, {"cccccc", a.Char('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""}, {"dddddd", a.Char('c').Optional(), true, ""},
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"}, {"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"}, {"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"}, {"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
@ -26,7 +26,7 @@ func TestSyntacticSugar(t *testing.T) {
func ExampleHandler_Times() { func ExampleHandler_Times() {
c, a := tokenize.C, tokenize.A c, a := tokenize.C, tokenize.A
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9)) phoneNumber := c.Seq(a.Char('0'), a.Digit.Times(9))
fmt.Println(phoneNumber.Match("0201234567")) fmt.Println(phoneNumber.Match("0201234567"))
// Output: // Output:
@ -35,7 +35,7 @@ func ExampleHandler_Times() {
func ExampleHandler_Then() { func ExampleHandler_Then() {
c, a := tokenize.C, tokenize.A c, a := tokenize.C, tokenize.A
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit)) phoneNumber := a.Char('0').Then(c.Repeated(9, a.Digit))
fmt.Println(phoneNumber.Match("0208888888")) fmt.Println(phoneNumber.Match("0208888888"))
// Output: // Output:
@ -78,9 +78,9 @@ func ExampleHandler_Optional() {
c, a := tokenize.C, tokenize.A c, a := tokenize.C, tokenize.A
spanish := c.Seq( spanish := c.Seq(
a.Rune('¿').Optional(), a.Char('¿').Optional(),
c.OneOrMore(a.AnyRune.Except(a.Question)), c.OneOrMore(a.AnyRune.Except(a.Question)),
a.Rune('?').Optional()) a.Char('?').Optional())
fmt.Println(spanish.Match("¿Habla español María?")) fmt.Println(spanish.Match("¿Habla español María?"))
fmt.Println(spanish.Match("Sí, María habla español.")) fmt.Println(spanish.Match("Sí, María habla español."))

View File

@ -67,23 +67,19 @@ var C = struct {
// //
// Doing so saves you a lot of typing, and it makes your code a lot cleaner. // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct { var A = struct {
Byte func(byte) Handler Char func(...rune) Handler
Bytes func(...byte) Handler CharRange func(...rune) Handler
ByteRange func(byte, byte) Handler
ByteByCallback func(func(byte) bool) Handler ByteByCallback func(func(byte) bool) Handler
Rune func(rune) Handler
Runes func(...rune) Handler
RuneRange func(rune, rune) Handler
RuneByCallback func(func(rune) bool) Handler RuneByCallback func(func(rune) bool) Handler
AnyByte Handler
AnyRune Handler
ValidRune Handler
InvalidRune Handler
Str func(string) Handler Str func(string) Handler
StrNoCase func(string) Handler StrNoCase func(string) Handler
EndOfLine Handler EndOfLine Handler
EndOfFile Handler EndOfFile Handler
UntilEndOfLine Handler UntilEndOfLine Handler
AnyByte Handler
AnyRune Handler
ValidRune Handler
InvalidRune Handler
Space Handler Space Handler
Tab Handler Tab Handler
CR Handler CR Handler
@ -159,68 +155,64 @@ var A = struct {
IPv6CIDRMask Handler IPv6CIDRMask Handler
IPv6Net Handler IPv6Net Handler
}{ }{
Byte: MatchByte, Char: MatchChar,
Bytes: MatchBytes, CharRange: MatchCharRange,
ByteRange: MatchByteRange,
ByteByCallback: MatchByteByCallback, ByteByCallback: MatchByteByCallback,
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
RuneByCallback: MatchRuneByCallback, RuneByCallback: MatchRuneByCallback,
AnyByte: MatchAnyByte(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
InvalidRune: MatchInvalidRune(),
Str: MatchStr, Str: MatchStr,
StrNoCase: MatchStrNoCase, StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(), EndOfFile: MatchEndOfFile(),
EndOfLine: MatchEndOfLine(), EndOfLine: MatchEndOfLine(),
UntilEndOfLine: MatchUntilEndOfLine(), UntilEndOfLine: MatchUntilEndOfLine(),
AnyByte: MatchAnyByte(), Space: MatchChar(' '),
AnyRune: MatchAnyRune(), Tab: MatchChar('\t'),
ValidRune: MatchValidRune(), CR: MatchChar('\r'),
InvalidRune: MatchInvalidRune(), LF: MatchChar('\n'),
Space: MatchByte(' '),
Tab: MatchByte('\t'),
CR: MatchByte('\r'),
LF: MatchByte('\n'),
CRLF: MatchStr("\r\n"), CRLF: MatchStr("\r\n"),
Excl: MatchByte('!'), Excl: MatchChar('!'),
DoubleQuote: MatchByte('"'), DoubleQuote: MatchChar('"'),
Hash: MatchByte('#'), Hash: MatchChar('#'),
Dollar: MatchByte('$'), Dollar: MatchChar('$'),
Percent: MatchByte('%'), Percent: MatchChar('%'),
Amp: MatchByte('&'), Amp: MatchChar('&'),
SingleQuote: MatchByte('\''), SingleQuote: MatchChar('\''),
RoundOpen: MatchByte('('), RoundOpen: MatchChar('('),
LeftParen: MatchByte('('), LeftParen: MatchChar('('),
RoundClose: MatchByte(')'), RoundClose: MatchChar(')'),
RightParen: MatchByte(')'), RightParen: MatchChar(')'),
Asterisk: MatchByte('*'), Asterisk: MatchChar('*'),
Multiply: MatchByte('*'), Multiply: MatchChar('*'),
Plus: MatchByte('+'), Plus: MatchChar('+'),
Add: MatchByte('+'), Add: MatchChar('+'),
Comma: MatchByte(','), Comma: MatchChar(','),
Minus: MatchByte('-'), Minus: MatchChar('-'),
Subtract: MatchByte('-'), Subtract: MatchChar('-'),
Dot: MatchByte('.'), Dot: MatchChar('.'),
Slash: MatchByte('/'), Slash: MatchChar('/'),
Divide: MatchByte('/'), Divide: MatchChar('/'),
Colon: MatchByte(':'), Colon: MatchChar(':'),
Semicolon: MatchByte(';'), Semicolon: MatchChar(';'),
AngleOpen: MatchByte('<'), AngleOpen: MatchChar('<'),
LessThan: MatchByte('<'), LessThan: MatchChar('<'),
Equal: MatchByte('='), Equal: MatchChar('='),
AngleClose: MatchByte('>'), AngleClose: MatchChar('>'),
GreaterThan: MatchByte('>'), GreaterThan: MatchChar('>'),
Question: MatchByte('?'), Question: MatchChar('?'),
At: MatchByte('@'), At: MatchChar('@'),
SquareOpen: MatchByte('['), SquareOpen: MatchChar('['),
Backslash: MatchByte('\\'), Backslash: MatchChar('\\'),
SquareClose: MatchByte(']'), SquareClose: MatchChar(']'),
Caret: MatchByte('^'), Caret: MatchChar('^'),
Underscore: MatchByte('_'), Underscore: MatchChar('_'),
Backquote: MatchByte('`'), Backquote: MatchChar('`'),
CurlyOpen: MatchByte('{'), CurlyOpen: MatchChar('{'),
Pipe: MatchByte('|'), Pipe: MatchChar('|'),
CurlyClose: MatchByte('}'), CurlyClose: MatchChar('}'),
Tilde: MatchByte('~'), Tilde: MatchChar('~'),
Newline: MatchNewline(), Newline: MatchNewline(),
Blank: MatchBlank(), Blank: MatchBlank(),
Blanks: MatchBlanks(), Blanks: MatchBlanks(),
@ -229,7 +221,7 @@ var A = struct {
Digit: MatchDigit(), Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(), DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(), Digits: MatchDigits(),
Zero: MatchByte('0'), Zero: MatchChar('0'),
Signed: MatchSigned, Signed: MatchSigned,
Integer: MatchInteger(true), Integer: MatchInteger(true),
IntegerBetween: MatchIntegerBetween, IntegerBetween: MatchIntegerBetween,
@ -345,23 +337,31 @@ var T = struct {
Group: MakeTokenGroup, Group: MakeTokenGroup,
} }
// MatchByte creates a Handler function that matches against the provided byte. func MatchChar(expected ...rune) Handler {
func MatchByte(expected byte) Handler { if len(expected) == 0 {
return func(tokenAPI *API) bool { callerPanic("MatchChar", "Handler: {name} definition error at {caller}: at least one character must be provided")
b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && b == expected {
tokenAPI.Input.Byte.Accept(b)
return true
}
return false
} }
if len(expected) == 1 {
return matchAgainstSingleChar(expected[0])
}
return matchAgainstMultipleChars(expected)
} }
// MatchRune creates a Handler function that matches against the provided rune. func matchAgainstSingleChar(expected rune) Handler {
func MatchRune(expected rune) Handler { // Handle an ASCII character.
if expected <= '\x7F' { if expected <= '\x7F' {
return MatchByte(byte(expected)) expectedByte := byte(expected)
return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && b == expectedByte {
tokenAPI.Input.Byte.Accept(b)
return true
}
return false
}
} }
// Handle an UTF8 character.
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Input.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err == nil && r == expected { if err == nil && r == expected {
@ -372,27 +372,8 @@ func MatchRune(expected rune) Handler {
} }
} }
// MatchBytes creates a Handler function that checks if the input matches func matchAgainstMultipleChars(expected []rune) Handler {
// one of the provided bytes. The first match counts. // Check if all characters are ASCII characters.
func MatchBytes(expected ...byte) Handler {
return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil {
return false
}
for _, e := range expected {
if b == e {
tokenAPI.Input.Byte.Accept(b)
return true
}
}
return false
}
}
// MatchRunes creates a Handler function that checks if the input matches
// one of the provided runes. The first match counts.
func MatchRunes(expected ...rune) Handler {
onlyBytes := true onlyBytes := true
expectedBytes := make([]byte, len(expected)) expectedBytes := make([]byte, len(expected))
for i, r := range expected { for i, r := range expected {
@ -402,9 +383,25 @@ func MatchRunes(expected ...rune) Handler {
} }
expectedBytes[i] = byte(r) expectedBytes[i] = byte(r)
} }
// Handle ASCII characters.
if onlyBytes { if onlyBytes {
return MatchBytes(expectedBytes...) return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil {
return false
}
for _, e := range expectedBytes {
if b == e {
tokenAPI.Input.Byte.Accept(b)
return true
}
}
return false
}
} }
// Handle UTF8 characters.
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Input.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
if err != nil { if err != nil {
@ -420,40 +417,43 @@ func MatchRunes(expected ...rune) Handler {
} }
} }
// MatchByteRange creates a Handler function that checks if the input func MatchCharRange(expected ...rune) Handler {
// matches the provided byte range. The byte range is defined by a start and if len(expected) == 0 {
// an end byte, inclusive, so: callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: at least one character range pair must be provided")
//
// MatchByteRange('5', '9')
//
// creates a Handler that will match any of '5', '6', '7', '8' or '9'.
func MatchByteRange(start byte, end byte) Handler {
if end < start {
callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
} }
return func(tokenAPI *API) bool { if len(expected)%2 != 0 {
b, err := tokenAPI.Input.Byte.Peek(0) callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: an even number of character range pairs must be provided")
if err == nil && b >= start && b <= end { }
tokenAPI.Input.Byte.Accept(b) starts := make([]rune, len(expected))
return true ends := make([]rune, len(expected))
for i := 0; i < len(expected); i += 2 {
start := expected[i]
end := expected[i+1]
if start > end {
callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: start %q must be <= end %q", start, end)
} }
return false starts[i/2] = start
ends[i/2] = end
} }
if len(expected) == 1 {
return matchAgainstSingleCharRange(starts[0], ends[0])
}
return matchAgainstMultipleCharRanges(starts, ends)
} }
// MatchRuneRange creates a Handler function that checks if the input func matchAgainstSingleCharRange(start rune, end rune) Handler {
// matches the provided rune range. The rune range is defined by a start and
// an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a Handler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) Handler {
if end < start {
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
}
if end <= '\x7F' { if end <= '\x7F' {
return MatchByteRange(byte(start), byte(end)) start := byte(start)
end := byte(end)
return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && b >= start && b <= end {
tokenAPI.Input.Byte.Accept(b)
return true
}
return false
}
} }
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Input.Rune.Peek(0) r, _, err := tokenAPI.Input.Rune.Peek(0)
@ -465,6 +465,45 @@ func MatchRuneRange(start rune, end rune) Handler {
} }
} }
func matchAgainstMultipleCharRanges(starts []rune, ends []rune) Handler {
// Check if all characters are ASCII characters.
onlyBytes := true
expectedStarts := make([]byte, len(starts))
expectedEnds := make([]byte, len(ends))
for i, start := range starts {
end := ends[i]
if end > '\x7F' {
onlyBytes = false
break
}
expectedStarts[i] = byte(start)
expectedEnds[i] = byte(end)
}
if onlyBytes {
return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0)
for i := range expectedStarts {
if err == nil && b >= expectedStarts[i] && b <= expectedEnds[i] {
tokenAPI.Input.Byte.Accept(b)
return true
}
}
return false
}
}
return func(tokenAPI *API) bool {
r, _, err := tokenAPI.Input.Rune.Peek(0)
for i := range starts {
if err == nil && r >= starts[i] && r <= ends[i] {
tokenAPI.Input.Rune.Accept(r)
return true
}
}
return false
}
}
// MatchNewline creates a handler that matches a newline, which is either // MatchNewline creates a handler that matches a newline, which is either
// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n).
func MatchNewline() Handler { func MatchNewline() Handler {
@ -758,14 +797,14 @@ func MatchNot(handler Handler) Handler {
// //
// Note that the input can contain more than the provided number of matches, e.g.: // Note that the input can contain more than the provided number of matches, e.g.:
// //
// MatchRep(4, MatchRune('X')) // MatchRep(4, MatchChar('X'))
// //
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX". // will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input. // In that last case, there will be a remainder "XX" on the input.
// //
// Another way to use this method, is by applying the following syntactic sugar: // Another way to use this method, is by applying the following syntactic sugar:
// //
// MatchRune('X').Times(4) // MatchChar('X').Times(4)
func MatchRep(times int, handler Handler) Handler { func MatchRep(times int, handler Handler) Handler {
return matchMinMax(times, times, handler, "MatchRep") return matchMinMax(times, times, handler, "MatchRep")
} }
@ -1082,7 +1121,7 @@ func MatchInvalidRune() Handler {
// MatchDigit creates a Handler that checks if a single digit can be read // MatchDigit creates a Handler that checks if a single digit can be read
// from the input. // from the input.
func MatchDigit() Handler { func MatchDigit() Handler {
return MatchByteRange('0', '9') return MatchCharRange('0', '9')
} }
// MatchDigits creates a Handler that checks if one or more digits can be read // MatchDigits creates a Handler that checks if one or more digits can be read
@ -1110,7 +1149,7 @@ func MatchDigits() Handler {
// MatchDigitNotZero creates a Handler that checks if a single digit not equal // MatchDigitNotZero creates a Handler that checks if a single digit not equal
// to zero '0' can be read from the input. // to zero '0' can be read from the input.
func MatchDigitNotZero() Handler { func MatchDigitNotZero() Handler {
return MatchByteRange('1', '9') return MatchCharRange('1', '9')
} }
// MatchInteger creates a Handler function that checks if a valid integer // MatchInteger creates a Handler function that checks if a valid integer
@ -1247,34 +1286,34 @@ func MatchBoolean() Handler {
MatchStr("true"), MatchStr("true"),
MatchStr("TRUE"), MatchStr("TRUE"),
MatchStr("True"), MatchStr("True"),
MatchByte('t'), MatchChar('t'),
MatchByte('T'), MatchChar('T'),
MatchByte('1'), MatchChar('1'),
MatchStr("false"), MatchStr("false"),
MatchStr("FALSE"), MatchStr("FALSE"),
MatchStr("False"), MatchStr("False"),
MatchByte('f'), MatchChar('f'),
MatchByte('F'), MatchChar('F'),
MatchByte('0'), MatchChar('0'),
) )
} }
// MatchASCII creates a Handler function that matches against any // MatchASCII creates a Handler function that matches against any
// ASCII value on the input. // ASCII value on the input.
func MatchASCII() Handler { func MatchASCII() Handler {
return MatchByteRange('\x00', '\x7F') return MatchCharRange('\x00', '\x7F')
} }
// MatchASCIILower creates a Handler function that matches against any // MatchASCIILower creates a Handler function that matches against any
// lower case ASCII letter on the input (a - z). // lower case ASCII letter on the input (a - z).
func MatchASCIILower() Handler { func MatchASCIILower() Handler {
return MatchByteRange('a', 'z') return MatchCharRange('a', 'z')
} }
// MatchASCIIUpper creates a Handler function that matches against any // MatchASCIIUpper creates a Handler function that matches against any
// upper case ASCII letter on the input (a - z). // upper case ASCII letter on the input (a - z).
func MatchASCIIUpper() Handler { func MatchASCIIUpper() Handler {
return MatchByteRange('A', 'Z') return MatchCharRange('A', 'Z')
} }
// MatchUnicodeLetter creates a Handler function that matches against any // MatchUnicodeLetter creates a Handler function that matches against any
@ -1298,14 +1337,7 @@ func MatchUnicodeLower() Handler {
// MatchHexDigit creates a Handler function that check if a single hexadecimal // MatchHexDigit creates a Handler function that check if a single hexadecimal
// digit can be read from the input. // digit can be read from the input.
func MatchHexDigit() Handler { func MatchHexDigit() Handler {
return func(tokenAPI *API) bool { return MatchCharRange('0', '9', 'a', 'f', 'A', 'F')
b, err := tokenAPI.Input.Byte.Peek(0)
if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) {
tokenAPI.Input.Byte.Accept(b)
return true
}
return false
}
} }
// MatchOctet creates a Handler function that checks if a valid octet value // MatchOctet creates a Handler function that checks if a valid octet value
@ -1373,7 +1405,7 @@ func MatchOctet(normalize bool) Handler {
// "192.168.001.012" will be normalize to "192.168.1.12". // "192.168.001.012" will be normalize to "192.168.1.12".
func MatchIPv4(normalize bool) Handler { func MatchIPv4(normalize bool) Handler {
octet := MatchOctet(normalize) octet := MatchOctet(normalize)
dot := MatchRune('.') dot := MatchChar('.')
return MatchSeq(octet, dot, octet, dot, octet, dot, octet) return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
} }
@ -1393,7 +1425,7 @@ func MatchIPv4CIDRMask(normalize bool) Handler {
// "255.255.192.000" will be normalized to "255.255.192.0". // "255.255.192.000" will be normalized to "255.255.192.0".
func MatchIPv4Netmask(normalize bool) Handler { func MatchIPv4Netmask(normalize bool) Handler {
octet := MakeUint8Token(nil, MatchOctet(normalize)) octet := MakeUint8Token(nil, MatchOctet(normalize))
dot := MatchRune('.') dot := MatchChar('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
@ -1423,7 +1455,7 @@ func MatchIPv4Netmask(normalize bool) Handler {
// be normalized to 172.16.10.254/18. // be normalized to 172.16.10.254/18.
func MatchIPv4Net(normalize bool) Handler { func MatchIPv4Net(normalize bool) Handler {
ip := MakeStrLiteralToken("ip", MatchIPv4(normalize)) ip := MakeStrLiteralToken("ip", MatchIPv4(normalize))
slash := MatchRune('/') slash := MatchChar('/')
mask := MatchAny( mask := MatchAny(
MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)), MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)),
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize))) MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
@ -1459,7 +1491,7 @@ func MatchIPv4Net(normalize bool) Handler {
// can be read from the input. // can be read from the input.
func MatchIPv6(normalize bool) Handler { func MatchIPv6(normalize bool) Handler {
hextet := MatchMinMax(1, 4, MatchHexDigit()) hextet := MatchMinMax(1, 4, MatchHexDigit())
colon := MatchRune(':') colon := MatchChar(':')
empty := MatchSeq(colon, colon) empty := MatchSeq(colon, colon)
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
@ -1523,7 +1555,7 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
// normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64. // normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64.
func MatchIPv6Net(normalize bool) Handler { func MatchIPv6Net(normalize bool) Handler {
ip := MatchIPv6(normalize) ip := MatchIPv6(normalize)
slash := MatchRune('/') slash := MatchChar('/')
mask := MatchIPv6CIDRMask(normalize) mask := MatchIPv6CIDRMask(normalize)
return MatchSeq(ip, slash, mask) return MatchSeq(ip, slash, mask)
} }

View File

@ -10,82 +10,82 @@ import (
func TestCombinators(t *testing.T) { func TestCombinators(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{ AssertHandlers(t, []HandlerT{
{"", c.Not(a.Rune('b')), false, ""}, {"", c.Not(a.Char('b')), false, ""},
{"abc not", c.Not(a.Rune('b')), true, "a"}, {"abc not", c.Not(a.Char('b')), true, "a"},
{"bcd not", c.Not(a.Rune('b')), false, ""}, {"bcd not", c.Not(a.Char('b')), false, ""},
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"}, {"aaaxxxb", c.OneOrMore(c.Not(a.Char('b'))), true, "aaaxxx"},
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"}, {"1010 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), true, "1"},
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""}, {"2020 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), false, ""},
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"}, {"abc any", c.Any(a.Char('a'), a.Char('b')), true, "a"},
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"}, {"bcd any", c.Any(a.Char('a'), a.Char('b')), true, "b"},
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""}, {"cde any", c.Any(a.Char('a'), a.Char('b')), false, ""},
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"}, {"ababc repeated", c.Repeated(4, a.Char('a', 'b')), true, "abab"},
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""}, {"ababc repeated", c.Repeated(5, a.Char('a', 'b')), false, ""},
{"", c.Min(0, a.Rune('a')), true, ""}, {"", c.Min(0, a.Char('a')), true, ""},
{"a", c.Min(0, a.Rune('a')), true, "a"}, {"a", c.Min(0, a.Char('a')), true, "a"},
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"}, {"aaaaa", c.Min(4, a.Char('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"}, {"aaaaa", c.Min(5, a.Char('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, a.Rune('a')), false, ""}, {"aaaaa", c.Min(6, a.Char('a')), false, ""},
{"", c.Max(4, a.Rune('b')), true, ""}, {"", c.Max(4, a.Char('b')), true, ""},
{"X", c.Max(4, a.Rune('b')), true, ""}, {"X", c.Max(4, a.Char('b')), true, ""},
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"}, {"bbbbbX", c.Max(4, a.Char('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"}, {"bbbbbX", c.Max(5, a.Char('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"}, {"bbbbbX", c.Max(6, a.Char('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, a.Rune('c')), true, ""}, {"", c.MinMax(0, 0, a.Char('c')), true, ""},
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""}, {"X", c.MinMax(0, 0, a.Char('c')), true, ""},
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"}, {"cccc", c.MinMax(0, 5, a.Char('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"}, {"ccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"}, {"cccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""}, {"cccccX", c.MinMax(0, 0, a.Char('c')), true, ""},
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"}, {"cccccX", c.MinMax(0, 1, a.Char('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"}, {"cccccX", c.MinMax(0, 5, a.Char('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"}, {"cccccX", c.MinMax(0, 6, a.Char('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"}, {"cccccX", c.MinMax(1, 1, a.Char('c')), true, "c"},
{"", c.MinMax(1, 1, a.Rune('c')), false, ""}, {"", c.MinMax(1, 1, a.Char('c')), false, ""},
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""}, {"X", c.MinMax(1, 1, a.Char('c')), false, ""},
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"}, {"cccccX", c.MinMax(1, 3, a.Char('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"}, {"cccccX", c.MinMax(1, 6, a.Char('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"}, {"cccccX", c.MinMax(3, 4, a.Char('c')), true, "cccc"},
{"", c.OneOrMore(a.Rune('d')), false, ""}, {"", c.OneOrMore(a.Char('d')), false, ""},
{"X", c.OneOrMore(a.Rune('d')), false, ""}, {"X", c.OneOrMore(a.Char('d')), false, ""},
{"dX", c.OneOrMore(a.Rune('d')), true, "d"}, {"dX", c.OneOrMore(a.Char('d')), true, "d"},
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"}, {"dddddX", c.OneOrMore(a.Char('d')), true, "ddddd"},
{"", c.ZeroOrMore(a.Rune('e')), true, ""}, {"", c.ZeroOrMore(a.Char('e')), true, ""},
{"X", c.ZeroOrMore(a.Rune('e')), true, ""}, {"X", c.ZeroOrMore(a.Char('e')), true, ""},
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"}, {"eX", c.ZeroOrMore(a.Char('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"}, {"eeeeeX", c.ZeroOrMore(a.Char('e')), true, "eeeee"},
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"}, {"HI!", c.Seq(a.Char('H'), a.Char('I'), a.Char('!')), true, "HI!"},
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"}, {"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"}, {"101010123", c.OneOrMore(c.Seq(a.Char('1'), a.Char('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""}, {"", c.Optional(c.OneOrMore(a.Char('f'))), true, ""},
{"ghijkl", c.Optional(a.Rune('h')), true, ""}, {"ghijkl", c.Optional(a.Char('h')), true, ""},
{"ghijkl", c.Optional(a.Rune('g')), true, "g"}, {"ghijkl", c.Optional(a.Char('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"}, {"fffffX", c.Optional(c.OneOrMore(a.Char('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"}, {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Char('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""}, {" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"}, {" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"}, {"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"}, {" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"}, {"ab", c.FollowedBy(a.Char('b'), a.Char('a')), true, "a"},
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""}, {"ba", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""},
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""}, {"aa", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""},
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"}, {"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), true, "aaabbbccc"},
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""}, {"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), false, ""},
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"}, {"xy", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"},
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""}, {"yx", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""},
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"}, {"xx", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"},
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""}, {"xa", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""},
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""}, {"xxxyyyzzzaaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), false, ""},
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"}, {"xxxyyyzzzbaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), true, "xxxyyyzzz"},
}) })
} }
func TestCombinatorPanics(t *testing.T) { func TestCombinatorPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A var c, a = tokenize.C, tokenize.A
AssertPanics(t, []PanicT{ AssertPanics(t, []PanicT{
{func() { a.RuneRange('z', 'a') }, true, {func() { a.CharRange('z', 'a') }, true,
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`}, `Handler: MatchCharRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must be <= end 'a'`},
{func() { c.MinMax(-1, 1, a.Space) }, true, {func() { c.MinMax(-1, 1, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`}, `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.MinMax(1, -1, a.Space) }, true, {func() { c.MinMax(1, -1, a.Space) }, true,
@ -102,25 +102,27 @@ func TestCombinatorPanics(t *testing.T) {
} }
func TestAtoms(t *testing.T) { func TestAtoms(t *testing.T) {
var a = tokenize.A var a, c = tokenize.A, tokenize.C
AssertHandlers(t, []HandlerT{ AssertHandlers(t, []HandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"}, {"dd", a.CharRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"}, {"ee", a.CharRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""}, {"ff", a.CharRange('b', 'e'), false, ""},
{"ff", a.CharRange('b', 'c', 'f', 'g'), true, "f"},
{"abc123_-,other", c.OneOrMore(a.CharRange('a', 'z', '0', '9', '_', '_', '-', '-')), true, "abc123_-"},
{"Hello, world 1!", a.Str("Hello"), true, "Hello"}, {"Hello, world 1!", a.Str("Hello"), true, "Hello"},
{"Hello, world 2!", a.StrNoCase("hElLo"), true, "Hello"}, {"Hello, world 2!", a.StrNoCase("hElLo"), true, "Hello"},
{"H♥llÖ, wÖrld 3!", a.Str("H♥llÖ"), true, "H♥llÖ"}, {"H♥llÖ, wÖrld 3!", a.Str("H♥llÖ"), true, "H♥llÖ"},
{"H♥llÖ, world 4!", a.StrNoCase("h♥llö"), true, "H♥llÖ"}, {"H♥llÖ, world 4!", a.StrNoCase("h♥llö"), true, "H♥llÖ"},
{"+X", a.Runes('+', '-', '*', '/'), true, "+"}, {"+X", a.Char('+', '-', '*', '/'), true, "+"},
{"-X", a.Runes('+', '-', '*', '/'), true, "-"}, {"-X", a.Char('+', '-', '*', '/'), true, "-"},
{"*X", a.Runes('+', '-', '*', '/'), true, "*"}, {"*X", a.Char('+', '-', '*', '/'), true, "*"},
{"/X", a.Runes('+', '-', '*', '/'), true, "/"}, {"/X", a.Char('+', '-', '*', '/'), true, "/"},
{"!X", a.Runes('+', '-', '*', '/'), false, ""}, {"!X", a.Char('+', '-', '*', '/'), false, ""},
{"xxx", a.Rune('x'), true, "x"}, {"xxx", a.Char('x'), true, "x"},
{"x ", a.Rune(' '), false, ""}, {"x ", a.Char(' '), false, ""},
{"aa", a.RuneRange('b', 'e'), false, ""}, {"aa", a.CharRange('b', 'e'), false, ""},
{"bb", a.RuneRange('b', 'e'), true, "b"}, {"bb", a.CharRange('b', 'e'), true, "b"},
{"cc", a.RuneRange('b', 'e'), true, "c"}, {"cc", a.CharRange('b', 'e'), true, "c"},
{"", a.EndOfFile, true, ""}, {"", a.EndOfFile, true, ""},
{"😂", a.AnyRune, true, "😂"}, {"😂", a.AnyRune, true, "😂"},
{"\xbc with AnyRune", a.AnyRune, true, "<22>"}, {"\xbc with AnyRune", a.AnyRune, true, "<22>"},
@ -350,8 +352,8 @@ func TestIPv6Atoms(t *testing.T) {
func TestModifiers(t *testing.T) { func TestModifiers(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{ AssertHandlers(t, []HandlerT{
{"missed me!", m.Drop(a.Rune('w')), false, ""}, {"missed me!", m.Drop(a.Char('w')), false, ""},
{"where are you?", m.Drop(a.Rune('w')), true, ""}, {"where are you?", m.Drop(a.Char('w')), true, ""},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{"cool", a.Str("cool"), true, "cool"}, {"cool", a.Str("cool"), true, "cool"},
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"}, {"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},