diff --git a/parse/parse_test.go b/parse/parse_test.go index 8809361..cce8cbc 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -83,12 +83,13 @@ func ExampleAPI_Accept_inIfStatement() { func ExampleAPI_Accept_inSwitchStatement() { var result string + a := tokenize.A parser := parse.New(func(p *parse.API) { for loop := true; loop; { switch { - case p.Accept(tokenize.A.Rune('X')): + case p.Accept(a.Char('X')): // NOOP, skip this rune - case p.Accept(tokenize.A.AnyRune): + case p.Accept(a.AnyRune): result += p.Result.String() default: loop = false diff --git a/tokenize/api_test.go b/tokenize/api_test.go index a105819..34728ed 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -204,7 +204,7 @@ func ExampleAPI_modifyingResults() { // a := tokenize.A // for _, r := range []rune{'a', 'b', 'c'} { // child := t.Fork() // fork, so we won't change parent t -// if a.Rune(r)(t) { +// if a.Char(r)(t) { // t.Merge(child) // accept results into parent of child // t.Dispose(child) // return to the parent level // return true // and report a successful match @@ -220,7 +220,7 @@ func ExampleAPI_modifyingResults() { // // You can make use of the parser/combinator tooling to make the // // implementation a lot simpler and to take care of forking at // // the appropriate places. The handler from above can be replaced with: -// simpler := tokenize.A.RuneRange('a', 'c') +// simpler := tokenize.A.CharRange('a', 'c') // result, err := tokenize.New(abcHandler)("another test") // fmt.Println(result, err) @@ -368,7 +368,7 @@ func TestClearData(t *testing.T) { tokenAPI.Input.Rune.Accept(r) // Add to runes r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a' tokenAPI.Input.Rune.Accept(r) // Add to runes - tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start. + tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start. r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p' tokenAPI.Input.Rune.Accept(r) // Add to runes r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r' diff --git a/tokenize/handler_test.go b/tokenize/handler_test.go index 786c8f3..a69db07 100644 --- a/tokenize/handler_test.go +++ b/tokenize/handler_test.go @@ -10,12 +10,12 @@ import ( func TestSyntacticSugar(t *testing.T) { var a = tokenize.A AssertHandlers(t, []HandlerT{ - {"aaaaaa", a.Rune('a').Times(4), true, "aaaa"}, - {"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"}, - {"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"}, - {"bababa", a.Rune('a').Then(a.Rune('b')), false, ""}, - {"cccccc", a.Rune('c').Optional(), true, "c"}, - {"dddddd", a.Rune('c').Optional(), true, ""}, + {"aaaaaa", a.Char('a').Times(4), true, "aaaa"}, + {"ababab", a.Char('a').Or(a.Char('b')).Times(4), true, "abab"}, + {"ababab", a.Char('a').Then(a.Char('b')), true, "ab"}, + {"bababa", a.Char('a').Then(a.Char('b')), false, ""}, + {"cccccc", a.Char('c').Optional(), true, "c"}, + {"dddddd", a.Char('c').Optional(), true, ""}, {"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"}, {"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"}, {"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"}, @@ -26,7 +26,7 @@ func TestSyntacticSugar(t *testing.T) { func ExampleHandler_Times() { c, a := tokenize.C, tokenize.A - phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9)) + phoneNumber := c.Seq(a.Char('0'), a.Digit.Times(9)) fmt.Println(phoneNumber.Match("0201234567")) // Output: @@ -35,7 +35,7 @@ func ExampleHandler_Times() { func ExampleHandler_Then() { c, a := tokenize.C, tokenize.A - phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit)) + phoneNumber := a.Char('0').Then(c.Repeated(9, a.Digit)) fmt.Println(phoneNumber.Match("0208888888")) // Output: @@ -78,9 +78,9 @@ func ExampleHandler_Optional() { c, a := tokenize.C, tokenize.A spanish := c.Seq( - a.Rune('¿').Optional(), + a.Char('¿').Optional(), c.OneOrMore(a.AnyRune.Except(a.Question)), - a.Rune('?').Optional()) + a.Char('?').Optional()) fmt.Println(spanish.Match("¿Habla español María?")) fmt.Println(spanish.Match("Sí, María habla español.")) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 93b8618..8d8ff6a 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -67,23 +67,19 @@ var C = struct { // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var A = struct { - Byte func(byte) Handler - Bytes func(...byte) Handler - ByteRange func(byte, byte) Handler + Char func(...rune) Handler + CharRange func(...rune) Handler ByteByCallback func(func(byte) bool) Handler - Rune func(rune) Handler - Runes func(...rune) Handler - RuneRange func(rune, rune) Handler RuneByCallback func(func(rune) bool) Handler + AnyByte Handler + AnyRune Handler + ValidRune Handler + InvalidRune Handler Str func(string) Handler StrNoCase func(string) Handler EndOfLine Handler EndOfFile Handler UntilEndOfLine Handler - AnyByte Handler - AnyRune Handler - ValidRune Handler - InvalidRune Handler Space Handler Tab Handler CR Handler @@ -159,68 +155,64 @@ var A = struct { IPv6CIDRMask Handler IPv6Net Handler }{ - Byte: MatchByte, - Bytes: MatchBytes, - ByteRange: MatchByteRange, + Char: MatchChar, + CharRange: MatchCharRange, ByteByCallback: MatchByteByCallback, - Rune: MatchRune, - Runes: MatchRunes, - RuneRange: MatchRuneRange, RuneByCallback: MatchRuneByCallback, + AnyByte: MatchAnyByte(), + AnyRune: MatchAnyRune(), + ValidRune: MatchValidRune(), + InvalidRune: MatchInvalidRune(), Str: MatchStr, StrNoCase: MatchStrNoCase, EndOfFile: MatchEndOfFile(), EndOfLine: MatchEndOfLine(), UntilEndOfLine: MatchUntilEndOfLine(), - AnyByte: MatchAnyByte(), - AnyRune: MatchAnyRune(), - ValidRune: MatchValidRune(), - InvalidRune: MatchInvalidRune(), - Space: MatchByte(' '), - Tab: MatchByte('\t'), - CR: MatchByte('\r'), - LF: MatchByte('\n'), + Space: MatchChar(' '), + Tab: MatchChar('\t'), + CR: MatchChar('\r'), + LF: MatchChar('\n'), CRLF: MatchStr("\r\n"), - Excl: MatchByte('!'), - DoubleQuote: MatchByte('"'), - Hash: MatchByte('#'), - Dollar: MatchByte('$'), - Percent: MatchByte('%'), - Amp: MatchByte('&'), - SingleQuote: MatchByte('\''), - RoundOpen: MatchByte('('), - LeftParen: MatchByte('('), - RoundClose: MatchByte(')'), - RightParen: MatchByte(')'), - Asterisk: MatchByte('*'), - Multiply: MatchByte('*'), - Plus: MatchByte('+'), - Add: MatchByte('+'), - Comma: MatchByte(','), - Minus: MatchByte('-'), - Subtract: MatchByte('-'), - Dot: MatchByte('.'), - Slash: MatchByte('/'), - Divide: MatchByte('/'), - Colon: MatchByte(':'), - Semicolon: MatchByte(';'), - AngleOpen: MatchByte('<'), - LessThan: MatchByte('<'), - Equal: MatchByte('='), - AngleClose: MatchByte('>'), - GreaterThan: MatchByte('>'), - Question: MatchByte('?'), - At: MatchByte('@'), - SquareOpen: MatchByte('['), - Backslash: MatchByte('\\'), - SquareClose: MatchByte(']'), - Caret: MatchByte('^'), - Underscore: MatchByte('_'), - Backquote: MatchByte('`'), - CurlyOpen: MatchByte('{'), - Pipe: MatchByte('|'), - CurlyClose: MatchByte('}'), - Tilde: MatchByte('~'), + Excl: MatchChar('!'), + DoubleQuote: MatchChar('"'), + Hash: MatchChar('#'), + Dollar: MatchChar('$'), + Percent: MatchChar('%'), + Amp: MatchChar('&'), + SingleQuote: MatchChar('\''), + RoundOpen: MatchChar('('), + LeftParen: MatchChar('('), + RoundClose: MatchChar(')'), + RightParen: MatchChar(')'), + Asterisk: MatchChar('*'), + Multiply: MatchChar('*'), + Plus: MatchChar('+'), + Add: MatchChar('+'), + Comma: MatchChar(','), + Minus: MatchChar('-'), + Subtract: MatchChar('-'), + Dot: MatchChar('.'), + Slash: MatchChar('/'), + Divide: MatchChar('/'), + Colon: MatchChar(':'), + Semicolon: MatchChar(';'), + AngleOpen: MatchChar('<'), + LessThan: MatchChar('<'), + Equal: MatchChar('='), + AngleClose: MatchChar('>'), + GreaterThan: MatchChar('>'), + Question: MatchChar('?'), + At: MatchChar('@'), + SquareOpen: MatchChar('['), + Backslash: MatchChar('\\'), + SquareClose: MatchChar(']'), + Caret: MatchChar('^'), + Underscore: MatchChar('_'), + Backquote: MatchChar('`'), + CurlyOpen: MatchChar('{'), + Pipe: MatchChar('|'), + CurlyClose: MatchChar('}'), + Tilde: MatchChar('~'), Newline: MatchNewline(), Blank: MatchBlank(), Blanks: MatchBlanks(), @@ -229,7 +221,7 @@ var A = struct { Digit: MatchDigit(), DigitNotZero: MatchDigitNotZero(), Digits: MatchDigits(), - Zero: MatchByte('0'), + Zero: MatchChar('0'), Signed: MatchSigned, Integer: MatchInteger(true), IntegerBetween: MatchIntegerBetween, @@ -345,23 +337,31 @@ var T = struct { Group: MakeTokenGroup, } -// MatchByte creates a Handler function that matches against the provided byte. -func MatchByte(expected byte) Handler { - return func(tokenAPI *API) bool { - b, err := tokenAPI.Input.Byte.Peek(0) - if err == nil && b == expected { - tokenAPI.Input.Byte.Accept(b) - return true - } - return false +func MatchChar(expected ...rune) Handler { + if len(expected) == 0 { + callerPanic("MatchChar", "Handler: {name} definition error at {caller}: at least one character must be provided") } + if len(expected) == 1 { + return matchAgainstSingleChar(expected[0]) + } + return matchAgainstMultipleChars(expected) } -// MatchRune creates a Handler function that matches against the provided rune. -func MatchRune(expected rune) Handler { +func matchAgainstSingleChar(expected rune) Handler { + // Handle an ASCII character. if expected <= '\x7F' { - return MatchByte(byte(expected)) + expectedByte := byte(expected) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Input.Byte.Peek(0) + if err == nil && b == expectedByte { + tokenAPI.Input.Byte.Accept(b) + return true + } + return false + } } + + // Handle an UTF8 character. return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err == nil && r == expected { @@ -372,27 +372,8 @@ func MatchRune(expected rune) Handler { } } -// MatchBytes creates a Handler function that checks if the input matches -// one of the provided bytes. The first match counts. -func MatchBytes(expected ...byte) Handler { - return func(tokenAPI *API) bool { - b, err := tokenAPI.Input.Byte.Peek(0) - if err != nil { - return false - } - for _, e := range expected { - if b == e { - tokenAPI.Input.Byte.Accept(b) - return true - } - } - return false - } -} - -// MatchRunes creates a Handler function that checks if the input matches -// one of the provided runes. The first match counts. -func MatchRunes(expected ...rune) Handler { +func matchAgainstMultipleChars(expected []rune) Handler { + // Check if all characters are ASCII characters. onlyBytes := true expectedBytes := make([]byte, len(expected)) for i, r := range expected { @@ -402,9 +383,25 @@ func MatchRunes(expected ...rune) Handler { } expectedBytes[i] = byte(r) } + + // Handle ASCII characters. if onlyBytes { - return MatchBytes(expectedBytes...) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Input.Byte.Peek(0) + if err != nil { + return false + } + for _, e := range expectedBytes { + if b == e { + tokenAPI.Input.Byte.Accept(b) + return true + } + } + return false + } } + + // Handle UTF8 characters. return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) if err != nil { @@ -420,40 +417,43 @@ func MatchRunes(expected ...rune) Handler { } } -// MatchByteRange creates a Handler function that checks if the input -// matches the provided byte range. The byte range is defined by a start and -// an end byte, inclusive, so: -// -// MatchByteRange('5', '9') -// -// creates a Handler that will match any of '5', '6', '7', '8' or '9'. -func MatchByteRange(start byte, end byte) Handler { - if end < start { - callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) +func MatchCharRange(expected ...rune) Handler { + if len(expected) == 0 { + callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: at least one character range pair must be provided") } - return func(tokenAPI *API) bool { - b, err := tokenAPI.Input.Byte.Peek(0) - if err == nil && b >= start && b <= end { - tokenAPI.Input.Byte.Accept(b) - return true + if len(expected)%2 != 0 { + callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: an even number of character range pairs must be provided") + } + starts := make([]rune, len(expected)) + ends := make([]rune, len(expected)) + for i := 0; i < len(expected); i += 2 { + start := expected[i] + end := expected[i+1] + if start > end { + callerPanic("MatchCharRange", "Handler: {name} definition error at {caller}: start %q must be <= end %q", start, end) } - return false + starts[i/2] = start + ends[i/2] = end } + + if len(expected) == 1 { + return matchAgainstSingleCharRange(starts[0], ends[0]) + } + return matchAgainstMultipleCharRanges(starts, ends) } -// MatchRuneRange creates a Handler function that checks if the input -// matches the provided rune range. The rune range is defined by a start and -// an end rune, inclusive, so: -// -// MatchRuneRange('g', 'k') -// -// creates a Handler that will match any of 'g', 'h', 'i', 'j' or 'k'. -func MatchRuneRange(start rune, end rune) Handler { - if end < start { - callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) - } +func matchAgainstSingleCharRange(start rune, end rune) Handler { if end <= '\x7F' { - return MatchByteRange(byte(start), byte(end)) + start := byte(start) + end := byte(end) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Input.Byte.Peek(0) + if err == nil && b >= start && b <= end { + tokenAPI.Input.Byte.Accept(b) + return true + } + return false + } } return func(tokenAPI *API) bool { r, _, err := tokenAPI.Input.Rune.Peek(0) @@ -465,6 +465,45 @@ func MatchRuneRange(start rune, end rune) Handler { } } +func matchAgainstMultipleCharRanges(starts []rune, ends []rune) Handler { + // Check if all characters are ASCII characters. + onlyBytes := true + expectedStarts := make([]byte, len(starts)) + expectedEnds := make([]byte, len(ends)) + for i, start := range starts { + end := ends[i] + if end > '\x7F' { + onlyBytes = false + break + } + expectedStarts[i] = byte(start) + expectedEnds[i] = byte(end) + } + + if onlyBytes { + return func(tokenAPI *API) bool { + b, err := tokenAPI.Input.Byte.Peek(0) + for i := range expectedStarts { + if err == nil && b >= expectedStarts[i] && b <= expectedEnds[i] { + tokenAPI.Input.Byte.Accept(b) + return true + } + } + return false + } + } + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Input.Rune.Peek(0) + for i := range starts { + if err == nil && r >= starts[i] && r <= ends[i] { + tokenAPI.Input.Rune.Accept(r) + return true + } + } + return false + } +} + // MatchNewline creates a handler that matches a newline, which is either // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). func MatchNewline() Handler { @@ -758,14 +797,14 @@ func MatchNot(handler Handler) Handler { // // Note that the input can contain more than the provided number of matches, e.g.: // -// MatchRep(4, MatchRune('X')) +// MatchRep(4, MatchChar('X')) // // will not match input "XXX", it will match input "XXXX", but also "XXXXXX". // In that last case, there will be a remainder "XX" on the input. // // Another way to use this method, is by applying the following syntactic sugar: // -// MatchRune('X').Times(4) +// MatchChar('X').Times(4) func MatchRep(times int, handler Handler) Handler { return matchMinMax(times, times, handler, "MatchRep") } @@ -1082,7 +1121,7 @@ func MatchInvalidRune() Handler { // MatchDigit creates a Handler that checks if a single digit can be read // from the input. func MatchDigit() Handler { - return MatchByteRange('0', '9') + return MatchCharRange('0', '9') } // MatchDigits creates a Handler that checks if one or more digits can be read @@ -1110,7 +1149,7 @@ func MatchDigits() Handler { // MatchDigitNotZero creates a Handler that checks if a single digit not equal // to zero '0' can be read from the input. func MatchDigitNotZero() Handler { - return MatchByteRange('1', '9') + return MatchCharRange('1', '9') } // MatchInteger creates a Handler function that checks if a valid integer @@ -1247,34 +1286,34 @@ func MatchBoolean() Handler { MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), - MatchByte('t'), - MatchByte('T'), - MatchByte('1'), + MatchChar('t'), + MatchChar('T'), + MatchChar('1'), MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), - MatchByte('f'), - MatchByte('F'), - MatchByte('0'), + MatchChar('f'), + MatchChar('F'), + MatchChar('0'), ) } // MatchASCII creates a Handler function that matches against any // ASCII value on the input. func MatchASCII() Handler { - return MatchByteRange('\x00', '\x7F') + return MatchCharRange('\x00', '\x7F') } // MatchASCIILower creates a Handler function that matches against any // lower case ASCII letter on the input (a - z). func MatchASCIILower() Handler { - return MatchByteRange('a', 'z') + return MatchCharRange('a', 'z') } // MatchASCIIUpper creates a Handler function that matches against any // upper case ASCII letter on the input (a - z). func MatchASCIIUpper() Handler { - return MatchByteRange('A', 'Z') + return MatchCharRange('A', 'Z') } // MatchUnicodeLetter creates a Handler function that matches against any @@ -1298,14 +1337,7 @@ func MatchUnicodeLower() Handler { // MatchHexDigit creates a Handler function that check if a single hexadecimal // digit can be read from the input. func MatchHexDigit() Handler { - return func(tokenAPI *API) bool { - b, err := tokenAPI.Input.Byte.Peek(0) - if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) { - tokenAPI.Input.Byte.Accept(b) - return true - } - return false - } + return MatchCharRange('0', '9', 'a', 'f', 'A', 'F') } // MatchOctet creates a Handler function that checks if a valid octet value @@ -1373,7 +1405,7 @@ func MatchOctet(normalize bool) Handler { // "192.168.001.012" will be normalize to "192.168.1.12". func MatchIPv4(normalize bool) Handler { octet := MatchOctet(normalize) - dot := MatchRune('.') + dot := MatchChar('.') return MatchSeq(octet, dot, octet, dot, octet, dot, octet) } @@ -1393,7 +1425,7 @@ func MatchIPv4CIDRMask(normalize bool) Handler { // "255.255.192.000" will be normalized to "255.255.192.0". func MatchIPv4Netmask(normalize bool) Handler { octet := MakeUint8Token(nil, MatchOctet(normalize)) - dot := MatchRune('.') + dot := MatchChar('.') netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) return func(tokenAPI *API) bool { @@ -1423,7 +1455,7 @@ func MatchIPv4Netmask(normalize bool) Handler { // be normalized to 172.16.10.254/18. func MatchIPv4Net(normalize bool) Handler { ip := MakeStrLiteralToken("ip", MatchIPv4(normalize)) - slash := MatchRune('/') + slash := MatchChar('/') mask := MatchAny( MakeStrLiteralToken("mask", MatchIPv4Netmask(normalize)), MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize))) @@ -1459,7 +1491,7 @@ func MatchIPv4Net(normalize bool) Handler { // can be read from the input. func MatchIPv6(normalize bool) Handler { hextet := MatchMinMax(1, 4, MatchHexDigit()) - colon := MatchRune(':') + colon := MatchChar(':') empty := MatchSeq(colon, colon) return func(tokenAPI *API) bool { @@ -1523,7 +1555,7 @@ func matchCIDRMask(bits int64, normalize bool) Handler { // normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64. func MatchIPv6Net(normalize bool) Handler { ip := MatchIPv6(normalize) - slash := MatchRune('/') + slash := MatchChar('/') mask := MatchIPv6CIDRMask(normalize) return MatchSeq(ip, slash, mask) } diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index 18c4e5c..1f95df6 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -10,82 +10,82 @@ import ( func TestCombinators(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M AssertHandlers(t, []HandlerT{ - {"", c.Not(a.Rune('b')), false, ""}, - {"abc not", c.Not(a.Rune('b')), true, "a"}, - {"bcd not", c.Not(a.Rune('b')), false, ""}, - {"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"}, - {"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"}, - {"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""}, - {"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"}, - {"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"}, - {"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""}, - {"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"}, - {"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""}, - {"", c.Min(0, a.Rune('a')), true, ""}, - {"a", c.Min(0, a.Rune('a')), true, "a"}, - {"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"}, - {"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"}, - {"aaaaa", c.Min(6, a.Rune('a')), false, ""}, - {"", c.Max(4, a.Rune('b')), true, ""}, - {"X", c.Max(4, a.Rune('b')), true, ""}, - {"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"}, - {"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"}, - {"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"}, - {"", c.MinMax(0, 0, a.Rune('c')), true, ""}, - {"X", c.MinMax(0, 0, a.Rune('c')), true, ""}, - {"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"}, - {"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"}, - {"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"}, - {"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""}, - {"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"}, - {"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"}, - {"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"}, - {"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"}, - {"", c.MinMax(1, 1, a.Rune('c')), false, ""}, - {"X", c.MinMax(1, 1, a.Rune('c')), false, ""}, - {"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"}, - {"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"}, - {"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"}, - {"", c.OneOrMore(a.Rune('d')), false, ""}, - {"X", c.OneOrMore(a.Rune('d')), false, ""}, - {"dX", c.OneOrMore(a.Rune('d')), true, "d"}, - {"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"}, - {"", c.ZeroOrMore(a.Rune('e')), true, ""}, - {"X", c.ZeroOrMore(a.Rune('e')), true, ""}, - {"eX", c.ZeroOrMore(a.Rune('e')), true, "e"}, - {"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"}, - {"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"}, + {"", c.Not(a.Char('b')), false, ""}, + {"abc not", c.Not(a.Char('b')), true, "a"}, + {"bcd not", c.Not(a.Char('b')), false, ""}, + {"aaaxxxb", c.OneOrMore(c.Not(a.Char('b'))), true, "aaaxxx"}, + {"1010 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), true, "1"}, + {"2020 not", c.Not(c.Seq(a.Char('2'), a.Char('0'))), false, ""}, + {"abc any", c.Any(a.Char('a'), a.Char('b')), true, "a"}, + {"bcd any", c.Any(a.Char('a'), a.Char('b')), true, "b"}, + {"cde any", c.Any(a.Char('a'), a.Char('b')), false, ""}, + {"ababc repeated", c.Repeated(4, a.Char('a', 'b')), true, "abab"}, + {"ababc repeated", c.Repeated(5, a.Char('a', 'b')), false, ""}, + {"", c.Min(0, a.Char('a')), true, ""}, + {"a", c.Min(0, a.Char('a')), true, "a"}, + {"aaaaa", c.Min(4, a.Char('a')), true, "aaaaa"}, + {"aaaaa", c.Min(5, a.Char('a')), true, "aaaaa"}, + {"aaaaa", c.Min(6, a.Char('a')), false, ""}, + {"", c.Max(4, a.Char('b')), true, ""}, + {"X", c.Max(4, a.Char('b')), true, ""}, + {"bbbbbX", c.Max(4, a.Char('b')), true, "bbbb"}, + {"bbbbbX", c.Max(5, a.Char('b')), true, "bbbbb"}, + {"bbbbbX", c.Max(6, a.Char('b')), true, "bbbbb"}, + {"", c.MinMax(0, 0, a.Char('c')), true, ""}, + {"X", c.MinMax(0, 0, a.Char('c')), true, ""}, + {"cccc", c.MinMax(0, 5, a.Char('c')), true, "cccc"}, + {"ccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"}, + {"cccccc", c.MinMax(0, 5, a.Char('c')), true, "ccccc"}, + {"cccccX", c.MinMax(0, 0, a.Char('c')), true, ""}, + {"cccccX", c.MinMax(0, 1, a.Char('c')), true, "c"}, + {"cccccX", c.MinMax(0, 5, a.Char('c')), true, "ccccc"}, + {"cccccX", c.MinMax(0, 6, a.Char('c')), true, "ccccc"}, + {"cccccX", c.MinMax(1, 1, a.Char('c')), true, "c"}, + {"", c.MinMax(1, 1, a.Char('c')), false, ""}, + {"X", c.MinMax(1, 1, a.Char('c')), false, ""}, + {"cccccX", c.MinMax(1, 3, a.Char('c')), true, "ccc"}, + {"cccccX", c.MinMax(1, 6, a.Char('c')), true, "ccccc"}, + {"cccccX", c.MinMax(3, 4, a.Char('c')), true, "cccc"}, + {"", c.OneOrMore(a.Char('d')), false, ""}, + {"X", c.OneOrMore(a.Char('d')), false, ""}, + {"dX", c.OneOrMore(a.Char('d')), true, "d"}, + {"dddddX", c.OneOrMore(a.Char('d')), true, "ddddd"}, + {"", c.ZeroOrMore(a.Char('e')), true, ""}, + {"X", c.ZeroOrMore(a.Char('e')), true, ""}, + {"eX", c.ZeroOrMore(a.Char('e')), true, "e"}, + {"eeeeeX", c.ZeroOrMore(a.Char('e')), true, "eeeee"}, + {"HI!", c.Seq(a.Char('H'), a.Char('I'), a.Char('!')), true, "HI!"}, {"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"}, - {"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"}, - {"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""}, - {"ghijkl", c.Optional(a.Rune('h')), true, ""}, - {"ghijkl", c.Optional(a.Rune('g')), true, "g"}, - {"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"}, + {"101010123", c.OneOrMore(c.Seq(a.Char('1'), a.Char('0'))), true, "101010"}, + {"", c.Optional(c.OneOrMore(a.Char('f'))), true, ""}, + {"ghijkl", c.Optional(a.Char('h')), true, ""}, + {"ghijkl", c.Optional(a.Char('g')), true, "g"}, + {"fffffX", c.Optional(c.OneOrMore(a.Char('f'))), true, "fffff"}, {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"}, - {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, + {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Char('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, {" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""}, {" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"}, {"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"}, {" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"}, - {"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"}, - {"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""}, - {"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""}, - {"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"}, - {"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""}, - {"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"}, - {"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""}, - {"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"}, - {"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""}, - {"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""}, - {"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"}, + {"ab", c.FollowedBy(a.Char('b'), a.Char('a')), true, "a"}, + {"ba", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""}, + {"aa", c.FollowedBy(a.Char('b'), a.Char('a')), false, ""}, + {"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), true, "aaabbbccc"}, + {"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Char('d')), c.OneOrMore(a.Char('a')).Then(c.OneOrMore(c.Not(a.Char('d'))))), false, ""}, + {"xy", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"}, + {"yx", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""}, + {"xx", c.NotFollowedBy(a.Char('a'), a.Char('x')), true, "x"}, + {"xa", c.NotFollowedBy(a.Char('a'), a.Char('x')), false, ""}, + {"xxxyyyzzzaaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), false, ""}, + {"xxxyyyzzzbaa", c.NotFollowedBy(a.Char('a'), c.OneOrMore(a.Char('x', 'y', 'z'))), true, "xxxyyyzzz"}, }) } func TestCombinatorPanics(t *testing.T) { var c, a = tokenize.C, tokenize.A AssertPanics(t, []PanicT{ - {func() { a.RuneRange('z', 'a') }, true, - `Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`}, + {func() { a.CharRange('z', 'a') }, true, + `Handler: MatchCharRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must be <= end 'a'`}, {func() { c.MinMax(-1, 1, a.Space) }, true, `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`}, {func() { c.MinMax(1, -1, a.Space) }, true, @@ -102,25 +102,27 @@ func TestCombinatorPanics(t *testing.T) { } func TestAtoms(t *testing.T) { - var a = tokenize.A + var a, c = tokenize.A, tokenize.C AssertHandlers(t, []HandlerT{ - {"dd", a.RuneRange('b', 'e'), true, "d"}, - {"ee", a.RuneRange('b', 'e'), true, "e"}, - {"ff", a.RuneRange('b', 'e'), false, ""}, + {"dd", a.CharRange('b', 'e'), true, "d"}, + {"ee", a.CharRange('b', 'e'), true, "e"}, + {"ff", a.CharRange('b', 'e'), false, ""}, + {"ff", a.CharRange('b', 'c', 'f', 'g'), true, "f"}, + {"abc123_-,other", c.OneOrMore(a.CharRange('a', 'z', '0', '9', '_', '_', '-', '-')), true, "abc123_-"}, {"Hello, world 1!", a.Str("Hello"), true, "Hello"}, {"Hello, world 2!", a.StrNoCase("hElLo"), true, "Hello"}, {"H♥llÖ, wÖrld 3!", a.Str("H♥llÖ"), true, "H♥llÖ"}, {"H♥llÖ, world 4!", a.StrNoCase("h♥llö"), true, "H♥llÖ"}, - {"+X", a.Runes('+', '-', '*', '/'), true, "+"}, - {"-X", a.Runes('+', '-', '*', '/'), true, "-"}, - {"*X", a.Runes('+', '-', '*', '/'), true, "*"}, - {"/X", a.Runes('+', '-', '*', '/'), true, "/"}, - {"!X", a.Runes('+', '-', '*', '/'), false, ""}, - {"xxx", a.Rune('x'), true, "x"}, - {"x ", a.Rune(' '), false, ""}, - {"aa", a.RuneRange('b', 'e'), false, ""}, - {"bb", a.RuneRange('b', 'e'), true, "b"}, - {"cc", a.RuneRange('b', 'e'), true, "c"}, + {"+X", a.Char('+', '-', '*', '/'), true, "+"}, + {"-X", a.Char('+', '-', '*', '/'), true, "-"}, + {"*X", a.Char('+', '-', '*', '/'), true, "*"}, + {"/X", a.Char('+', '-', '*', '/'), true, "/"}, + {"!X", a.Char('+', '-', '*', '/'), false, ""}, + {"xxx", a.Char('x'), true, "x"}, + {"x ", a.Char(' '), false, ""}, + {"aa", a.CharRange('b', 'e'), false, ""}, + {"bb", a.CharRange('b', 'e'), true, "b"}, + {"cc", a.CharRange('b', 'e'), true, "c"}, {"", a.EndOfFile, true, ""}, {"😂", a.AnyRune, true, "😂"}, {"\xbc with AnyRune", a.AnyRune, true, "�"}, @@ -350,8 +352,8 @@ func TestIPv6Atoms(t *testing.T) { func TestModifiers(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M AssertHandlers(t, []HandlerT{ - {"missed me!", m.Drop(a.Rune('w')), false, ""}, - {"where are you?", m.Drop(a.Rune('w')), true, ""}, + {"missed me!", m.Drop(a.Char('w')), false, ""}, + {"where are you?", m.Drop(a.Char('w')), true, ""}, {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {"cool", a.Str("cool"), true, "cool"}, {"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},