diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index 6755cde..bcdef0a 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -71,7 +71,7 @@ type simpleCalculator struct { } // A definition of an int64, which conveniently drops surrounding blanks. -var dropBlank = tokenize.M.Drop(tokenize.C.Opt(tokenize.A.Blanks)) +var dropBlank = tokenize.M.Drop(tokenize.C.Optional(tokenize.A.Blanks)) var bareInteger = tokenize.C.Seq(dropBlank, tokenize.A.Integer, dropBlank) var int64Token = tokenize.T.Int64(nil, bareInteger) diff --git a/examples/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go index 09daabc..191f77f 100644 --- a/examples/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -54,7 +54,7 @@ func createHelloTokenizer() tokenize.Func { // that does all the work. The 'greeting' Handler matches the whole input and // drops all but the name from it. hello := a.StrNoCase("hello") - comma := c.Seq(c.Opt(a.Blanks), a.Comma, c.Opt(a.Blanks)) + comma := c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks)) separator := c.Any(comma, a.Blanks) name := c.OneOrMore(c.Not(a.Excl)) greeting := m.Drop(hello). diff --git a/examples/example_helloSingleStateParser_test.go b/examples/example_helloSingleStateParser_test.go index 5a8b218..e862fe1 100644 --- a/examples/example_helloSingleStateParser_test.go +++ b/examples/example_helloSingleStateParser_test.go @@ -85,7 +85,7 @@ func (h *helloparser2) start(p *parse.API) { p.Error("the greeting is not being friendly") return } - if !p.Accept(c.Seq(c.Opt(a.Blanks), a.Comma, c.Opt(a.Blanks))) { + if !p.Accept(c.Seq(c.Optional(a.Blanks), a.Comma, c.Optional(a.Blanks))) { p.Error("the greeting is not properly separated") return } diff --git a/tokenize/handler.go b/tokenize/handler.go index f3e2b77..7d402cf 100644 --- a/tokenize/handler.go +++ b/tokenize/handler.go @@ -41,9 +41,9 @@ func (handler Handler) SeparatedBy(separatorHandler Handler) Handler { } // Optional is syntactic sugar that allows you to write a construction like -// MatchOpt(handler) as handler.Optional(). +// MatchOptional(handler) as handler.Optional(). func (handler Handler) Optional() Handler { - return MatchOpt(handler) + return MatchOptional(handler) } // Except is syntactic sugar that allows you to write a construction like diff --git a/tokenize/handler_test.go b/tokenize/handler_test.go index 700ad64..c47737c 100644 --- a/tokenize/handler_test.go +++ b/tokenize/handler_test.go @@ -31,7 +31,7 @@ func ExampleHandler_Times() { func ExampleHandler_Then() { c, a := tokenize.C, tokenize.A - phoneNumber := a.Rune('0').Then(c.Rep(9, a.Digit)) + phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit)) fmt.Println(phoneNumber.Match("0208888888")) // Output: @@ -40,7 +40,7 @@ func ExampleHandler_Then() { func ExampleHandler_Or() { c, a := tokenize.C, tokenize.A - phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Rep(8, a.Digit)) + phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit)) fmt.Println(phoneNumber.Match("+31209876543")) fmt.Println(phoneNumber.Match("0031209876543")) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index cbf7ffc..d01e4dd 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -26,31 +26,35 @@ import ( // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var C = struct { - Any func(...Handler) Handler - Not func(Handler) Handler - Opt func(Handler) Handler - Seq func(...Handler) Handler - Rep func(times int, handler Handler) Handler - Min func(min int, handler Handler) Handler - Max func(max int, handler Handler) Handler - ZeroOrMore func(Handler) Handler - OneOrMore func(Handler) Handler - MinMax func(min int, max int, handler Handler) Handler - Separated func(separated Handler, separator Handler) Handler - Except func(except Handler, handler Handler) Handler + Any func(...Handler) Handler + Not func(Handler) Handler + Seq func(...Handler) Handler + Min func(min int, handler Handler) Handler + Max func(max int, handler Handler) Handler + Repeated func(times int, handler Handler) Handler + Optional func(Handler) Handler + ZeroOrMore func(Handler) Handler + OneOrMore func(Handler) Handler + MinMax func(min int, max int, handler Handler) Handler + Separated func(separated Handler, separator Handler) Handler + Except func(except Handler, handler Handler) Handler + FollowedBy func(lookAhead Handler, handler Handler) Handler + WhileFollowedBy func(lookahead Handler, handler Handler) Handler }{ - Opt: MatchOpt, - Any: MatchAny, - Not: MatchNot, - Seq: MatchSeq, - Rep: MatchRep, - Min: MatchMin, - Max: MatchMax, - ZeroOrMore: MatchZeroOrMore, - OneOrMore: MatchOneOrMore, - MinMax: MatchMinMax, - Separated: MatchSeparated, - Except: MatchExcept, + Any: MatchAny, + Not: MatchNot, + Seq: MatchSeq, + Min: MatchMin, + Max: MatchMax, + Repeated: MatchRep, + Optional: MatchOptional, + ZeroOrMore: MatchZeroOrMore, + OneOrMore: MatchOneOrMore, + MinMax: MatchMinMax, + Separated: MatchSeparated, + Except: MatchExcept, + FollowedBy: MatchFollowedBy, + WhileFollowedBy: MatchWhileFollowedBy, } // A provides convenient access to a range of atoms or functions to build atoms. @@ -199,6 +203,7 @@ var A = struct { Pipe: MatchRune('|'), CurlyClose: MatchRune('}'), Tilde: MatchRune('~'), + Newline: MatchNewline(), Blank: MatchBlank(), Blanks: MatchBlanks(), Whitespace: MatchWhitespace(), @@ -339,6 +344,12 @@ func MatchRuneRange(start rune, end rune) Handler { return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end }) } +// MatchNewline creates a handler that matches a newline, which is either +// a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). +func MatchNewline() Handler { + return MatchAny(MatchStr("\r\n"), MatchRune('\n')) +} + // MatchBlank creates a Handler that matches one rune from the input // against blank characters, meaning tabs and spaces. // @@ -382,7 +393,7 @@ func MatchRuneByCallback(callback func(rune) bool) Handler { // MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. func MatchEndOfLine() Handler { - return MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()) + return MatchAny(MatchNewline(), MatchEndOfFile()) } // MatchStr creates a Handler that matches the input against the provided string. @@ -406,11 +417,11 @@ func MatchStrNoCase(expected string) Handler { return MatchSeq(handlers...) } -// MatchOpt creates a Handler that makes the provided Handler optional. +// MatchOptional creates a Handler that makes the provided Handler optional. // When the provided Handler applies, then its output is used, otherwise // no output is generated but still a successful match is reported (but the // result will be empty). -func MatchOpt(handler Handler) Handler { +func MatchOptional(handler Handler) Handler { return MatchMinMax(0, 1, handler) } @@ -580,13 +591,38 @@ func MatchExcept(handler Handler, except Handler) Handler { } } +// TODO keep this? +func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { + return func(t *API) bool { + child := t.Fork() + if handler(child) && lookAhead(child.Fork()) { + child.Merge() + return true + } + return false + } +} + +// TODO keep this? Make some useful tests first. +func MatchWhileFollowedBy(lookAhead Handler, handler Handler) Handler { + followedBy := MatchFollowedBy(lookAhead, handler) + return func(t *API) bool { + matches := 0 + for followedBy(t) { + fmt.Printf("Matches so far: %q\n", t.Result().String()) + matches++ + } + return matches > 0 + } +} + // MatchSigned creates a Handler that checks if the provided Handler is // prefixed by an optional '+' or '-' sign. This can be used to turn numeric // atoms into a signed version, e.g. // // C.Signed(A.Integer) func MatchSigned(handler Handler) Handler { - sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-'))) + sign := MatchOptional(MatchAny(MatchRune('+'), MatchRune('-'))) return MatchSeq(sign, handler) } @@ -695,7 +731,7 @@ func MatchInteger() Handler { // Handler will report a match, so both "123" and "123.123" will match. func MatchFloat() Handler { digits := MatchDigits() - return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits))) + return MatchSeq(digits, MatchOptional(MatchSeq(MatchRune('.'), digits))) } // MatchBoolean creates a Handler function that checks if a boolean @@ -950,7 +986,7 @@ func MatchIPv6Net(normalize bool) Handler { // even though we would have dropped the output anyway. So if you would like // to drop optional blanks (spaces and tabs), then use something like: // -// M.Drop(C.Opt(A.Blanks)) +// M.Drop(C.Optional(A.Blanks)) // // instead of: // diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index 520920d..525d44e 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -18,8 +18,8 @@ func TestCombinators(t *testing.T) { {"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"}, {"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"}, {"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""}, - {"ababc", c.Rep(4, a.Runes('a', 'b')), true, "abab"}, - {"ababc", c.Rep(5, a.Runes('a', 'b')), false, ""}, + {"ababc", c.Repeated(4, a.Runes('a', 'b')), true, "abab"}, + {"ababc", c.Repeated(5, a.Runes('a', 'b')), false, ""}, {"", c.Min(0, a.Rune('a')), true, ""}, {"a", c.Min(0, a.Rune('a')), true, "a"}, {"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"}, @@ -55,12 +55,12 @@ func TestCombinators(t *testing.T) { {"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"}, {"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"}, {"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"}, - {"", c.Opt(c.OneOrMore(a.Rune('f'))), true, ""}, - {"ghijkl", c.Opt(a.Rune('h')), true, ""}, - {"ghijkl", c.Opt(a.Rune('g')), true, "g"}, - {"fffffX", c.Opt(c.OneOrMore(a.Rune('f'))), true, "fffff"}, + {"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""}, + {"ghijkl", c.Optional(a.Rune('h')), true, ""}, + {"ghijkl", c.Optional(a.Rune('g')), true, "g"}, + {"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"}, {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"}, - {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, + {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, {" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""}, {" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""}, {" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""}, @@ -382,23 +382,23 @@ func TestTokenMakers(t *testing.T) { func TestCombination(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M demonic := c.Seq( - c.Opt(a.SquareOpen), + c.Optional(a.SquareOpen), m.Trim( c.Seq( - c.Opt(a.Blanks), - c.Rep(3, a.AngleClose), + c.Optional(a.Blanks), + c.Repeated(3, a.AngleClose), m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string { return fmt.Sprintf("%d", len(s)) }), - m.Replace(c.Separated(a.Comma, c.Opt(a.Blanks)), ", "), + m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "), m.ToUpper(c.Min(1, a.ASCIILower)), m.Drop(a.Excl), - c.Rep(3, a.AngleOpen), - c.Opt(a.Blanks), + c.Repeated(3, a.AngleOpen), + c.Optional(a.Blanks), ), " \t", ), - c.Opt(a.SquareClose), + c.Optional(a.SquareClose), ) AssertHandlers(t, []HandlerT{