diff --git a/cmd/burntsushi-tester/Makefile b/cmd/burntsushi-tester/Makefile index 8c2ddee..fdbafb7 100644 --- a/cmd/burntsushi-tester/Makefile +++ b/cmd/burntsushi-tester/Makefile @@ -1,4 +1,4 @@ -PROFILE_COUNT=100 +PROFILE_COUNT=500 PROFILE_COUNT2=1000 TIME=time diff --git a/parse/document.go b/parse/document.go index 1ee201f..08fefd0 100644 --- a/parse/document.go +++ b/parse/document.go @@ -27,7 +27,7 @@ var ( // A '#' hash symbol marks the rest of the line as a comment. // All characters up to the end of the line are included in the comment. - comment = c.Seq(a.Hash, a.UntilEndOfLine) + comment = c.Seq(a.Hash, a.UntilEndOfLine.Optional()) optionalComment = comment.Optional() endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) @@ -35,7 +35,9 @@ var ( whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment) // Keys may be either bare or quoted. - detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote) + detectKey = a.ByteByCallback(func(b byte) bool { + return isBareKeyRune(b) || b == '\'' || b == '"' + }) // Both [tables] and [[arrays of tables]] start with a square open bracket. detectTable = a.SquareOpen diff --git a/parse/document_test.go b/parse/document_test.go index 2c98dc5..9c86cf4 100644 --- a/parse/document_test.go +++ b/parse/document_test.go @@ -14,7 +14,7 @@ func TestComment(t *testing.T) { {`# \xxx/ \u can't escape/`, `{}`, ``}, {"# \tlexe\r accepts embedded ca\r\riage \returns\r\n", `{}`, ``}, {" # multiple\n#lines\n \t\n\n\t#with\n ### comments!", `{}`, ``}, - {"# with data and newline\ncode continues here", `{}`, `unexpected input (expected a value assignment) at line 2, column 5`}, + {"# with data and newline\ncode continues here", `{}`, `unexpected input (expected a value assignment) at line 2, column 6`}, } { p := newParser() testParse(t, p, p.startDocument, test) diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go index d2dfb64..4465221 100644 --- a/parse/keyvaluepair.go +++ b/parse/keyvaluepair.go @@ -14,7 +14,7 @@ var ( // sign, and value must be on the same line (though some values can be // broken over multiple lines). - keyAssignment = c.Seq(whitespace, a.Equal, whitespace) + keyAssignment = c.InOptionalBlanks(a.Equal) // A key may be either bare, quoted or dotted. Bare keys may only // contain ASCII letters, ASCII digits, underscores, and dashes @@ -32,14 +32,14 @@ var ( (b >= '0' && b <= '9') || b == '_' || b == '-') } bareKeyRune = a.ByteByCallback(isBareKeyRune) - bareKey = c.OneOrMore(bareKeyRune) + bareKey = a.BytesByCallback(isBareKeyRune) // Dotted keys are a sequence of bare or quoted keys joined with a dot. // This allows for grouping similar properties together. Whitespace // around dot-separated parts is ignored, however, best practice is to // not use any extraneous whitespace. - keySeparatorDot = c.Seq(whitespace, a.Dot, whitespace) + keySeparatorDot = c.InOptionalBlanks(a.Dot) ) func (t *parser) startKeyValuePair(p *parse.API) { diff --git a/parse/keyvaluepair_test.go b/parse/keyvaluepair_test.go index 479a12e..a72262d 100644 --- a/parse/keyvaluepair_test.go +++ b/parse/keyvaluepair_test.go @@ -39,7 +39,7 @@ func TestKeyValuePair(t *testing.T) { {``, `{}`, ``}, {` `, `{}`, ``}, {" \t ", `{}`, ``}, - {" key ", `{}`, `unexpected input (expected a value assignment) at line 1, column 5`}, + {" key ", `{}`, `unexpected end of file (expected a value assignment) at line 1, column 6`}, {" key \t=", `{}`, `unexpected end of file (expected a value) at line 1, column 8`}, {"key = # INVALID", `{}`, `unexpected input (expected a value) at line 1, column 7`}, {" key \t =\t \"The Value\" \r\n", `{"key": "The Value"}`, ``}, diff --git a/parse/value_datetime.go b/parse/value_datetime.go index 3fc010d..c85b62e 100644 --- a/parse/value_datetime.go +++ b/parse/value_datetime.go @@ -53,9 +53,9 @@ var ( // odt4 = 1979-05-27 07:32:00Z tdelimTok = c.Any( - tok.Str("T", a.Rune('T')), - tok.Str("t", a.Rune('t')), - tok.Str(" ", a.Rune(' '))) + tok.Str("T", a.Char('T')), + tok.Str("t", a.Char('t')), + tok.Str(" ", a.Char(' '))) // If you omit the offset from an RFC 3339 formatted date-time, it will // represent the given date-time without any relation to an offset or @@ -71,8 +71,8 @@ var ( // Note that RFC 3339 also allows the use of a lower case 'z'. // Here we replace it with a capital 'Z' to make the Go date parser work. - zulu = m.Replace(a.Runes('Z', 'z'), "Z") - offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute) + zulu = m.Replace(a.Char('Z', 'z'), "Z") + offset = c.Seq(a.Char('+', '-'), hour, a.Colon, minute) tzTok = tok.Str("Z07:00", zulu.Or(offset)) // The full date/time parse format, based on the above definitions. diff --git a/parse/value_number.go b/parse/value_number.go index 592dfcd..348cac9 100644 --- a/parse/value_number.go +++ b/parse/value_number.go @@ -34,15 +34,15 @@ var ( // Hexadecimal with prefix `0x`. hexDigits = c.OneOrMore(a.HexDigit) underscoreHexDigits = m.Drop(a.Underscore).Then(hexDigits) - hexadecimal = a.Rune('x').Then(tok.Str("x", hexDigits.Then(c.ZeroOrMore(underscoreHexDigits)))) + hexadecimal = a.Char('x').Then(tok.Str("x", hexDigits.Then(c.ZeroOrMore(underscoreHexDigits)))) // Octal with prefix `0o`. - octalDigits = c.OneOrMore(a.RuneRange('0', '7')) + octalDigits = c.OneOrMore(a.CharRange('0', '7')) underscoreOctalDigits = m.Drop(a.Underscore).Then(octalDigits) - octal = a.Rune('o').Then(tok.Str("o", octalDigits.Then(c.ZeroOrMore(underscoreOctalDigits)))) + octal = a.Char('o').Then(tok.Str("o", octalDigits.Then(c.ZeroOrMore(underscoreOctalDigits)))) // Binary with prefix `0b`. - binaryDigits = c.OneOrMore(a.RuneRange('0', '1')) + binaryDigits = c.OneOrMore(a.CharRange('0', '1')) underscoreBinaryDigits = m.Drop(a.Underscore).Then(binaryDigits) - binary = a.Rune('b').Then(tok.Str("b", binaryDigits.Then(c.ZeroOrMore(underscoreBinaryDigits)))) + binary = a.Char('b').Then(tok.Str("b", binaryDigits.Then(c.ZeroOrMore(underscoreBinaryDigits)))) // A fractional part is a decimal point followed by one or more digits. // Similar to integers, you may use underscores to enhance readability. @@ -51,7 +51,7 @@ var ( // An exponent part is an E (upper or lower case) followed by an integer // part (which follows the same rules as decimal integer values). - exponentPart = a.Runes('e', 'E').Then(integer) + exponentPart = a.Char('e', 'E').Then(integer) // Floats should be implemented as IEEE 754 binary64 values. // A float consists of an integer part (which follows the same rules as diff --git a/parse/value_string.go b/parse/value_string.go index 7e14b7b..46bb2b1 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -25,10 +25,10 @@ var ( closingMultiLineLiteralString = m.Drop(multiLineLiteralStringDelimiter) // Opening and closing character for basic strings. - basicStringDelimiter = m.Drop(a.DoubleQuote) + basicStringDelimiter = a.DoubleQuote // Opening and losing character for literal strings. - literalStringDelimiter = m.Drop(a.SingleQuote) + literalStringDelimiter = a.SingleQuote // Control characters as defined by TOML (U+0000 to U+001F, U+007F) @@ -47,10 +47,10 @@ var ( // \uXXXX - unicode (U+XXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX) - validEscapeChar = a.Bytes('b', 't', 'n', 'f', 'r', '"', '\\') + validEscapeChar = a.Char('b', 't', 'n', 'f', 'r', '"', '\\') shortEscape = c.Seq(a.Backslash, validEscapeChar) - shortUTF8Escape = c.Seq(a.Backslash, a.Byte('u'), a.HexDigit.Times(4)) - longUTF8Escape = c.Seq(a.Backslash, a.Byte('U'), a.HexDigit.Times(8)) + shortUTF8Escape = c.Seq(a.Backslash, a.Char('u'), a.HexDigit.Times(4)) + longUTF8Escape = c.Seq(a.Backslash, a.Char('U'), a.HexDigit.Times(8)) validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) // For writing long strings without introducing extraneous whitespace, use a diff --git a/parse/value_table.go b/parse/value_table.go index 346ef81..7303c63 100644 --- a/parse/value_table.go +++ b/parse/value_table.go @@ -7,17 +7,17 @@ import ( var ( // Opener and closer for [table]. - tableOpen = c.Seq(whitespace, a.SquareOpen, whitespace) - tableClose = c.Seq(whitespace, a.SquareClose, whitespace) + tableOpen = c.InOptionalBlanks(a.SquareOpen) + tableClose = c.InOptionalBlanks(a.SquareClose) // Opener and closer for [[array.of.tables]]. - tableArrayOpen = c.Seq(whitespace, a.SquareOpen, a.SquareOpen, whitespace) - tableArrayClose = c.Seq(whitespace, a.SquareClose, a.SquareClose, whitespace) + tableArrayOpen = c.InOptionalBlanks(c.Seq(a.SquareOpen, a.SquareOpen)) + tableArrayClose = c.InOptionalBlanks(c.Seq(a.SquareClose, a.SquareClose)) // Opener, separator and closer for { inline: "tables" }. - inlineTableOpen = c.Seq(whitespace, a.CurlyOpen, whitespace) - inlineTableSeparator = c.Seq(whitespace, a.Comma, whitespace) - inlineTableClose = c.Seq(whitespace, a.CurlyClose, whitespace) + inlineTableOpen = c.InOptionalBlanks(a.CurlyOpen) + inlineTableSeparator = c.InOptionalBlanks(a.Comma) + inlineTableClose = c.InOptionalBlanks(a.CurlyClose) ) func (t *parser) startTable(p *parse.API) { diff --git a/parse2/grammar.go b/parse2/grammar.go index ede3ff7..9d42906 100644 --- a/parse2/grammar.go +++ b/parse2/grammar.go @@ -120,25 +120,25 @@ func BuildGrammar() tokenize.Handler { // Comment R("comment-start-symbol", a.Hash) - R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF))) - R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii"))) + R("non-ascii", a.CharRange(0x80, 0xD7FF).Or(a.CharRange(0xE000, 0x10FFFF))) + R("non-eol", c.Any(a.Char(0x09), a.CharRange(0x20, 0x7E), G("non-ascii"))) R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol")))) // Basic String R("escape-seq-char", c.Any( - a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), - a.Rune('u').Then(G("hex-digit").Times(4)), - a.Rune('U').Then(G("hex-digit").Times(8)))) + a.Char('"', '\\', 'b', 'f', 'n', 'r', 't'), + a.Char('u').Then(G("hex-digit").Times(4)), + a.Char('U').Then(G("hex-digit").Times(8)))) R("escaped", G("escape").Then(G("escape-seq-char"))) - R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) + R("basic-unescaped", c.Any(a.CharRange(0x20, 0x21), a.CharRange(0x23, 0x5B), a.CharRange(0x5D, 0x7E), G("non-ascii"))) R("basic-char", G("escaped").Or(G("basic-unescaped"))) R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark")))) // Multiline Basic String R("ml-basic-string-delim", G("quotation-mark").Times(3)) - R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) + R("ml-basic-unescaped", c.Any(a.CharRange(0x20, 0x5B), a.CharRange(0x5D, 0x7E), G("non-ascii"))) R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped"))) R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline")))) R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat")))) @@ -151,7 +151,7 @@ func BuildGrammar() tokenize.Handler { // Literal String - R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii"))) + R("literal-char", c.Any(G("tab"), a.CharRange(0x20, 0x26), a.CharRange(0x28, 0x7E), G("non-ascii"))) R("literal-string", c.Seq( m.Drop(G("apostrophe")), c.ZeroOrMore(G("literal-char")), @@ -160,7 +160,7 @@ func BuildGrammar() tokenize.Handler { // Multiline Literal String R("ml-literal-string-delim", G("apostrophe").Times(3)) - R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii"))) + R("ml-literal-char", c.Any(G("tab"), a.CharRange(0x20, 0x7E), G("non-ascii"))) R("ml-literal-body-content", G("ml-literal-char").Or(G("newline"))) R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim")))) R("ml-literal-string", c.Seq( @@ -183,17 +183,17 @@ func BuildGrammar() tokenize.Handler { R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit"))) R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int"))) - R("hex-prefix", a.Zero.Then(a.Rune('x'))) + R("hex-prefix", a.Zero.Then(a.Char('x'))) R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit")))) R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit")))) - R("oct-prefix", a.Zero.Then(a.Rune('o'))) - R("digit0-7", a.RuneRange('0', '7')) + R("oct-prefix", a.Zero.Then(a.Char('o'))) + R("digit0-7", a.CharRange('0', '7')) R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7")))) R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit")))) - R("bin-prefix", a.Zero.Then(a.Rune('b'))) - R("digit0-1", a.Runes('0', '1')) + R("bin-prefix", a.Zero.Then(a.Char('b'))) + R("digit0-1", a.Char('0', '1')) R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1")))) R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit")))) @@ -241,9 +241,9 @@ func BuildGrammar() tokenize.Handler { R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday")))) R("time-delim", c.Any( - tok.Str("T", a.Rune('T')), - tok.Str("t", a.Rune('t')), - tok.Str(" ", a.Rune(' ')))) + tok.Str("T", a.Char('T')), + tok.Str("t", a.Char('t')), + tok.Str(" ", a.Char(' ')))) R("time-hour", G("digit").Times(2)) R("time-minute", G("digit").Times(2)) @@ -252,7 +252,7 @@ func BuildGrammar() tokenize.Handler { R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit))))) - R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z")) + R("time-zulu", m.Replace(a.Char('Z', 'z'), "Z")) R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute"))) R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset")))) diff --git a/parse2/profile-sushi.sh b/parse2/profile-sushi.sh index 130fc44..e68896f 100755 --- a/parse2/profile-sushi.sh +++ b/parse2/profile-sushi.sh @@ -1,9 +1,12 @@ #!/bin/bash #FILE=short.toml + FILE=normal.toml +ITER=1500 + #FILE=long.toml -ITER=10000 +#ITER=500 cd ../cmd/burntsushi-tester go build