Speed improvements

This commit is contained in:
Maurice Makaay 2019-07-29 23:51:03 +00:00
parent 74274e04fb
commit 5ff6f20ab7
11 changed files with 55 additions and 50 deletions

View File

@ -1,4 +1,4 @@
PROFILE_COUNT=100
PROFILE_COUNT=500
PROFILE_COUNT2=1000
TIME=time

View File

@ -27,7 +27,7 @@ var (
// A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment.
comment = c.Seq(a.Hash, a.UntilEndOfLine)
comment = c.Seq(a.Hash, a.UntilEndOfLine.Optional())
optionalComment = comment.Optional()
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
@ -35,7 +35,9 @@ var (
whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment)
// Keys may be either bare or quoted.
detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
detectKey = a.ByteByCallback(func(b byte) bool {
return isBareKeyRune(b) || b == '\'' || b == '"'
})
// Both [tables] and [[arrays of tables]] start with a square open bracket.
detectTable = a.SquareOpen

View File

@ -14,7 +14,7 @@ func TestComment(t *testing.T) {
{`# \xxx/ \u can't escape/`, `{}`, ``},
{"# \tlexe\r accepts embedded ca\r\riage \returns\r\n", `{}`, ``},
{" # multiple\n#lines\n \t\n\n\t#with\n ### comments!", `{}`, ``},
{"# with data and newline\ncode continues here", `{}`, `unexpected input (expected a value assignment) at line 2, column 5`},
{"# with data and newline\ncode continues here", `{}`, `unexpected input (expected a value assignment) at line 2, column 6`},
} {
p := newParser()
testParse(t, p, p.startDocument, test)

View File

@ -14,7 +14,7 @@ var (
// sign, and value must be on the same line (though some values can be
// broken over multiple lines).
keyAssignment = c.Seq(whitespace, a.Equal, whitespace)
keyAssignment = c.InOptionalBlanks(a.Equal)
// A key may be either bare, quoted or dotted. Bare keys may only
// contain ASCII letters, ASCII digits, underscores, and dashes
@ -32,14 +32,14 @@ var (
(b >= '0' && b <= '9') || b == '_' || b == '-')
}
bareKeyRune = a.ByteByCallback(isBareKeyRune)
bareKey = c.OneOrMore(bareKeyRune)
bareKey = a.BytesByCallback(isBareKeyRune)
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
// This allows for grouping similar properties together. Whitespace
// around dot-separated parts is ignored, however, best practice is to
// not use any extraneous whitespace.
keySeparatorDot = c.Seq(whitespace, a.Dot, whitespace)
keySeparatorDot = c.InOptionalBlanks(a.Dot)
)
func (t *parser) startKeyValuePair(p *parse.API) {

View File

@ -39,7 +39,7 @@ func TestKeyValuePair(t *testing.T) {
{``, `{}`, ``},
{` `, `{}`, ``},
{" \t ", `{}`, ``},
{" key ", `{}`, `unexpected input (expected a value assignment) at line 1, column 5`},
{" key ", `{}`, `unexpected end of file (expected a value assignment) at line 1, column 6`},
{" key \t=", `{}`, `unexpected end of file (expected a value) at line 1, column 8`},
{"key = # INVALID", `{}`, `unexpected input (expected a value) at line 1, column 7`},
{" key \t =\t \"The Value\" \r\n", `{"key": "The Value"}`, ``},

View File

@ -53,9 +53,9 @@ var (
// odt4 = 1979-05-27 07:32:00Z
tdelimTok = c.Any(
tok.Str("T", a.Rune('T')),
tok.Str("t", a.Rune('t')),
tok.Str(" ", a.Rune(' ')))
tok.Str("T", a.Char('T')),
tok.Str("t", a.Char('t')),
tok.Str(" ", a.Char(' ')))
// If you omit the offset from an RFC 3339 formatted date-time, it will
// represent the given date-time without any relation to an offset or
@ -71,8 +71,8 @@ var (
// Note that RFC 3339 also allows the use of a lower case 'z'.
// Here we replace it with a capital 'Z' to make the Go date parser work.
zulu = m.Replace(a.Runes('Z', 'z'), "Z")
offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute)
zulu = m.Replace(a.Char('Z', 'z'), "Z")
offset = c.Seq(a.Char('+', '-'), hour, a.Colon, minute)
tzTok = tok.Str("Z07:00", zulu.Or(offset))
// The full date/time parse format, based on the above definitions.

View File

@ -34,15 +34,15 @@ var (
// Hexadecimal with prefix `0x`.
hexDigits = c.OneOrMore(a.HexDigit)
underscoreHexDigits = m.Drop(a.Underscore).Then(hexDigits)
hexadecimal = a.Rune('x').Then(tok.Str("x", hexDigits.Then(c.ZeroOrMore(underscoreHexDigits))))
hexadecimal = a.Char('x').Then(tok.Str("x", hexDigits.Then(c.ZeroOrMore(underscoreHexDigits))))
// Octal with prefix `0o`.
octalDigits = c.OneOrMore(a.RuneRange('0', '7'))
octalDigits = c.OneOrMore(a.CharRange('0', '7'))
underscoreOctalDigits = m.Drop(a.Underscore).Then(octalDigits)
octal = a.Rune('o').Then(tok.Str("o", octalDigits.Then(c.ZeroOrMore(underscoreOctalDigits))))
octal = a.Char('o').Then(tok.Str("o", octalDigits.Then(c.ZeroOrMore(underscoreOctalDigits))))
// Binary with prefix `0b`.
binaryDigits = c.OneOrMore(a.RuneRange('0', '1'))
binaryDigits = c.OneOrMore(a.CharRange('0', '1'))
underscoreBinaryDigits = m.Drop(a.Underscore).Then(binaryDigits)
binary = a.Rune('b').Then(tok.Str("b", binaryDigits.Then(c.ZeroOrMore(underscoreBinaryDigits))))
binary = a.Char('b').Then(tok.Str("b", binaryDigits.Then(c.ZeroOrMore(underscoreBinaryDigits))))
// A fractional part is a decimal point followed by one or more digits.
// Similar to integers, you may use underscores to enhance readability.
@ -51,7 +51,7 @@ var (
// An exponent part is an E (upper or lower case) followed by an integer
// part (which follows the same rules as decimal integer values).
exponentPart = a.Runes('e', 'E').Then(integer)
exponentPart = a.Char('e', 'E').Then(integer)
// Floats should be implemented as IEEE 754 binary64 values.
// A float consists of an integer part (which follows the same rules as

View File

@ -25,10 +25,10 @@ var (
closingMultiLineLiteralString = m.Drop(multiLineLiteralStringDelimiter)
// Opening and closing character for basic strings.
basicStringDelimiter = m.Drop(a.DoubleQuote)
basicStringDelimiter = a.DoubleQuote
// Opening and losing character for literal strings.
literalStringDelimiter = m.Drop(a.SingleQuote)
literalStringDelimiter = a.SingleQuote
// Control characters as defined by TOML (U+0000 to U+001F, U+007F)
@ -47,10 +47,10 @@ var (
// \uXXXX - unicode (U+XXXX)
// \UXXXXXXXX - unicode (U+XXXXXXXX)
validEscapeChar = a.Bytes('b', 't', 'n', 'f', 'r', '"', '\\')
validEscapeChar = a.Char('b', 't', 'n', 'f', 'r', '"', '\\')
shortEscape = c.Seq(a.Backslash, validEscapeChar)
shortUTF8Escape = c.Seq(a.Backslash, a.Byte('u'), a.HexDigit.Times(4))
longUTF8Escape = c.Seq(a.Backslash, a.Byte('U'), a.HexDigit.Times(8))
shortUTF8Escape = c.Seq(a.Backslash, a.Char('u'), a.HexDigit.Times(4))
longUTF8Escape = c.Seq(a.Backslash, a.Char('U'), a.HexDigit.Times(8))
validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape)
// For writing long strings without introducing extraneous whitespace, use a

View File

@ -7,17 +7,17 @@ import (
var (
// Opener and closer for [table].
tableOpen = c.Seq(whitespace, a.SquareOpen, whitespace)
tableClose = c.Seq(whitespace, a.SquareClose, whitespace)
tableOpen = c.InOptionalBlanks(a.SquareOpen)
tableClose = c.InOptionalBlanks(a.SquareClose)
// Opener and closer for [[array.of.tables]].
tableArrayOpen = c.Seq(whitespace, a.SquareOpen, a.SquareOpen, whitespace)
tableArrayClose = c.Seq(whitespace, a.SquareClose, a.SquareClose, whitespace)
tableArrayOpen = c.InOptionalBlanks(c.Seq(a.SquareOpen, a.SquareOpen))
tableArrayClose = c.InOptionalBlanks(c.Seq(a.SquareClose, a.SquareClose))
// Opener, separator and closer for { inline: "tables" }.
inlineTableOpen = c.Seq(whitespace, a.CurlyOpen, whitespace)
inlineTableSeparator = c.Seq(whitespace, a.Comma, whitespace)
inlineTableClose = c.Seq(whitespace, a.CurlyClose, whitespace)
inlineTableOpen = c.InOptionalBlanks(a.CurlyOpen)
inlineTableSeparator = c.InOptionalBlanks(a.Comma)
inlineTableClose = c.InOptionalBlanks(a.CurlyClose)
)
func (t *parser) startTable(p *parse.API) {

View File

@ -120,25 +120,25 @@ func BuildGrammar() tokenize.Handler {
// Comment
R("comment-start-symbol", a.Hash)
R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)))
R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii")))
R("non-ascii", a.CharRange(0x80, 0xD7FF).Or(a.CharRange(0xE000, 0x10FFFF)))
R("non-eol", c.Any(a.Char(0x09), a.CharRange(0x20, 0x7E), G("non-ascii")))
R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol"))))
// Basic String
R("escape-seq-char", c.Any(
a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'),
a.Rune('u').Then(G("hex-digit").Times(4)),
a.Rune('U').Then(G("hex-digit").Times(8))))
a.Char('"', '\\', 'b', 'f', 'n', 'r', 't'),
a.Char('u').Then(G("hex-digit").Times(4)),
a.Char('U').Then(G("hex-digit").Times(8))))
R("escaped", G("escape").Then(G("escape-seq-char")))
R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
R("basic-unescaped", c.Any(a.CharRange(0x20, 0x21), a.CharRange(0x23, 0x5B), a.CharRange(0x5D, 0x7E), G("non-ascii")))
R("basic-char", G("escaped").Or(G("basic-unescaped")))
R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark"))))
// Multiline Basic String
R("ml-basic-string-delim", G("quotation-mark").Times(3))
R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
R("ml-basic-unescaped", c.Any(a.CharRange(0x20, 0x5B), a.CharRange(0x5D, 0x7E), G("non-ascii")))
R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped")))
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline"))))
R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat"))))
@ -151,7 +151,7 @@ func BuildGrammar() tokenize.Handler {
// Literal String
R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii")))
R("literal-char", c.Any(G("tab"), a.CharRange(0x20, 0x26), a.CharRange(0x28, 0x7E), G("non-ascii")))
R("literal-string", c.Seq(
m.Drop(G("apostrophe")),
c.ZeroOrMore(G("literal-char")),
@ -160,7 +160,7 @@ func BuildGrammar() tokenize.Handler {
// Multiline Literal String
R("ml-literal-string-delim", G("apostrophe").Times(3))
R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii")))
R("ml-literal-char", c.Any(G("tab"), a.CharRange(0x20, 0x7E), G("non-ascii")))
R("ml-literal-body-content", G("ml-literal-char").Or(G("newline")))
R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim"))))
R("ml-literal-string", c.Seq(
@ -183,17 +183,17 @@ func BuildGrammar() tokenize.Handler {
R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit")))
R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int")))
R("hex-prefix", a.Zero.Then(a.Rune('x')))
R("hex-prefix", a.Zero.Then(a.Char('x')))
R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit"))))
R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit"))))
R("oct-prefix", a.Zero.Then(a.Rune('o')))
R("digit0-7", a.RuneRange('0', '7'))
R("oct-prefix", a.Zero.Then(a.Char('o')))
R("digit0-7", a.CharRange('0', '7'))
R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7"))))
R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit"))))
R("bin-prefix", a.Zero.Then(a.Rune('b')))
R("digit0-1", a.Runes('0', '1'))
R("bin-prefix", a.Zero.Then(a.Char('b')))
R("digit0-1", a.Char('0', '1'))
R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1"))))
R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit"))))
@ -241,9 +241,9 @@ func BuildGrammar() tokenize.Handler {
R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))))
R("time-delim", c.Any(
tok.Str("T", a.Rune('T')),
tok.Str("t", a.Rune('t')),
tok.Str(" ", a.Rune(' '))))
tok.Str("T", a.Char('T')),
tok.Str("t", a.Char('t')),
tok.Str(" ", a.Char(' '))))
R("time-hour", G("digit").Times(2))
R("time-minute", G("digit").Times(2))
@ -252,7 +252,7 @@ func BuildGrammar() tokenize.Handler {
R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit)))))
R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z"))
R("time-zulu", m.Replace(a.Char('Z', 'z'), "Z"))
R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute")))
R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset"))))

View File

@ -1,9 +1,12 @@
#!/bin/bash
#FILE=short.toml
FILE=normal.toml
ITER=1500
#FILE=long.toml
ITER=10000
#ITER=500
cd ../cmd/burntsushi-tester
go build