Speed improvements

2019-07-29 23:51:03 +00:00 · 2019-07-29 23:51:03 +00:00 · 5ff6f20ab7
parent 74274e04fb
commit 5ff6f20ab7
11 changed files with 55 additions and 50 deletions
--- a/cmd/burntsushi-tester/Makefile
+++ b/cmd/burntsushi-tester/Makefile
@ -1,4 +1,4 @@
-PROFILE_COUNT=100
+PROFILE_COUNT=500
 PROFILE_COUNT2=1000
 TIME=time

--- a/parse/document.go
+++ b/parse/document.go
@ -27,7 +27,7 @@ var (
 	// A '#' hash symbol marks the rest of the line as a comment.
 	// All characters up to the end of the line are included in the comment.

-	comment         = c.Seq(a.Hash, a.UntilEndOfLine)
+	comment         = c.Seq(a.Hash, a.UntilEndOfLine.Optional())
 	optionalComment = comment.Optional()

 	endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
@ -35,7 +35,9 @@ var (
 	whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment)

 	// Keys may be either bare or quoted.
-	detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
+	detectKey = a.ByteByCallback(func(b byte) bool {
+		return isBareKeyRune(b) || b == '\'' || b == '"'
+	})

 	// Both [tables] and [[arrays of tables]] start with a square open bracket.
 	detectTable = a.SquareOpen
--- a/parse/document_test.go
+++ b/parse/document_test.go
@ -14,7 +14,7 @@ func TestComment(t *testing.T) {
 		{`# \xxx/ \u can't escape/`, `{}`, ``},
 		{"# \tlexe\r accepts embedded ca\r\riage \returns\r\n", `{}`, ``},
 		{"   # multiple\n#lines\n \t\n\n\t#with\n ### comments!", `{}`, ``},
-		{"# with data and newline\ncode continues here", `{}`, `unexpected input (expected a value assignment) at line 2, column 5`},
+		{"# with data and newline\ncode continues here", `{}`, `unexpected input (expected a value assignment) at line 2, column 6`},
 	} {
 		p := newParser()
 		testParse(t, p, p.startDocument, test)
--- a/parse/keyvaluepair.go
+++ b/parse/keyvaluepair.go
@ -14,7 +14,7 @@ var (
 	// sign, and value must be on the same line (though some values can be
 	// broken over multiple lines).

-	keyAssignment = c.Seq(whitespace, a.Equal, whitespace)
+	keyAssignment = c.InOptionalBlanks(a.Equal)

 	// A key may be either bare, quoted or dotted. Bare keys may only
 	// contain ASCII letters, ASCII digits, underscores, and dashes
@ -32,14 +32,14 @@ var (
 			(b >= '0' && b <= '9') || b == '_' || b == '-')
 	}
 	bareKeyRune = a.ByteByCallback(isBareKeyRune)
-	bareKey     = c.OneOrMore(bareKeyRune)
+	bareKey     = a.BytesByCallback(isBareKeyRune)

 	// Dotted keys are a sequence of bare or quoted keys joined with a dot.
 	// This allows for grouping similar properties together. Whitespace
 	// around dot-separated parts is ignored, however, best practice is to
 	// not use any extraneous whitespace.

-	keySeparatorDot = c.Seq(whitespace, a.Dot, whitespace)
+	keySeparatorDot = c.InOptionalBlanks(a.Dot)
 )

 func (t *parser) startKeyValuePair(p *parse.API) {
--- a/parse/keyvaluepair_test.go
+++ b/parse/keyvaluepair_test.go
@ -39,7 +39,7 @@ func TestKeyValuePair(t *testing.T) {
 		{``, `{}`, ``},
 		{` `, `{}`, ``},
 		{" \t ", `{}`, ``},
-		{" key ", `{}`, `unexpected input (expected a value assignment) at line 1, column 5`},
+		{" key ", `{}`, `unexpected end of file (expected a value assignment) at line 1, column 6`},
 		{" key \t=", `{}`, `unexpected end of file (expected a value) at line 1, column 8`},
 		{"key = # INVALID", `{}`, `unexpected input (expected a value) at line 1, column 7`},
 		{" key \t =\t \"The Value\"  \r\n", `{"key": "The Value"}`, ``},
--- a/parse/value_datetime.go
+++ b/parse/value_datetime.go
@ -53,9 +53,9 @@ var (
 	//    odt4 = 1979-05-27 07:32:00Z

 	tdelimTok = c.Any(
-		tok.Str("T", a.Rune('T')),
-		tok.Str("t", a.Rune('t')),
-		tok.Str(" ", a.Rune(' ')))
+		tok.Str("T", a.Char('T')),
+		tok.Str("t", a.Char('t')),
+		tok.Str(" ", a.Char(' ')))

 	// If you omit the offset from an RFC 3339 formatted date-time, it will
 	// represent the given date-time without any relation to an offset or
@ -71,8 +71,8 @@ var (
 	// Note that RFC 3339 also allows the use of a lower case 'z'.
 	// Here we replace it with a capital 'Z' to make the Go date parser work.

-	zulu   = m.Replace(a.Runes('Z', 'z'), "Z")
-	offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute)
+	zulu   = m.Replace(a.Char('Z', 'z'), "Z")
+	offset = c.Seq(a.Char('+', '-'), hour, a.Colon, minute)
 	tzTok  = tok.Str("Z07:00", zulu.Or(offset))

 	// The full date/time parse format, based on the above definitions.
--- a/parse/value_number.go
+++ b/parse/value_number.go
@ -34,15 +34,15 @@ var (
 	// Hexadecimal with prefix `0x`.
 	hexDigits           = c.OneOrMore(a.HexDigit)
 	underscoreHexDigits = m.Drop(a.Underscore).Then(hexDigits)
-	hexadecimal         = a.Rune('x').Then(tok.Str("x", hexDigits.Then(c.ZeroOrMore(underscoreHexDigits))))
+	hexadecimal         = a.Char('x').Then(tok.Str("x", hexDigits.Then(c.ZeroOrMore(underscoreHexDigits))))
 	// Octal with prefix `0o`.
-	octalDigits           = c.OneOrMore(a.RuneRange('0', '7'))
+	octalDigits           = c.OneOrMore(a.CharRange('0', '7'))
 	underscoreOctalDigits = m.Drop(a.Underscore).Then(octalDigits)
-	octal                 = a.Rune('o').Then(tok.Str("o", octalDigits.Then(c.ZeroOrMore(underscoreOctalDigits))))
+	octal                 = a.Char('o').Then(tok.Str("o", octalDigits.Then(c.ZeroOrMore(underscoreOctalDigits))))
 	// Binary with prefix `0b`.
-	binaryDigits           = c.OneOrMore(a.RuneRange('0', '1'))
+	binaryDigits           = c.OneOrMore(a.CharRange('0', '1'))
 	underscoreBinaryDigits = m.Drop(a.Underscore).Then(binaryDigits)
-	binary                 = a.Rune('b').Then(tok.Str("b", binaryDigits.Then(c.ZeroOrMore(underscoreBinaryDigits))))
+	binary                 = a.Char('b').Then(tok.Str("b", binaryDigits.Then(c.ZeroOrMore(underscoreBinaryDigits))))

 	// A fractional part is a decimal point followed by one or more digits.
 	// Similar to integers, you may use underscores to enhance readability.
@ -51,7 +51,7 @@ var (

 	// An exponent part is an E (upper or lower case) followed by an integer
 	// part (which follows the same rules as decimal integer values).
-	exponentPart = a.Runes('e', 'E').Then(integer)
+	exponentPart = a.Char('e', 'E').Then(integer)

 	// Floats should be implemented as IEEE 754 binary64 values.
 	// A float consists of an integer part (which follows the same rules as
--- a/parse/value_string.go
+++ b/parse/value_string.go
@ -25,10 +25,10 @@ var (
 	closingMultiLineLiteralString   = m.Drop(multiLineLiteralStringDelimiter)

 	// Opening and closing character for basic strings.
-	basicStringDelimiter = m.Drop(a.DoubleQuote)
+	basicStringDelimiter = a.DoubleQuote

 	// Opening and losing character for literal strings.
-	literalStringDelimiter = m.Drop(a.SingleQuote)
+	literalStringDelimiter = a.SingleQuote

 	// Control characters as defined by TOML (U+0000 to U+001F, U+007F)

@ -47,10 +47,10 @@ var (
 	// \uXXXX     - unicode         (U+XXXX)
 	// \UXXXXXXXX - unicode         (U+XXXXXXXX)

-	validEscapeChar = a.Bytes('b', 't', 'n', 'f', 'r', '"', '\\')
+	validEscapeChar = a.Char('b', 't', 'n', 'f', 'r', '"', '\\')
 	shortEscape     = c.Seq(a.Backslash, validEscapeChar)
-	shortUTF8Escape = c.Seq(a.Backslash, a.Byte('u'), a.HexDigit.Times(4))
-	longUTF8Escape  = c.Seq(a.Backslash, a.Byte('U'), a.HexDigit.Times(8))
+	shortUTF8Escape = c.Seq(a.Backslash, a.Char('u'), a.HexDigit.Times(4))
+	longUTF8Escape  = c.Seq(a.Backslash, a.Char('U'), a.HexDigit.Times(8))
 	validEscape     = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape)

 	// For writing long strings without introducing extraneous whitespace, use a
--- a/parse/value_table.go
+++ b/parse/value_table.go
@ -7,17 +7,17 @@ import (

 var (
 	// Opener and closer for [table].
-	tableOpen  = c.Seq(whitespace, a.SquareOpen, whitespace)
-	tableClose = c.Seq(whitespace, a.SquareClose, whitespace)
+	tableOpen  = c.InOptionalBlanks(a.SquareOpen)
+	tableClose = c.InOptionalBlanks(a.SquareClose)

 	// Opener and closer for [[array.of.tables]].
-	tableArrayOpen  = c.Seq(whitespace, a.SquareOpen, a.SquareOpen, whitespace)
-	tableArrayClose = c.Seq(whitespace, a.SquareClose, a.SquareClose, whitespace)
+	tableArrayOpen  = c.InOptionalBlanks(c.Seq(a.SquareOpen, a.SquareOpen))
+	tableArrayClose = c.InOptionalBlanks(c.Seq(a.SquareClose, a.SquareClose))

 	// Opener, separator and closer for { inline: "tables" }.
-	inlineTableOpen      = c.Seq(whitespace, a.CurlyOpen, whitespace)
-	inlineTableSeparator = c.Seq(whitespace, a.Comma, whitespace)
-	inlineTableClose     = c.Seq(whitespace, a.CurlyClose, whitespace)
+	inlineTableOpen      = c.InOptionalBlanks(a.CurlyOpen)
+	inlineTableSeparator = c.InOptionalBlanks(a.Comma)
+	inlineTableClose     = c.InOptionalBlanks(a.CurlyClose)
 )

 func (t *parser) startTable(p *parse.API) {
--- a/parse2/grammar.go
+++ b/parse2/grammar.go
@ -120,25 +120,25 @@ func BuildGrammar() tokenize.Handler {
 	// Comment

 	R("comment-start-symbol", a.Hash)
-	R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)))
-	R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii")))
+	R("non-ascii", a.CharRange(0x80, 0xD7FF).Or(a.CharRange(0xE000, 0x10FFFF)))
+	R("non-eol", c.Any(a.Char(0x09), a.CharRange(0x20, 0x7E), G("non-ascii")))
 	R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol"))))

 	// Basic String

 	R("escape-seq-char", c.Any(
-		a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'),
-		a.Rune('u').Then(G("hex-digit").Times(4)),
-		a.Rune('U').Then(G("hex-digit").Times(8))))
+		a.Char('"', '\\', 'b', 'f', 'n', 'r', 't'),
+		a.Char('u').Then(G("hex-digit").Times(4)),
+		a.Char('U').Then(G("hex-digit").Times(8))))
 	R("escaped", G("escape").Then(G("escape-seq-char")))
-	R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
+	R("basic-unescaped", c.Any(a.CharRange(0x20, 0x21), a.CharRange(0x23, 0x5B), a.CharRange(0x5D, 0x7E), G("non-ascii")))
 	R("basic-char", G("escaped").Or(G("basic-unescaped")))
 	R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark"))))

 	// Multiline Basic String

 	R("ml-basic-string-delim", G("quotation-mark").Times(3))
-	R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
+	R("ml-basic-unescaped", c.Any(a.CharRange(0x20, 0x5B), a.CharRange(0x5D, 0x7E), G("non-ascii")))
 	R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped")))
 	R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline"))))
 	R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat"))))
@ -151,7 +151,7 @@ func BuildGrammar() tokenize.Handler {

 	// Literal String

-	R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii")))
+	R("literal-char", c.Any(G("tab"), a.CharRange(0x20, 0x26), a.CharRange(0x28, 0x7E), G("non-ascii")))
 	R("literal-string", c.Seq(
 		m.Drop(G("apostrophe")),
 		c.ZeroOrMore(G("literal-char")),
@ -160,7 +160,7 @@ func BuildGrammar() tokenize.Handler {
 	// Multiline Literal String

 	R("ml-literal-string-delim", G("apostrophe").Times(3))
-	R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii")))
+	R("ml-literal-char", c.Any(G("tab"), a.CharRange(0x20, 0x7E), G("non-ascii")))
 	R("ml-literal-body-content", G("ml-literal-char").Or(G("newline")))
 	R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim"))))
 	R("ml-literal-string", c.Seq(
@ -183,17 +183,17 @@ func BuildGrammar() tokenize.Handler {
 	R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit")))
 	R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int")))

-	R("hex-prefix", a.Zero.Then(a.Rune('x')))
+	R("hex-prefix", a.Zero.Then(a.Char('x')))
 	R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit"))))
 	R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit"))))

-	R("oct-prefix", a.Zero.Then(a.Rune('o')))
-	R("digit0-7", a.RuneRange('0', '7'))
+	R("oct-prefix", a.Zero.Then(a.Char('o')))
+	R("digit0-7", a.CharRange('0', '7'))
 	R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7"))))
 	R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit"))))

-	R("bin-prefix", a.Zero.Then(a.Rune('b')))
-	R("digit0-1", a.Runes('0', '1'))
+	R("bin-prefix", a.Zero.Then(a.Char('b')))
+	R("digit0-1", a.Char('0', '1'))
 	R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1"))))
 	R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit"))))

@ -241,9 +241,9 @@ func BuildGrammar() tokenize.Handler {
 	R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))))

 	R("time-delim", c.Any(
-		tok.Str("T", a.Rune('T')),
-		tok.Str("t", a.Rune('t')),
-		tok.Str(" ", a.Rune(' '))))
+		tok.Str("T", a.Char('T')),
+		tok.Str("t", a.Char('t')),
+		tok.Str(" ", a.Char(' '))))

 	R("time-hour", G("digit").Times(2))
 	R("time-minute", G("digit").Times(2))
@ -252,7 +252,7 @@ func BuildGrammar() tokenize.Handler {

 	R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit)))))

-	R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z"))
+	R("time-zulu", m.Replace(a.Char('Z', 'z'), "Z"))
 	R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute")))
 	R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset"))))

--- a/parse2/profile-sushi.sh
+++ b/parse2/profile-sushi.sh
@ -1,9 +1,12 @@
 #!/bin/bash

 #FILE=short.toml
+
 FILE=normal.toml
+ITER=1500
+
 #FILE=long.toml
-ITER=10000
+#ITER=500

 cd ../cmd/burntsushi-tester
 go build