String parser replaced with a low level parser for performance.

2019-08-01 13:25:21 +00:00 · 2019-08-01 13:25:21 +00:00 · 22144487f0
parent ed846c7e53
commit 22144487f0
12 changed files with 815 additions and 724 deletions
--- a/cmd/burntsushi-tester/test.toml
+++ b/cmd/burntsushi-tester/test.toml
@ -1,7 +0,0 @@
-regex2 = '''I [dw]on't need \d{2} apples'''
-lines  = '''
-The first newline is
-trimmed in raw strings.
-   All other whitespace
-   is preserved.
-'''
--- a/parse/benchmark_test.go
+++ b/parse/benchmark_test.go
@ -4,73 +4,29 @@ import (
 	"testing"
 )

-func A(b byte) (byte, bool) {
-	if b > 'b' {
-		switch b {
-		case 't':
-			return '\t', true
-		case 'n':
-			return '\n', true
-		case 'r':
-			return '\r', true
-		case 'f':
-			return '\f', true
-		}
-	} else {
-		switch b {
-		case '"':
-			return '"', true
-		case '\\':
-			return '\\', true
-		case 'b':
-			return '\b', true
-		}
-	}
-	return 0x00, false
-}
-
-func B(b byte) (byte, bool) {
-	switch b {
-	case 'r':
-		return '\r', true
-	case 'n':
-		return '\n', true
-	case 't':
-		return '\t', true
-	case 'b':
-		return '\b', true
-	case 'f':
-		return '\f', true
-	case '"':
-		return '"', true
-	case '\\':
-		return '\\', true
-	}
-	return 0x00, false
-}
-
-// TODO cleanup unused benchmark.
-func Benchmark_A(b *testing.B) {
-	for i := 0; i < b.N; i++ {
-		A('b')
-		A('t')
-		A('n')
-		A('f')
-		A('r')
-		A('"')
-		A('\\')
-	}
-}
-
-// TODO cleanup unused benchmark.
 func Benchmark_B(b *testing.B) {
+	f := func(i int) int { i = i + 1; return i }
 	for i := 0; i < b.N; i++ {
-		B('b')
-		B('t')
-		B('n')
-		B('f')
-		B('r')
-		B('"')
-		B('\\')
+		f(i)
+	}
+}
+func Benchmark_C(b *testing.B) {
+	f := func(i int) int { i += 1; return i }
+	for i := 0; i < b.N; i++ {
+		f(i)
+	}
+}
+
+func Benchmark_D(b *testing.B) {
+	f := func(i int) int { i++; return i }
+	for i := 0; i < b.N; i++ {
+		f(i)
+	}
+}
+
+func Benchmark_A(b *testing.B) {
+	f := func(i int) int { i = 2; return i }
+	for i := 0; i < b.N; i++ {
+		f(i)
 	}
 }
--- a/parse/document.go
+++ b/parse/document.go
@ -27,11 +27,9 @@ var (
 	// A '#' hash symbol marks the rest of the line as a comment.
 	// All characters up to the end of the line are included in the comment.

-	comment         = c.Seq(a.Hash, a.UntilEndOfLine.Optional())
-	optionalComment = comment.Optional()
-
-	endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
-
+	comment                      = c.Seq(a.Hash, a.UntilEndOfLine.Optional())
+	optionalComment              = comment.Optional()
+	endOfLineOrComment           = c.Seq(whitespace, optionalComment, a.EndOfLine)
 	whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment)

 	// Keys may be either bare or quoted.
--- a/parse/document_test.go
+++ b/parse/document_test.go
@ -1,6 +1,7 @@
 package parse

 import (
+	"strings"
 	"testing"
 )

@ -29,3 +30,18 @@ func TestInvalidDocument(t *testing.T) {
 		testParse(t, p, p.startDocument, test)
 	}
 }
+
+func TestBufferRelatedBug(t *testing.T) {
+	text := strings.Repeat("#", 2040) + "\n# a bug\n"
+	ast, err := Run(text)
+
+	if len(text) != 2049 {
+		t.Fatalf("Test input is not 2049 bytes, but %d", len(text))
+	}
+	if err != nil {
+		t.Fatalf("Unexpected error from parser: %s", err)
+	}
+	if ast.String() != "{}" {
+		t.Fatalf("Unexpected TOML document structure returned:\nexpected: {}\nactual: %s", ast)
+	}
+}
--- a/parse/keyvaluepair.go
+++ b/parse/keyvaluepair.go
@ -53,7 +53,11 @@ func (t *parser) startKeyValuePair(p *parse.API) {
 				p.Expected("end of line")
 			}
 		} else {
-			p.Expected("a value")
+			// Should have been handled by the value parsing code.
+			// This is a safety net.
+			if !p.IsStoppedOrInError() {
+				panic("Bug: value parsing did not return a successful value, neither an error")
+			}
 		}
 	}
 }
@ -70,14 +74,16 @@ func (t *parser) startKeyValuePair(p *parse.API) {

 func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
 	var keyPart string
+	var strType stringType
 	var ok bool
 	switch {
 	case p.Accept(bareKey):
 		keyPart, ok = p.Result.String(), true
-	case p.Peek(a.SingleQuote):
-		keyPart, ok = t.parseLiteralString("key", p)
-	case p.Peek(a.DoubleQuote):
-		keyPart, ok = t.parseBasicString("key", p)
+	case p.Peek(detectString):
+		keyPart, strType, ok = t.parseString(p)
+		if strType != strTypeBasic && strType != strTypeLiteral {
+			p.Expected("a key name") // TODO more specific error telling about the abuse of multi-line string?
+		}
 	default:
 		p.Expected("a key name")
 		return nil, false
--- a/parse/parse.go
+++ b/parse/parse.go
@ -6,12 +6,13 @@ import (
 )

 type parser struct {
-	doc *ast.Document
+	doc      *ast.Document
+	strFlags byte // A helper field used for string parsing.
 }

 func newParser() *parser {
 	doc := ast.NewDocument()
-	return &parser{doc}
+	return &parser{doc: doc}
 }

 // Run the TOML parser against the provided input data.
--- a/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.json
+++ b/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.json
@ -0,0 +1 @@
+{}
--- a/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.toml
+++ b/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.toml
@ -0,0 +1,2 @@
+# This line is 2040 long, to make the read buffer end at the 'g' of the second line, leaving only the newline at the end of the file for the next read operation. There was a bug that resulted in the word 'a' of the second line being seen as a key, because the comment skipping did not work as it should with the buffer filling operation between '# a bug' and the final '\n'. #################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################
+# a bug
--- a/parse/value.go
+++ b/parse/value.go
@ -20,7 +20,8 @@ var (
 func (t *parser) parseValue(p *parse.API) (*ast.Value, bool) {
 	switch {
 	case p.Peek(detectString):
-		return t.parseString(p)
+		str, _, ok := t.parseString(p)
+		return ast.NewValue(ast.TypeString, str), ok
 	case p.Peek(detectBoolean):
 		return t.parseBoolean(p)
 	case p.Peek(detectNumberSpecials):
--- a/parse/value_string.go
+++ b/parse/value_string.go
--- a/parse/value_string_test.go
+++ b/parse/value_string_test.go
@ -13,30 +13,6 @@ func TestStartString(t *testing.T) {
 	testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected a string value) at start of file"})
 }

-func TestStartBasicString(t *testing.T) {
-	parser := newParser()
-	wrapper := func(p *parse.API) { parser.parseBasicString("xyz", p) }
-	testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening quotation marks) at start of file"})
-}
-
-func TestStartLiteralString(t *testing.T) {
-	parser := newParser()
-	wrapper := func(p *parse.API) { parser.parseLiteralString("xyz", p) }
-	testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening single quote) at start of file"})
-}
-
-func TestStartMultiLineBasicString(t *testing.T) {
-	parser := newParser()
-	wrapper := func(p *parse.API) { parser.parseMultiLineBasicString(p) }
-	testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening three quotation marks) at start of file"})
-}
-
-func TestStartMultiLineLiteralString(t *testing.T) {
-	parser := newParser()
-	wrapper := func(p *parse.API) { parser.parseMultiLineLiteralString(p) }
-	testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening three single quotes) at start of file"})
-}
-
 func TestString(t *testing.T) {
 	for _, test := range []parseTest{
 		{`x=no start quote"`, `{}`, `unexpected input (expected a value) at line 1, column 3`},
--- a/parse2/profile-qa.sh
+++ b/parse2/profile-qa.sh
@ -4,10 +4,16 @@ go build
 $(cd ../cmd/burntsushi-tester/; go build)

 DURATION=`./parse2 -p 10 < long.toml 2>&1 | grep Duration | awk '{print $2}'`
-echo "$DURATION parse2 10 iteration profiling of long.toml"
+echo "$DURATION ./parse2 -p 10 < long.toml"
+
+DURATION=`./parse2 -p 100 < long.toml 2>&1 | grep Duration | awk '{print $2}'`
+echo "$DURATION ./parse2 -p 100 < long.toml"

 DURATION=`./parse2 -p 1000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'`
-echo "$DURATION parse2 1000 iteration profiling of normal.toml"
+echo "$DURATION ./parse2 -p 1000 < normal.toml"
+
+DURATION=`./parse2 -p 10000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'`
+echo "$DURATION ./parse2 -p 10000 < normal.toml"

 echo ""