From 74274e04fb2f58232846f14f1f746bc7754d1f0d Mon Sep 17 00:00:00 2001
From: Maurice Makaay <maurice@makaay.nl>
Date: Sun, 28 Jul 2019 23:50:30 +0000
Subject: [PATCH] Made a big jump in performance on big files with lots of
 comments, by reading in chunks till end of line, instead of byte-by-byte.

---
 burntsushi/burntsushi.go       |  2 ++
 cmd/burntsushi-tester/Makefile | 19 +++++++++--------
 parse/document.go              |  2 +-
 parse/value_datetime.go        |  2 +-
 parse/value_number.go          | 12 +++++------
 parse/value_string.go          |  8 +++----
 parse2/grammar.go              |  4 ++--
 parse2/performance_timings.txt | 39 +++++++++++++++-------------------
 8 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/burntsushi/burntsushi.go b/burntsushi/burntsushi.go
index 1f7ca9d..415eb4a 100644
--- a/burntsushi/burntsushi.go
+++ b/burntsushi/burntsushi.go
@@ -122,6 +122,8 @@ func writeSushi(w *bufio.Writer, value *ast.Value) {
 	default:
 		panic(fmt.Sprintf("Unhandled data type: %s", value.Type))
 	}
+
+	w.Flush()
 }
 
 func renderValue(w *bufio.Writer, t string, v string) {
diff --git a/cmd/burntsushi-tester/Makefile b/cmd/burntsushi-tester/Makefile
index b644064..8c2ddee 100644
--- a/cmd/burntsushi-tester/Makefile
+++ b/cmd/burntsushi-tester/Makefile
@@ -1,5 +1,6 @@
 PROFILE_COUNT=100
 PROFILE_COUNT2=1000
+TIME=time
 
 b:
 	go build
@@ -22,10 +23,10 @@ test: test-a test-b test-sushi
 profile: profile-a profile-b
 
 test-a:
-	numactl --physcpubind=+1 bash -c "time ./A < testfile.toml"
+	numactl --physcpubind=+1 bash -c "${TIME} ./A < testfile.toml"
 
 test2-a:
-	numactl --physcpubind=+1 bash -c "time ./A < testfile2.toml"
+	numactl --physcpubind=+1 bash -c "${TIME} ./A < testfile2.toml"
 
 profile-a:
 	numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT} < testfile.toml"
@@ -34,10 +35,10 @@ profile2-a:
 	numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT2} < testfile2.toml"
 
 test-b:
-	numactl --physcpubind=+2 bash -c "time ./B < testfile.toml"
+	numactl --physcpubind=+2 bash -c "${TIME} ./B < testfile.toml"
 
 test2-b:
-	numactl --physcpubind=+2 bash -c "time ./B < testfile2.toml"
+	numactl --physcpubind=+2 bash -c "${TIME} ./B < testfile2.toml"
 
 profile-b:
 	numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT} < testfile.toml"
@@ -47,22 +48,22 @@ profile2-b:
 
 test-sushi:
 	
-	numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile.toml"
+	numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test-decoder < testfile.toml"
 
 test2-sushi:
 	
-	numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile2.toml"
+	numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test-decoder < testfile2.toml"
 
 
 test-sushi-a:
 	
-	numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ./A"
+	numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test ./A"
 
 test-sushi-b:
 	
-	numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ./B"
+	numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test ./B"
 
 test-sushi-sushi:
 	
-	numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ${GOPATH}/bin/toml-test-decoder"
+	numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test ${GOPATH}/bin/toml-test-decoder"
 
diff --git a/parse/document.go b/parse/document.go
index 3561737..1ee201f 100644
--- a/parse/document.go
+++ b/parse/document.go
@@ -27,7 +27,7 @@ var (
 	// A '#' hash symbol marks the rest of the line as a comment.
 	// All characters up to the end of the line are included in the comment.
 
-	comment         = c.Seq(a.Hash, m.DropUntilEndOfLine)
+	comment         = c.Seq(a.Hash, a.UntilEndOfLine)
 	optionalComment = comment.Optional()
 
 	endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
diff --git a/parse/value_datetime.go b/parse/value_datetime.go
index a767187..3fc010d 100644
--- a/parse/value_datetime.go
+++ b/parse/value_datetime.go
@@ -91,7 +91,7 @@ func (t *parser) parseDateTime(p *parse.API) (*ast.Value, bool) {
 		p.Expected("a date and/or time")
 		return nil, false
 	}
-	token := p.Result.Tokens[0]
+	token := p.Result.Token(0)
 
 	layout := ""
 	for _, l := range token.Value.([]tokenize.Token) {
diff --git a/parse/value_number.go b/parse/value_number.go
index 5189192..592dfcd 100644
--- a/parse/value_number.go
+++ b/parse/value_number.go
@@ -73,18 +73,18 @@ var (
 func (t *parser) parseNumber(p *parse.API) (*ast.Value, bool) {
 	switch {
 	case p.Accept(floatToken):
-		return ast.NewValue(ast.TypeFloat, p.Result.Tokens[0].Value.(float64)), true
+		return ast.NewValue(ast.TypeFloat, p.Result.Token(0).Value.(float64)), true
 	case p.Skip(nan):
 		return ast.NewValue(ast.TypeFloat, math.NaN()), true
 	case p.Accept(inf):
-		if p.Result.Bytes[0] == '-' {
+		if p.Result.Byte(0) == '-' {
 			return ast.NewValue(ast.TypeFloat, math.Inf(-1)), true
 		}
 		return ast.NewValue(ast.TypeFloat, math.Inf(+1)), true
 	case p.Accept(a.Zero):
 		return t.parseIntegerStartingWithZero(p)
 	case p.Accept(integerToken):
-		return ast.NewValue(ast.TypeInteger, p.Result.Tokens[0].Value.(int64)), true
+		return ast.NewValue(ast.TypeInteger, p.Result.Token(0).Value.(int64)), true
 	default:
 		p.Expected("a number")
 		return nil, false
@@ -96,11 +96,11 @@ func (t *parser) parseIntegerStartingWithZero(p *parse.API) (*ast.Value, bool) {
 	var err error
 	switch {
 	case p.Accept(hexadecimal):
-		value, err = strconv.ParseInt(p.Result.Tokens[0].Value.(string), 16, 64)
+		value, err = strconv.ParseInt(p.Result.Token(0).Value.(string), 16, 64)
 	case p.Accept(octal):
-		value, err = strconv.ParseInt(p.Result.Tokens[0].Value.(string), 8, 64)
+		value, err = strconv.ParseInt(p.Result.Token(0).Value.(string), 8, 64)
 	case p.Accept(binary):
-		value, err = strconv.ParseInt(p.Result.Tokens[0].Value.(string), 2, 64)
+		value, err = strconv.ParseInt(p.Result.Token(0).Value.(string), 2, 64)
 	default:
 		return ast.NewValue(ast.TypeInteger, int64(0)), true
 	}
diff --git a/parse/value_string.go b/parse/value_string.go
index 930682b..7e14b7b 100644
--- a/parse/value_string.go
+++ b/parse/value_string.go
@@ -104,7 +104,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
 	for {
 		switch {
 		case p.Peek(controlCharacter):
-			p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Bytes[0])
+			p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Byte(0))
 			return sb.String(), false
 		case p.Accept(validEscape):
 			if !appendEscapedRune(p, sb) {
@@ -147,7 +147,7 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
 		case p.Skip(a.Tab):
 			sb.WriteString("\t")
 		case p.Peek(controlCharacter):
-			p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Bytes[0])
+			p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Byte(0))
 			return sb.String(), false
 		case p.Peek(a.InvalidRune):
 			p.SetError("invalid UTF8 rune")
@@ -195,7 +195,7 @@ func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) {
 		case p.Skip(newline):
 			sb.WriteString("\n")
 		case p.Peek(controlCharacter):
-			p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Bytes[0])
+			p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Byte(0))
 			return sb.String(), false
 		case p.Accept(validEscape):
 			if !appendEscapedRune(p, sb) {
@@ -279,7 +279,7 @@ func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) {
 		case p.Skip(newline):
 			sb.WriteString("\n")
 		case p.Peek(controlCharacter):
-			p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Bytes[0])
+			p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Byte(0))
 			return sb.String(), false
 		case p.Accept(a.ValidRune):
 			sb.WriteString(p.Result.String())
diff --git a/parse2/grammar.go b/parse2/grammar.go
index a3fdb04..ede3ff7 100644
--- a/parse2/grammar.go
+++ b/parse2/grammar.go
@@ -47,7 +47,7 @@ func main() {
 			if err != nil {
 				panic("Cannot profile, parsing input failed: " + err.Error())
 			}
-			fmt.Printf("cycle %d / %d, tokens=%d\r", i+1, *doProfile, len(result.Tokens))
+			fmt.Printf("cycle %d / %d, tokens=%d\r", i+1, *doProfile, len(result.Tokens()))
 		}
 
 		duration := time.Since(start)
@@ -62,7 +62,7 @@ func main() {
 	if err != nil {
 		log.Fatalf("Error in parsing TOML: %s\n", err)
 	} else {
-		for i, t := range result.Tokens {
+		for i, t := range result.Tokens() {
 			fmt.Printf("[%d] %v\n", i, t)
 		}
 	}
diff --git a/parse2/performance_timings.txt b/parse2/performance_timings.txt
index a439361..5fea263 100644
--- a/parse2/performance_timings.txt
+++ b/parse2/performance_timings.txt
@@ -1,25 +1,20 @@
-16.750      ./parse2 -p 100 < long.toml
-19.403      ./parse2 -p 10000 < x
+9.2767      ./parse2 -p 100 < long.toml
+9.2569      ./parse2 -p 10000 < x
 
-1.508262093s parse2 10 iteration profiling of long.toml
-1.278056375s parse2 1000 iteration profiling of normal.toml
+917.916965ms parse2 10 iteration profiling of long.toml
+895.376882ms parse2 1000 iteration profiling of normal.toml
 
-207.402484ms burntsushi-tester 10 iteration profiling of long.toml
-782.128156ms burntsushi-tester 1000 iteration profiling of normal.toml
+71.794061ms burntsushi-tester 10 iteration profiling of long.toml
+472.762136ms burntsushi-tester 1000 iteration profiling of normal.toml
 
-0.002s git.makaay.nl/mauricem/go-toml/ast (unit tests)
-0.236s git.makaay.nl/mauricem/go-toml/parse (unit tests)
-
-0m0.254s BurntSushi test set
-
-3.500633ms qa-array-inline-1000.toml
-5.844964ms qa-array-inline-nested-1000.toml
-4.164484ms qa-key-literal-40kb.toml
-6.965205ms qa-key-string-40kb.toml
-4.514677ms qa-scalar-literal-40kb.toml
-8.53826ms qa-scalar-literal-multiline-40kb.toml
-7.819157ms qa-scalar-string-40kb.toml
-6.569182ms qa-scalar-string-multiline-40kb.toml
-5.64134ms qa-table-inline-1000.toml
-11.501451ms qa-table-inline-nested-1000.toml
-149.369957ms qa-long-loads-of-comments.toml
\ No newline at end of file
+2.15562ms qa-array-inline-1000.toml
+5.625499ms qa-array-inline-nested-1000.toml
+2.791934ms qa-key-literal-40kb.toml
+5.096704ms qa-key-string-40kb.toml
+3.232853ms qa-scalar-literal-40kb.toml
+5.398462ms qa-scalar-literal-multiline-40kb.toml
+4.710443ms qa-scalar-string-40kb.toml
+3.948024ms qa-scalar-string-multiline-40kb.toml
+2.803367ms qa-table-inline-1000.toml
+6.076693ms qa-table-inline-nested-1000.toml
+95.646345ms qa-long-loads-of-comments.toml
\ No newline at end of file