From dea3eb987b4e4f51832da177225176658867e788 Mon Sep 17 00:00:00 2001
From: Maurice Makaay <maurice@makaay.nl>
Date: Fri, 26 Jul 2019 22:56:24 +0000
Subject: [PATCH] Changes for compatibliity with latest parsekit, and some
 speed improvements.

---
 cmd/burntsushi-tester/Makefile |  6 ++++++
 parse/document.go              | 39 ++++++++++++++++++++++++++++------
 parse/keyvaluepair.go          |  6 +++---
 parse/parse.go                 | 29 -------------------------
 parse/value_array.go           |  8 +++----
 parse/value_boolean.go         |  4 ++--
 parse/value_number.go          |  4 ++--
 parse/value_string.go          | 36 +++++++++++++++----------------
 parse/value_table.go           | 20 ++++++++---------
 parse2/profile-sushi.sh        |  2 +-
 10 files changed, 79 insertions(+), 75 deletions(-)

diff --git a/cmd/burntsushi-tester/Makefile b/cmd/burntsushi-tester/Makefile
index 983acc2..b644064 100644
--- a/cmd/burntsushi-tester/Makefile
+++ b/cmd/burntsushi-tester/Makefile
@@ -1,4 +1,5 @@
 PROFILE_COUNT=100
+PROFILE_COUNT2=1000
 
 b:
 	go build
@@ -29,6 +30,8 @@ test2-a:
 profile-a:
 	numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT} < testfile.toml"
 	
+profile2-a:
+	numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT2} < testfile2.toml"
 
 test-b:
 	numactl --physcpubind=+2 bash -c "time ./B < testfile.toml"
@@ -39,6 +42,9 @@ test2-b:
 profile-b:
 	numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT} < testfile.toml"
 
+profile2-b:
+	numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT2} < testfile2.toml"
+
 test-sushi:
 	
 	numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile.toml"
diff --git a/parse/document.go b/parse/document.go
index eb3db01..3561737 100644
--- a/parse/document.go
+++ b/parse/document.go
@@ -2,28 +2,55 @@ package parse
 
 import (
 	"git.makaay.nl/mauricem/go-parsekit/parse"
+	"git.makaay.nl/mauricem/go-parsekit/tokenize"
 )
 
+// Some globally useful tokenizer definitions.
 var (
+	c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
+
+	// Whitespace means tab (0x09) or space (0x20).
+	// This matches the blanks as defined by parsekit.
+
+	whitespace = a.Blanks.Optional()
+
+	// Newline means LF (0x0A) or CRLF (0x0D0A).
+	// This matches the newline as defined by parsekit.
+
+	newline = a.Newline
+
+	// Whitespace + newlines.
+	// This matches the whitespace as defined by parsekit.
+
+	whitespaceInclNewlines = a.Whitespace
+
+	// A '#' hash symbol marks the rest of the line as a comment.
+	// All characters up to the end of the line are included in the comment.
+
+	comment         = c.Seq(a.Hash, m.DropUntilEndOfLine)
+	optionalComment = comment.Optional()
+
+	endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
+
+	whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment)
+
 	// Keys may be either bare or quoted.
 	detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
 
 	// Both [tables] and [[arrays of tables]] start with a square open bracket.
 	detectTable = a.SquareOpen
-
-	whitespaceNewlinesAndComments = whitespaceInclNewlines.Or(comment)
 )
 
 func (t *parser) startDocument(p *parse.API) {
 	for {
 		switch {
-		case p.Accept(whitespaceNewlinesAndComments):
+		case p.Skip(whitespaceNewlinesOrComments):
 			// NOOP
-		case p.Peek(detectTable):
-			p.Handle(t.startTable)
 		case p.Peek(detectKey):
 			p.Handle(t.startKeyValuePair)
-		case p.Accept(a.EndOfFile):
+		case p.Peek(detectTable):
+			p.Handle(t.startTable)
+		case p.Skip(a.EndOfFile):
 			p.Stop()
 		default:
 			p.Expected("key/value pair, table or array of tables")
diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go
index 345d3e7..d2dfb64 100644
--- a/parse/keyvaluepair.go
+++ b/parse/keyvaluepair.go
@@ -49,7 +49,7 @@ func (t *parser) startKeyValuePair(p *parse.API) {
 			err := t.doc.SetKeyValuePair(key, value)
 			if err != nil {
 				p.SetError("%s", err)
-			} else if !p.Accept(endOfLineOrComment) {
+			} else if !p.Skip(endOfLineOrComment) {
 				p.Expected("end of line")
 			}
 		}
@@ -93,14 +93,14 @@ func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
 // practice is to not use any extraneous whitespace.
 
 func (t *parser) parseEndOfKeyOrDot(p *parse.API, key ast.Key) (ast.Key, bool) {
-	if p.Accept(keySeparatorDot) {
+	if p.Skip(keySeparatorDot) {
 		return t.parseKey(p, key)
 	}
 	return key, true
 }
 
 func (t *parser) startAssignment(p *parse.API) {
-	if !p.Accept(keyAssignment) {
+	if !p.Skip(keyAssignment) {
 		p.Expected("a value assignment")
 	}
 }
diff --git a/parse/parse.go b/parse/parse.go
index 458d95c..b35ce2a 100644
--- a/parse/parse.go
+++ b/parse/parse.go
@@ -2,38 +2,9 @@ package parse
 
 import (
 	"git.makaay.nl/mauricem/go-parsekit/parse"
-	"git.makaay.nl/mauricem/go-parsekit/tokenize"
 	"git.makaay.nl/mauricem/go-toml/ast"
 )
 
-// Some globally useful tokenizer definitions.
-var (
-	c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
-
-	// Whitespace means tab (0x09) or space (0x20).
-	// This matches the blanks as defined by parsekit.
-
-	whitespace = a.Blanks.Optional()
-
-	// Newline means LF (0x0A) or CRLF (0x0D0A).
-	// This matches the newline as defined by parsekit.
-
-	newline = a.Newline
-
-	// Whitespace + newlines.
-	// This matches the whitespace as defined by parsekit.
-
-	whitespaceInclNewlines = a.Whitespace
-
-	// A '#' hash symbol marks the rest of the line as a comment.
-	// All characters up to the end of the line are included in the comment.
-
-	comment         = c.Seq(a.Hash, m.DropUntilEndOfLine)
-	optionalComment = comment.Optional()
-
-	endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
-)
-
 type parser struct {
 	doc *ast.Document
 }
diff --git a/parse/value_array.go b/parse/value_array.go
index 2ee1055..fb0a451 100644
--- a/parse/value_array.go
+++ b/parse/value_array.go
@@ -39,7 +39,7 @@ var (
 
 func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
 	// Check for the start of the array.
-	if !p.Accept(arrayOpen) {
+	if !p.Skip(arrayOpen) {
 		p.Expected("an array")
 		return nil, false
 	}
@@ -47,7 +47,7 @@ func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
 	a := ast.NewArray()
 
 	// Check for an empty array.
-	if p.Accept(arrayClose) {
+	if p.Skip(arrayClose) {
 		return ast.NewValue(ast.TypeArray, a), true
 	}
 
@@ -68,12 +68,12 @@ func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
 		}
 
 		// Check for the end of the array.
-		if p.Accept(arrayClose) {
+		if p.Skip(arrayClose) {
 			return ast.NewValue(ast.TypeArray, a), true
 		}
 
 		// Not the end of the array? Then we should find an array separator.
-		if !p.Accept(arraySeparator) {
+		if !p.Skip(arraySeparator) {
 			p.Expected("an array separator")
 			return nil, false
 		}
diff --git a/parse/value_boolean.go b/parse/value_boolean.go
index f576004..d4383f2 100644
--- a/parse/value_boolean.go
+++ b/parse/value_boolean.go
@@ -15,9 +15,9 @@ var (
 // Booleans are just the tokens you're used to. Always lowercase.
 func (t *parser) parseBoolean(p *parse.API) (*ast.Value, bool) {
 	switch {
-	case p.Accept(trueStr):
+	case p.Skip(trueStr):
 		return trueValue, true
-	case p.Accept(falseStr):
+	case p.Skip(falseStr):
 		return falseValue, true
 	default:
 		p.Expected("true or false")
diff --git a/parse/value_number.go b/parse/value_number.go
index 602d949..5189192 100644
--- a/parse/value_number.go
+++ b/parse/value_number.go
@@ -74,10 +74,10 @@ func (t *parser) parseNumber(p *parse.API) (*ast.Value, bool) {
 	switch {
 	case p.Accept(floatToken):
 		return ast.NewValue(ast.TypeFloat, p.Result.Tokens[0].Value.(float64)), true
-	case p.Accept(nan):
+	case p.Skip(nan):
 		return ast.NewValue(ast.TypeFloat, math.NaN()), true
 	case p.Accept(inf):
-		if p.Result.Runes[0] == '-' {
+		if p.Result.Bytes[0] == '-' {
 			return ast.NewValue(ast.TypeFloat, math.Inf(-1)), true
 		}
 		return ast.NewValue(ast.TypeFloat, math.Inf(+1)), true
diff --git a/parse/value_string.go b/parse/value_string.go
index 5a2860e..436838e 100644
--- a/parse/value_string.go
+++ b/parse/value_string.go
@@ -96,7 +96,7 @@ func (t *parser) parseString(p *parse.API) (*ast.Value, bool) {
 // "All other escape sequences [..] are reserved and, if used, TOML should
 // produce an error.""
 func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
-	if !p.Accept(a.DoubleQuote) {
+	if !p.Skip(a.DoubleQuote) {
 		p.Expected(`opening quotation marks`)
 		return "", false
 	}
@@ -104,7 +104,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
 	for {
 		switch {
 		case p.PeekWithResult(controlCharacter):
-			p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Runes[0])
+			p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Bytes[0])
 			return sb.String(), false
 		case p.Accept(validEscape):
 			if !appendEscapedRune(p, sb) {
@@ -113,7 +113,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
 		case p.Peek(a.Backslash):
 			p.SetError("invalid escape sequence")
 			return sb.String(), false
-		case p.Accept(basicStringDelimiter):
+		case p.Skip(basicStringDelimiter):
 			return sb.String(), true
 		case p.Peek(a.InvalidRune):
 			p.SetError("invalid UTF8 rune")
@@ -135,19 +135,19 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
 //
 // • Control characters other than tab are not permitted in a literal string.
 func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
-	if !p.Accept(a.SingleQuote) {
+	if !p.Skip(a.SingleQuote) {
 		p.Expected("opening single quote")
 		return "", false
 	}
 	sb := &strings.Builder{}
 	for {
 		switch {
-		case p.Accept(literalStringDelimiter):
+		case p.Skip(literalStringDelimiter):
 			return sb.String(), true
-		case p.Accept(a.Tab):
+		case p.Skip(a.Tab):
 			sb.WriteString("\t")
 		case p.PeekWithResult(controlCharacter):
-			p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Runes[0])
+			p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Bytes[0])
 			return sb.String(), false
 		case p.Peek(a.InvalidRune):
 			p.SetError("invalid UTF8 rune")
@@ -185,28 +185,28 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
 // a \, it will be trimmed along with all whitespace (including newlines) up to
 // the next non-whitespace character or closing delimiter.
 func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) {
-	if !p.Accept(openingMultiLineBasicString) {
+	if !p.Skip(openingMultiLineBasicString) {
 		p.Expected("opening three quotation marks")
 		return "", false
 	}
 	sb := &strings.Builder{}
 	for {
 		switch {
-		case p.Accept(newline):
+		case p.Skip(newline):
 			sb.WriteString("\n")
 		case p.PeekWithResult(controlCharacter):
-			p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Runes[0])
+			p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Bytes[0])
 			return sb.String(), false
 		case p.Accept(validEscape):
 			if !appendEscapedRune(p, sb) {
 				return sb.String(), false
 			}
-		case p.Accept(lineEndingBackslash):
-			// NOOP, the line-ending backslash sequence is skipped.
+		case p.Skip(lineEndingBackslash):
+			// NOOP
 		case p.Peek(a.Backslash):
 			p.SetError("invalid escape sequence")
 			return sb.String(), false
-		case p.Accept(closingMultiLineBasicString):
+		case p.Skip(closingMultiLineBasicString):
 			return sb.String(), true
 		case p.Accept(a.ValidRune):
 			sb.WriteString(p.Result.String())
@@ -265,21 +265,21 @@ func appendEscapedRune(p *parse.API, sb *strings.Builder) bool {
 //
 // • Control characters other than tab and newline are not permitted in a multi-line literal string.
 func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) {
-	if !p.Accept(openingMultiLineLiteralString) {
+	if !p.Skip(openingMultiLineLiteralString) {
 		p.Expected("opening three single quotes")
 		return "", false
 	}
 	sb := &strings.Builder{}
 	for {
 		switch {
-		case p.Accept(closingMultiLineLiteralString):
+		case p.Skip(closingMultiLineLiteralString):
 			return sb.String(), true
-		case p.Accept(a.Tab):
+		case p.Skip(a.Tab):
 			sb.WriteString("\t")
-		case p.Accept(newline):
+		case p.Skip(newline):
 			sb.WriteString("\n")
 		case p.PeekWithResult(controlCharacter):
-			p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Runes[0])
+			p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Bytes[0])
 			return sb.String(), false
 		case p.Accept(a.ValidRune):
 			sb.WriteString(p.Result.String())
diff --git a/parse/value_table.go b/parse/value_table.go
index d24cd27..346ef81 100644
--- a/parse/value_table.go
+++ b/parse/value_table.go
@@ -22,9 +22,9 @@ var (
 
 func (t *parser) startTable(p *parse.API) {
 	switch {
-	case p.Accept(tableArrayOpen):
+	case p.Skip(tableArrayOpen):
 		p.Handle(t.startArrayOfTables)
-	case p.Accept(tableOpen):
+	case p.Skip(tableOpen):
 		p.Handle(t.startPlainTable)
 	default:
 		p.Expected("a table")
@@ -71,11 +71,11 @@ func (t *parser) startTable(p *parse.API) {
 // 	      name = "plantain"
 func (t *parser) startArrayOfTables(p *parse.API) {
 	if key, ok := t.parseKey(p, []string{}); ok {
-		if !p.Accept(tableArrayClose) {
+		if !p.Skip(tableArrayClose) {
 			p.Expected("closing ']]' for array of tables name")
 			return
 		}
-		if !p.Accept(endOfLineOrComment) {
+		if !p.Skip(endOfLineOrComment) {
 			p.Expected("end of line or comment")
 			return
 		}
@@ -127,11 +127,11 @@ func (t *parser) startArrayOfTables(p *parse.API) {
 // Empty tables are allowed and simply have no key/value pairs within them.
 func (t *parser) startPlainTable(p *parse.API) {
 	if key, ok := t.parseKey(p, []string{}); ok {
-		if !p.Accept(tableClose) {
+		if !p.Skip(tableClose) {
 			p.Expected("closing ']' for table name")
 			return
 		}
-		if !p.Accept(endOfLineOrComment) {
+		if !p.Skip(endOfLineOrComment) {
 			p.Expected("end of line or comment")
 			return
 		}
@@ -161,7 +161,7 @@ func (t *parser) startPlainTable(p *parse.API) {
 //     animal = { type.name = "pug" }
 func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) {
 	// Check for the start of the array.
-	if !p.Accept(inlineTableOpen) {
+	if !p.Skip(inlineTableOpen) {
 		p.Expected("an inline table")
 		return nil, false
 	}
@@ -169,7 +169,7 @@ func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) {
 	subdoc := newParser()
 
 	// Check for an empty inline table.
-	if p.Accept(inlineTableClose) {
+	if p.Skip(inlineTableClose) {
 		return ast.NewValue(ast.TypeTable, subdoc.doc.Root), true
 	}
 
@@ -193,12 +193,12 @@ func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) {
 		}
 
 		// Check for the end of the inline table.
-		if p.Accept(inlineTableClose) {
+		if p.Skip(inlineTableClose) {
 			return ast.NewValue(ast.TypeTable, subdoc.doc.Root), true
 		}
 
 		// Not the end of the inline table? Then we should find a key/value pair separator.
-		if !p.Accept(inlineTableSeparator) {
+		if !p.Skip(inlineTableSeparator) {
 			p.Expected("an array separator")
 			return nil, false
 		}
diff --git a/parse2/profile-sushi.sh b/parse2/profile-sushi.sh
index 986a988..130fc44 100755
--- a/parse2/profile-sushi.sh
+++ b/parse2/profile-sushi.sh
@@ -6,7 +6,7 @@ FILE=normal.toml
 ITER=10000
 
 cd ../cmd/burntsushi-tester
-go build -gcflags=all=-l
+go build
 cd ../../parse2
 ppfile=`cat $FILE | ../cmd/burntsushi-tester/burntsushi-tester -p $ITER 2>&1 | grep "profiling enabled" | cut -d, -f2`
 go tool pprof -http 0.0.0.0:8888 ../cmd/burntsushi-tester/burntsushi-tester $ppfile