From dea3eb987b4e4f51832da177225176658867e788 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Fri, 26 Jul 2019 22:56:24 +0000 Subject: [PATCH] Changes for compatibliity with latest parsekit, and some speed improvements. --- cmd/burntsushi-tester/Makefile | 6 ++++++ parse/document.go | 39 ++++++++++++++++++++++++++++------ parse/keyvaluepair.go | 6 +++--- parse/parse.go | 29 ------------------------- parse/value_array.go | 8 +++---- parse/value_boolean.go | 4 ++-- parse/value_number.go | 4 ++-- parse/value_string.go | 36 +++++++++++++++---------------- parse/value_table.go | 20 ++++++++--------- parse2/profile-sushi.sh | 2 +- 10 files changed, 79 insertions(+), 75 deletions(-) diff --git a/cmd/burntsushi-tester/Makefile b/cmd/burntsushi-tester/Makefile index 983acc2..b644064 100644 --- a/cmd/burntsushi-tester/Makefile +++ b/cmd/burntsushi-tester/Makefile @@ -1,4 +1,5 @@ PROFILE_COUNT=100 +PROFILE_COUNT2=1000 b: go build @@ -29,6 +30,8 @@ test2-a: profile-a: numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT} < testfile.toml" +profile2-a: + numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT2} < testfile2.toml" test-b: numactl --physcpubind=+2 bash -c "time ./B < testfile.toml" @@ -39,6 +42,9 @@ test2-b: profile-b: numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT} < testfile.toml" +profile2-b: + numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT2} < testfile2.toml" + test-sushi: numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile.toml" diff --git a/parse/document.go b/parse/document.go index eb3db01..3561737 100644 --- a/parse/document.go +++ b/parse/document.go @@ -2,28 +2,55 @@ package parse import ( "git.makaay.nl/mauricem/go-parsekit/parse" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) +// Some globally useful tokenizer definitions. var ( + c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T + + // Whitespace means tab (0x09) or space (0x20). + // This matches the blanks as defined by parsekit. + + whitespace = a.Blanks.Optional() + + // Newline means LF (0x0A) or CRLF (0x0D0A). + // This matches the newline as defined by parsekit. + + newline = a.Newline + + // Whitespace + newlines. + // This matches the whitespace as defined by parsekit. + + whitespaceInclNewlines = a.Whitespace + + // A '#' hash symbol marks the rest of the line as a comment. + // All characters up to the end of the line are included in the comment. + + comment = c.Seq(a.Hash, m.DropUntilEndOfLine) + optionalComment = comment.Optional() + + endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) + + whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment) + // Keys may be either bare or quoted. detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote) // Both [tables] and [[arrays of tables]] start with a square open bracket. detectTable = a.SquareOpen - - whitespaceNewlinesAndComments = whitespaceInclNewlines.Or(comment) ) func (t *parser) startDocument(p *parse.API) { for { switch { - case p.Accept(whitespaceNewlinesAndComments): + case p.Skip(whitespaceNewlinesOrComments): // NOOP - case p.Peek(detectTable): - p.Handle(t.startTable) case p.Peek(detectKey): p.Handle(t.startKeyValuePair) - case p.Accept(a.EndOfFile): + case p.Peek(detectTable): + p.Handle(t.startTable) + case p.Skip(a.EndOfFile): p.Stop() default: p.Expected("key/value pair, table or array of tables") diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go index 345d3e7..d2dfb64 100644 --- a/parse/keyvaluepair.go +++ b/parse/keyvaluepair.go @@ -49,7 +49,7 @@ func (t *parser) startKeyValuePair(p *parse.API) { err := t.doc.SetKeyValuePair(key, value) if err != nil { p.SetError("%s", err) - } else if !p.Accept(endOfLineOrComment) { + } else if !p.Skip(endOfLineOrComment) { p.Expected("end of line") } } @@ -93,14 +93,14 @@ func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) { // practice is to not use any extraneous whitespace. func (t *parser) parseEndOfKeyOrDot(p *parse.API, key ast.Key) (ast.Key, bool) { - if p.Accept(keySeparatorDot) { + if p.Skip(keySeparatorDot) { return t.parseKey(p, key) } return key, true } func (t *parser) startAssignment(p *parse.API) { - if !p.Accept(keyAssignment) { + if !p.Skip(keyAssignment) { p.Expected("a value assignment") } } diff --git a/parse/parse.go b/parse/parse.go index 458d95c..b35ce2a 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -2,38 +2,9 @@ package parse import ( "git.makaay.nl/mauricem/go-parsekit/parse" - "git.makaay.nl/mauricem/go-parsekit/tokenize" "git.makaay.nl/mauricem/go-toml/ast" ) -// Some globally useful tokenizer definitions. -var ( - c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T - - // Whitespace means tab (0x09) or space (0x20). - // This matches the blanks as defined by parsekit. - - whitespace = a.Blanks.Optional() - - // Newline means LF (0x0A) or CRLF (0x0D0A). - // This matches the newline as defined by parsekit. - - newline = a.Newline - - // Whitespace + newlines. - // This matches the whitespace as defined by parsekit. - - whitespaceInclNewlines = a.Whitespace - - // A '#' hash symbol marks the rest of the line as a comment. - // All characters up to the end of the line are included in the comment. - - comment = c.Seq(a.Hash, m.DropUntilEndOfLine) - optionalComment = comment.Optional() - - endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) -) - type parser struct { doc *ast.Document } diff --git a/parse/value_array.go b/parse/value_array.go index 2ee1055..fb0a451 100644 --- a/parse/value_array.go +++ b/parse/value_array.go @@ -39,7 +39,7 @@ var ( func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) { // Check for the start of the array. - if !p.Accept(arrayOpen) { + if !p.Skip(arrayOpen) { p.Expected("an array") return nil, false } @@ -47,7 +47,7 @@ func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) { a := ast.NewArray() // Check for an empty array. - if p.Accept(arrayClose) { + if p.Skip(arrayClose) { return ast.NewValue(ast.TypeArray, a), true } @@ -68,12 +68,12 @@ func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) { } // Check for the end of the array. - if p.Accept(arrayClose) { + if p.Skip(arrayClose) { return ast.NewValue(ast.TypeArray, a), true } // Not the end of the array? Then we should find an array separator. - if !p.Accept(arraySeparator) { + if !p.Skip(arraySeparator) { p.Expected("an array separator") return nil, false } diff --git a/parse/value_boolean.go b/parse/value_boolean.go index f576004..d4383f2 100644 --- a/parse/value_boolean.go +++ b/parse/value_boolean.go @@ -15,9 +15,9 @@ var ( // Booleans are just the tokens you're used to. Always lowercase. func (t *parser) parseBoolean(p *parse.API) (*ast.Value, bool) { switch { - case p.Accept(trueStr): + case p.Skip(trueStr): return trueValue, true - case p.Accept(falseStr): + case p.Skip(falseStr): return falseValue, true default: p.Expected("true or false") diff --git a/parse/value_number.go b/parse/value_number.go index 602d949..5189192 100644 --- a/parse/value_number.go +++ b/parse/value_number.go @@ -74,10 +74,10 @@ func (t *parser) parseNumber(p *parse.API) (*ast.Value, bool) { switch { case p.Accept(floatToken): return ast.NewValue(ast.TypeFloat, p.Result.Tokens[0].Value.(float64)), true - case p.Accept(nan): + case p.Skip(nan): return ast.NewValue(ast.TypeFloat, math.NaN()), true case p.Accept(inf): - if p.Result.Runes[0] == '-' { + if p.Result.Bytes[0] == '-' { return ast.NewValue(ast.TypeFloat, math.Inf(-1)), true } return ast.NewValue(ast.TypeFloat, math.Inf(+1)), true diff --git a/parse/value_string.go b/parse/value_string.go index 5a2860e..436838e 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -96,7 +96,7 @@ func (t *parser) parseString(p *parse.API) (*ast.Value, bool) { // "All other escape sequences [..] are reserved and, if used, TOML should // produce an error."" func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { - if !p.Accept(a.DoubleQuote) { + if !p.Skip(a.DoubleQuote) { p.Expected(`opening quotation marks`) return "", false } @@ -104,7 +104,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { for { switch { case p.PeekWithResult(controlCharacter): - p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Runes[0]) + p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Bytes[0]) return sb.String(), false case p.Accept(validEscape): if !appendEscapedRune(p, sb) { @@ -113,7 +113,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { case p.Peek(a.Backslash): p.SetError("invalid escape sequence") return sb.String(), false - case p.Accept(basicStringDelimiter): + case p.Skip(basicStringDelimiter): return sb.String(), true case p.Peek(a.InvalidRune): p.SetError("invalid UTF8 rune") @@ -135,19 +135,19 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { // // • Control characters other than tab are not permitted in a literal string. func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) { - if !p.Accept(a.SingleQuote) { + if !p.Skip(a.SingleQuote) { p.Expected("opening single quote") return "", false } sb := &strings.Builder{} for { switch { - case p.Accept(literalStringDelimiter): + case p.Skip(literalStringDelimiter): return sb.String(), true - case p.Accept(a.Tab): + case p.Skip(a.Tab): sb.WriteString("\t") case p.PeekWithResult(controlCharacter): - p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Runes[0]) + p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Bytes[0]) return sb.String(), false case p.Peek(a.InvalidRune): p.SetError("invalid UTF8 rune") @@ -185,28 +185,28 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) { // a \, it will be trimmed along with all whitespace (including newlines) up to // the next non-whitespace character or closing delimiter. func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) { - if !p.Accept(openingMultiLineBasicString) { + if !p.Skip(openingMultiLineBasicString) { p.Expected("opening three quotation marks") return "", false } sb := &strings.Builder{} for { switch { - case p.Accept(newline): + case p.Skip(newline): sb.WriteString("\n") case p.PeekWithResult(controlCharacter): - p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Runes[0]) + p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Bytes[0]) return sb.String(), false case p.Accept(validEscape): if !appendEscapedRune(p, sb) { return sb.String(), false } - case p.Accept(lineEndingBackslash): - // NOOP, the line-ending backslash sequence is skipped. + case p.Skip(lineEndingBackslash): + // NOOP case p.Peek(a.Backslash): p.SetError("invalid escape sequence") return sb.String(), false - case p.Accept(closingMultiLineBasicString): + case p.Skip(closingMultiLineBasicString): return sb.String(), true case p.Accept(a.ValidRune): sb.WriteString(p.Result.String()) @@ -265,21 +265,21 @@ func appendEscapedRune(p *parse.API, sb *strings.Builder) bool { // // • Control characters other than tab and newline are not permitted in a multi-line literal string. func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) { - if !p.Accept(openingMultiLineLiteralString) { + if !p.Skip(openingMultiLineLiteralString) { p.Expected("opening three single quotes") return "", false } sb := &strings.Builder{} for { switch { - case p.Accept(closingMultiLineLiteralString): + case p.Skip(closingMultiLineLiteralString): return sb.String(), true - case p.Accept(a.Tab): + case p.Skip(a.Tab): sb.WriteString("\t") - case p.Accept(newline): + case p.Skip(newline): sb.WriteString("\n") case p.PeekWithResult(controlCharacter): - p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Runes[0]) + p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Bytes[0]) return sb.String(), false case p.Accept(a.ValidRune): sb.WriteString(p.Result.String()) diff --git a/parse/value_table.go b/parse/value_table.go index d24cd27..346ef81 100644 --- a/parse/value_table.go +++ b/parse/value_table.go @@ -22,9 +22,9 @@ var ( func (t *parser) startTable(p *parse.API) { switch { - case p.Accept(tableArrayOpen): + case p.Skip(tableArrayOpen): p.Handle(t.startArrayOfTables) - case p.Accept(tableOpen): + case p.Skip(tableOpen): p.Handle(t.startPlainTable) default: p.Expected("a table") @@ -71,11 +71,11 @@ func (t *parser) startTable(p *parse.API) { // name = "plantain" func (t *parser) startArrayOfTables(p *parse.API) { if key, ok := t.parseKey(p, []string{}); ok { - if !p.Accept(tableArrayClose) { + if !p.Skip(tableArrayClose) { p.Expected("closing ']]' for array of tables name") return } - if !p.Accept(endOfLineOrComment) { + if !p.Skip(endOfLineOrComment) { p.Expected("end of line or comment") return } @@ -127,11 +127,11 @@ func (t *parser) startArrayOfTables(p *parse.API) { // Empty tables are allowed and simply have no key/value pairs within them. func (t *parser) startPlainTable(p *parse.API) { if key, ok := t.parseKey(p, []string{}); ok { - if !p.Accept(tableClose) { + if !p.Skip(tableClose) { p.Expected("closing ']' for table name") return } - if !p.Accept(endOfLineOrComment) { + if !p.Skip(endOfLineOrComment) { p.Expected("end of line or comment") return } @@ -161,7 +161,7 @@ func (t *parser) startPlainTable(p *parse.API) { // animal = { type.name = "pug" } func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) { // Check for the start of the array. - if !p.Accept(inlineTableOpen) { + if !p.Skip(inlineTableOpen) { p.Expected("an inline table") return nil, false } @@ -169,7 +169,7 @@ func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) { subdoc := newParser() // Check for an empty inline table. - if p.Accept(inlineTableClose) { + if p.Skip(inlineTableClose) { return ast.NewValue(ast.TypeTable, subdoc.doc.Root), true } @@ -193,12 +193,12 @@ func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) { } // Check for the end of the inline table. - if p.Accept(inlineTableClose) { + if p.Skip(inlineTableClose) { return ast.NewValue(ast.TypeTable, subdoc.doc.Root), true } // Not the end of the inline table? Then we should find a key/value pair separator. - if !p.Accept(inlineTableSeparator) { + if !p.Skip(inlineTableSeparator) { p.Expected("an array separator") return nil, false } diff --git a/parse2/profile-sushi.sh b/parse2/profile-sushi.sh index 986a988..130fc44 100755 --- a/parse2/profile-sushi.sh +++ b/parse2/profile-sushi.sh @@ -6,7 +6,7 @@ FILE=normal.toml ITER=10000 cd ../cmd/burntsushi-tester -go build -gcflags=all=-l +go build cd ../../parse2 ppfile=`cat $FILE | ../cmd/burntsushi-tester/burntsushi-tester -p $ITER 2>&1 | grep "profiling enabled" | cut -d, -f2` go tool pprof -http 0.0.0.0:8888 ../cmd/burntsushi-tester/burntsushi-tester $ppfile