From 74274e04fb2f58232846f14f1f746bc7754d1f0d Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Sun, 28 Jul 2019 23:50:30 +0000 Subject: [PATCH] Made a big jump in performance on big files with lots of comments, by reading in chunks till end of line, instead of byte-by-byte. --- burntsushi/burntsushi.go | 2 ++ cmd/burntsushi-tester/Makefile | 19 +++++++++-------- parse/document.go | 2 +- parse/value_datetime.go | 2 +- parse/value_number.go | 12 +++++------ parse/value_string.go | 8 +++---- parse2/grammar.go | 4 ++-- parse2/performance_timings.txt | 39 +++++++++++++++------------------- 8 files changed, 43 insertions(+), 45 deletions(-) diff --git a/burntsushi/burntsushi.go b/burntsushi/burntsushi.go index 1f7ca9d..415eb4a 100644 --- a/burntsushi/burntsushi.go +++ b/burntsushi/burntsushi.go @@ -122,6 +122,8 @@ func writeSushi(w *bufio.Writer, value *ast.Value) { default: panic(fmt.Sprintf("Unhandled data type: %s", value.Type)) } + + w.Flush() } func renderValue(w *bufio.Writer, t string, v string) { diff --git a/cmd/burntsushi-tester/Makefile b/cmd/burntsushi-tester/Makefile index b644064..8c2ddee 100644 --- a/cmd/burntsushi-tester/Makefile +++ b/cmd/burntsushi-tester/Makefile @@ -1,5 +1,6 @@ PROFILE_COUNT=100 PROFILE_COUNT2=1000 +TIME=time b: go build @@ -22,10 +23,10 @@ test: test-a test-b test-sushi profile: profile-a profile-b test-a: - numactl --physcpubind=+1 bash -c "time ./A < testfile.toml" + numactl --physcpubind=+1 bash -c "${TIME} ./A < testfile.toml" test2-a: - numactl --physcpubind=+1 bash -c "time ./A < testfile2.toml" + numactl --physcpubind=+1 bash -c "${TIME} ./A < testfile2.toml" profile-a: numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT} < testfile.toml" @@ -34,10 +35,10 @@ profile2-a: numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT2} < testfile2.toml" test-b: - numactl --physcpubind=+2 bash -c "time ./B < testfile.toml" + numactl --physcpubind=+2 bash -c "${TIME} ./B < testfile.toml" test2-b: - numactl --physcpubind=+2 bash -c "time ./B < testfile2.toml" + numactl --physcpubind=+2 bash -c "${TIME} ./B < testfile2.toml" profile-b: numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT} < testfile.toml" @@ -47,22 +48,22 @@ profile2-b: test-sushi: - numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile.toml" + numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test-decoder < testfile.toml" test2-sushi: - numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile2.toml" + numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test-decoder < testfile2.toml" test-sushi-a: - numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ./A" + numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test ./A" test-sushi-b: - numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ./B" + numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test ./B" test-sushi-sushi: - numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ${GOPATH}/bin/toml-test-decoder" + numactl --physcpubind=+3 bash -c "${TIME} ${GOPATH}/bin/toml-test ${GOPATH}/bin/toml-test-decoder" diff --git a/parse/document.go b/parse/document.go index 3561737..1ee201f 100644 --- a/parse/document.go +++ b/parse/document.go @@ -27,7 +27,7 @@ var ( // A '#' hash symbol marks the rest of the line as a comment. // All characters up to the end of the line are included in the comment. - comment = c.Seq(a.Hash, m.DropUntilEndOfLine) + comment = c.Seq(a.Hash, a.UntilEndOfLine) optionalComment = comment.Optional() endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) diff --git a/parse/value_datetime.go b/parse/value_datetime.go index a767187..3fc010d 100644 --- a/parse/value_datetime.go +++ b/parse/value_datetime.go @@ -91,7 +91,7 @@ func (t *parser) parseDateTime(p *parse.API) (*ast.Value, bool) { p.Expected("a date and/or time") return nil, false } - token := p.Result.Tokens[0] + token := p.Result.Token(0) layout := "" for _, l := range token.Value.([]tokenize.Token) { diff --git a/parse/value_number.go b/parse/value_number.go index 5189192..592dfcd 100644 --- a/parse/value_number.go +++ b/parse/value_number.go @@ -73,18 +73,18 @@ var ( func (t *parser) parseNumber(p *parse.API) (*ast.Value, bool) { switch { case p.Accept(floatToken): - return ast.NewValue(ast.TypeFloat, p.Result.Tokens[0].Value.(float64)), true + return ast.NewValue(ast.TypeFloat, p.Result.Token(0).Value.(float64)), true case p.Skip(nan): return ast.NewValue(ast.TypeFloat, math.NaN()), true case p.Accept(inf): - if p.Result.Bytes[0] == '-' { + if p.Result.Byte(0) == '-' { return ast.NewValue(ast.TypeFloat, math.Inf(-1)), true } return ast.NewValue(ast.TypeFloat, math.Inf(+1)), true case p.Accept(a.Zero): return t.parseIntegerStartingWithZero(p) case p.Accept(integerToken): - return ast.NewValue(ast.TypeInteger, p.Result.Tokens[0].Value.(int64)), true + return ast.NewValue(ast.TypeInteger, p.Result.Token(0).Value.(int64)), true default: p.Expected("a number") return nil, false @@ -96,11 +96,11 @@ func (t *parser) parseIntegerStartingWithZero(p *parse.API) (*ast.Value, bool) { var err error switch { case p.Accept(hexadecimal): - value, err = strconv.ParseInt(p.Result.Tokens[0].Value.(string), 16, 64) + value, err = strconv.ParseInt(p.Result.Token(0).Value.(string), 16, 64) case p.Accept(octal): - value, err = strconv.ParseInt(p.Result.Tokens[0].Value.(string), 8, 64) + value, err = strconv.ParseInt(p.Result.Token(0).Value.(string), 8, 64) case p.Accept(binary): - value, err = strconv.ParseInt(p.Result.Tokens[0].Value.(string), 2, 64) + value, err = strconv.ParseInt(p.Result.Token(0).Value.(string), 2, 64) default: return ast.NewValue(ast.TypeInteger, int64(0)), true } diff --git a/parse/value_string.go b/parse/value_string.go index 930682b..7e14b7b 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -104,7 +104,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { for { switch { case p.Peek(controlCharacter): - p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Bytes[0]) + p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Byte(0)) return sb.String(), false case p.Accept(validEscape): if !appendEscapedRune(p, sb) { @@ -147,7 +147,7 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) { case p.Skip(a.Tab): sb.WriteString("\t") case p.Peek(controlCharacter): - p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Bytes[0]) + p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Byte(0)) return sb.String(), false case p.Peek(a.InvalidRune): p.SetError("invalid UTF8 rune") @@ -195,7 +195,7 @@ func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) { case p.Skip(newline): sb.WriteString("\n") case p.Peek(controlCharacter): - p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Bytes[0]) + p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Byte(0)) return sb.String(), false case p.Accept(validEscape): if !appendEscapedRune(p, sb) { @@ -279,7 +279,7 @@ func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) { case p.Skip(newline): sb.WriteString("\n") case p.Peek(controlCharacter): - p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Bytes[0]) + p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Byte(0)) return sb.String(), false case p.Accept(a.ValidRune): sb.WriteString(p.Result.String()) diff --git a/parse2/grammar.go b/parse2/grammar.go index a3fdb04..ede3ff7 100644 --- a/parse2/grammar.go +++ b/parse2/grammar.go @@ -47,7 +47,7 @@ func main() { if err != nil { panic("Cannot profile, parsing input failed: " + err.Error()) } - fmt.Printf("cycle %d / %d, tokens=%d\r", i+1, *doProfile, len(result.Tokens)) + fmt.Printf("cycle %d / %d, tokens=%d\r", i+1, *doProfile, len(result.Tokens())) } duration := time.Since(start) @@ -62,7 +62,7 @@ func main() { if err != nil { log.Fatalf("Error in parsing TOML: %s\n", err) } else { - for i, t := range result.Tokens { + for i, t := range result.Tokens() { fmt.Printf("[%d] %v\n", i, t) } } diff --git a/parse2/performance_timings.txt b/parse2/performance_timings.txt index a439361..5fea263 100644 --- a/parse2/performance_timings.txt +++ b/parse2/performance_timings.txt @@ -1,25 +1,20 @@ -16.750 ./parse2 -p 100 < long.toml -19.403 ./parse2 -p 10000 < x +9.2767 ./parse2 -p 100 < long.toml +9.2569 ./parse2 -p 10000 < x -1.508262093s parse2 10 iteration profiling of long.toml -1.278056375s parse2 1000 iteration profiling of normal.toml +917.916965ms parse2 10 iteration profiling of long.toml +895.376882ms parse2 1000 iteration profiling of normal.toml -207.402484ms burntsushi-tester 10 iteration profiling of long.toml -782.128156ms burntsushi-tester 1000 iteration profiling of normal.toml +71.794061ms burntsushi-tester 10 iteration profiling of long.toml +472.762136ms burntsushi-tester 1000 iteration profiling of normal.toml -0.002s git.makaay.nl/mauricem/go-toml/ast (unit tests) -0.236s git.makaay.nl/mauricem/go-toml/parse (unit tests) - -0m0.254s BurntSushi test set - -3.500633ms qa-array-inline-1000.toml -5.844964ms qa-array-inline-nested-1000.toml -4.164484ms qa-key-literal-40kb.toml -6.965205ms qa-key-string-40kb.toml -4.514677ms qa-scalar-literal-40kb.toml -8.53826ms qa-scalar-literal-multiline-40kb.toml -7.819157ms qa-scalar-string-40kb.toml -6.569182ms qa-scalar-string-multiline-40kb.toml -5.64134ms qa-table-inline-1000.toml -11.501451ms qa-table-inline-nested-1000.toml -149.369957ms qa-long-loads-of-comments.toml \ No newline at end of file +2.15562ms qa-array-inline-1000.toml +5.625499ms qa-array-inline-nested-1000.toml +2.791934ms qa-key-literal-40kb.toml +5.096704ms qa-key-string-40kb.toml +3.232853ms qa-scalar-literal-40kb.toml +5.398462ms qa-scalar-literal-multiline-40kb.toml +4.710443ms qa-scalar-string-40kb.toml +3.948024ms qa-scalar-string-multiline-40kb.toml +2.803367ms qa-table-inline-1000.toml +6.076693ms qa-table-inline-nested-1000.toml +95.646345ms qa-long-loads-of-comments.toml \ No newline at end of file