diff --git a/cmd/burntsushi-tester/Makefile b/cmd/burntsushi-tester/Makefile index 1892154..adabeb2 100644 --- a/cmd/burntsushi-tester/Makefile +++ b/cmd/burntsushi-tester/Makefile @@ -1,22 +1,40 @@ -a: - go build - mv burntsushi-tester A - b: go build mv burntsushi-tester B +a: + go build + mv burntsushi-tester A + test: test-a test-b test-sushi test-a: numactl --physcpubind=+1 bash -c "time ./A < testfile.toml" + +profile-a: + numactl --physcpubind=+1 bash -c "./A -p 100 < testfile.toml" test-b: numactl --physcpubind=+2 bash -c "time ./B < testfile.toml" +profile-b: + numactl --physcpubind=+2 bash -c "./B -p 100 < testfile.toml" + test-sushi: - numactl --physcpubind=+3 bash -c "time ~/go-workspace/bin/toml-test-decoder < testfile.toml" + numactl --physcpubind=+3 bash -c "time ~/${GOPATH}/bin/toml-test-decoder < testfile.toml" +test-sushi-a: + + numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ./A" + +test-sushi-b: + + numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ./B" + +test-sushi-sushi: + + numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test ${GOPATH}/bin/toml-test-decoder" + diff --git a/cmd/burntsushi-tester/testfile.toml b/cmd/burntsushi-tester/testfile.toml index c0a3277..95812d4 100644 --- a/cmd/burntsushi-tester/testfile.toml +++ b/cmd/burntsushi-tester/testfile.toml @@ -10013,7 +10013,7 @@ # Tables (also known as hash tables or dictionaries) are collections of # key/value pairs. They appear in square brackets on a line by themselves. -[table] +[tablex] key = "value" # Yeah, you can do this. @@ -21812,6 +21812,251 @@ key = "value" # Yeah, you can do this. # -------------------------------------------------------------------------------- # -------------------------------------------------------------------------------- # -------------------------------------------------------------------------------- +# +################################################################################ +## Comment + +# Speak your mind with the hash symbol. They go from the symbol to the end of +# the line. + + +################################################################################ +## Table + +# Tables (also known as hash tables or dictionaries) are collections of +# key/value pairs. They appear in square brackets on a line by themselves. + +[table] + +key = "value" # Yeah, you can do this. + +# Nested tables are denoted by table names with dots in them. Name your tables +# whatever crap you please, just don't use #, ., [ or ]. + +[table.subtable] + +key = "another value" + +# You don't need to specify all the super-tables if you don't want to. TOML +# knows how to do it for you. + +# [x] you +# [x.y] don't +# [x.y.z] need these +[x.y.z.w] # for this to work + + +################################################################################ +## Inline Table + +# Inline tables provide a more compact syntax for expressing tables. They are +# especially useful for grouped data that can otherwise quickly become verbose. +# Inline tables are enclosed in curly braces `{` and `}`. No newlines are +# allowed between the curly braces unless they are valid within a value. + +[table.inline] + +name = { first = "Tom", last = "Preston-Werner" } +point = { x = 1, y = 2 } + + +################################################################################ +## String + +# There are four ways to express strings: basic, multi-line basic, literal, and +# multi-line literal. All strings must contain only valid UTF-8 characters. + +[string.basic] + +basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." + +[string.multiline] + +# The following strings are byte-for-byte equivalent: +key1 = "One\nTwo" +key2 = """One\nTwo""" +key3 = """ +One +Two""" + +[string.multiline.continued] + +# The following strings are byte-for-byte equivalent: +key1 = "The quick brown fox jumps over the lazy dog." + +key2 = """ +The quick brown \ + + + fox jumps over \ + the lazy dog.""" + +key3 = """\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """ + +[string.literal] + +# What you see is what you get. +winpath = 'C:\Users\nodejs\templates' +winpath2 = '\\ServerX\admin$\system32\' +quoted = 'Tom "Dubs" Preston-Werner' +regex = '<\i\c*\s*>' + + +[string.literal.multiline] + +regex2 = '''I [dw]on't need \d{2} apples''' +lines = ''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +''' + + +################################################################################ +## Integer + +# Integers are whole numbers. Positive numbers may be prefixed with a plus sign. +# Negative numbers are prefixed with a minus sign. + +[integer] + +key1 = +99 +key2 = 42 +key3 = 0 +key4 = -17 + +[integer.underscores] + +# For large numbers, you may use underscores to enhance readability. Each +# underscore must be surrounded by at least one digit. +key1 = 1_000 +key2 = 5_349_221 +key3 = 1_2_3_4_5 # valid but inadvisable + + +################################################################################ +## Float + +# A float consists of an integer part (which may be prefixed with a plus or +# minus sign) followed by a fractional part and/or an exponent part. + +[float.fractional] + +key1 = +1.0 +key2 = 3.1415 +key3 = -0.01 + +[float.exponent] + +key1 = 5e+22 +key2 = 1e6 +key3 = -2E-2 + +[float.both] + +key = 6.626e-34 + +[float.underscores] + +key1 = 9_224_617.445_991_228_313 +key2 = 1e0_1_0 + + +################################################################################ +## Boolean + +# Booleans are just the tokens you're used to. Always lowercase. + +[boolean] + +True = true +False = false + + +################################################################################ +## Datetime + +# Datetimes are RFC 3339 dates. + +[datetime] + +key1 = 1979-05-27T07:32:00Z +key2 = 1979-05-27T00:32:00-07:00 +key3 = 1979-05-27T00:32:00.999999-07:00 + + +################################################################################ +## Array + +# Arrays are square brackets with other primitives inside. Whitespace is +# ignored. Elements are separated by commas. Data types may not be mixed. + +[array] + +key1 = [ 1, 2, 3 ] +key2 = [ "red", "yellow", "green" ] +key3 = [ [ 1, 2 ], [3, 4, 5] ] +key4 = [ [ 1, 2 ], ["a", "b", "c"] ] # this is ok + +# Arrays can also be multiline. So in addition to ignoring whitespace, arrays +# also ignore newlines between the brackets. Terminating commas are ok before +# the closing bracket. + +key5 = [ + 1, 2, 3 +] +key6 = [ + 1, + 2, # this is ok +] + + +################################################################################ +## Array of Tables + +# These can be expressed by using a table name in double brackets. Each table +# with the same double bracketed name will be an element in the array. The +# tables are inserted in the order encountered. + +[[products]] + +name = "Hammer" +sku = 738594937 + +[[products]] + +[[products]] + +name = "Nail" +sku = 284758393 +color = "gray" + + +# You can create nested arrays of tables as well. + +[[fruit]] + name = "apple" + + [fruit.physical] + color = "red" + shape = "round" + + [[fruit.variety]] + name = "red delicious" + + [[fruit.variety]] + name = "granny smith" + +[[fruit]] + name = "banana" + + [[fruit.variety]] + name = "plantain" # -------------------------------------------------------------------------------- # -------------------------------------------------------------------------------- diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go index 7fcc350..cd842e6 100644 --- a/parse/keyvaluepair.go +++ b/parse/keyvaluepair.go @@ -27,7 +27,11 @@ var ( // A bare key must be non-empty, but an empty quoted key is allowed // (though discouraged). - bareKeyRune = c.Any(a.ASCIILower, a.ASCIIUpper, a.Digit, a.Underscore, a.Minus) + isBareKeyRune = func(b byte) bool { + return ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || + (b >= '0' && b <= '9') || b == '_' || b == '-') + } + bareKeyRune = a.ByteByCallback(isBareKeyRune) bareKey = c.OneOrMore(bareKeyRune) // Dotted keys are a sequence of bare or quoted keys joined with a dot. diff --git a/parse/value_string.go b/parse/value_string.go index d8ed7b7..1bf3bf6 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -22,7 +22,8 @@ var ( // Control characters as defined by TOML (U+0000 to U+001F, U+007F) - controlCharacter = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F')) + isControlCharacter = func(b byte) bool { return (b >= 0x00 && b <= 0x1F) || b == 0x7F } + controlCharacter = a.ByteByCallback(isControlCharacter) // For convenience, some popular characters have a compact escape sequence. // @@ -36,10 +37,10 @@ var ( // \uXXXX - unicode (U+XXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX) - validEscapeChar = a.Runes('b', 't', 'n', 'f', 'r', '"', '\\') + validEscapeChar = a.Bytes('b', 't', 'n', 'f', 'r', '"', '\\') shortEscape = c.Seq(a.Backslash, validEscapeChar) - shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4)) - longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8)) + shortUTF8Escape = c.Seq(a.Backslash, a.Byte('u'), a.HexDigit.Times(4)) + longUTF8Escape = c.Seq(a.Backslash, a.Byte('U'), a.HexDigit.Times(8)) validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) // For writing long strings without introducing extraneous whitespace, use a diff --git a/parse2/performance_timings.txt b/parse2/performance_timings.txt index f6ab3c2..a439361 100644 --- a/parse2/performance_timings.txt +++ b/parse2/performance_timings.txt @@ -1,37 +1,25 @@ -19.251 ./parse2 -p 100 < long.toml -22.386 ./parse2 -p 10000 < x +16.750 ./parse2 -p 100 < long.toml +19.403 ./parse2 -p 10000 < x -1.84591043s parse2 10 iteration profiling of long.toml -1.566912075s parse2 1000 iteration profiling of x +1.508262093s parse2 10 iteration profiling of long.toml +1.278056375s parse2 1000 iteration profiling of normal.toml -984.177401ms burntsushi-tester 10 iteration profiling of long.toml -1.108335844s burntsushi-tester 1000 iteration profiling of x +207.402484ms burntsushi-tester 10 iteration profiling of long.toml +782.128156ms burntsushi-tester 1000 iteration profiling of normal.toml 0.002s git.makaay.nl/mauricem/go-toml/ast (unit tests) -0.324s git.makaay.nl/mauricem/go-toml/parse (unit tests) - -0m0.246s BurntSushi test set - -4.073716ms qa-array-inline-1000.toml -6.881482ms qa-array-inline-nested-1000.toml -5.271393ms qa-key-literal-40kb.toml -8.577264ms qa-key-string-40kb.toml -5.168562ms qa-scalar-literal-40kb.toml -8.401454ms qa-scalar-literal-multiline-40kb.toml -8.874881ms qa-scalar-string-40kb.toml -7.596715ms qa-scalar-string-multiline-40kb.toml -5.716824ms qa-table-inline-1000.toml -14.342319ms qa-table-inline-nested-1000.toml -185.826552ms qa-long-loads-of-comments.toml - -0.190 time ./parse2 < long.toml -0.005 time ./parse2 < x - ---- new benchmark tests -NO CURSOR UPDATES BASELINE - 0.264 (test-sushi) -274544444 256706465 -2259506 2118384 -23911 22624 +0.236s git.makaay.nl/mauricem/go-toml/parse (unit tests) +0m0.254s BurntSushi test set +3.500633ms qa-array-inline-1000.toml +5.844964ms qa-array-inline-nested-1000.toml +4.164484ms qa-key-literal-40kb.toml +6.965205ms qa-key-string-40kb.toml +4.514677ms qa-scalar-literal-40kb.toml +8.53826ms qa-scalar-literal-multiline-40kb.toml +7.819157ms qa-scalar-string-40kb.toml +6.569182ms qa-scalar-string-multiline-40kb.toml +5.64134ms qa-table-inline-1000.toml +11.501451ms qa-table-inline-nested-1000.toml +149.369957ms qa-long-loads-of-comments.toml \ No newline at end of file