String parser replaced with a low level parser for performance.

This commit is contained in:
Maurice Makaay 2019-08-01 13:25:21 +00:00
parent ed846c7e53
commit 22144487f0
12 changed files with 815 additions and 724 deletions

View File

@ -1,7 +0,0 @@
regex2 = '''I [dw]on't need \d{2} apples'''
lines = '''
The first newline is
trimmed in raw strings.
All other whitespace
is preserved.
'''

View File

@ -4,73 +4,29 @@ import (
"testing" "testing"
) )
func A(b byte) (byte, bool) {
if b > 'b' {
switch b {
case 't':
return '\t', true
case 'n':
return '\n', true
case 'r':
return '\r', true
case 'f':
return '\f', true
}
} else {
switch b {
case '"':
return '"', true
case '\\':
return '\\', true
case 'b':
return '\b', true
}
}
return 0x00, false
}
func B(b byte) (byte, bool) {
switch b {
case 'r':
return '\r', true
case 'n':
return '\n', true
case 't':
return '\t', true
case 'b':
return '\b', true
case 'f':
return '\f', true
case '"':
return '"', true
case '\\':
return '\\', true
}
return 0x00, false
}
// TODO cleanup unused benchmark.
func Benchmark_A(b *testing.B) {
for i := 0; i < b.N; i++ {
A('b')
A('t')
A('n')
A('f')
A('r')
A('"')
A('\\')
}
}
// TODO cleanup unused benchmark.
func Benchmark_B(b *testing.B) { func Benchmark_B(b *testing.B) {
f := func(i int) int { i = i + 1; return i }
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
B('b') f(i)
B('t') }
B('n') }
B('f') func Benchmark_C(b *testing.B) {
B('r') f := func(i int) int { i += 1; return i }
B('"') for i := 0; i < b.N; i++ {
B('\\') f(i)
}
}
func Benchmark_D(b *testing.B) {
f := func(i int) int { i++; return i }
for i := 0; i < b.N; i++ {
f(i)
}
}
func Benchmark_A(b *testing.B) {
f := func(i int) int { i = 2; return i }
for i := 0; i < b.N; i++ {
f(i)
} }
} }

View File

@ -27,11 +27,9 @@ var (
// A '#' hash symbol marks the rest of the line as a comment. // A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment. // All characters up to the end of the line are included in the comment.
comment = c.Seq(a.Hash, a.UntilEndOfLine.Optional()) comment = c.Seq(a.Hash, a.UntilEndOfLine.Optional())
optionalComment = comment.Optional() optionalComment = comment.Optional()
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment) whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment)
// Keys may be either bare or quoted. // Keys may be either bare or quoted.

View File

@ -1,6 +1,7 @@
package parse package parse
import ( import (
"strings"
"testing" "testing"
) )
@ -29,3 +30,18 @@ func TestInvalidDocument(t *testing.T) {
testParse(t, p, p.startDocument, test) testParse(t, p, p.startDocument, test)
} }
} }
func TestBufferRelatedBug(t *testing.T) {
text := strings.Repeat("#", 2040) + "\n# a bug\n"
ast, err := Run(text)
if len(text) != 2049 {
t.Fatalf("Test input is not 2049 bytes, but %d", len(text))
}
if err != nil {
t.Fatalf("Unexpected error from parser: %s", err)
}
if ast.String() != "{}" {
t.Fatalf("Unexpected TOML document structure returned:\nexpected: {}\nactual: %s", ast)
}
}

View File

@ -53,7 +53,11 @@ func (t *parser) startKeyValuePair(p *parse.API) {
p.Expected("end of line") p.Expected("end of line")
} }
} else { } else {
p.Expected("a value") // Should have been handled by the value parsing code.
// This is a safety net.
if !p.IsStoppedOrInError() {
panic("Bug: value parsing did not return a successful value, neither an error")
}
} }
} }
} }
@ -70,14 +74,16 @@ func (t *parser) startKeyValuePair(p *parse.API) {
func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) { func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
var keyPart string var keyPart string
var strType stringType
var ok bool var ok bool
switch { switch {
case p.Accept(bareKey): case p.Accept(bareKey):
keyPart, ok = p.Result.String(), true keyPart, ok = p.Result.String(), true
case p.Peek(a.SingleQuote): case p.Peek(detectString):
keyPart, ok = t.parseLiteralString("key", p) keyPart, strType, ok = t.parseString(p)
case p.Peek(a.DoubleQuote): if strType != strTypeBasic && strType != strTypeLiteral {
keyPart, ok = t.parseBasicString("key", p) p.Expected("a key name") // TODO more specific error telling about the abuse of multi-line string?
}
default: default:
p.Expected("a key name") p.Expected("a key name")
return nil, false return nil, false

View File

@ -6,12 +6,13 @@ import (
) )
type parser struct { type parser struct {
doc *ast.Document doc *ast.Document
strFlags byte // A helper field used for string parsing.
} }
func newParser() *parser { func newParser() *parser {
doc := ast.NewDocument() doc := ast.NewDocument()
return &parser{doc} return &parser{doc: doc}
} }
// Run the TOML parser against the provided input data. // Run the TOML parser against the provided input data.

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,2 @@
# This line is 2040 long, to make the read buffer end at the 'g' of the second line, leaving only the newline at the end of the file for the next read operation. There was a bug that resulted in the word 'a' of the second line being seen as a key, because the comment skipping did not work as it should with the buffer filling operation between '# a bug' and the final '\n'. #################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################
# a bug

View File

@ -20,7 +20,8 @@ var (
func (t *parser) parseValue(p *parse.API) (*ast.Value, bool) { func (t *parser) parseValue(p *parse.API) (*ast.Value, bool) {
switch { switch {
case p.Peek(detectString): case p.Peek(detectString):
return t.parseString(p) str, _, ok := t.parseString(p)
return ast.NewValue(ast.TypeString, str), ok
case p.Peek(detectBoolean): case p.Peek(detectBoolean):
return t.parseBoolean(p) return t.parseBoolean(p)
case p.Peek(detectNumberSpecials): case p.Peek(detectNumberSpecials):

File diff suppressed because it is too large Load Diff

View File

@ -13,30 +13,6 @@ func TestStartString(t *testing.T) {
testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected a string value) at start of file"}) testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected a string value) at start of file"})
} }
func TestStartBasicString(t *testing.T) {
parser := newParser()
wrapper := func(p *parse.API) { parser.parseBasicString("xyz", p) }
testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening quotation marks) at start of file"})
}
func TestStartLiteralString(t *testing.T) {
parser := newParser()
wrapper := func(p *parse.API) { parser.parseLiteralString("xyz", p) }
testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening single quote) at start of file"})
}
func TestStartMultiLineBasicString(t *testing.T) {
parser := newParser()
wrapper := func(p *parse.API) { parser.parseMultiLineBasicString(p) }
testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening three quotation marks) at start of file"})
}
func TestStartMultiLineLiteralString(t *testing.T) {
parser := newParser()
wrapper := func(p *parse.API) { parser.parseMultiLineLiteralString(p) }
testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening three single quotes) at start of file"})
}
func TestString(t *testing.T) { func TestString(t *testing.T) {
for _, test := range []parseTest{ for _, test := range []parseTest{
{`x=no start quote"`, `{}`, `unexpected input (expected a value) at line 1, column 3`}, {`x=no start quote"`, `{}`, `unexpected input (expected a value) at line 1, column 3`},

View File

@ -4,10 +4,16 @@ go build
$(cd ../cmd/burntsushi-tester/; go build) $(cd ../cmd/burntsushi-tester/; go build)
DURATION=`./parse2 -p 10 < long.toml 2>&1 | grep Duration | awk '{print $2}'` DURATION=`./parse2 -p 10 < long.toml 2>&1 | grep Duration | awk '{print $2}'`
echo "$DURATION parse2 10 iteration profiling of long.toml" echo "$DURATION ./parse2 -p 10 < long.toml"
DURATION=`./parse2 -p 100 < long.toml 2>&1 | grep Duration | awk '{print $2}'`
echo "$DURATION ./parse2 -p 100 < long.toml"
DURATION=`./parse2 -p 1000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'` DURATION=`./parse2 -p 1000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'`
echo "$DURATION parse2 1000 iteration profiling of normal.toml" echo "$DURATION ./parse2 -p 1000 < normal.toml"
DURATION=`./parse2 -p 10000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'`
echo "$DURATION ./parse2 -p 10000 < normal.toml"
echo "" echo ""