Changes for compatibliity with latest parsekit, and some speed improvements.

This commit is contained in:
Maurice Makaay 2019-07-26 22:56:24 +00:00
parent bf7b693cb8
commit dea3eb987b
10 changed files with 79 additions and 75 deletions

View File

@ -1,4 +1,5 @@
PROFILE_COUNT=100
PROFILE_COUNT2=1000
b:
go build
@ -29,6 +30,8 @@ test2-a:
profile-a:
numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT} < testfile.toml"
profile2-a:
numactl --physcpubind=+1 bash -c "./A -p ${PROFILE_COUNT2} < testfile2.toml"
test-b:
numactl --physcpubind=+2 bash -c "time ./B < testfile.toml"
@ -39,6 +42,9 @@ test2-b:
profile-b:
numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT} < testfile.toml"
profile2-b:
numactl --physcpubind=+2 bash -c "./B -p ${PROFILE_COUNT2} < testfile2.toml"
test-sushi:
numactl --physcpubind=+3 bash -c "time ${GOPATH}/bin/toml-test-decoder < testfile.toml"

View File

@ -2,28 +2,55 @@ package parse
import (
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// Some globally useful tokenizer definitions.
var (
c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
// Whitespace means tab (0x09) or space (0x20).
// This matches the blanks as defined by parsekit.
whitespace = a.Blanks.Optional()
// Newline means LF (0x0A) or CRLF (0x0D0A).
// This matches the newline as defined by parsekit.
newline = a.Newline
// Whitespace + newlines.
// This matches the whitespace as defined by parsekit.
whitespaceInclNewlines = a.Whitespace
// A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment.
comment = c.Seq(a.Hash, m.DropUntilEndOfLine)
optionalComment = comment.Optional()
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment)
// Keys may be either bare or quoted.
detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
// Both [tables] and [[arrays of tables]] start with a square open bracket.
detectTable = a.SquareOpen
whitespaceNewlinesAndComments = whitespaceInclNewlines.Or(comment)
)
func (t *parser) startDocument(p *parse.API) {
for {
switch {
case p.Accept(whitespaceNewlinesAndComments):
case p.Skip(whitespaceNewlinesOrComments):
// NOOP
case p.Peek(detectTable):
p.Handle(t.startTable)
case p.Peek(detectKey):
p.Handle(t.startKeyValuePair)
case p.Accept(a.EndOfFile):
case p.Peek(detectTable):
p.Handle(t.startTable)
case p.Skip(a.EndOfFile):
p.Stop()
default:
p.Expected("key/value pair, table or array of tables")

View File

@ -49,7 +49,7 @@ func (t *parser) startKeyValuePair(p *parse.API) {
err := t.doc.SetKeyValuePair(key, value)
if err != nil {
p.SetError("%s", err)
} else if !p.Accept(endOfLineOrComment) {
} else if !p.Skip(endOfLineOrComment) {
p.Expected("end of line")
}
}
@ -93,14 +93,14 @@ func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
// practice is to not use any extraneous whitespace.
func (t *parser) parseEndOfKeyOrDot(p *parse.API, key ast.Key) (ast.Key, bool) {
if p.Accept(keySeparatorDot) {
if p.Skip(keySeparatorDot) {
return t.parseKey(p, key)
}
return key, true
}
func (t *parser) startAssignment(p *parse.API) {
if !p.Accept(keyAssignment) {
if !p.Skip(keyAssignment) {
p.Expected("a value assignment")
}
}

View File

@ -2,38 +2,9 @@ package parse
import (
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
"git.makaay.nl/mauricem/go-toml/ast"
)
// Some globally useful tokenizer definitions.
var (
c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
// Whitespace means tab (0x09) or space (0x20).
// This matches the blanks as defined by parsekit.
whitespace = a.Blanks.Optional()
// Newline means LF (0x0A) or CRLF (0x0D0A).
// This matches the newline as defined by parsekit.
newline = a.Newline
// Whitespace + newlines.
// This matches the whitespace as defined by parsekit.
whitespaceInclNewlines = a.Whitespace
// A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment.
comment = c.Seq(a.Hash, m.DropUntilEndOfLine)
optionalComment = comment.Optional()
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
)
type parser struct {
doc *ast.Document
}

View File

@ -39,7 +39,7 @@ var (
func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
// Check for the start of the array.
if !p.Accept(arrayOpen) {
if !p.Skip(arrayOpen) {
p.Expected("an array")
return nil, false
}
@ -47,7 +47,7 @@ func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
a := ast.NewArray()
// Check for an empty array.
if p.Accept(arrayClose) {
if p.Skip(arrayClose) {
return ast.NewValue(ast.TypeArray, a), true
}
@ -68,12 +68,12 @@ func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
}
// Check for the end of the array.
if p.Accept(arrayClose) {
if p.Skip(arrayClose) {
return ast.NewValue(ast.TypeArray, a), true
}
// Not the end of the array? Then we should find an array separator.
if !p.Accept(arraySeparator) {
if !p.Skip(arraySeparator) {
p.Expected("an array separator")
return nil, false
}

View File

@ -15,9 +15,9 @@ var (
// Booleans are just the tokens you're used to. Always lowercase.
func (t *parser) parseBoolean(p *parse.API) (*ast.Value, bool) {
switch {
case p.Accept(trueStr):
case p.Skip(trueStr):
return trueValue, true
case p.Accept(falseStr):
case p.Skip(falseStr):
return falseValue, true
default:
p.Expected("true or false")

View File

@ -74,10 +74,10 @@ func (t *parser) parseNumber(p *parse.API) (*ast.Value, bool) {
switch {
case p.Accept(floatToken):
return ast.NewValue(ast.TypeFloat, p.Result.Tokens[0].Value.(float64)), true
case p.Accept(nan):
case p.Skip(nan):
return ast.NewValue(ast.TypeFloat, math.NaN()), true
case p.Accept(inf):
if p.Result.Runes[0] == '-' {
if p.Result.Bytes[0] == '-' {
return ast.NewValue(ast.TypeFloat, math.Inf(-1)), true
}
return ast.NewValue(ast.TypeFloat, math.Inf(+1)), true

View File

@ -96,7 +96,7 @@ func (t *parser) parseString(p *parse.API) (*ast.Value, bool) {
// "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error.""
func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
if !p.Accept(a.DoubleQuote) {
if !p.Skip(a.DoubleQuote) {
p.Expected(`opening quotation marks`)
return "", false
}
@ -104,7 +104,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
for {
switch {
case p.PeekWithResult(controlCharacter):
p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Runes[0])
p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Bytes[0])
return sb.String(), false
case p.Accept(validEscape):
if !appendEscapedRune(p, sb) {
@ -113,7 +113,7 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
case p.Peek(a.Backslash):
p.SetError("invalid escape sequence")
return sb.String(), false
case p.Accept(basicStringDelimiter):
case p.Skip(basicStringDelimiter):
return sb.String(), true
case p.Peek(a.InvalidRune):
p.SetError("invalid UTF8 rune")
@ -135,19 +135,19 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
//
// • Control characters other than tab are not permitted in a literal string.
func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
if !p.Accept(a.SingleQuote) {
if !p.Skip(a.SingleQuote) {
p.Expected("opening single quote")
return "", false
}
sb := &strings.Builder{}
for {
switch {
case p.Accept(literalStringDelimiter):
case p.Skip(literalStringDelimiter):
return sb.String(), true
case p.Accept(a.Tab):
case p.Skip(a.Tab):
sb.WriteString("\t")
case p.PeekWithResult(controlCharacter):
p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Runes[0])
p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Bytes[0])
return sb.String(), false
case p.Peek(a.InvalidRune):
p.SetError("invalid UTF8 rune")
@ -185,28 +185,28 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
// a \, it will be trimmed along with all whitespace (including newlines) up to
// the next non-whitespace character or closing delimiter.
func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) {
if !p.Accept(openingMultiLineBasicString) {
if !p.Skip(openingMultiLineBasicString) {
p.Expected("opening three quotation marks")
return "", false
}
sb := &strings.Builder{}
for {
switch {
case p.Accept(newline):
case p.Skip(newline):
sb.WriteString("\n")
case p.PeekWithResult(controlCharacter):
p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Runes[0])
p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Bytes[0])
return sb.String(), false
case p.Accept(validEscape):
if !appendEscapedRune(p, sb) {
return sb.String(), false
}
case p.Accept(lineEndingBackslash):
// NOOP, the line-ending backslash sequence is skipped.
case p.Skip(lineEndingBackslash):
// NOOP
case p.Peek(a.Backslash):
p.SetError("invalid escape sequence")
return sb.String(), false
case p.Accept(closingMultiLineBasicString):
case p.Skip(closingMultiLineBasicString):
return sb.String(), true
case p.Accept(a.ValidRune):
sb.WriteString(p.Result.String())
@ -265,21 +265,21 @@ func appendEscapedRune(p *parse.API, sb *strings.Builder) bool {
//
// • Control characters other than tab and newline are not permitted in a multi-line literal string.
func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) {
if !p.Accept(openingMultiLineLiteralString) {
if !p.Skip(openingMultiLineLiteralString) {
p.Expected("opening three single quotes")
return "", false
}
sb := &strings.Builder{}
for {
switch {
case p.Accept(closingMultiLineLiteralString):
case p.Skip(closingMultiLineLiteralString):
return sb.String(), true
case p.Accept(a.Tab):
case p.Skip(a.Tab):
sb.WriteString("\t")
case p.Accept(newline):
case p.Skip(newline):
sb.WriteString("\n")
case p.PeekWithResult(controlCharacter):
p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Runes[0])
p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Bytes[0])
return sb.String(), false
case p.Accept(a.ValidRune):
sb.WriteString(p.Result.String())

View File

@ -22,9 +22,9 @@ var (
func (t *parser) startTable(p *parse.API) {
switch {
case p.Accept(tableArrayOpen):
case p.Skip(tableArrayOpen):
p.Handle(t.startArrayOfTables)
case p.Accept(tableOpen):
case p.Skip(tableOpen):
p.Handle(t.startPlainTable)
default:
p.Expected("a table")
@ -71,11 +71,11 @@ func (t *parser) startTable(p *parse.API) {
// name = "plantain"
func (t *parser) startArrayOfTables(p *parse.API) {
if key, ok := t.parseKey(p, []string{}); ok {
if !p.Accept(tableArrayClose) {
if !p.Skip(tableArrayClose) {
p.Expected("closing ']]' for array of tables name")
return
}
if !p.Accept(endOfLineOrComment) {
if !p.Skip(endOfLineOrComment) {
p.Expected("end of line or comment")
return
}
@ -127,11 +127,11 @@ func (t *parser) startArrayOfTables(p *parse.API) {
// Empty tables are allowed and simply have no key/value pairs within them.
func (t *parser) startPlainTable(p *parse.API) {
if key, ok := t.parseKey(p, []string{}); ok {
if !p.Accept(tableClose) {
if !p.Skip(tableClose) {
p.Expected("closing ']' for table name")
return
}
if !p.Accept(endOfLineOrComment) {
if !p.Skip(endOfLineOrComment) {
p.Expected("end of line or comment")
return
}
@ -161,7 +161,7 @@ func (t *parser) startPlainTable(p *parse.API) {
// animal = { type.name = "pug" }
func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) {
// Check for the start of the array.
if !p.Accept(inlineTableOpen) {
if !p.Skip(inlineTableOpen) {
p.Expected("an inline table")
return nil, false
}
@ -169,7 +169,7 @@ func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) {
subdoc := newParser()
// Check for an empty inline table.
if p.Accept(inlineTableClose) {
if p.Skip(inlineTableClose) {
return ast.NewValue(ast.TypeTable, subdoc.doc.Root), true
}
@ -193,12 +193,12 @@ func (t *parser) parseInlineTable(p *parse.API) (*ast.Value, bool) {
}
// Check for the end of the inline table.
if p.Accept(inlineTableClose) {
if p.Skip(inlineTableClose) {
return ast.NewValue(ast.TypeTable, subdoc.doc.Root), true
}
// Not the end of the inline table? Then we should find a key/value pair separator.
if !p.Accept(inlineTableSeparator) {
if !p.Skip(inlineTableSeparator) {
p.Expected("an array separator")
return nil, false
}

View File

@ -6,7 +6,7 @@ FILE=normal.toml
ITER=10000
cd ../cmd/burntsushi-tester
go build -gcflags=all=-l
go build
cd ../../parse2
ppfile=`cat $FILE | ../cmd/burntsushi-tester/burntsushi-tester -p $ITER 2>&1 | grep "profiling enabled" | cut -d, -f2`
go tool pprof -http 0.0.0.0:8888 ../cmd/burntsushi-tester/burntsushi-tester $ppfile