Cleaning up some logic around whitespace / newlines / comments.

This commit is contained in:
Maurice Makaay 2019-07-04 11:31:28 +00:00
parent 019dd35d83
commit af5d35ecee
16 changed files with 200 additions and 130 deletions

View File

@ -9,15 +9,15 @@ var (
detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
// Both [tables] and [[arrays of tables]] start with a square open bracket.
detectTableOrArrayOfTables = a.SquareOpen
detectTable = a.SquareOpen
)
func (t *parser) startDocument(p *parse.API) {
for {
switch {
case p.Accept(whitespaceOrComment):
// NOOP, skip these
case p.Peek(detectTableOrArrayOfTables):
case p.Accept(whitespaceInclNewlines.Or(comment)):
// NOOP
case p.Peek(detectTable):
p.Handle(t.startTable)
case p.Peek(detectKey):
p.Handle(t.startKeyValuePair)

View File

@ -13,7 +13,8 @@ var (
// Blank is ignored around key names and values. The key, equals
// sign, and value must be on the same line (though some values can be
// broken over multiple lines).
keyAssignment = c.Seq(optionalBlanks, a.Equal, optionalBlanks)
keyAssignment = c.Seq(whitespace, a.Equal, whitespace)
// A key may be either bare, quoted or dotted. Bare keys may only
// contain ASCII letters, ASCII digits, underscores, and dashes
@ -25,14 +26,16 @@ var (
// Best practice is to use bare keys except when absolutely necessary.
// A bare key must be non-empty, but an empty quoted key is allowed
// (though discouraged).
bareKeyRune = c.Any(a.ASCIILower, a.ASCIIUpper, a.Digit, a.Underscore, a.Minus)
bareKey = c.OneOrMore(bareKeyRune)
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
// This allows for grouping similar properties together. Blanks
// around dot-separated parts are ignored, however, best practice is to
// not use any extraneous blanks.
keySeparatorDot = c.Seq(optionalBlanks, a.Dot, optionalBlanks)
// This allows for grouping similar properties together. Whitespace
// around dot-separated parts is ignored, however, best practice is to
// not use any extraneous whitespace.
keySeparatorDot = c.Seq(whitespace, a.Dot, whitespace)
)
func (t *parser) startKeyValuePair(p *parse.API) {
@ -58,6 +61,7 @@ func (t *parser) startKeyValuePair(p *parse.API) {
// is to use bare keys except when absolutely necessary.
// A bare key must be non-empty, but an empty quoted key is allowed (though
// discouraged).
func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
var keyPart string
var ok bool
@ -83,6 +87,7 @@ func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
// This allows for grouping similar properties together.
// Whitespace around dot-separated parts is ignored, however, best
// practice is to not use any extraneous whitespace.
func (t *parser) parseEndOfKeyOrDot(p *parse.API, key ast.Key) (ast.Key, bool) {
if p.Accept(keySeparatorDot) {
return t.parseKey(p, key)

View File

@ -10,29 +10,29 @@ import (
var (
c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
// From the specs: "Whitespace means tab (0x09) or space (0x20)."
// In this package, we name this a blank, to be in line with the
// terminology as used in parsekit.
blank = a.Runes('\t', ' ')
blanks = c.OneOrMore(blank)
optionalBlanks = c.ZeroOrMore(blank)
// Whitespace means tab (0x09) or space (0x20).
// The matches the blanks as defined by parsekit.
whitespace = a.Blanks.Optional()
// Newline means LF (0x0A) or CRLF (0x0D0A).
// This matches the default newline as defined by parsekit.
// This matches the newline as defined by parsekit.
newline = a.Newline
// Whitespace is defined as blanks + newlines.
whitespace = c.OneOrMore(blank.Or(newline))
optionalWhitespace = c.ZeroOrMore(blank.Or(newline))
// Whitespace + newlines.
// This matches the whitespace as defined by parsekit.
whitespaceInclNewlines = a.Whitespace
// A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment.
comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)))
optionalComment = c.Optional(comment)
endOfLineOrComment = c.Seq(optionalBlanks, optionalComment, a.EndOfLine)
whitespaceOrComment = whitespace.Or(comment)
optionalWhitespaceOrComment = c.ZeroOrMore(whitespaceOrComment)
comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)))
optionalComment = comment.Optional()
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
whitespaceOrComment = c.ZeroOrMore(whitespaceInclNewlines.Or(comment))
)
type parser struct {

View File

@ -31,9 +31,9 @@ import (
// 2, # this is ok
// ]
var (
arrayOpen = a.SquareOpen.Then(optionalWhitespaceOrComment)
arraySeparator = c.Seq(optionalWhitespaceOrComment, a.Comma, optionalWhitespaceOrComment)
arrayClose = c.Seq(optionalWhitespaceOrComment, a.Comma.Optional(), optionalWhitespaceOrComment, a.SquareClose)
arrayOpen = a.SquareOpen.Then(whitespaceOrComment)
arraySeparator = c.Seq(whitespace, a.Comma, whitespaceOrComment)
arrayClose = c.Seq(whitespace, a.Comma.Optional(), whitespaceOrComment, a.SquareClose)
)
func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {

View File

@ -30,7 +30,7 @@ func TestArray(t *testing.T) {
{"x=[#comment before value\n1]", `{"x": [1]}`, ``},
{"x=[1#comment after value\n]", `{"x": [1]}`, ``},
{"x=[1\n#comment on its own line after value\n]", `{"x": [1]}`, ``},
{"x=[1#comment 1\n#comment 2\n#comment 3\n , \n2]", `{"x": [1, 2]}`, ``},
{"x=[1#comment 1\n,\n2]", `{}`, `unexpected input (expected an array separator) at line 1, column 5`},
{"x=[1]", `{"x": [1]}`, ``},
{"x=[1,0x2, 0b11, 0o4]", `{"x": [1, 2, 3, 4]}`, ``},
{"x=[0.1,0.2,3e-1,0.04e+1, nan, inf]", `{"x": [0.1, 0.2, 0.3, 0.4, NaN, +Inf]}`, ``},

View File

@ -25,21 +25,24 @@ var (
//
// lt1 = 07:32:00
// lt2 = 00:32:00.999999
year = a.Digit.Times(4)
month = a.Digit.Times(2)
day = a.Digit.Times(2)
yyyymmdd = c.Seq(year, a.Minus, month, a.Minus, day)
dateTok = tok.Str("2006-01-02", yyyymmdd)
hour = a.Digit.Times(2)
minute = a.Digit.Times(2)
seconds = a.Digit.Times(2)
hhmmss = c.Seq(hour, a.Colon, minute, a.Colon, seconds)
timeTok = tok.Str("15:04:05", hhmmss)
hour = a.Digit.Times(2)
minute = a.Digit.Times(2)
seconds = a.Digit.Times(2)
hhmmss = c.Seq(hour, a.Colon, minute, a.Colon, seconds)
timeTok = tok.Str("15:04:05", hhmmss)
// The precision of fractional seconds is implementation-specific, but at
// least millisecond precision is expected. If the value contains greater
// precision than the implementation can support, the additional precision
// must be truncated, not rounded.
micro = a.Dot.Then(c.MinMax(1, 9, a.Digit).Then(m.Drop(c.ZeroOrMore(a.Digit))))
microTok = c.Optional(tok.Str(".999999999", micro))
@ -48,6 +51,7 @@ var (
// Note that RFC 3339 also allows the use of a lower case delimiter.
//
// odt4 = 1979-05-27 07:32:00Z
tdelimTok = c.Any(
tok.Str("T", a.Rune('T')),
tok.Str("t", a.Rune('t')),
@ -66,18 +70,19 @@ var (
//
// Note that RFC 3339 also allows the use of a lower case 'z'.
// Here we replace it with a capital 'Z' to make the Go date parser work.
zulu = m.Replace(a.Runes('Z', 'z'), "Z")
offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute)
tz = zulu.Or(offset)
tzTok = tok.Str("Z07:00", tz)
tzTok = tok.Str("Z07:00", zulu.Or(offset))
// The full date/time parse format, based on the above definitions.
// The first token denotes the type of date/time value.
// The rest of the tokens contain layout fragments for time.Parse().
offsetDateTime = tok.Str(ast.TypeOffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok))
localDateTime = tok.Str(ast.TypeLocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok))
localDate = tok.Str(ast.TypeLocalDate, dateTok)
localTime = tok.Str(ast.TypeLocalTime, c.Seq(timeTok, microTok))
// The token denotes the type of date/time value.
// The contained tokens contain layout fragments for time.Parse().
offsetDateTime = tok.Group(ast.TypeOffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok))
localDateTime = tok.Group(ast.TypeLocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok))
localDate = tok.Group(ast.TypeLocalDate, dateTok)
localTime = tok.Group(ast.TypeLocalTime, c.Seq(timeTok, microTok))
datetime = c.Any(offsetDateTime, localDateTime, localDate, localTime)
)
@ -86,32 +91,17 @@ func (t *parser) parseDateTime(p *parse.API) (*ast.Value, bool) {
p.Expected("a date and/or time")
return nil, false
}
tokens := p.Result().Tokens()
valueType := getDateTimeValueType(&tokens)
input, value, err := getDateTimeValue(&tokens)
if err == nil {
return ast.NewValue(valueType, value), true
}
p.Error("invalid date/time value %s: %s", input, err)
return nil, false
}
token := p.Result().Token(0)
// The first token is a token that wraps the complete date/time input.
// Its type denotes the type of date/time value that it wraps.
func getDateTimeValueType(tokens *[]*tokenize.Token) ast.ValueType {
return (*tokens)[0].Type.(ast.ValueType)
}
// The rest of the tokens contain fragments that can be used with
// time.Parse() to parse the provided date/time input. Here, these fragments
// are combined into a layout string, which is then used to parse
// the input string.
func getDateTimeValue(tokens *[]*tokenize.Token) (string, time.Time, error) {
layout := ""
for _, l := range (*tokens)[1:] {
for _, l := range token.Value.([]*tokenize.Token) {
layout += l.Type.(string)
}
input := string((*tokens)[0].Runes)
value, err := time.Parse(layout, input)
return input, value, err
value, err := time.Parse(layout, string(token.Runes))
if err != nil {
p.Error("invalid date/time value %s: %s", string(token.Runes), err)
return nil, false
}
return ast.NewValue(token.Type.(ast.ValueType), value), true
}

View File

@ -15,6 +15,7 @@ func TestStartNumber(t *testing.T) {
func TestInteger(t *testing.T) {
for _, test := range []parseTest{
// Decimal
{`x=0`, `{"x": 0}`, ``},
{`x=+0`, `{"x": 0}`, ``},
{`x=-0`, `{"x": 0}`, ``},
@ -29,17 +30,12 @@ func TestInteger(t *testing.T) {
{`x=5_349_221`, `{"x": 5349221}`, ``},
{`x=1_2_3_4_5`, `{"x": 12345}`, ``},
{`x=9_223_372_036_854_775_807`, `{"x": 9223372036854775807}`, ``},
{`x=9_223_372_036_854_775_808`, `{}`,
`Panic: Handler error: MakeInt64Token cannot handle input "9223372036854775808": ` +
`strconv.ParseInt: parsing "9223372036854775808": value out of range ` +
`(only use a type conversion token maker, when the input has been validated on beforehand)`},
{`x=-9_223_372_036_854_775_808`, `{"x": -9223372036854775808}`, ``},
{`x=9_223_372_036_854_775_808`, `{}`, `Panic: int64 token invalid (strconv.ParseInt: parsing "9223372036854775808": value out of range)`},
// TODO make the use of the same kind of handling for panics and for errors between parsekit and TOML.
{`x=-9_223_372_036_854_775_809`, `{}`,
`Panic: Handler error: MakeInt64Token cannot handle input "-9223372036854775809": ` +
`strconv.ParseInt: parsing "-9223372036854775809": value out of range ` +
`(only use a type conversion token maker, when the input has been validated on beforehand)`},
{`x=-9_223_372_036_854_775_809`, `{}`, `Panic: int64 token invalid (strconv.ParseInt: parsing "-9223372036854775809": value out of range)`},
// Hexadecimal
{`x=0x0`, `{"x": 0}`, ``},
{`x=0x1`, `{"x": 1}`, ``},
{`x=0x01`, `{"x": 1}`, ``},
@ -50,7 +46,9 @@ func TestInteger(t *testing.T) {
{`x=0xgood_beef`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`},
{`x=0x7FFFFFFFFFFFFFFF`, `{"x": 9223372036854775807}`, ``},
{`x=0x8000000000000000`, `{}`, `invalid integer value 0x8000000000000000: strconv.ParseInt: parsing "8000000000000000": value out of range at line 1, column 21`},
//Octal
{`x=0o0`, `{"x": 0}`, ``},
{`x=0o1`, `{"x": 1}`, ``},
{`x=0o01`, `{"x": 1}`, ``},
@ -60,7 +58,9 @@ func TestInteger(t *testing.T) {
{`x=0o9`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`},
{`x=0o777777777777777777777`, `{"x": 9223372036854775807}`, ``},
{`x=0o1000000000000000000000`, `{}`, `invalid integer value 0o1000000000000000000000: strconv.ParseInt: parsing "1000000000000000000000": value out of range at line 1, column 27`},
// Binary
{`x=0b0`, `{"x": 0}`, ``},
{`x=0b1`, `{"x": 1}`, ``},
{`x=0b01`, `{"x": 1}`, ``},

View File

@ -13,12 +13,15 @@ import (
var (
// Multi-line basic strings are surrounded by three quotation marks on each
// side and allow newlines.
doubleQuote3 = a.Str(`"""`)
// Multi-line literal strings are surrounded by three single quotes on each side and allow newlines.
singleQuote3 = a.Str(`'''`)
// Control characters as defined by TOML (U+0000 to U+001F, U+007F)
controlCharacter = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
// For convenience, some popular characters have a compact escape sequence.
@ -32,6 +35,7 @@ var (
// \\ - backslash (U+005C)
// \uXXXX - unicode (U+XXXX)
// \UXXXXXXXX - unicode (U+XXXXXXXX)
validEscapeChar = a.Runes('b', 't', 'n', 'f', 'r', '"', '\\')
shortEscape = c.Seq(a.Backslash, validEscapeChar)
shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4))
@ -42,7 +46,8 @@ var (
// "line ending backslash". When the last non-whitespace character on a line is
// a \, it will be trimmed along with all whitespace (including newlines) up to
// the next non-whitespace character or closing delimiter.
lineEndingBackslash = c.Seq(a.Backslash, optionalBlanks, newline, optionalWhitespace)
lineEndingBackslash = c.Seq(a.Backslash, whitespace, newline, whitespaceInclNewlines.Optional())
)
// There are four ways to express strings: basic, multi-line basic, literal and

View File

@ -7,17 +7,17 @@ import (
var (
// Opener and closer for [table].
tableOpen = c.Seq(optionalBlanks, a.SquareOpen, optionalBlanks)
tableClose = c.Seq(optionalBlanks, a.SquareClose, optionalBlanks)
tableOpen = c.Seq(whitespace, a.SquareOpen, whitespace)
tableClose = c.Seq(whitespace, a.SquareClose, whitespace)
// Opener and closer for [[array.of.tables]].
tableArrayOpen = c.Seq(optionalBlanks, a.SquareOpen, a.SquareOpen, optionalBlanks)
tableArrayClose = c.Seq(optionalBlanks, a.SquareClose, a.SquareClose, optionalBlanks)
tableArrayOpen = c.Seq(whitespace, a.SquareOpen, a.SquareOpen, whitespace)
tableArrayClose = c.Seq(whitespace, a.SquareClose, a.SquareClose, whitespace)
// Opener, separator and closer for { inline: "tables" }.
inlineTableOpen = c.Seq(optionalBlanks, a.CurlyOpen, optionalBlanks)
inlineTableSeparator = c.Seq(optionalBlanks, a.Comma, optionalBlanks)
inlineTableClose = c.Seq(optionalBlanks, a.CurlyClose, optionalBlanks)
inlineTableOpen = c.Seq(whitespace, a.CurlyOpen, whitespace)
inlineTableSeparator = c.Seq(whitespace, a.Comma, whitespace)
inlineTableClose = c.Seq(whitespace, a.CurlyClose, whitespace)
)
func (t *parser) startTable(p *parse.API) {

View File

@ -47,8 +47,8 @@ func BuildGrammar() tokenize.Handler {
g["tab"] = a.Tab
g["space"] = a.Space
g["wschar"] = g["tab"].Or(g["space"])
g["ws"] = c.ZeroOrMore(g["wschar"])
g["whitespaceChar"] = g["tab"].Or(g["space"])
g["ws"] = c.ZeroOrMore(g["whitespaceChar"])
g["newline"] = a.Newline
g["ws-or-newline"] = g["ws"].Or(g["newline"])
@ -168,7 +168,7 @@ func BuildGrammar() tokenize.Handler {
g["array-open"] = a.SquareOpen
g["array-close"] = a.SquareClose
g["ws-comment-newline"] = c.ZeroOrMore(g["wschar"].Or(g["comment"].Optional().Then(g["newline"])))
g["ws-comment-newline"] = c.ZeroOrMore(g["whitespaceChar"].Or(g["comment"].Optional().Then(g["newline"])))
g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val"))
g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional())
g["array-sep"] = g["ws"].Then(a.Comma)

View File

@ -5,14 +5,18 @@ import (
"log"
"math"
"os"
"time"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
"github.com/pkg/profile"
)
func main() {
toml := BuildGrammar()
fmt.Printf("Reading TOML document from STDIN ...\n")
t := profile.Start()
result, err := toml.Match(os.Stdin)
t.Stop()
fmt.Printf("Completed reading document.\n")
if err != nil {
log.Fatalf("Error in parsing TOML: %s\n", err)
@ -68,10 +72,10 @@ func BuildGrammar() tokenize.Handler {
R("tab", a.Tab)
R("space", a.Space)
R("wschar", G("tab").Or(G("space")))
R("ws", c.ZeroOrMore(G("wschar")))
R("whitespaceChar", G("tab").Or(G("space")))
R("ws", c.ZeroOrMore(G("whitespaceChar")))
R("newline", a.Newline)
R("wschar-or-newline", G("wschar").Or(G("newline")))
R("whitespaceChar-or-newline", G("whitespaceChar").Or(G("newline")))
// Comment
@ -96,7 +100,7 @@ func BuildGrammar() tokenize.Handler {
R("ml-basic-string-delim", G("quotation-mark").Times(3))
R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped")))
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("wschar-or-newline"))))
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline"))))
R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat"))))
R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim"))))
R("ml-basic-string", c.Seq(
@ -190,35 +194,59 @@ func BuildGrammar() tokenize.Handler {
// Date and time (as defined in RFC 3339)
R("date-full-year", G("digit").Times(4))
R("date-year", G("digit").Times(4))
R("date-month", G("digit").Times(2))
R("date-mday", G("digit").Times(2))
R("time-delim", a.Runes('T', 't', ' '))
R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))))
R("time-delim", c.Any(
tok.Str("T", a.Rune('T')),
tok.Str("t", a.Rune('t')),
tok.Str(" ", a.Rune(' '))))
R("time-hour", G("digit").Times(2))
R("time-minute", G("digit").Times(2))
R("time-second", G("digit").Times(2))
R("time-sec-frac", G("decimal-point").Then(c.OneOrMore(G("digit"))))
R("time-zulu", a.Runes('Z', 'z'))
R("time", tok.Str("15:04:05", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"))))
R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit)))))
R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z"))
R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute")))
R("time-offset", c.Any(G("time-zulu"), G("time-num-offset")))
R("partial-time", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"), G("time-sec-frac").Optional()))
R("full-time", c.Seq(G("partial-time"), G("time-offset")))
R("full-date", c.Seq(G("date-full-year"), G("minus"), G("date-month"), G("minus"), G("date-mday")))
R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset"))))
R("offset-date-time", c.Seq(G("full-date"), G("time-delim"), G("full-time")))
R("local-date-time", c.Seq(G("full-date"), G("time-delim"), G("partial-time")))
R("local-date", G("full-date"))
R("local-time", G("partial-time"))
R("offset-date-time", c.Seq(G("date"), G("time-delim"), G("time"), G("time-sec-frac").Optional(), G("time-offset")))
R("local-date-time", c.Seq(G("date"), G("time-delim"), G("time")))
R("local-date", G("date"))
R("local-time", G("time"))
R("date-time", c.Any(G("offset-date-time"), G("local-date-time"), G("local-date"), G("local-time")))
makeDateTimeValue := func(t *tokenize.API) interface{} {
layout := ""
input := ""
for _, t := range t.Result().Tokens() {
layout += t.Type.(string)
input += t.Value.(string)
}
t.Result().ClearTokens()
value, err := time.Parse(layout, input)
if err != nil {
panic(fmt.Sprintf("Ow, we must implement a way to report date parse errors: %s", err))
}
return value
}
R("date-time", c.Any(
tok.ByCallback("offset-date-time", G("offset-date-time"), makeDateTimeValue),
tok.ByCallback("local-date-time", G("local-date-time"), makeDateTimeValue),
tok.ByCallback("local-date", G("local-date"), makeDateTimeValue),
tok.ByCallback("local-time", G("local-time"), makeDateTimeValue)))
// Inline Table
R("inline-table-open", a.CurlyOpen.Then(G("ws")))
R("inline-table-close", G("ws").Then(a.CurlyClose))
R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws")))
R("inline-table-keyval", tok.Group("inline-table-keyval", c.Seq(G("key"), G("keyval-sep"), G("val"))))
R("inline-table-keyvals", c.Seq(G("inline-table-keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("inline-table-keyval")))))
R("inline-table-keyvals", c.Seq(G("keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("keyval")))))
R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close"))))
@ -227,12 +255,14 @@ func BuildGrammar() tokenize.Handler {
R("array-open", a.SquareOpen)
R("array-close", a.SquareClose)
R("array-sep", G("ws").Then(a.Comma))
R("ws-comment-newline", c.ZeroOrMore(G("wschar").Or(G("comment").Optional().Then(G("newline")))))
R("array-values", c.Any(
c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep"), G("array-values")),
c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep").Optional())))
R("ws-comment-newline", c.ZeroOrMore(G("whitespaceChar").Or(G("comment").Optional().Then(G("newline")))))
R("array-values", c.Seq(
G("ws-comment-newline"),
G("val"),
c.ZeroOrMore(c.Seq(G("ws"), G("array-sep"), G("ws-comment-newline"), G("val"))),
G("array-sep").Optional()))
R("inline-array", tok.Group("inline-array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close"))))
R("inline-array", tok.Group("array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close"))))
// Standard Table
@ -270,14 +300,9 @@ func BuildGrammar() tokenize.Handler {
// Overall Structure
R("expression", c.Any(
c.Seq(G("ws"), G("table"), G("ws"), G("comment").Optional()),
c.Seq(G("ws"), G("keyval"), G("ws"), G("comment").Optional()),
c.Seq(G("ws"), G("comment").Optional()),
))
R("expression", c.Seq(G("ws"), c.Optional(G("table").Or(G("keyval"))), G("ws"), G("comment").Optional()))
//R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile))
R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression")))))
R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile))
return G("toml")
}

View File

@ -21,14 +21,14 @@ var (
// Whitespace, Newline
ws = c.ZeroOrMore(wschar)
ws = c.ZeroOrMore(whitespaceChar)
tab = a.Tab
space = a.Space
wschar = tab.Or(space)
whitespaceChar = tab.Or(space)
newline = a.Newline
wsOrNewline = ws.Or(newline)
whitespace = ws.Or(newline)
// Comment
@ -84,7 +84,7 @@ var (
mlBasicBody = c.ZeroOrMore(c.Any(mlBasicChar, newline, mlBasicBodyConcat))
mlBasicChar = mlBasicUnescaped.Or(escaped)
mlBasicUnescaped = c.Any(printableASCII.Except(a.Backslash), nonASCII)
mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(wsOrNewline))
mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(whitespace))
// Literal String
@ -185,7 +185,7 @@ var (
arrayvalues = c.Seq(arrayValue, c.ZeroOrMore(c.Seq(arraySep, arrayValue)), arraySep.Optional())
arraySep = ws.Then(a.Comma)
arrayValue = wsCommentNewline.Then(val)
wsCommentNewline = c.ZeroOrMore(wschar.Or(comment.Optional().Then(newline)))
wsCommentNewline = c.ZeroOrMore(whitespaceChar.Or(comment.Optional().Then(newline)))
// Table

BIN
parse2/parse2 Executable file

Binary file not shown.

5
parse2/profile.sh Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
go build
ppfile=`cat /tmp/y | ./parse2 2>&1 | grep "cpu profiling enabled" | cut -d, -f2`
go tool pprof -http 0.0.0.0:8888 ./parse2 $ppfile

View File

@ -20,9 +20,9 @@ expression =/ ws table ws [ comment ]
;; Whitespace
ws = *wschar
wschar = %x20 ; Space
wschar =/ %x09 ; Horizontal tab
ws = *whitespaceChar
whitespaceChar = %x20 ; Space
whitespaceChar =/ %x09 ; Horizontal tab
;; Newline
@ -197,7 +197,7 @@ array-values =/ ws-comment-newline val ws [ array-sep ]
array-sep = %x2C ; , Comma
ws-comment-newline = *( wschar / [ comment ] newline )
ws-comment-newline = *( whitespaceChar / [ comment ] newline )
;; Table

View File

@ -70,8 +70,6 @@ Two"""
# The following strings are byte-for-byte equivalent:
key1 = "The quick brown fox jumps over the lazy dog."
key1.1 = """The quick brown fox jumps over the lazy dog."""
key2 = """
The quick brown \
fox jumps over \
@ -267,6 +265,48 @@ het.is.een.hex1 = 0x0
het.is.een.hex2 = 0x1
het.is.een.hex3 = 0xffffffff
go = [1,2,3,
4,5,
6,]
no1 = []
no2 = [ ]
no3 = [
]
go1 = [1,2,3,]
go2 = [
1,
2,
3,]
go3 = [ 1, #one
2, #two
3 #three
]
go4 =[
1 ,
2,
3,
# hi
# hi
# hi
]
k="kaka"
[date.types]
the.d1=2019-01-01
the.d2=2019-01-01 12:12:12
the.d3=2019-01-01T12:12:12
the.d4=2019-01-01T12:12:12Z
the.d5=2019-01-01 12:12:12Z
the.d6=2019-01-01 12:12:12+03:45