diff --git a/parse/document.go b/parse/document.go index 22c577d..53cdf9c 100644 --- a/parse/document.go +++ b/parse/document.go @@ -9,15 +9,15 @@ var ( detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote) // Both [tables] and [[arrays of tables]] start with a square open bracket. - detectTableOrArrayOfTables = a.SquareOpen + detectTable = a.SquareOpen ) func (t *parser) startDocument(p *parse.API) { for { switch { - case p.Accept(whitespaceOrComment): - // NOOP, skip these - case p.Peek(detectTableOrArrayOfTables): + case p.Accept(whitespaceInclNewlines.Or(comment)): + // NOOP + case p.Peek(detectTable): p.Handle(t.startTable) case p.Peek(detectKey): p.Handle(t.startKeyValuePair) diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go index a2912de..09f1491 100644 --- a/parse/keyvaluepair.go +++ b/parse/keyvaluepair.go @@ -13,7 +13,8 @@ var ( // Blank is ignored around key names and values. The key, equals // sign, and value must be on the same line (though some values can be // broken over multiple lines). - keyAssignment = c.Seq(optionalBlanks, a.Equal, optionalBlanks) + + keyAssignment = c.Seq(whitespace, a.Equal, whitespace) // A key may be either bare, quoted or dotted. Bare keys may only // contain ASCII letters, ASCII digits, underscores, and dashes @@ -25,14 +26,16 @@ var ( // Best practice is to use bare keys except when absolutely necessary. // A bare key must be non-empty, but an empty quoted key is allowed // (though discouraged). + bareKeyRune = c.Any(a.ASCIILower, a.ASCIIUpper, a.Digit, a.Underscore, a.Minus) bareKey = c.OneOrMore(bareKeyRune) // Dotted keys are a sequence of bare or quoted keys joined with a dot. - // This allows for grouping similar properties together. Blanks - // around dot-separated parts are ignored, however, best practice is to - // not use any extraneous blanks. - keySeparatorDot = c.Seq(optionalBlanks, a.Dot, optionalBlanks) + // This allows for grouping similar properties together. Whitespace + // around dot-separated parts is ignored, however, best practice is to + // not use any extraneous whitespace. + + keySeparatorDot = c.Seq(whitespace, a.Dot, whitespace) ) func (t *parser) startKeyValuePair(p *parse.API) { @@ -58,6 +61,7 @@ func (t *parser) startKeyValuePair(p *parse.API) { // is to use bare keys except when absolutely necessary. // A bare key must be non-empty, but an empty quoted key is allowed (though // discouraged). + func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) { var keyPart string var ok bool @@ -83,6 +87,7 @@ func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) { // This allows for grouping similar properties together. // Whitespace around dot-separated parts is ignored, however, best // practice is to not use any extraneous whitespace. + func (t *parser) parseEndOfKeyOrDot(p *parse.API, key ast.Key) (ast.Key, bool) { if p.Accept(keySeparatorDot) { return t.parseKey(p, key) diff --git a/parse/parse.go b/parse/parse.go index fd04ff8..b1f2a81 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -10,29 +10,29 @@ import ( var ( c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T - // From the specs: "Whitespace means tab (0x09) or space (0x20)." - // In this package, we name this a blank, to be in line with the - // terminology as used in parsekit. - blank = a.Runes('\t', ' ') - blanks = c.OneOrMore(blank) - optionalBlanks = c.ZeroOrMore(blank) + // Whitespace means tab (0x09) or space (0x20). + // The matches the blanks as defined by parsekit. + + whitespace = a.Blanks.Optional() // Newline means LF (0x0A) or CRLF (0x0D0A). - // This matches the default newline as defined by parsekit. + // This matches the newline as defined by parsekit. + newline = a.Newline - // Whitespace is defined as blanks + newlines. - whitespace = c.OneOrMore(blank.Or(newline)) - optionalWhitespace = c.ZeroOrMore(blank.Or(newline)) + // Whitespace + newlines. + // This matches the whitespace as defined by parsekit. + + whitespaceInclNewlines = a.Whitespace // A '#' hash symbol marks the rest of the line as a comment. // All characters up to the end of the line are included in the comment. - comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine))) - optionalComment = c.Optional(comment) - endOfLineOrComment = c.Seq(optionalBlanks, optionalComment, a.EndOfLine) - whitespaceOrComment = whitespace.Or(comment) - optionalWhitespaceOrComment = c.ZeroOrMore(whitespaceOrComment) + comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine))) + optionalComment = comment.Optional() + + endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) + whitespaceOrComment = c.ZeroOrMore(whitespaceInclNewlines.Or(comment)) ) type parser struct { diff --git a/parse/value_array.go b/parse/value_array.go index e30c906..7a192b7 100644 --- a/parse/value_array.go +++ b/parse/value_array.go @@ -31,9 +31,9 @@ import ( // 2, # this is ok // ] var ( - arrayOpen = a.SquareOpen.Then(optionalWhitespaceOrComment) - arraySeparator = c.Seq(optionalWhitespaceOrComment, a.Comma, optionalWhitespaceOrComment) - arrayClose = c.Seq(optionalWhitespaceOrComment, a.Comma.Optional(), optionalWhitespaceOrComment, a.SquareClose) + arrayOpen = a.SquareOpen.Then(whitespaceOrComment) + arraySeparator = c.Seq(whitespace, a.Comma, whitespaceOrComment) + arrayClose = c.Seq(whitespace, a.Comma.Optional(), whitespaceOrComment, a.SquareClose) ) func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) { diff --git a/parse/value_array_test.go b/parse/value_array_test.go index 09fc8d1..264cfe8 100644 --- a/parse/value_array_test.go +++ b/parse/value_array_test.go @@ -30,7 +30,7 @@ func TestArray(t *testing.T) { {"x=[#comment before value\n1]", `{"x": [1]}`, ``}, {"x=[1#comment after value\n]", `{"x": [1]}`, ``}, {"x=[1\n#comment on its own line after value\n]", `{"x": [1]}`, ``}, - {"x=[1#comment 1\n#comment 2\n#comment 3\n , \n2]", `{"x": [1, 2]}`, ``}, + {"x=[1#comment 1\n,\n2]", `{}`, `unexpected input (expected an array separator) at line 1, column 5`}, {"x=[1]", `{"x": [1]}`, ``}, {"x=[1,0x2, 0b11, 0o4]", `{"x": [1, 2, 3, 4]}`, ``}, {"x=[0.1,0.2,3e-1,0.04e+1, nan, inf]", `{"x": [0.1, 0.2, 0.3, 0.4, NaN, +Inf]}`, ``}, diff --git a/parse/value_datetime.go b/parse/value_datetime.go index da6d643..5335496 100644 --- a/parse/value_datetime.go +++ b/parse/value_datetime.go @@ -25,21 +25,24 @@ var ( // // lt1 = 07:32:00 // lt2 = 00:32:00.999999 + year = a.Digit.Times(4) month = a.Digit.Times(2) day = a.Digit.Times(2) yyyymmdd = c.Seq(year, a.Minus, month, a.Minus, day) dateTok = tok.Str("2006-01-02", yyyymmdd) - hour = a.Digit.Times(2) - minute = a.Digit.Times(2) - seconds = a.Digit.Times(2) - hhmmss = c.Seq(hour, a.Colon, minute, a.Colon, seconds) - timeTok = tok.Str("15:04:05", hhmmss) + + hour = a.Digit.Times(2) + minute = a.Digit.Times(2) + seconds = a.Digit.Times(2) + hhmmss = c.Seq(hour, a.Colon, minute, a.Colon, seconds) + timeTok = tok.Str("15:04:05", hhmmss) // The precision of fractional seconds is implementation-specific, but at // least millisecond precision is expected. If the value contains greater // precision than the implementation can support, the additional precision // must be truncated, not rounded. + micro = a.Dot.Then(c.MinMax(1, 9, a.Digit).Then(m.Drop(c.ZeroOrMore(a.Digit)))) microTok = c.Optional(tok.Str(".999999999", micro)) @@ -48,6 +51,7 @@ var ( // Note that RFC 3339 also allows the use of a lower case delimiter. // // odt4 = 1979-05-27 07:32:00Z + tdelimTok = c.Any( tok.Str("T", a.Rune('T')), tok.Str("t", a.Rune('t')), @@ -66,18 +70,19 @@ var ( // // Note that RFC 3339 also allows the use of a lower case 'z'. // Here we replace it with a capital 'Z' to make the Go date parser work. + zulu = m.Replace(a.Runes('Z', 'z'), "Z") offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute) - tz = zulu.Or(offset) - tzTok = tok.Str("Z07:00", tz) + tzTok = tok.Str("Z07:00", zulu.Or(offset)) // The full date/time parse format, based on the above definitions. - // The first token denotes the type of date/time value. - // The rest of the tokens contain layout fragments for time.Parse(). - offsetDateTime = tok.Str(ast.TypeOffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok)) - localDateTime = tok.Str(ast.TypeLocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok)) - localDate = tok.Str(ast.TypeLocalDate, dateTok) - localTime = tok.Str(ast.TypeLocalTime, c.Seq(timeTok, microTok)) + // The token denotes the type of date/time value. + // The contained tokens contain layout fragments for time.Parse(). + + offsetDateTime = tok.Group(ast.TypeOffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok)) + localDateTime = tok.Group(ast.TypeLocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok)) + localDate = tok.Group(ast.TypeLocalDate, dateTok) + localTime = tok.Group(ast.TypeLocalTime, c.Seq(timeTok, microTok)) datetime = c.Any(offsetDateTime, localDateTime, localDate, localTime) ) @@ -86,32 +91,17 @@ func (t *parser) parseDateTime(p *parse.API) (*ast.Value, bool) { p.Expected("a date and/or time") return nil, false } - tokens := p.Result().Tokens() - valueType := getDateTimeValueType(&tokens) - input, value, err := getDateTimeValue(&tokens) - if err == nil { - return ast.NewValue(valueType, value), true - } - p.Error("invalid date/time value %s: %s", input, err) - return nil, false -} + token := p.Result().Token(0) -// The first token is a token that wraps the complete date/time input. -// Its type denotes the type of date/time value that it wraps. -func getDateTimeValueType(tokens *[]*tokenize.Token) ast.ValueType { - return (*tokens)[0].Type.(ast.ValueType) -} - -// The rest of the tokens contain fragments that can be used with -// time.Parse() to parse the provided date/time input. Here, these fragments -// are combined into a layout string, which is then used to parse -// the input string. -func getDateTimeValue(tokens *[]*tokenize.Token) (string, time.Time, error) { layout := "" - for _, l := range (*tokens)[1:] { + for _, l := range token.Value.([]*tokenize.Token) { layout += l.Type.(string) } - input := string((*tokens)[0].Runes) - value, err := time.Parse(layout, input) - return input, value, err + value, err := time.Parse(layout, string(token.Runes)) + if err != nil { + p.Error("invalid date/time value %s: %s", string(token.Runes), err) + return nil, false + } + + return ast.NewValue(token.Type.(ast.ValueType), value), true } diff --git a/parse/value_number_test.go b/parse/value_number_test.go index 92a6166..dfce891 100644 --- a/parse/value_number_test.go +++ b/parse/value_number_test.go @@ -15,6 +15,7 @@ func TestStartNumber(t *testing.T) { func TestInteger(t *testing.T) { for _, test := range []parseTest{ // Decimal + {`x=0`, `{"x": 0}`, ``}, {`x=+0`, `{"x": 0}`, ``}, {`x=-0`, `{"x": 0}`, ``}, @@ -29,17 +30,12 @@ func TestInteger(t *testing.T) { {`x=5_349_221`, `{"x": 5349221}`, ``}, {`x=1_2_3_4_5`, `{"x": 12345}`, ``}, {`x=9_223_372_036_854_775_807`, `{"x": 9223372036854775807}`, ``}, - {`x=9_223_372_036_854_775_808`, `{}`, - `Panic: Handler error: MakeInt64Token cannot handle input "9223372036854775808": ` + - `strconv.ParseInt: parsing "9223372036854775808": value out of range ` + - `(only use a type conversion token maker, when the input has been validated on beforehand)`}, - {`x=-9_223_372_036_854_775_808`, `{"x": -9223372036854775808}`, ``}, + {`x=9_223_372_036_854_775_808`, `{}`, `Panic: int64 token invalid (strconv.ParseInt: parsing "9223372036854775808": value out of range)`}, // TODO make the use of the same kind of handling for panics and for errors between parsekit and TOML. - {`x=-9_223_372_036_854_775_809`, `{}`, - `Panic: Handler error: MakeInt64Token cannot handle input "-9223372036854775809": ` + - `strconv.ParseInt: parsing "-9223372036854775809": value out of range ` + - `(only use a type conversion token maker, when the input has been validated on beforehand)`}, + {`x=-9_223_372_036_854_775_809`, `{}`, `Panic: int64 token invalid (strconv.ParseInt: parsing "-9223372036854775809": value out of range)`}, + // Hexadecimal + {`x=0x0`, `{"x": 0}`, ``}, {`x=0x1`, `{"x": 1}`, ``}, {`x=0x01`, `{"x": 1}`, ``}, @@ -50,7 +46,9 @@ func TestInteger(t *testing.T) { {`x=0xgood_beef`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`}, {`x=0x7FFFFFFFFFFFFFFF`, `{"x": 9223372036854775807}`, ``}, {`x=0x8000000000000000`, `{}`, `invalid integer value 0x8000000000000000: strconv.ParseInt: parsing "8000000000000000": value out of range at line 1, column 21`}, + //Octal + {`x=0o0`, `{"x": 0}`, ``}, {`x=0o1`, `{"x": 1}`, ``}, {`x=0o01`, `{"x": 1}`, ``}, @@ -60,7 +58,9 @@ func TestInteger(t *testing.T) { {`x=0o9`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`}, {`x=0o777777777777777777777`, `{"x": 9223372036854775807}`, ``}, {`x=0o1000000000000000000000`, `{}`, `invalid integer value 0o1000000000000000000000: strconv.ParseInt: parsing "1000000000000000000000": value out of range at line 1, column 27`}, + // Binary + {`x=0b0`, `{"x": 0}`, ``}, {`x=0b1`, `{"x": 1}`, ``}, {`x=0b01`, `{"x": 1}`, ``}, diff --git a/parse/value_string.go b/parse/value_string.go index c764e8f..7815d0b 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -13,12 +13,15 @@ import ( var ( // Multi-line basic strings are surrounded by three quotation marks on each // side and allow newlines. + doubleQuote3 = a.Str(`"""`) // Multi-line literal strings are surrounded by three single quotes on each side and allow newlines. + singleQuote3 = a.Str(`'''`) // Control characters as defined by TOML (U+0000 to U+001F, U+007F) + controlCharacter = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F')) // For convenience, some popular characters have a compact escape sequence. @@ -32,6 +35,7 @@ var ( // \\ - backslash (U+005C) // \uXXXX - unicode (U+XXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX) + validEscapeChar = a.Runes('b', 't', 'n', 'f', 'r', '"', '\\') shortEscape = c.Seq(a.Backslash, validEscapeChar) shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4)) @@ -42,7 +46,8 @@ var ( // "line ending backslash". When the last non-whitespace character on a line is // a \, it will be trimmed along with all whitespace (including newlines) up to // the next non-whitespace character or closing delimiter. - lineEndingBackslash = c.Seq(a.Backslash, optionalBlanks, newline, optionalWhitespace) + + lineEndingBackslash = c.Seq(a.Backslash, whitespace, newline, whitespaceInclNewlines.Optional()) ) // There are four ways to express strings: basic, multi-line basic, literal and diff --git a/parse/value_table.go b/parse/value_table.go index 3147950..3d0ff57 100644 --- a/parse/value_table.go +++ b/parse/value_table.go @@ -7,17 +7,17 @@ import ( var ( // Opener and closer for [table]. - tableOpen = c.Seq(optionalBlanks, a.SquareOpen, optionalBlanks) - tableClose = c.Seq(optionalBlanks, a.SquareClose, optionalBlanks) + tableOpen = c.Seq(whitespace, a.SquareOpen, whitespace) + tableClose = c.Seq(whitespace, a.SquareClose, whitespace) // Opener and closer for [[array.of.tables]]. - tableArrayOpen = c.Seq(optionalBlanks, a.SquareOpen, a.SquareOpen, optionalBlanks) - tableArrayClose = c.Seq(optionalBlanks, a.SquareClose, a.SquareClose, optionalBlanks) + tableArrayOpen = c.Seq(whitespace, a.SquareOpen, a.SquareOpen, whitespace) + tableArrayClose = c.Seq(whitespace, a.SquareClose, a.SquareClose, whitespace) // Opener, separator and closer for { inline: "tables" }. - inlineTableOpen = c.Seq(optionalBlanks, a.CurlyOpen, optionalBlanks) - inlineTableSeparator = c.Seq(optionalBlanks, a.Comma, optionalBlanks) - inlineTableClose = c.Seq(optionalBlanks, a.CurlyClose, optionalBlanks) + inlineTableOpen = c.Seq(whitespace, a.CurlyOpen, whitespace) + inlineTableSeparator = c.Seq(whitespace, a.Comma, whitespace) + inlineTableClose = c.Seq(whitespace, a.CurlyClose, whitespace) ) func (t *parser) startTable(p *parse.API) { diff --git a/parse2/grammar.1.bak b/parse2/grammar.1.bak index c2191d1..f5771b5 100644 --- a/parse2/grammar.1.bak +++ b/parse2/grammar.1.bak @@ -47,8 +47,8 @@ func BuildGrammar() tokenize.Handler { g["tab"] = a.Tab g["space"] = a.Space - g["wschar"] = g["tab"].Or(g["space"]) - g["ws"] = c.ZeroOrMore(g["wschar"]) + g["whitespaceChar"] = g["tab"].Or(g["space"]) + g["ws"] = c.ZeroOrMore(g["whitespaceChar"]) g["newline"] = a.Newline g["ws-or-newline"] = g["ws"].Or(g["newline"]) @@ -168,7 +168,7 @@ func BuildGrammar() tokenize.Handler { g["array-open"] = a.SquareOpen g["array-close"] = a.SquareClose - g["ws-comment-newline"] = c.ZeroOrMore(g["wschar"].Or(g["comment"].Optional().Then(g["newline"]))) + g["ws-comment-newline"] = c.ZeroOrMore(g["whitespaceChar"].Or(g["comment"].Optional().Then(g["newline"]))) g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val")) g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional()) g["array-sep"] = g["ws"].Then(a.Comma) diff --git a/parse2/grammar.go b/parse2/grammar.go index 27ca1d9..e8acb37 100644 --- a/parse2/grammar.go +++ b/parse2/grammar.go @@ -5,14 +5,18 @@ import ( "log" "math" "os" + "time" "git.makaay.nl/mauricem/go-parsekit/tokenize" + "github.com/pkg/profile" ) func main() { toml := BuildGrammar() fmt.Printf("Reading TOML document from STDIN ...\n") + t := profile.Start() result, err := toml.Match(os.Stdin) + t.Stop() fmt.Printf("Completed reading document.\n") if err != nil { log.Fatalf("Error in parsing TOML: %s\n", err) @@ -68,10 +72,10 @@ func BuildGrammar() tokenize.Handler { R("tab", a.Tab) R("space", a.Space) - R("wschar", G("tab").Or(G("space"))) - R("ws", c.ZeroOrMore(G("wschar"))) + R("whitespaceChar", G("tab").Or(G("space"))) + R("ws", c.ZeroOrMore(G("whitespaceChar"))) R("newline", a.Newline) - R("wschar-or-newline", G("wschar").Or(G("newline"))) + R("whitespaceChar-or-newline", G("whitespaceChar").Or(G("newline"))) // Comment @@ -96,7 +100,7 @@ func BuildGrammar() tokenize.Handler { R("ml-basic-string-delim", G("quotation-mark").Times(3)) R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped"))) - R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("wschar-or-newline")))) + R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline")))) R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat")))) R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim")))) R("ml-basic-string", c.Seq( @@ -190,35 +194,59 @@ func BuildGrammar() tokenize.Handler { // Date and time (as defined in RFC 3339) - R("date-full-year", G("digit").Times(4)) + R("date-year", G("digit").Times(4)) R("date-month", G("digit").Times(2)) R("date-mday", G("digit").Times(2)) - R("time-delim", a.Runes('T', 't', ' ')) + R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday")))) + + R("time-delim", c.Any( + tok.Str("T", a.Rune('T')), + tok.Str("t", a.Rune('t')), + tok.Str(" ", a.Rune(' ')))) + R("time-hour", G("digit").Times(2)) R("time-minute", G("digit").Times(2)) R("time-second", G("digit").Times(2)) - R("time-sec-frac", G("decimal-point").Then(c.OneOrMore(G("digit")))) - R("time-zulu", a.Runes('Z', 'z')) + R("time", tok.Str("15:04:05", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second")))) + + R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit))))) + + R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z")) R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute"))) - R("time-offset", c.Any(G("time-zulu"), G("time-num-offset"))) - R("partial-time", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"), G("time-sec-frac").Optional())) - R("full-time", c.Seq(G("partial-time"), G("time-offset"))) - R("full-date", c.Seq(G("date-full-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))) + R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset")))) - R("offset-date-time", c.Seq(G("full-date"), G("time-delim"), G("full-time"))) - R("local-date-time", c.Seq(G("full-date"), G("time-delim"), G("partial-time"))) - R("local-date", G("full-date")) - R("local-time", G("partial-time")) + R("offset-date-time", c.Seq(G("date"), G("time-delim"), G("time"), G("time-sec-frac").Optional(), G("time-offset"))) + R("local-date-time", c.Seq(G("date"), G("time-delim"), G("time"))) + R("local-date", G("date")) + R("local-time", G("time")) - R("date-time", c.Any(G("offset-date-time"), G("local-date-time"), G("local-date"), G("local-time"))) + makeDateTimeValue := func(t *tokenize.API) interface{} { + layout := "" + input := "" + for _, t := range t.Result().Tokens() { + layout += t.Type.(string) + input += t.Value.(string) + } + t.Result().ClearTokens() + value, err := time.Parse(layout, input) + if err != nil { + panic(fmt.Sprintf("Ow, we must implement a way to report date parse errors: %s", err)) + } + return value + } + + R("date-time", c.Any( + tok.ByCallback("offset-date-time", G("offset-date-time"), makeDateTimeValue), + tok.ByCallback("local-date-time", G("local-date-time"), makeDateTimeValue), + tok.ByCallback("local-date", G("local-date"), makeDateTimeValue), + tok.ByCallback("local-time", G("local-time"), makeDateTimeValue))) // Inline Table R("inline-table-open", a.CurlyOpen.Then(G("ws"))) R("inline-table-close", G("ws").Then(a.CurlyClose)) R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws"))) - R("inline-table-keyval", tok.Group("inline-table-keyval", c.Seq(G("key"), G("keyval-sep"), G("val")))) - R("inline-table-keyvals", c.Seq(G("inline-table-keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("inline-table-keyval"))))) + R("inline-table-keyvals", c.Seq(G("keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("keyval"))))) R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close")))) @@ -227,12 +255,14 @@ func BuildGrammar() tokenize.Handler { R("array-open", a.SquareOpen) R("array-close", a.SquareClose) R("array-sep", G("ws").Then(a.Comma)) - R("ws-comment-newline", c.ZeroOrMore(G("wschar").Or(G("comment").Optional().Then(G("newline"))))) - R("array-values", c.Any( - c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep"), G("array-values")), - c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep").Optional()))) + R("ws-comment-newline", c.ZeroOrMore(G("whitespaceChar").Or(G("comment").Optional().Then(G("newline"))))) + R("array-values", c.Seq( + G("ws-comment-newline"), + G("val"), + c.ZeroOrMore(c.Seq(G("ws"), G("array-sep"), G("ws-comment-newline"), G("val"))), + G("array-sep").Optional())) - R("inline-array", tok.Group("inline-array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close")))) + R("inline-array", tok.Group("array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close")))) // Standard Table @@ -270,14 +300,9 @@ func BuildGrammar() tokenize.Handler { // Overall Structure - R("expression", c.Any( - c.Seq(G("ws"), G("table"), G("ws"), G("comment").Optional()), - c.Seq(G("ws"), G("keyval"), G("ws"), G("comment").Optional()), - c.Seq(G("ws"), G("comment").Optional()), - )) + R("expression", c.Seq(G("ws"), c.Optional(G("table").Or(G("keyval"))), G("ws"), G("comment").Optional())) - //R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile)) - R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))))) + R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile)) return G("toml") } diff --git a/parse2/grammar.go.bak b/parse2/grammar.go.bak index c5758bf..64a8894 100644 --- a/parse2/grammar.go.bak +++ b/parse2/grammar.go.bak @@ -21,14 +21,14 @@ var ( // Whitespace, Newline - ws = c.ZeroOrMore(wschar) + ws = c.ZeroOrMore(whitespaceChar) tab = a.Tab space = a.Space - wschar = tab.Or(space) + whitespaceChar = tab.Or(space) newline = a.Newline - wsOrNewline = ws.Or(newline) + whitespace = ws.Or(newline) // Comment @@ -84,7 +84,7 @@ var ( mlBasicBody = c.ZeroOrMore(c.Any(mlBasicChar, newline, mlBasicBodyConcat)) mlBasicChar = mlBasicUnescaped.Or(escaped) mlBasicUnescaped = c.Any(printableASCII.Except(a.Backslash), nonASCII) - mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(wsOrNewline)) + mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(whitespace)) // Literal String @@ -185,7 +185,7 @@ var ( arrayvalues = c.Seq(arrayValue, c.ZeroOrMore(c.Seq(arraySep, arrayValue)), arraySep.Optional()) arraySep = ws.Then(a.Comma) arrayValue = wsCommentNewline.Then(val) - wsCommentNewline = c.ZeroOrMore(wschar.Or(comment.Optional().Then(newline))) + wsCommentNewline = c.ZeroOrMore(whitespaceChar.Or(comment.Optional().Then(newline))) // Table diff --git a/parse2/parse2 b/parse2/parse2 new file mode 100755 index 0000000..9fdefc1 Binary files /dev/null and b/parse2/parse2 differ diff --git a/parse2/profile.sh b/parse2/profile.sh new file mode 100755 index 0000000..ef8431f --- /dev/null +++ b/parse2/profile.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +go build +ppfile=`cat /tmp/y | ./parse2 2>&1 | grep "cpu profiling enabled" | cut -d, -f2` +go tool pprof -http 0.0.0.0:8888 ./parse2 $ppfile diff --git a/parse2/toml.abnf b/parse2/toml.abnf index ecfed60..a8e17c7 100644 --- a/parse2/toml.abnf +++ b/parse2/toml.abnf @@ -20,9 +20,9 @@ expression =/ ws table ws [ comment ] ;; Whitespace -ws = *wschar -wschar = %x20 ; Space -wschar =/ %x09 ; Horizontal tab +ws = *whitespaceChar +whitespaceChar = %x20 ; Space +whitespaceChar =/ %x09 ; Horizontal tab ;; Newline @@ -197,7 +197,7 @@ array-values =/ ws-comment-newline val ws [ array-sep ] array-sep = %x2C ; , Comma -ws-comment-newline = *( wschar / [ comment ] newline ) +ws-comment-newline = *( whitespaceChar / [ comment ] newline ) ;; Table diff --git a/parse2/x b/parse2/x index 6d667bf..08e6307 100644 --- a/parse2/x +++ b/parse2/x @@ -70,8 +70,6 @@ Two""" # The following strings are byte-for-byte equivalent: key1 = "The quick brown fox jumps over the lazy dog." -key1.1 = """The quick brown fox jumps over the lazy dog.""" - key2 = """ The quick brown \ fox jumps over \ @@ -267,6 +265,48 @@ het.is.een.hex1 = 0x0 het.is.een.hex2 = 0x1 het.is.een.hex3 = 0xffffffff -go = [1,2,3, -4,5, -6,] +no1 = [] +no2 = [ ] +no3 = [ + ] +go1 = [1,2,3,] +go2 = [ +1, +2, +3,] +go3 = [ 1, #one + 2, #two + 3 #three + ] + +go4 =[ + + + +1 , + + + + 2, + + + + + 3, + + + +# hi +# hi +# hi +] + +k="kaka" + +[date.types] +the.d1=2019-01-01 +the.d2=2019-01-01 12:12:12 +the.d3=2019-01-01T12:12:12 +the.d4=2019-01-01T12:12:12Z +the.d5=2019-01-01 12:12:12Z +the.d6=2019-01-01 12:12:12+03:45