diff --git a/parse/testfiles/valid/mmakaay/time-T-and-Z-case-insensitive.json b/parse/testfiles/valid/mmakaay/time-T-and-Z-case-insensitive.json new file mode 100644 index 0000000..7d254c9 --- /dev/null +++ b/parse/testfiles/valid/mmakaay/time-T-and-Z-case-insensitive.json @@ -0,0 +1,14 @@ +{ + "date1" : { + "type" : "datetime", + "value" : "2019-10-10T11:12:13Z" + }, + "date2" : { + "type" : "datetime", + "value" : "2019-10-10T11:12:13Z" + }, + "date3" : { + "type" : "datetime", + "value" : "2019-10-10T11:12:13Z" + } +} diff --git a/parse/testfiles/valid/mmakaay/time-T-and-Z-case-insensitive.toml b/parse/testfiles/valid/mmakaay/time-T-and-Z-case-insensitive.toml new file mode 100644 index 0000000..8444bba --- /dev/null +++ b/parse/testfiles/valid/mmakaay/time-T-and-Z-case-insensitive.toml @@ -0,0 +1,3 @@ +date1 = 2019-10-10T11:12:13Z +date2 = 2019-10-10t11:12:13z +date3 = 2019-10-10 11:12:13z \ No newline at end of file diff --git a/parse/value_datetime.go b/parse/value_datetime.go index c39bed2..da6d643 100644 --- a/parse/value_datetime.go +++ b/parse/value_datetime.go @@ -45,9 +45,13 @@ var ( // For the sake of readability, you may replace the T delimiter between // date and time with a space (as permitted by RFC 3339 section 5.6). + // Note that RFC 3339 also allows the use of a lower case delimiter. // // odt4 = 1979-05-27 07:32:00Z - tdelimTok = tok.Str("T", a.Rune('T')).Or(tok.Str(" ", a.Rune(' '))) + tdelimTok = c.Any( + tok.Str("T", a.Rune('T')), + tok.Str("t", a.Rune('t')), + tok.Str(" ", a.Rune(' '))) // If you omit the offset from an RFC 3339 formatted date-time, it will // represent the given date-time without any relation to an offset or @@ -59,7 +63,10 @@ var ( // It cannot be converted to an instant in time without additional // information. Conversion to an instant, if required, is // implementation-specific. - zulu = a.Rune('Z') + // + // Note that RFC 3339 also allows the use of a lower case 'z'. + // Here we replace it with a capital 'Z' to make the Go date parser work. + zulu = m.Replace(a.Runes('Z', 'z'), "Z") offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute) tz = zulu.Or(offset) tzTok = tok.Str("Z07:00", tz) diff --git a/parse/value_number.go b/parse/value_number.go index 425c7ec..c94711b 100644 --- a/parse/value_number.go +++ b/parse/value_number.go @@ -1,5 +1,7 @@ package parse +// TODO move some definitions into parsekit, as far as they match Go standard formatting of numbers. + import ( "math" "strconv" diff --git a/parse2/grammar.1.bak b/parse2/grammar.1.bak new file mode 100644 index 0000000..c2191d1 --- /dev/null +++ b/parse2/grammar.1.bak @@ -0,0 +1,232 @@ +package parse2 + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +type Grammar map[string]tokenize.Handler + +func (g Grammar) Rule(name string, definition tokenize.Handler) { + if _, ok := g[name]; ok { + panic(fmt.Sprintf("Grammar rule %q already exists", name)) + } + g[name] = definition +} + +func (g Grammar) Get(name string) tokenize.Handler { + if h, ok := g[name]; ok { + return g[name] + } + return func(t *tokenize.API) bool { + return g[name](t) + } +} + +func BuildGrammar() tokenize.Handler { + + c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T + + g := make(Grammar) + R := g.Rule + G := g.Get + + R("alpha", a.Letter) + R("digit", a.Digit) + g["minus"] = a.Minus + g["plus"] = a.Plus + g["underscore"] = a.Underscore + g["quotation-mark"] = a.DoubleQuote + g["apostrophe"] = a.SingleQuote + g["colon"] = a.Colon + g["escape"] = a.Backslash + g["hex-digit"] = a.HexDigit + + // Whitespace, Newline + + g["tab"] = a.Tab + g["space"] = a.Space + g["wschar"] = g["tab"].Or(g["space"]) + g["ws"] = c.ZeroOrMore(g["wschar"]) + g["newline"] = a.Newline + g["ws-or-newline"] = g["ws"].Or(g["newline"]) + + // Comment + + g["comment-start-symbol"] = a.Hash + g["printable-ascii"] = a.RuneRange(0x20, 0x7E) + g["non-ascii"] = a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)) + g["non-eol"] = c.Any(a.Rune(0x09), g["printable-ascii"], g["non-ascii"]) + g["comment"] = g["comment-start-symbol"].Then(c.ZeroOrMore(g["non-eol"])) + + // Basic String + + g["escape-seq-char"] = c.Any( + a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), + a.Rune('u').Then(g["hex-digit"].Times(4)), + a.Rune('U').Then(g["hex-digit"].Times(8))) + g["escaped"] = g["escape"].Then(g["escape-seq-char"]) + g["basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["quotation-mark"].Or(g["escape"])), g["non-ascii"]) + g["basic-char"] = g["escaped"].Or(g["basic-unescaped"]) + g["basic-string"] = c.Seq(g["quotation-mark"], c.ZeroOrMore(g["basic-char"]), g["quotation-mark"]) + + // Multiline Basic String + + g["ml-basic-string-delim"] = g["quotation-mark"].Times(3) + g["ml-basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["backslash"]), g["non-ascii"]) + g["ml-basic-char"] = g["ml-basic-unescaped"].Or(g["escaped"]) + g["ml-basic-body-concat"] = c.Seq(g["escape"], g["ws"], g["newline"], c.ZeroOrMore(g["ws-or-newline"])) + g["ml-basic-body"] = c.ZeroOrMore(c.Any(g["ml-basic-char"], g["newline"], g["ml-basic-body-concat"])) + g["ml-basic-strinct"] = c.Seq(g["ml-basic-string-delim"], g["ml-basic-body"], g["ml-basic-string-delim"]) + + // Literal String + + g["literal-char"] = c.Any(g["tab"], g["printable-ascii"].Except(g["apostrophe"]), g["non-ascii"]) + g["literal-string"] = c.Seq(g["apostrophe"], c.ZeroOrMore(g["literal-char"]), g["apostrophe"]) + + // Multiline Literal String + + g["ml-literal-string-delim"] = g["apostrophe"].Times(3) + g["ml-literal-char"] = c.Any(g["tab"], g["printable-ascii"], g["non-ascii"]) + g["ml-literal-body"] = c.ZeroOrMore(g["ml-literal-char"].Or(g["newline"])) + g["ml-literal-string"] = c.Seq(g["ml-literal-string-delim"], g["ml-literal-body"], g["ml-literal-string-delim"]) + + // String + + g["string"] = c.Any(g["ml-basic-string"], g["basic-string"], g["ml-literal-string"], g["literal-string"]) + + // Integer + + g["digit1-9"] = a.DigitNotZero + g["underscore-int-digit"] = c.Any(g["digit"], g["underscore"].Then(g["digit"])) + g["unsiged-dec-int"] = c.Any(g["digit"], g["digit1-9"].Then(c.OneOrMore(g["underscore-int-digit"]))) + g["dec-int"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["unsigned-dec-int"]) + + g["hex-prefix"] = a.Zero.Then(a.Rune('x')) + g["underscore-hex-digit"] = c.Any(g["hex-digit"], g["underscore"].Then(g["hex-digit"])) + g["hex-int"] = c.Seq(g["hex-prefix"], g["hex-digit"], c.ZeroOrMore(g["underscore-hex-digit"])) + + g["oct-prefix"] = a.Zero.Then(a.Rune('o')) + g["digit0-7"] = a.RuneRange('0', '7') + g["underscore-oct-digit"] = c.Any(g["digit0-7"], g["underscore"].Then(g["digit0-7"])) + g["oct-int"] = c.Seq(g["oct-prefix"], g["digit0-7"], c.ZeroOrMore(g["underscore-oct-digit"])) + + g["bin-prefix"] = a.Zero.Then(a.Rune('b')) + g["digit0-1"] = a.Runes('0', '1') + g["underscore-bin-digit"] = c.Any(g["digit0-1"], g["underscore"].Then(g["digit0-1"])) + g["bin-int"] = c.Seq(g["bin-prefix"], g["digit0-1"], c.ZeroOrMore(g["underscore-bin-digit"])) + + g["integer"] = c.Any(g["dec-int"], g["hex-int"], g["oct-int"], g["bin-int"]) + + // Float + + g["float-int-part"] = g["dec-int"] + g["exp"] = a.StrNoCase("e").Then(g["float-int-part"]) + g["decimal-point"] = a.Dot + g["zero-prefixable-int"] = c.Seq(g["digit"], c.ZeroOrMore(g["underscore-int-digit"])) + g["frac"] = c.Seq(g["decimal-point"], g["zero-prefixable-int"]) + g["standard-float"] = c.Seq(g["float-int-part"], g["exp"].Or(g["frac"].Then(c.Optional(g["exp"])))) + + g["inf"] = a.Str("inf") + g["nan"] = a.Str("nan") + g["special-float"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["inf"].Or(g["nan"])) + + g["float"] = g["standard-float"].Or(g["special-float"]) + + // Boolean + + g["true"] = a.Str("true") + g["false"] = a.Str("false") + + g["boolean"] = g["true"].Or(g["false"]) + + // Date and time (as defined in RFC 3339) + + g["date-full-year"] = g["digit"].Times(4) + g["date-month"] = g["digit"].Times(2) + g["date-mday"] = g["digit"].Times(2) + g["time-delim"] = a.Runes('T', 't', ' ') + g["time-hour"] = g["digit"].Times(2) + g["time-minute"] = g["digit"].Times(2) + g["time-second"] = g["digit"].Times(2) + g["time-sec-frac"] = g["decimal-point"].Then(c.OneOrMore(g["digit"])) + g["time-num-offset"] = c.Seq(g["plus"].Or(g["minus"]), g["time-hour"], g["colon"], g["time-minute"]) + g["time-offset"] = c.Any(a.Runes('Z', 'z'), g["time-num-offset"]) + g["partial-time"] = c.Seq(g["time-hour"], g["colon"], g["time-minute"], g["colon"], g["time-second"], g["time-sec-frac"].Optional()) + g["full-time"] = c.Seq(g["partial-time"], g["time-offset"]) + g["full-date"] = c.Seq(g["date-full-year"], g["minus"], g["date-month"], g["minus"], g["date-mday"]) + + g["offset-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["full-time"]) + g["local-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["parial-time"]) + g["local-date"] = g["full-date"] + g["local-time"] = g["parial-time"] + + g["date-time"] = c.Any(g["offset-date-time"], g["local-date-time"], g["local-date"], g["local-time"]) + + // Array + + g["array-open"] = a.SquareOpen + g["array-close"] = a.SquareClose + g["ws-comment-newline"] = c.ZeroOrMore(g["wschar"].Or(g["comment"].Optional().Then(g["newline"]))) + g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val")) + g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional()) + g["array-sep"] = g["ws"].Then(a.Comma) + + g["array"] = c.Seq(g["array-open"], g["array-values"].Optional(), g["ws-comment-newline"], g["array-close"]) + + // Table + + g["table"] = g["std-table"].Or(g["array-table"]) + + // Standard Table + + g["std-table"] = c.Seq(g["std-table-open"], g.Recursive("key"), g["std-table-close"]) + + g["std-table-open"] = a.SquareOpen.Then(g["ws"]) + g["std-table-close"] = g["ws"].Then(a.SquareClose) + + // Inline Table + + g["inline-table"] = c.Seq(g["inline-table-open"], g["inline-table-keyvals"], g["inline-table-close"]) + + g["inline-table-open"] = a.CurlyOpen.Then(g["ws"]) + g["inline-table-close"] = g["ws"].Then(a.CurlyClose) + g["inline-table-sep"] = c.Seq(g["ws"], a.Comma, g["ws"]) + g["inline-table-keyval"] = c.Seq(g.Recursive("key"), g.Recursive("keyval-sep"), g.Recursive("val")) + g["inline-table-keyvals"] = c.Seq(g["inline-table-keyval"], c.ZeroOrMore(c.Seq(g["inline-table-sep"], g["inline-table-keyval"]))) + + // Array Table + + g["array-table"] = c.Seq(g["array-table-open"], g.Recursive("key"), g["array-table-close"]) + + g["array-table-open"] = a.SquareOpen.Times(2).Then(g["ws"]) + g["array-table-close"] = g["ws"].Then(a.SquareClose.Times(2)) + + // Key-Value Pairs + + g["unquoted-key"] = c.OneOrMore(c.Any(g["alpha"], g["digit"], g["minus"], g["underscore"])) + g["quoted-key"] = g["basic-string"].Or(g["literal-string"]) + g["dot-sep"] = c.Seq(g["ws"], a.Dot, g["ws"]) + g["simple-key"] = g["quoted-key"].Or(g["unquoted-key"]) + g["dotted-key"] = c.Seq(g["simple-key"], c.OneOrMore(g["dot-sep"].Then(g["simple_key"]))) + + g["key-val-sep"] = c.Seq(g["ws"], a.Equal, g["ws"]) + + g["val"] = c.Any(g["string"], g["boolean"], g["array"], g["inlineTable"], g["dateTime"], g["float"], g["integer"]) + + g["key"] = g["simple-key"].Or(g["dotted-key"]) + + g["keyval"] = c.Seq(g["key"], g["keyval-sep"], g["val"]) + + // Overall Structure + + g["expression"] = c.Any( + c.Seq(g["ws"], g["comment"].Optional()), + c.Seq(g["ws"], g["keyval"], g["ws"], g["comment"].Optional()), + c.Seq(g["ws"], g["table"], g["ws"], g["comment"].Optional())) + + g["toml"] = c.Seq(g["expression"], c.ZeroOrMore(g["newline"].Then(g["expression"]))) + + return g["toml"] +} diff --git a/parse2/grammar.go b/parse2/grammar.go new file mode 100644 index 0000000..27ca1d9 --- /dev/null +++ b/parse2/grammar.go @@ -0,0 +1,283 @@ +package main + +import ( + "fmt" + "log" + "math" + "os" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +func main() { + toml := BuildGrammar() + fmt.Printf("Reading TOML document from STDIN ...\n") + result, err := toml.Match(os.Stdin) + fmt.Printf("Completed reading document.\n") + if err != nil { + log.Fatalf("Error in parsing TOML: %s\n", err) + } else { + fmt.Printf("Result:\n") + for i, t := range result.Tokens() { + fmt.Printf("[%d] %v\n", i, t) + } + } +} + +type Grammar map[string]tokenize.Handler + +func (g Grammar) Rule(name string, definition tokenize.Handler) { + if _, ok := g[name]; ok { + panic(fmt.Sprintf("Grammar rule %q already exists", name)) + } + g[name] = definition +} + +func (g Grammar) Get(name string) tokenize.Handler { + if handler, ok := g[name]; ok { + return handler + } + return func(t *tokenize.API) bool { + if handler, ok := g[name]; ok { + return handler(t) + } + panic(fmt.Sprintf("Grammar rule %q does not exist", name)) + } +} + +func BuildGrammar() tokenize.Handler { + + c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T + + g := make(Grammar) + R := g.Rule + G := g.Get + + R("alpha", a.Letter) + R("digit", a.Digit) + R("minus", a.Minus) + R("plus", a.Plus) + R("underscore", a.Underscore) + R("quotation-mark", a.DoubleQuote) + R("apostrophe", a.SingleQuote) + R("colon", a.Colon) + R("escape", a.Backslash) + R("hex-digit", a.HexDigit) + + // Whitespace, Newline + + R("tab", a.Tab) + R("space", a.Space) + R("wschar", G("tab").Or(G("space"))) + R("ws", c.ZeroOrMore(G("wschar"))) + R("newline", a.Newline) + R("wschar-or-newline", G("wschar").Or(G("newline"))) + + // Comment + + R("comment-start-symbol", a.Hash) + R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF))) + R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii"))) + R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol")))) + + // Basic String + + R("escape-seq-char", c.Any( + a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), + a.Rune('u').Then(G("hex-digit").Times(4)), + a.Rune('U').Then(G("hex-digit").Times(8)))) + R("escaped", G("escape").Then(G("escape-seq-char"))) + R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) + R("basic-char", G("escaped").Or(G("basic-unescaped"))) + R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark")))) + + // Multiline Basic String + + R("ml-basic-string-delim", G("quotation-mark").Times(3)) + R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) + R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped"))) + R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("wschar-or-newline")))) + R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat")))) + R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim")))) + R("ml-basic-string", c.Seq( + m.Drop(G("ml-basic-string-delim")), + m.Drop(c.Optional(G("newline"))), + G("ml-basic-body"), + m.Drop(G("ml-basic-string-delim")))) + + // Literal String + R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii"))) + R("literal-string", c.Seq( + m.Drop(G("apostrophe")), + c.ZeroOrMore(G("literal-char")), + m.Drop(G("apostrophe")))) + + // Multiline Literal String + + R("ml-literal-string-delim", G("apostrophe").Times(3)) + R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii"))) + R("ml-literal-body-content", G("ml-literal-char").Or(G("newline"))) + R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim")))) + R("ml-literal-string", c.Seq( + m.Drop(G("ml-literal-string-delim")), + G("ml-literal-body"), + m.Drop(G("ml-literal-string-delim")))) + + // String + + R("string", c.Any( + tok.StrInterpreted("string", G("ml-basic-string")), + tok.StrInterpreted("string", G("basic-string")), + tok.Str("string", G("ml-literal-string")), + tok.Str("string", G("literal-string")))) + + // Integer + + R("digit1-9", a.DigitNotZero) + R("underscore-int-digit", c.Any(G("digit"), m.Drop(G("underscore")).Then(G("digit")))) + R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit"))) + R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int"))) + + R("hex-prefix", a.Zero.Then(a.Rune('x'))) + R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit")))) + R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit")))) + + R("oct-prefix", a.Zero.Then(a.Rune('o'))) + R("digit0-7", a.RuneRange('0', '7')) + R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7")))) + R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit")))) + + R("bin-prefix", a.Zero.Then(a.Rune('b'))) + R("digit0-1", a.Runes('0', '1')) + R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1")))) + R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit")))) + + R("integer", c.Any( + tok.Int64Base("integer", 16, G("hex-int")), + tok.Int64Base("integer", 8, G("oct-int")), + tok.Int64Base("integer", 2, G("bin-int")), + tok.Int64("integer", G("dec-int")))) + + // Float + + R("float-int-part", G("dec-int")) + R("exp", a.StrNoCase("e").Then(G("float-int-part"))) + R("decimal-point", a.Dot) + R("zero-prefixable-int", c.Seq(G("digit"), m.Drop(c.ZeroOrMore(G("underscore-int-digit"))))) + R("frac", c.Seq(G("decimal-point"), G("zero-prefixable-int"))) + R("standard-float", c.Seq(G("float-int-part"), G("exp").Or(G("frac").Then(c.Optional(G("exp")))))) + + R("inf-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("inf"))) + + R("nan-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("nan"))) + + R("float", c.Any( + tok.Float64("float", G("standard-float")), + tok.ByCallback("float", G("inf-float"), func(t *tokenize.API) interface{} { + if t.Result().Rune(0) == '-' { + return math.Inf(-1) + } + return math.Inf(+1) + }), + tok.ByValue("float", G("nan-float"), math.NaN()))) + + // Boolean + + R("true", a.Str("true")) + R("false", a.Str("false")) + + R("boolean", tok.Boolean("boolean", G("true").Or(G("false")))) + + // Date and time (as defined in RFC 3339) + + R("date-full-year", G("digit").Times(4)) + R("date-month", G("digit").Times(2)) + R("date-mday", G("digit").Times(2)) + R("time-delim", a.Runes('T', 't', ' ')) + R("time-hour", G("digit").Times(2)) + R("time-minute", G("digit").Times(2)) + R("time-second", G("digit").Times(2)) + R("time-sec-frac", G("decimal-point").Then(c.OneOrMore(G("digit")))) + R("time-zulu", a.Runes('Z', 'z')) + R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute"))) + R("time-offset", c.Any(G("time-zulu"), G("time-num-offset"))) + R("partial-time", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"), G("time-sec-frac").Optional())) + R("full-time", c.Seq(G("partial-time"), G("time-offset"))) + R("full-date", c.Seq(G("date-full-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))) + + R("offset-date-time", c.Seq(G("full-date"), G("time-delim"), G("full-time"))) + R("local-date-time", c.Seq(G("full-date"), G("time-delim"), G("partial-time"))) + R("local-date", G("full-date")) + R("local-time", G("partial-time")) + + R("date-time", c.Any(G("offset-date-time"), G("local-date-time"), G("local-date"), G("local-time"))) + + // Inline Table + + R("inline-table-open", a.CurlyOpen.Then(G("ws"))) + R("inline-table-close", G("ws").Then(a.CurlyClose)) + R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws"))) + R("inline-table-keyval", tok.Group("inline-table-keyval", c.Seq(G("key"), G("keyval-sep"), G("val")))) + R("inline-table-keyvals", c.Seq(G("inline-table-keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("inline-table-keyval"))))) + + R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close")))) + + // Array + + R("array-open", a.SquareOpen) + R("array-close", a.SquareClose) + R("array-sep", G("ws").Then(a.Comma)) + R("ws-comment-newline", c.ZeroOrMore(G("wschar").Or(G("comment").Optional().Then(G("newline"))))) + R("array-values", c.Any( + c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep"), G("array-values")), + c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep").Optional()))) + + R("inline-array", tok.Group("inline-array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close")))) + + // Standard Table + + R("std-table-open", a.SquareOpen.Then(G("ws"))) + R("std-table-close", G("ws").Then(a.SquareClose)) + + R("std-table", c.Seq(G("std-table-open"), tok.Group("table", G("key")), G("std-table-close"))) + + // Array Table + + R("array-table", c.Seq(G("array-table-open"), tok.Group("array-of-tables", G("key")), G("array-table-close"))) + + R("array-table-open", a.SquareOpen.Times(2).Then(G("ws"))) + R("array-table-close", G("ws").Then(a.SquareClose.Times(2))) + + // Table + + R("table", G("array-table").Or(G("std-table"))) + + // Key-Value Pairs + + R("unquoted-key", c.OneOrMore(c.Any(G("alpha"), G("digit"), G("minus"), G("underscore")))) + R("quoted-key", G("basic-string").Or(G("literal-string"))) + R("key-sep", c.Seq(G("ws"), a.Dot, G("ws"))) + R("simple-key", tok.Str("key-part", G("quoted-key").Or(G("unquoted-key")))) + R("dotted-key", c.Seq(G("simple-key"), c.OneOrMore(G("key-sep").Then(G("simple-key"))))) + + R("keyval-sep", c.Seq(G("ws"), a.Equal, G("ws"))) + + R("key", tok.Group("key", G("dotted-key").Or(G("simple-key")))) + + R("val", tok.Group("val", c.Any(G("string"), G("boolean"), G("inline-array"), G("inline-table"), G("date-time"), G("float"), G("integer")))) + + R("keyval", tok.Group("keyval", c.Seq(G("key"), G("keyval-sep"), G("val")))) + + // Overall Structure + + R("expression", c.Any( + c.Seq(G("ws"), G("table"), G("ws"), G("comment").Optional()), + c.Seq(G("ws"), G("keyval"), G("ws"), G("comment").Optional()), + c.Seq(G("ws"), G("comment").Optional()), + )) + + //R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile)) + R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))))) + + return G("toml") +} diff --git a/parse2/grammar.go.bak b/parse2/grammar.go.bak new file mode 100644 index 0000000..c5758bf --- /dev/null +++ b/parse2/grammar.go.bak @@ -0,0 +1,221 @@ +package parse2 + +import "git.makaay.nl/mauricem/go-parsekit/tokenize" + +var ( + c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T + + // Overall Structure + + toml = c.Seq(expression, c.ZeroOrMore(newline.Then(expression))) + + expression = c.Any( + c.Seq(ws, comment.Optional()), + c.Seq(ws, keyval, ws, comment.Optional()), + c.Seq(ws, table, ws, comment.Optional())) + + // ABNF definitions + + alpha = a.Letter + digit = a.Digit + + // Whitespace, Newline + + ws = c.ZeroOrMore(wschar) + tab = a.Tab + space = a.Space + wschar = tab.Or(space) + + newline = a.Newline + + wsOrNewline = ws.Or(newline) + + // Comment + + comment = commentStartSymbol.Then(c.ZeroOrMore(nonEOL)) + + commentStartSymbol = a.Hash + nonEOL = c.Any(a.Rune(0x09), printableASCII, nonASCII) + printableASCII = a.RuneRange(0x20, 0x7E) + nonASCII = a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)) + + // Key-Value Pairs + + keyval = c.Seq(key, keyvalSep, val) + + key = simpleKey.Or(dottedKey) + simpleKey = quotedKey.Or(unquotedKey) + + unquotedKey = c.OneOrMore(c.Any(alpha, digit, minus, underscore)) + quotedKey = basicString.Or(literalString) + dottedKey = c.Seq(simpleKey, c.OneOrMore(dotSep.Then(simpleKey))) + + dotSep = c.Seq(ws, a.Dot, ws) + keyvalSep = c.Seq(ws, a.Equal, ws) + + val = c.Any(string, boolean, array, inlineTable, dateTime, float, integer) + + // String + + string = c.Any(mlBasicString, basicString, mlLiteralString, literalString) + + // Basic String + + basicString = c.Seq(quotationMark, c.ZeroOrMore(basicChar), quotationMark) + + quotationMark = a.DoubleQuote + + basicChar = escaped.Or(basicUnescaped) + basicUnescaped = c.Any(printableASCII.Except(quotationMark.Or(escape)), nonASCII) + escaped = escape.Then(escapeSeqChar) + + escape = a.Backslash + escapeSeqChar = c.Any( + a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), + a.Rune('u').Then(a.HexDigit.Times(4)), + a.Rune('U').Then(a.HexDigit.Times(8))) + + // Multiline Basic String + + mlBasicString = c.Seq(mlBasicStringDelim, mlBasicBody, mlBasicStringDelim) + + mlBasicStringDelim = quotationMark.Times(3) + + mlBasicBody = c.ZeroOrMore(c.Any(mlBasicChar, newline, mlBasicBodyConcat)) + mlBasicChar = mlBasicUnescaped.Or(escaped) + mlBasicUnescaped = c.Any(printableASCII.Except(a.Backslash), nonASCII) + mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(wsOrNewline)) + + // Literal String + + literalString = c.Seq(apostrophe, c.ZeroOrMore(literalChar), apostrophe) + + apostrophe = a.SingleQuote + + literalChar = c.Any(a.Tab, printableASCII.Except(apostrophe), nonASCII) + + // Multiline Literal String + + mlLiteralString = c.Seq(mlLiteralStringDelim, mlLiteralBody, mlLiteralStringDelim) + + mlLiteralStringDelim = apostrophe.Times(3) + + mlLiteralBody = c.ZeroOrMore(mlLiteralChar.Or(newline)) + mlLiteralChar = c.Any(a.Tab, printableASCII, nonASCII) + + // Integer + + integer = c.Any(decInt, hexInt, octInt, binInt) + + minus = a.Minus + plus = a.Plus + underscore = a.Underscore + + decInt = c.Optional(plus.Or(minus)).Then(unsignedDecInt) + unsignedDecInt = c.Any(digit, digit1to9.Then(c.OneOrMore(intDigitOrUnderscoreIntDigit))) + digit1to9 = a.DigitNotZero + intDigitOrUnderscoreIntDigit = c.Any(digit, underscore.Then(digit)) + + hexInt = c.Seq(hexPrefix, hexDigit, c.ZeroOrMore(hexDigitOrUnderscoreHexDigit)) + hexPrefix = a.Zero.Then(a.Rune('x')) + hexDigit = a.HexDigit + hexDigitOrUnderscoreHexDigit = c.Any(hexDigit, underscore.Then(hexDigit)) + + octInt = c.Seq(octPrefix, digit0to7, c.ZeroOrMore(octDigitOrUnderscoreOctDigit)) + octPrefix = a.Zero.Then(a.Rune('o')) + digit0to7 = a.RuneRange('0', '7') + octDigitOrUnderscoreOctDigit = c.Any(digit0to7, underscore.Then(digit0to7)) + + binInt = c.Seq(binPrefix, digit0to1, c.ZeroOrMore(binDigitOrUnderscoreBinDigit)) + binPrefix = a.Zero.Then(a.Rune('b')) + digit0to1 = a.Runes('0', '1') + binDigitOrUnderscoreBinDigit = c.Any(digit0to1, underscore.Then(digit0to1)) + + // Float + + float = standardFloat.Or(specialFloat) + + standardFloat = c.Seq(floatIntPart, exp.Or(frac.Then(c.Optional(exp)))) + floatIntPart = decInt + exp = a.StrNoCase("e").Then(floatIntPart) + frac = c.Seq(decimalPoint, zeroPrefixableInt) + decimalPoint = a.Dot + zeroPrefixableInt = c.Seq(digit, c.ZeroOrMore(intDigitOrUnderscoreIntDigit)) + + specialFloat = c.Optional(plus.Or(minus)).Then(inf.Or(nan)) + inf = a.Str("inf") + nan = a.Str("nan") + + // Boolean + + boolean = boolTrue.Or(boolFalse) + + boolTrue = a.Str("true") + boolFalse = a.Str("false") + + // Date and time (as defined in RFC 3339) + + dateTime = c.Any(offsetDateTime, localDateTime, localDate, localTime) + + offsetDateTime = c.Seq(fullDate, timeDelim, fullTime) + localDateTime = c.Seq(fullDate, timeDelim, partialTime) + localDate = fullDate + localTime = partialTime + + dateFullYear = digit.Times(4) + dateMonth = digit.Times(2) + dateMday = digit.Times(2) + timeDelim = a.Runes('T', 't', ' ') + timeHour = digit.Times(2) + timeMinute = digit.Times(2) + timeSecond = digit.Times(2) + timeSecfrac = a.Dot.Then(c.OneOrMore(digit)) + timeNumOffset = c.Seq(plus.Or(minus), timeHour, a.Colon, timeMinute) + timeOffset = c.Any(a.Runes('Z', 'z'), timeNumOffset) + partialTime = c.Seq(timeHour, a.Colon, timeMinute, a.Colon, timeSecond, timeSecfrac.Optional()) + fullTime = c.Seq(partialTime, timeOffset) + fullDate = c.Seq(dateFullYear, minus, dateMonth, minus, dateMday) + + // Array + + array = c.Seq(arrayOpen, arrayvalues.Optional(), wsCommentNewline, arrayClose) + + arrayOpen = a.SquareOpen + arrayClose = a.SquareClose + arrayvalues = c.Seq(arrayValue, c.ZeroOrMore(c.Seq(arraySep, arrayValue)), arraySep.Optional()) + arraySep = ws.Then(a.Comma) + arrayValue = wsCommentNewline.Then(val) + wsCommentNewline = c.ZeroOrMore(wschar.Or(comment.Optional().Then(newline))) + + // Table + + table = stdTable.Or(arrayTable) + + // Standard Table + + stdTable = c.Seq(stdTableOpen, key, stdTableClose) + + stdTableOpen = a.SquareOpen.Then(ws) + stdTableClose = ws.Then(a.SquareClose) + + // Inline Table + + inlineTable = c.Seq(inlineTableOpen, inlineTableKeyvals, inlineTableClose) + + inlineTableOpen = a.CurlyOpen.Then(ws) + inlineTableClose = ws.Then(a.CurlyClose) + inlineTableKeyvals = c.Seq(inlineTableKeyval, c.ZeroOrMore(c.Seq(inlineTableSep, inlineTableKeyval))) + inlineTableKeyval = c.Seq(key, keyvalSep, val) + inlineTableSep = c.Seq(ws, a.Comma, ws) + + // Array Table + + arrayTable = c.Seq(arrayTableOpen, key, arrayTableClose) + + arrayTableOpen = a.SquareOpen.Times(2).Then(ws) + arrayTableClose = ws.Then(a.SquareClose.Times(2)) +) + +func init() { + +} diff --git a/parse2/toml.abnf b/parse2/toml.abnf new file mode 100644 index 0000000..ecfed60 --- /dev/null +++ b/parse2/toml.abnf @@ -0,0 +1,234 @@ +;; WARNING: This document is a work-in-progress and should not be considered +;; authoritative until further notice. + +;; This is an attempt to define TOML in ABNF according to the grammar defined +;; in RFC 5234 (http://www.ietf.org/rfc/rfc5234.txt). + +;; You can try out this grammar using http://instaparse.mojombo.com/ +;; To do so, in the lower right, click on Options and change `:input-format` to +;; ':abnf'. Then paste this entire ABNF document into the grammar entry box +;; (above the options). Then you can type or paste a sample TOML document into +;; the beige box on the left. Tada! + +;; Overall Structure + +toml = expression *( newline expression ) + +expression = ws [ comment ] +expression =/ ws keyval ws [ comment ] +expression =/ ws table ws [ comment ] + +;; Whitespace + +ws = *wschar +wschar = %x20 ; Space +wschar =/ %x09 ; Horizontal tab + +;; Newline + +newline = %x0A ; LF +newline =/ %x0D.0A ; CRLF + +;; Comment + +comment-start-symbol = %x23 ; # +non-ascii = %x80-D7FF / %xE000-10FFFF +non-eol = %x09 / %x20-7F / non-ascii + +comment = comment-start-symbol *non-eol + +;; Key-Value pairs + +keyval = key keyval-sep val + +key = simple-key / dotted-key +simple-key = quoted-key / unquoted-key + +unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ +quoted-key = basic-string / literal-string +dotted-key = simple-key 1*( dot-sep simple-key ) + +dot-sep = ws %x2E ws ; . Period +keyval-sep = ws %x3D ws ; = + +val = string / boolean / array / inline-table / date-time / float / integer + +;; String + +string = ml-basic-string / basic-string / ml-literal-string / literal-string + +;; Basic String + +basic-string = quotation-mark *basic-char quotation-mark + +quotation-mark = %x22 ; " + +basic-char = basic-unescaped / escaped +basic-unescaped = %x20-21 / %x23-5B / %x5D-7E / non-ascii +escaped = escape escape-seq-char + +escape = %x5C ; \ +escape-seq-char = %x22 ; " quotation mark U+0022 +escape-seq-char =/ %x5C ; \ reverse solidus U+005C +escape-seq-char =/ %x62 ; b backspace U+0008 +escape-seq-char =/ %x66 ; f form feed U+000C +escape-seq-char =/ %x6E ; n line feed U+000A +escape-seq-char =/ %x72 ; r carriage return U+000D +escape-seq-char =/ %x74 ; t tab U+0009 +escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX +escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX + +;; Multiline Basic String + +ml-basic-string = ml-basic-string-delim ml-basic-body ml-basic-string-delim + +ml-basic-string-delim = 3quotation-mark + +ml-basic-body = *( ml-basic-char / newline / ( escape ws newline ) ) +ml-basic-char = ml-basic-unescaped / escaped +ml-basic-unescaped = %x20-5B / %x5D-7E / non-ascii + +;; Literal String + +literal-string = apostrophe *literal-char apostrophe + +apostrophe = %x27 ; ' apostrophe + +literal-char = %x09 / %x20-26 / %x28-7E / non-ascii + +;; Multiline Literal String + +ml-literal-string = ml-literal-string-delim ml-literal-body ml-literal-string-delim + +ml-literal-string-delim = 3apostrophe + +ml-literal-body = *( ml-literal-char / newline ) +ml-literal-char = %x09 / %x20-7E / non-ascii + +;; Integer + +integer = dec-int / hex-int / oct-int / bin-int + +minus = %x2D ; - +plus = %x2B ; + +underscore = %x5F ; _ +digit1-9 = %x31-39 ; 1-9 +digit0-7 = %x30-37 ; 0-7 +digit0-1 = %x30-31 ; 0-1 + +hex-prefix = %x30.78 ; 0x +oct-prefix = %x30.6f ; 0o +bin-prefix = %x30.62 ; 0b + +dec-int = [ minus / plus ] unsigned-dec-int +unsigned-dec-int = DIGIT / digit1-9 1*( DIGIT / underscore DIGIT ) + +hex-int = hex-prefix HEXDIG *( HEXDIG / underscore HEXDIG ) +oct-int = oct-prefix digit0-7 *( digit0-7 / underscore digit0-7 ) +bin-int = bin-prefix digit0-1 *( digit0-1 / underscore digit0-1 ) + +;; Float + +float = float-int-part ( exp / frac [ exp ] ) +float =/ special-float + +float-int-part = dec-int +frac = decimal-point zero-prefixable-int +decimal-point = %x2E ; . +zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) + +exp = "e" float-int-part + +special-float = [ minus / plus ] ( inf / nan ) +inf = %x69.6e.66 ; inf +nan = %x6e.61.6e ; nan + +;; Boolean + +boolean = true / false + +true = %x74.72.75.65 ; true +false = %x66.61.6C.73.65 ; false + +;; Date and Time (as defined in RFC 3339) + +date-time = offset-date-time / local-date-time / local-date / local-time + +date-fullyear = 4DIGIT +date-month = 2DIGIT ; 01-12 +date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year +time-delim = "T" / %x20 ; T, t, or space +time-hour = 2DIGIT ; 00-23 +time-minute = 2DIGIT ; 00-59 +time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules +time-secfrac = "." 1*DIGIT +time-numoffset = ( "+" / "-" ) time-hour ":" time-minute +time-offset = "Z" / time-numoffset + +partial-time = time-hour ":" time-minute ":" time-second [ time-secfrac ] +full-date = date-fullyear "-" date-month "-" date-mday +full-time = partial-time time-offset + +;; Offset Date-Time + +offset-date-time = full-date time-delim full-time + +;; Local Date-Time + +local-date-time = full-date time-delim partial-time + +;; Local Date + +local-date = full-date + +;; Local Time + +local-time = partial-time + +;; Array + +array = array-open [ array-values ] ws-comment-newline array-close + +array-open = %x5B ; [ +array-close = %x5D ; ] + +array-values = ws-comment-newline val ws array-sep array-values +array-values =/ ws-comment-newline val ws [ array-sep ] + +array-sep = %x2C ; , Comma + +ws-comment-newline = *( wschar / [ comment ] newline ) + +;; Table + +table = std-table / array-table + +;; Standard Table + +std-table = std-table-open key std-table-close + +std-table-open = %x5B ws ; [ Left square bracket +std-table-close = ws %x5D ; ] Right square bracket + +;; Inline Table + +inline-table = inline-table-open [ inline-table-keyvals ] inline-table-close + +inline-table-open = %x7B ws ; { +inline-table-close = ws %x7D ; } +inline-table-sep = ws %x2C ws ; , Comma + +inline-table-keyvals = key keyval-sep val [ inline-table-sep inline-table-keyvals ] + +;; Array Table + +array-table = array-table-open key array-table-close + +array-table-open = %x5B.5B ws ; [[ Double left square bracket +array-table-close = ws %x5D.5D ; ]] Double right square bracket + +;; Built-in ABNF terms, reproduced here for clarity + +ALPHA = %x41-5A / %x61-7A ; A-Z / a-z +DIGIT = %x30-39 ; 0-9 +HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" \ No newline at end of file diff --git a/parse2/x b/parse2/x new file mode 100644 index 0000000..6d667bf --- /dev/null +++ b/parse2/x @@ -0,0 +1,272 @@ +# From: https://github.com/toml-lang/toml/blob/master/examples/example-v0.3.0.toml +################################################################################ +## Comment + +# Speak your mind with the hash symbol. They go from the symbol to the end of +# the line. + + +################################################################################ +## Table + +# Tables (also known as hash tables or dictionaries) are collections of +# key/value pairs. They appear in square brackets on a line by themselves. + +[table] + +key = "value" # Yeah, you can do this. + +# Nested tables are denoted by table names with dots in them. Name your tables +# whatever crap you please, just don't use #, ., [ or ]. + +[table.subtable] + +key = "another value" + +# You don't need to specify all the super-tables if you don't want to. TOML +# knows how to do it for you. + +# [x] you +# [x.y] don't +# [x.y.z] need these +[x.y.z.w] # for this to work + + +################################################################################ +## Inline Table + +# Inline tables provide a more compact syntax for expressing tables. They are +# especially useful for grouped data that can otherwise quickly become verbose. +# Inline tables are enclosed in curly braces `{` and `}`. No newlines are +# allowed between the curly braces unless they are valid within a value. + +[table.inline] + +name = { first = "Tom", last = "Preston-Werner" } +point = { x = 1, y = 2 } + + +################################################################################ +## String + +# There are four ways to express strings: basic, multi-line basic, literal, and +# multi-line literal. All strings must contain only valid UTF-8 characters. + +[string.basic] + +basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." + +[string.multiline] + +# The following strings are byte-for-byte equivalent: +key1 = "One\nTwo" +key2 = """One\nTwo""" +key3 = """ +One +Two""" + +[string.multiline.continued] + +# The following strings are byte-for-byte equivalent: +key1 = "The quick brown fox jumps over the lazy dog." + +key1.1 = """The quick brown fox jumps over the lazy dog.""" + +key2 = """ +The quick brown \ + fox jumps over \ + the lazy dog.""" + +key3 = """\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """ + +[string.literal] + +# What you see is what you get. +winpath = 'C:\Users\nodejs\templates' +winpath2 = '\\ServerX\admin$\system32\' +quoted = 'Tom "Dubs" Preston-Werner' +regex = '<\i\c*\s*>' + + +[string.literal.multiline] + +regex2 = '''I [dw]on't need \d{2} apples''' +lines = ''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +''' + + +################################################################################ +## Integer + +# Integers are whole numbers. Positive numbers may be prefixed with a plus sign. +# Negative numbers are prefixed with a minus sign. + +[integer] + +key1 = +99 +key2 = 42 +key3 = 0 +key4 = -17 + +[integer.underscores] + +# For large numbers, you may use underscores to enhance readability. Each +# underscore must be surrounded by at least one digit. +key1 = 1_000 +key2 = 5_349_221 +key3 = 1_2_3_4_5 # valid but inadvisable + + +################################################################################ +## Float + +# A float consists of an integer part (which may be prefixed with a plus or +# minus sign) followed by a fractional part and/or an exponent part. + +[float.fractional] + +key1 = +1.0 +key2 = 3.1415 +key3 = -0.01 + +[float.exponent] + +key1 = 5e+22 +key2 = 1e6 +key3 = -2E-2 + +[float.both] + +key = 6.626e-34 + +[float.underscores] + +key1 = 9_224_617.445_991_228_313 +key2 = 1e1_00 # modified by mmakaay, because 1e1000 yields an out of range error + +################################################################################ +## Boolean + +# Booleans are just the tokens you're used to. Always lowercase. + +[boolean] + +True = true +False = false + + +################################################################################ +## Datetime + +# Datetimes are RFC 3339 dates. + +[datetime] + +key1 = 1979-05-27T07:32:00Z +key2 = 1979-05-27T00:32:00-07:00 +key3 = 1979-05-27T00:32:00.999999-07:00 + + +################################################################################ +## Array + +# Arrays are square brackets with other primitives inside. Whitespace is +# ignored. Elements are separated by commas. Data types may not be mixed. + +[array] + +key1 = [ 1, 2, 3 ] +key2 = [ "red", "yellow", "green" ] +key3 = [ [ 1, 2 ], [3, 4, 5] ] +key4 = [ [ 1, 2 ], ["a", "b", "c"] ] # this is ok + +# Arrays can also be multiline. So in addition to ignoring whitespace, arrays +# also ignore newlines between the brackets. Terminating commas are ok before +# the closing bracket. + +key5 = [ + 1, 2, 3 +] +key6 = [ + 1, + 2, # this is ok +] + + +################################################################################ +## Array of Tables + +# These can be expressed by using a table name in double brackets. Each table +# with the same double bracketed name will be an element in the array. The +# tables are inserted in the order encountered. + +[[products]] + +name = "Hammer" +sku = 738594937 + +[[products]] + +[[products]] + +name = "Nail" +sku = 284758393 +color = "gray" + + +# You can create nested arrays of tables as well. + +[[fruit]] + name = "apple" + + [fruit.physical] + color = "red" + shape = "round" + + [[fruit.variety]] + name = "red delicious" + + [[fruit.variety]] + name = "granny smith" + +[[fruit]] + name = "banana" + + [[fruit.variety]] + name = "plantain" + +[float.special] + +nan1 = nan +nan2 = +nan +nan3 = -nan + +inf1 = inf +inf2 = +inf +inf3 = -inf + +[int.special] + +bin1 = 0b0 +bin2 = 0b1 +bin3 = 0b1010101 + +oct1 = 0o0 +oct2 = 0o1 +oct3 = 0o755 + +het.is.een.hex1 = 0x0 +het.is.een.hex2 = 0x1 +het.is.een.hex3 = 0xffffffff + +go = [1,2,3, +4,5, +6,] diff --git a/parse2/y b/parse2/y new file mode 100644 index 0000000..169063e --- /dev/null +++ b/parse2/y @@ -0,0 +1,245 @@ +# From: https://github.com/toml-lang/toml/blob/master/examples/example-v0.3.0.toml +################################################################################ +## Comment + +# Speak your mind with the hash symbol. They go from the symbol to the end of +# the line. + + +################################################################################ +## Table + +# Tables (also known as hash tables or dictionaries) are collections of +# key/value pairs. They appear in square brackets on a line by themselves. + +[table] + +key = "value" # Yeah, you can do this. + +# Nested tables are denoted by table names with dots in them. Name your tables +# whatever crap you please, just don't use #, ., [ or ]. + +[table.subtable] + +key = "another value" + +# You don't need to specify all the super-tables if you don't want to. TOML +# knows how to do it for you. + +# [x] you +# [x.y] don't +# [x.y.z] need these +[x.y.z.w] # for this to work + + +################################################################################ +## Inline Table + +# Inline tables provide a more compact syntax for expressing tables. They are +# especially useful for grouped data that can otherwise quickly become verbose. +# Inline tables are enclosed in curly braces `{` and `}`. No newlines are +# allowed between the curly braces unless they are valid within a value. + +[table.inline] + +name = { first = "Tom", last = "Preston-Werner" } +point = { x = 1, y = 2 } + + +################################################################################ +## String + +# There are four ways to express strings: basic, multi-line basic, literal, and +# multi-line literal. All strings must contain only valid UTF-8 characters. + +[string.basic] + +basic = "I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." + +[string.multiline] + +# The following strings are byte-for-byte equivalent: +key1 = "One\nTwo" +key2 = """One\nTwo""" +key3 = """ +One +Two""" + +[string.multiline.continued] + +# The following strings are byte-for-byte equivalent: +key1 = "The quick brown fox jumps over the lazy dog." + +key1.1 = """The quick brown fox jumps over the lazy dog.""" + +key2 = """ +The quick brown \ + fox jumps over \ + the lazy dog.""" + +key3 = """\ + The quick brown \ + fox jumps over \ + the lazy dog.\ + """ + +[string.literal] + +# What you see is what you get. +winpath = 'C:\Users\nodejs\templates' +winpath2 = '\\ServerX\admin$\system32\' +quoted = 'Tom "Dubs" Preston-Werner' +regex = '<\i\c*\s*>' + + +[string.literal.multiline] + +regex2 = '''I [dw]on't need \d{2} apples''' +lines = ''' +The first newline is +trimmed in raw strings. + All other whitespace + is preserved. +''' + + +################################################################################ +## Integer + +# Integers are whole numbers. Positive numbers may be prefixed with a plus sign. +# Negative numbers are prefixed with a minus sign. + +[integer] + +key1 = +99 +key2 = 42 +key3 = 0 +key4 = -17 + +[integer.underscores] + +# For large numbers, you may use underscores to enhance readability. Each +# underscore must be surrounded by at least one digit. +key1 = 1_000 +key2 = 5_349_221 +key3 = 1_2_3_4_5 # valid but inadvisable + + +################################################################################ +## Float + +# A float consists of an integer part (which may be prefixed with a plus or +# minus sign) followed by a fractional part and/or an exponent part. + +[float.fractional] + +key1 = +1.0 +key2 = 3.1415 +key3 = -0.01 + +[float.exponent] + +key1 = 5e+22 +key2 = 1e6 +key3 = -2E-2 + +[float.both] + +key = 6.626e-34 + +[float.underscores] + +key1 = 9_224_617.445_991_228_313 +key2 = 1e1_00 # modified by mmakaay, because 1e1000 yields an out of range error + + +################################################################################ +## Boolean + +# Booleans are just the tokens you're used to. Always lowercase. + +[boolean] + +True = true +False = false + + +################################################################################ +## Datetime + +# Datetimes are RFC 3339 dates. + +[datetime] + +key1 = 1979-05-27T07:32:00Z +key2 = 1979-05-27T00:32:00-07:00 +key3 = 1979-05-27T00:32:00.999999-07:00 + + +################################################################################ +## Array + +# Arrays are square brackets with other primitives inside. Whitespace is +# ignored. Elements are separated by commas. Data types may not be mixed. + +[array] + +key1 = [ 1, 2, 3 ] +key2 = [ "red", "yellow", "green" ] +key3 = [ [ 1, 2 ], [3, 4, 5] ] +key4 = [ [ 1, 2 ], ["a", "b", "c"] ] # this is ok + +# Arrays can also be multiline. So in addition to ignoring whitespace, arrays +# also ignore newlines between the brackets. Terminating commas are ok before +# the closing bracket. + +key5 = [ + 1, 2, 3 +] +key6 = [ + 1, + 2, # this is ok +] + + +################################################################################ +## Array of Tables + +# These can be expressed by using a table name in double brackets. Each table +# with the same double bracketed name will be an element in the array. The +# tables are inserted in the order encountered. + +[[products]] + +name = "Hammer" +sku = 738594937 + +[[products]] + +[[products]] + +name = "Nail" +sku = 284758393 +color = "gray" + + +# You can create nested arrays of tables as well. + +[[fruit]] + name = "apple" + + [fruit.physical] + color = "red" + shape = "round" + + [[fruit.variety]] + name = "red delicious" + + [[fruit.variety]] + name = "granny smith" + +[[fruit]] + name = "banana" + + [[fruit.variety]] + name = "plantain"