package parse2 import ( "fmt" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) type Grammar map[string]tokenize.Handler func (g Grammar) Rule(name string, definition tokenize.Handler) { if _, ok := g[name]; ok { panic(fmt.Sprintf("Grammar rule %q already exists", name)) } g[name] = definition } func (g Grammar) Get(name string) tokenize.Handler { if h, ok := g[name]; ok { return g[name] } return func(t *tokenize.API) bool { return g[name](t) } } func BuildGrammar() tokenize.Handler { c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T g := make(Grammar) R := g.Rule G := g.Get R("alpha", a.Letter) R("digit", a.Digit) g["minus"] = a.Minus g["plus"] = a.Plus g["underscore"] = a.Underscore g["quotation-mark"] = a.DoubleQuote g["apostrophe"] = a.SingleQuote g["colon"] = a.Colon g["escape"] = a.Backslash g["hex-digit"] = a.HexDigit // Whitespace, Newline g["tab"] = a.Tab g["space"] = a.Space g["wschar"] = g["tab"].Or(g["space"]) g["ws"] = c.ZeroOrMore(g["wschar"]) g["newline"] = a.Newline g["ws-or-newline"] = g["ws"].Or(g["newline"]) // Comment g["comment-start-symbol"] = a.Hash g["printable-ascii"] = a.RuneRange(0x20, 0x7E) g["non-ascii"] = a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)) g["non-eol"] = c.Any(a.Rune(0x09), g["printable-ascii"], g["non-ascii"]) g["comment"] = g["comment-start-symbol"].Then(c.ZeroOrMore(g["non-eol"])) // Basic String g["escape-seq-char"] = c.Any( a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), a.Rune('u').Then(g["hex-digit"].Times(4)), a.Rune('U').Then(g["hex-digit"].Times(8))) g["escaped"] = g["escape"].Then(g["escape-seq-char"]) g["basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["quotation-mark"].Or(g["escape"])), g["non-ascii"]) g["basic-char"] = g["escaped"].Or(g["basic-unescaped"]) g["basic-string"] = c.Seq(g["quotation-mark"], c.ZeroOrMore(g["basic-char"]), g["quotation-mark"]) // Multiline Basic String g["ml-basic-string-delim"] = g["quotation-mark"].Times(3) g["ml-basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["backslash"]), g["non-ascii"]) g["ml-basic-char"] = g["ml-basic-unescaped"].Or(g["escaped"]) g["ml-basic-body-concat"] = c.Seq(g["escape"], g["ws"], g["newline"], c.ZeroOrMore(g["ws-or-newline"])) g["ml-basic-body"] = c.ZeroOrMore(c.Any(g["ml-basic-char"], g["newline"], g["ml-basic-body-concat"])) g["ml-basic-strinct"] = c.Seq(g["ml-basic-string-delim"], g["ml-basic-body"], g["ml-basic-string-delim"]) // Literal String g["literal-char"] = c.Any(g["tab"], g["printable-ascii"].Except(g["apostrophe"]), g["non-ascii"]) g["literal-string"] = c.Seq(g["apostrophe"], c.ZeroOrMore(g["literal-char"]), g["apostrophe"]) // Multiline Literal String g["ml-literal-string-delim"] = g["apostrophe"].Times(3) g["ml-literal-char"] = c.Any(g["tab"], g["printable-ascii"], g["non-ascii"]) g["ml-literal-body"] = c.ZeroOrMore(g["ml-literal-char"].Or(g["newline"])) g["ml-literal-string"] = c.Seq(g["ml-literal-string-delim"], g["ml-literal-body"], g["ml-literal-string-delim"]) // String g["string"] = c.Any(g["ml-basic-string"], g["basic-string"], g["ml-literal-string"], g["literal-string"]) // Integer g["digit1-9"] = a.DigitNotZero g["underscore-int-digit"] = c.Any(g["digit"], g["underscore"].Then(g["digit"])) g["unsiged-dec-int"] = c.Any(g["digit"], g["digit1-9"].Then(c.OneOrMore(g["underscore-int-digit"]))) g["dec-int"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["unsigned-dec-int"]) g["hex-prefix"] = a.Zero.Then(a.Rune('x')) g["underscore-hex-digit"] = c.Any(g["hex-digit"], g["underscore"].Then(g["hex-digit"])) g["hex-int"] = c.Seq(g["hex-prefix"], g["hex-digit"], c.ZeroOrMore(g["underscore-hex-digit"])) g["oct-prefix"] = a.Zero.Then(a.Rune('o')) g["digit0-7"] = a.RuneRange('0', '7') g["underscore-oct-digit"] = c.Any(g["digit0-7"], g["underscore"].Then(g["digit0-7"])) g["oct-int"] = c.Seq(g["oct-prefix"], g["digit0-7"], c.ZeroOrMore(g["underscore-oct-digit"])) g["bin-prefix"] = a.Zero.Then(a.Rune('b')) g["digit0-1"] = a.Runes('0', '1') g["underscore-bin-digit"] = c.Any(g["digit0-1"], g["underscore"].Then(g["digit0-1"])) g["bin-int"] = c.Seq(g["bin-prefix"], g["digit0-1"], c.ZeroOrMore(g["underscore-bin-digit"])) g["integer"] = c.Any(g["dec-int"], g["hex-int"], g["oct-int"], g["bin-int"]) // Float g["float-int-part"] = g["dec-int"] g["exp"] = a.StrNoCase("e").Then(g["float-int-part"]) g["decimal-point"] = a.Dot g["zero-prefixable-int"] = c.Seq(g["digit"], c.ZeroOrMore(g["underscore-int-digit"])) g["frac"] = c.Seq(g["decimal-point"], g["zero-prefixable-int"]) g["standard-float"] = c.Seq(g["float-int-part"], g["exp"].Or(g["frac"].Then(c.Optional(g["exp"])))) g["inf"] = a.Str("inf") g["nan"] = a.Str("nan") g["special-float"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["inf"].Or(g["nan"])) g["float"] = g["standard-float"].Or(g["special-float"]) // Boolean g["true"] = a.Str("true") g["false"] = a.Str("false") g["boolean"] = g["true"].Or(g["false"]) // Date and time (as defined in RFC 3339) g["date-full-year"] = g["digit"].Times(4) g["date-month"] = g["digit"].Times(2) g["date-mday"] = g["digit"].Times(2) g["time-delim"] = a.Runes('T', 't', ' ') g["time-hour"] = g["digit"].Times(2) g["time-minute"] = g["digit"].Times(2) g["time-second"] = g["digit"].Times(2) g["time-sec-frac"] = g["decimal-point"].Then(c.OneOrMore(g["digit"])) g["time-num-offset"] = c.Seq(g["plus"].Or(g["minus"]), g["time-hour"], g["colon"], g["time-minute"]) g["time-offset"] = c.Any(a.Runes('Z', 'z'), g["time-num-offset"]) g["partial-time"] = c.Seq(g["time-hour"], g["colon"], g["time-minute"], g["colon"], g["time-second"], g["time-sec-frac"].Optional()) g["full-time"] = c.Seq(g["partial-time"], g["time-offset"]) g["full-date"] = c.Seq(g["date-full-year"], g["minus"], g["date-month"], g["minus"], g["date-mday"]) g["offset-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["full-time"]) g["local-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["parial-time"]) g["local-date"] = g["full-date"] g["local-time"] = g["parial-time"] g["date-time"] = c.Any(g["offset-date-time"], g["local-date-time"], g["local-date"], g["local-time"]) // Array g["array-open"] = a.SquareOpen g["array-close"] = a.SquareClose g["ws-comment-newline"] = c.ZeroOrMore(g["wschar"].Or(g["comment"].Optional().Then(g["newline"]))) g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val")) g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional()) g["array-sep"] = g["ws"].Then(a.Comma) g["array"] = c.Seq(g["array-open"], g["array-values"].Optional(), g["ws-comment-newline"], g["array-close"]) // Table g["table"] = g["std-table"].Or(g["array-table"]) // Standard Table g["std-table"] = c.Seq(g["std-table-open"], g.Recursive("key"), g["std-table-close"]) g["std-table-open"] = a.SquareOpen.Then(g["ws"]) g["std-table-close"] = g["ws"].Then(a.SquareClose) // Inline Table g["inline-table"] = c.Seq(g["inline-table-open"], g["inline-table-keyvals"], g["inline-table-close"]) g["inline-table-open"] = a.CurlyOpen.Then(g["ws"]) g["inline-table-close"] = g["ws"].Then(a.CurlyClose) g["inline-table-sep"] = c.Seq(g["ws"], a.Comma, g["ws"]) g["inline-table-keyval"] = c.Seq(g.Recursive("key"), g.Recursive("keyval-sep"), g.Recursive("val")) g["inline-table-keyvals"] = c.Seq(g["inline-table-keyval"], c.ZeroOrMore(c.Seq(g["inline-table-sep"], g["inline-table-keyval"]))) // Array Table g["array-table"] = c.Seq(g["array-table-open"], g.Recursive("key"), g["array-table-close"]) g["array-table-open"] = a.SquareOpen.Times(2).Then(g["ws"]) g["array-table-close"] = g["ws"].Then(a.SquareClose.Times(2)) // Key-Value Pairs g["unquoted-key"] = c.OneOrMore(c.Any(g["alpha"], g["digit"], g["minus"], g["underscore"])) g["quoted-key"] = g["basic-string"].Or(g["literal-string"]) g["dot-sep"] = c.Seq(g["ws"], a.Dot, g["ws"]) g["simple-key"] = g["quoted-key"].Or(g["unquoted-key"]) g["dotted-key"] = c.Seq(g["simple-key"], c.OneOrMore(g["dot-sep"].Then(g["simple_key"]))) g["key-val-sep"] = c.Seq(g["ws"], a.Equal, g["ws"]) g["val"] = c.Any(g["string"], g["boolean"], g["array"], g["inlineTable"], g["dateTime"], g["float"], g["integer"]) g["key"] = g["simple-key"].Or(g["dotted-key"]) g["keyval"] = c.Seq(g["key"], g["keyval-sep"], g["val"]) // Overall Structure g["expression"] = c.Any( c.Seq(g["ws"], g["comment"].Optional()), c.Seq(g["ws"], g["keyval"], g["ws"], g["comment"].Optional()), c.Seq(g["ws"], g["table"], g["ws"], g["comment"].Optional())) g["toml"] = c.Seq(g["expression"], c.ZeroOrMore(g["newline"].Then(g["expression"]))) return g["toml"] }