233 lines
8.6 KiB
Groff
233 lines
8.6 KiB
Groff
package parse2
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
|
)
|
|
|
|
type Grammar map[string]tokenize.Handler
|
|
|
|
func (g Grammar) Rule(name string, definition tokenize.Handler) {
|
|
if _, ok := g[name]; ok {
|
|
panic(fmt.Sprintf("Grammar rule %q already exists", name))
|
|
}
|
|
g[name] = definition
|
|
}
|
|
|
|
func (g Grammar) Get(name string) tokenize.Handler {
|
|
if h, ok := g[name]; ok {
|
|
return g[name]
|
|
}
|
|
return func(t *tokenize.API) bool {
|
|
return g[name](t)
|
|
}
|
|
}
|
|
|
|
func BuildGrammar() tokenize.Handler {
|
|
|
|
c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T
|
|
|
|
g := make(Grammar)
|
|
R := g.Rule
|
|
G := g.Get
|
|
|
|
R("alpha", a.Letter)
|
|
R("digit", a.Digit)
|
|
g["minus"] = a.Minus
|
|
g["plus"] = a.Plus
|
|
g["underscore"] = a.Underscore
|
|
g["quotation-mark"] = a.DoubleQuote
|
|
g["apostrophe"] = a.SingleQuote
|
|
g["colon"] = a.Colon
|
|
g["escape"] = a.Backslash
|
|
g["hex-digit"] = a.HexDigit
|
|
|
|
// Whitespace, Newline
|
|
|
|
g["tab"] = a.Tab
|
|
g["space"] = a.Space
|
|
g["whitespaceChar"] = g["tab"].Or(g["space"])
|
|
g["ws"] = c.ZeroOrMore(g["whitespaceChar"])
|
|
g["newline"] = a.Newline
|
|
g["ws-or-newline"] = g["ws"].Or(g["newline"])
|
|
|
|
// Comment
|
|
|
|
g["comment-start-symbol"] = a.Hash
|
|
g["printable-ascii"] = a.RuneRange(0x20, 0x7E)
|
|
g["non-ascii"] = a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF))
|
|
g["non-eol"] = c.Any(a.Rune(0x09), g["printable-ascii"], g["non-ascii"])
|
|
g["comment"] = g["comment-start-symbol"].Then(c.ZeroOrMore(g["non-eol"]))
|
|
|
|
// Basic String
|
|
|
|
g["escape-seq-char"] = c.Any(
|
|
a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'),
|
|
a.Rune('u').Then(g["hex-digit"].Times(4)),
|
|
a.Rune('U').Then(g["hex-digit"].Times(8)))
|
|
g["escaped"] = g["escape"].Then(g["escape-seq-char"])
|
|
g["basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["quotation-mark"].Or(g["escape"])), g["non-ascii"])
|
|
g["basic-char"] = g["escaped"].Or(g["basic-unescaped"])
|
|
g["basic-string"] = c.Seq(g["quotation-mark"], c.ZeroOrMore(g["basic-char"]), g["quotation-mark"])
|
|
|
|
// Multiline Basic String
|
|
|
|
g["ml-basic-string-delim"] = g["quotation-mark"].Times(3)
|
|
g["ml-basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["backslash"]), g["non-ascii"])
|
|
g["ml-basic-char"] = g["ml-basic-unescaped"].Or(g["escaped"])
|
|
g["ml-basic-body-concat"] = c.Seq(g["escape"], g["ws"], g["newline"], c.ZeroOrMore(g["ws-or-newline"]))
|
|
g["ml-basic-body"] = c.ZeroOrMore(c.Any(g["ml-basic-char"], g["newline"], g["ml-basic-body-concat"]))
|
|
g["ml-basic-strinct"] = c.Seq(g["ml-basic-string-delim"], g["ml-basic-body"], g["ml-basic-string-delim"])
|
|
|
|
// Literal String
|
|
|
|
g["literal-char"] = c.Any(g["tab"], g["printable-ascii"].Except(g["apostrophe"]), g["non-ascii"])
|
|
g["literal-string"] = c.Seq(g["apostrophe"], c.ZeroOrMore(g["literal-char"]), g["apostrophe"])
|
|
|
|
// Multiline Literal String
|
|
|
|
g["ml-literal-string-delim"] = g["apostrophe"].Times(3)
|
|
g["ml-literal-char"] = c.Any(g["tab"], g["printable-ascii"], g["non-ascii"])
|
|
g["ml-literal-body"] = c.ZeroOrMore(g["ml-literal-char"].Or(g["newline"]))
|
|
g["ml-literal-string"] = c.Seq(g["ml-literal-string-delim"], g["ml-literal-body"], g["ml-literal-string-delim"])
|
|
|
|
// String
|
|
|
|
g["string"] = c.Any(g["ml-basic-string"], g["basic-string"], g["ml-literal-string"], g["literal-string"])
|
|
|
|
// Integer
|
|
|
|
g["digit1-9"] = a.DigitNotZero
|
|
g["underscore-int-digit"] = c.Any(g["digit"], g["underscore"].Then(g["digit"]))
|
|
g["unsiged-dec-int"] = c.Any(g["digit"], g["digit1-9"].Then(c.OneOrMore(g["underscore-int-digit"])))
|
|
g["dec-int"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["unsigned-dec-int"])
|
|
|
|
g["hex-prefix"] = a.Zero.Then(a.Rune('x'))
|
|
g["underscore-hex-digit"] = c.Any(g["hex-digit"], g["underscore"].Then(g["hex-digit"]))
|
|
g["hex-int"] = c.Seq(g["hex-prefix"], g["hex-digit"], c.ZeroOrMore(g["underscore-hex-digit"]))
|
|
|
|
g["oct-prefix"] = a.Zero.Then(a.Rune('o'))
|
|
g["digit0-7"] = a.RuneRange('0', '7')
|
|
g["underscore-oct-digit"] = c.Any(g["digit0-7"], g["underscore"].Then(g["digit0-7"]))
|
|
g["oct-int"] = c.Seq(g["oct-prefix"], g["digit0-7"], c.ZeroOrMore(g["underscore-oct-digit"]))
|
|
|
|
g["bin-prefix"] = a.Zero.Then(a.Rune('b'))
|
|
g["digit0-1"] = a.Runes('0', '1')
|
|
g["underscore-bin-digit"] = c.Any(g["digit0-1"], g["underscore"].Then(g["digit0-1"]))
|
|
g["bin-int"] = c.Seq(g["bin-prefix"], g["digit0-1"], c.ZeroOrMore(g["underscore-bin-digit"]))
|
|
|
|
g["integer"] = c.Any(g["dec-int"], g["hex-int"], g["oct-int"], g["bin-int"])
|
|
|
|
// Float
|
|
|
|
g["float-int-part"] = g["dec-int"]
|
|
g["exp"] = a.StrNoCase("e").Then(g["float-int-part"])
|
|
g["decimal-point"] = a.Dot
|
|
g["zero-prefixable-int"] = c.Seq(g["digit"], c.ZeroOrMore(g["underscore-int-digit"]))
|
|
g["frac"] = c.Seq(g["decimal-point"], g["zero-prefixable-int"])
|
|
g["standard-float"] = c.Seq(g["float-int-part"], g["exp"].Or(g["frac"].Then(c.Optional(g["exp"]))))
|
|
|
|
g["inf"] = a.Str("inf")
|
|
g["nan"] = a.Str("nan")
|
|
g["special-float"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["inf"].Or(g["nan"]))
|
|
|
|
g["float"] = g["standard-float"].Or(g["special-float"])
|
|
|
|
// Boolean
|
|
|
|
g["true"] = a.Str("true")
|
|
g["false"] = a.Str("false")
|
|
|
|
g["boolean"] = g["true"].Or(g["false"])
|
|
|
|
// Date and time (as defined in RFC 3339)
|
|
|
|
g["date-full-year"] = g["digit"].Times(4)
|
|
g["date-month"] = g["digit"].Times(2)
|
|
g["date-mday"] = g["digit"].Times(2)
|
|
g["time-delim"] = a.Runes('T', 't', ' ')
|
|
g["time-hour"] = g["digit"].Times(2)
|
|
g["time-minute"] = g["digit"].Times(2)
|
|
g["time-second"] = g["digit"].Times(2)
|
|
g["time-sec-frac"] = g["decimal-point"].Then(c.OneOrMore(g["digit"]))
|
|
g["time-num-offset"] = c.Seq(g["plus"].Or(g["minus"]), g["time-hour"], g["colon"], g["time-minute"])
|
|
g["time-offset"] = c.Any(a.Runes('Z', 'z'), g["time-num-offset"])
|
|
g["partial-time"] = c.Seq(g["time-hour"], g["colon"], g["time-minute"], g["colon"], g["time-second"], g["time-sec-frac"].Optional())
|
|
g["full-time"] = c.Seq(g["partial-time"], g["time-offset"])
|
|
g["full-date"] = c.Seq(g["date-full-year"], g["minus"], g["date-month"], g["minus"], g["date-mday"])
|
|
|
|
g["offset-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["full-time"])
|
|
g["local-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["parial-time"])
|
|
g["local-date"] = g["full-date"]
|
|
g["local-time"] = g["parial-time"]
|
|
|
|
g["date-time"] = c.Any(g["offset-date-time"], g["local-date-time"], g["local-date"], g["local-time"])
|
|
|
|
// Array
|
|
|
|
g["array-open"] = a.SquareOpen
|
|
g["array-close"] = a.SquareClose
|
|
g["ws-comment-newline"] = c.ZeroOrMore(g["whitespaceChar"].Or(g["comment"].Optional().Then(g["newline"])))
|
|
g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val"))
|
|
g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional())
|
|
g["array-sep"] = g["ws"].Then(a.Comma)
|
|
|
|
g["array"] = c.Seq(g["array-open"], g["array-values"].Optional(), g["ws-comment-newline"], g["array-close"])
|
|
|
|
// Table
|
|
|
|
g["table"] = g["std-table"].Or(g["array-table"])
|
|
|
|
// Standard Table
|
|
|
|
g["std-table"] = c.Seq(g["std-table-open"], g.Recursive("key"), g["std-table-close"])
|
|
|
|
g["std-table-open"] = a.SquareOpen.Then(g["ws"])
|
|
g["std-table-close"] = g["ws"].Then(a.SquareClose)
|
|
|
|
// Inline Table
|
|
|
|
g["inline-table"] = c.Seq(g["inline-table-open"], g["inline-table-keyvals"], g["inline-table-close"])
|
|
|
|
g["inline-table-open"] = a.CurlyOpen.Then(g["ws"])
|
|
g["inline-table-close"] = g["ws"].Then(a.CurlyClose)
|
|
g["inline-table-sep"] = c.Seq(g["ws"], a.Comma, g["ws"])
|
|
g["inline-table-keyval"] = c.Seq(g.Recursive("key"), g.Recursive("keyval-sep"), g.Recursive("val"))
|
|
g["inline-table-keyvals"] = c.Seq(g["inline-table-keyval"], c.ZeroOrMore(c.Seq(g["inline-table-sep"], g["inline-table-keyval"])))
|
|
|
|
// Array Table
|
|
|
|
g["array-table"] = c.Seq(g["array-table-open"], g.Recursive("key"), g["array-table-close"])
|
|
|
|
g["array-table-open"] = a.SquareOpen.Times(2).Then(g["ws"])
|
|
g["array-table-close"] = g["ws"].Then(a.SquareClose.Times(2))
|
|
|
|
// Key-Value Pairs
|
|
|
|
g["unquoted-key"] = c.OneOrMore(c.Any(g["alpha"], g["digit"], g["minus"], g["underscore"]))
|
|
g["quoted-key"] = g["basic-string"].Or(g["literal-string"])
|
|
g["dot-sep"] = c.Seq(g["ws"], a.Dot, g["ws"])
|
|
g["simple-key"] = g["quoted-key"].Or(g["unquoted-key"])
|
|
g["dotted-key"] = c.Seq(g["simple-key"], c.OneOrMore(g["dot-sep"].Then(g["simple_key"])))
|
|
|
|
g["key-val-sep"] = c.Seq(g["ws"], a.Equal, g["ws"])
|
|
|
|
g["val"] = c.Any(g["string"], g["boolean"], g["array"], g["inlineTable"], g["dateTime"], g["float"], g["integer"])
|
|
|
|
g["key"] = g["simple-key"].Or(g["dotted-key"])
|
|
|
|
g["keyval"] = c.Seq(g["key"], g["keyval-sep"], g["val"])
|
|
|
|
// Overall Structure
|
|
|
|
g["expression"] = c.Any(
|
|
c.Seq(g["ws"], g["comment"].Optional()),
|
|
c.Seq(g["ws"], g["keyval"], g["ws"], g["comment"].Optional()),
|
|
c.Seq(g["ws"], g["table"], g["ws"], g["comment"].Optional()))
|
|
|
|
g["toml"] = c.Seq(g["expression"], c.ZeroOrMore(g["newline"].Then(g["expression"])))
|
|
|
|
return g["toml"]
|
|
}
|