package main import ( "fmt" "log" "math" "os" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func main() { toml := BuildGrammar() fmt.Printf("Reading TOML document from STDIN ...\n") result, err := toml.Match(os.Stdin) fmt.Printf("Completed reading document.\n") if err != nil { log.Fatalf("Error in parsing TOML: %s\n", err) } else { fmt.Printf("Result:\n") for i, t := range result.Tokens() { fmt.Printf("[%d] %v\n", i, t) } } } type Grammar map[string]tokenize.Handler func (g Grammar) Rule(name string, definition tokenize.Handler) { if _, ok := g[name]; ok { panic(fmt.Sprintf("Grammar rule %q already exists", name)) } g[name] = definition } func (g Grammar) Get(name string) tokenize.Handler { if handler, ok := g[name]; ok { return handler } return func(t *tokenize.API) bool { if handler, ok := g[name]; ok { return handler(t) } panic(fmt.Sprintf("Grammar rule %q does not exist", name)) } } func BuildGrammar() tokenize.Handler { c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T g := make(Grammar) R := g.Rule G := g.Get R("alpha", a.Letter) R("digit", a.Digit) R("minus", a.Minus) R("plus", a.Plus) R("underscore", a.Underscore) R("quotation-mark", a.DoubleQuote) R("apostrophe", a.SingleQuote) R("colon", a.Colon) R("escape", a.Backslash) R("hex-digit", a.HexDigit) // Whitespace, Newline R("tab", a.Tab) R("space", a.Space) R("wschar", G("tab").Or(G("space"))) R("ws", c.ZeroOrMore(G("wschar"))) R("newline", a.Newline) R("wschar-or-newline", G("wschar").Or(G("newline"))) // Comment R("comment-start-symbol", a.Hash) R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF))) R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii"))) R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol")))) // Basic String R("escape-seq-char", c.Any( a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), a.Rune('u').Then(G("hex-digit").Times(4)), a.Rune('U').Then(G("hex-digit").Times(8)))) R("escaped", G("escape").Then(G("escape-seq-char"))) R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) R("basic-char", G("escaped").Or(G("basic-unescaped"))) R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark")))) // Multiline Basic String R("ml-basic-string-delim", G("quotation-mark").Times(3)) R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped"))) R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("wschar-or-newline")))) R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat")))) R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim")))) R("ml-basic-string", c.Seq( m.Drop(G("ml-basic-string-delim")), m.Drop(c.Optional(G("newline"))), G("ml-basic-body"), m.Drop(G("ml-basic-string-delim")))) // Literal String R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii"))) R("literal-string", c.Seq( m.Drop(G("apostrophe")), c.ZeroOrMore(G("literal-char")), m.Drop(G("apostrophe")))) // Multiline Literal String R("ml-literal-string-delim", G("apostrophe").Times(3)) R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii"))) R("ml-literal-body-content", G("ml-literal-char").Or(G("newline"))) R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim")))) R("ml-literal-string", c.Seq( m.Drop(G("ml-literal-string-delim")), G("ml-literal-body"), m.Drop(G("ml-literal-string-delim")))) // String R("string", c.Any( tok.StrInterpreted("string", G("ml-basic-string")), tok.StrInterpreted("string", G("basic-string")), tok.Str("string", G("ml-literal-string")), tok.Str("string", G("literal-string")))) // Integer R("digit1-9", a.DigitNotZero) R("underscore-int-digit", c.Any(G("digit"), m.Drop(G("underscore")).Then(G("digit")))) R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit"))) R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int"))) R("hex-prefix", a.Zero.Then(a.Rune('x'))) R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit")))) R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit")))) R("oct-prefix", a.Zero.Then(a.Rune('o'))) R("digit0-7", a.RuneRange('0', '7')) R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7")))) R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit")))) R("bin-prefix", a.Zero.Then(a.Rune('b'))) R("digit0-1", a.Runes('0', '1')) R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1")))) R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit")))) R("integer", c.Any( tok.Int64Base("integer", 16, G("hex-int")), tok.Int64Base("integer", 8, G("oct-int")), tok.Int64Base("integer", 2, G("bin-int")), tok.Int64("integer", G("dec-int")))) // Float R("float-int-part", G("dec-int")) R("exp", a.StrNoCase("e").Then(G("float-int-part"))) R("decimal-point", a.Dot) R("zero-prefixable-int", c.Seq(G("digit"), m.Drop(c.ZeroOrMore(G("underscore-int-digit"))))) R("frac", c.Seq(G("decimal-point"), G("zero-prefixable-int"))) R("standard-float", c.Seq(G("float-int-part"), G("exp").Or(G("frac").Then(c.Optional(G("exp")))))) R("inf-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("inf"))) R("nan-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("nan"))) R("float", c.Any( tok.Float64("float", G("standard-float")), tok.ByCallback("float", G("inf-float"), func(t *tokenize.API) interface{} { if t.Result().Rune(0) == '-' { return math.Inf(-1) } return math.Inf(+1) }), tok.ByValue("float", G("nan-float"), math.NaN()))) // Boolean R("true", a.Str("true")) R("false", a.Str("false")) R("boolean", tok.Boolean("boolean", G("true").Or(G("false")))) // Date and time (as defined in RFC 3339) R("date-full-year", G("digit").Times(4)) R("date-month", G("digit").Times(2)) R("date-mday", G("digit").Times(2)) R("time-delim", a.Runes('T', 't', ' ')) R("time-hour", G("digit").Times(2)) R("time-minute", G("digit").Times(2)) R("time-second", G("digit").Times(2)) R("time-sec-frac", G("decimal-point").Then(c.OneOrMore(G("digit")))) R("time-zulu", a.Runes('Z', 'z')) R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute"))) R("time-offset", c.Any(G("time-zulu"), G("time-num-offset"))) R("partial-time", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"), G("time-sec-frac").Optional())) R("full-time", c.Seq(G("partial-time"), G("time-offset"))) R("full-date", c.Seq(G("date-full-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))) R("offset-date-time", c.Seq(G("full-date"), G("time-delim"), G("full-time"))) R("local-date-time", c.Seq(G("full-date"), G("time-delim"), G("partial-time"))) R("local-date", G("full-date")) R("local-time", G("partial-time")) R("date-time", c.Any(G("offset-date-time"), G("local-date-time"), G("local-date"), G("local-time"))) // Inline Table R("inline-table-open", a.CurlyOpen.Then(G("ws"))) R("inline-table-close", G("ws").Then(a.CurlyClose)) R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws"))) R("inline-table-keyval", tok.Group("inline-table-keyval", c.Seq(G("key"), G("keyval-sep"), G("val")))) R("inline-table-keyvals", c.Seq(G("inline-table-keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("inline-table-keyval"))))) R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close")))) // Array R("array-open", a.SquareOpen) R("array-close", a.SquareClose) R("array-sep", G("ws").Then(a.Comma)) R("ws-comment-newline", c.ZeroOrMore(G("wschar").Or(G("comment").Optional().Then(G("newline"))))) R("array-values", c.Any( c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep"), G("array-values")), c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep").Optional()))) R("inline-array", tok.Group("inline-array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close")))) // Standard Table R("std-table-open", a.SquareOpen.Then(G("ws"))) R("std-table-close", G("ws").Then(a.SquareClose)) R("std-table", c.Seq(G("std-table-open"), tok.Group("table", G("key")), G("std-table-close"))) // Array Table R("array-table", c.Seq(G("array-table-open"), tok.Group("array-of-tables", G("key")), G("array-table-close"))) R("array-table-open", a.SquareOpen.Times(2).Then(G("ws"))) R("array-table-close", G("ws").Then(a.SquareClose.Times(2))) // Table R("table", G("array-table").Or(G("std-table"))) // Key-Value Pairs R("unquoted-key", c.OneOrMore(c.Any(G("alpha"), G("digit"), G("minus"), G("underscore")))) R("quoted-key", G("basic-string").Or(G("literal-string"))) R("key-sep", c.Seq(G("ws"), a.Dot, G("ws"))) R("simple-key", tok.Str("key-part", G("quoted-key").Or(G("unquoted-key")))) R("dotted-key", c.Seq(G("simple-key"), c.OneOrMore(G("key-sep").Then(G("simple-key"))))) R("keyval-sep", c.Seq(G("ws"), a.Equal, G("ws"))) R("key", tok.Group("key", G("dotted-key").Or(G("simple-key")))) R("val", tok.Group("val", c.Any(G("string"), G("boolean"), G("inline-array"), G("inline-table"), G("date-time"), G("float"), G("integer")))) R("keyval", tok.Group("keyval", c.Seq(G("key"), G("keyval-sep"), G("val")))) // Overall Structure R("expression", c.Any( c.Seq(G("ws"), G("table"), G("ws"), G("comment").Optional()), c.Seq(G("ws"), G("keyval"), G("ws"), G("comment").Optional()), c.Seq(G("ws"), G("comment").Optional()), )) //R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile)) R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))))) return G("toml") }