package main import ( "fmt" "log" "math" "os" "time" "git.makaay.nl/mauricem/go-parsekit/tokenize" "github.com/pkg/profile" ) func main() { toml := BuildGrammar() fmt.Printf("Reading TOML document from STDIN ...\n") t := profile.Start() //profile.CPUProfile) result, err := toml.Match(os.Stdin) t.Stop() fmt.Printf("Completed reading document.\n") if err != nil { log.Fatalf("Error in parsing TOML: %s\n", err) } else { fmt.Printf("Result:\n") for i, t := range result.Tokens() { fmt.Printf("[%d] %v\n", i, t) } } } type Grammar map[string]tokenize.Handler func (g Grammar) Rule(name string, definition tokenize.Handler) { if _, ok := g[name]; ok { panic(fmt.Sprintf("Grammar rule %q already exists", name)) } g[name] = definition } func (g Grammar) Get(name string) tokenize.Handler { if handler, ok := g[name]; ok { return handler } return func(t *tokenize.API) bool { if handler, ok := g[name]; ok { return handler(t) } panic(fmt.Sprintf("Grammar rule %q does not exist", name)) } } func BuildGrammar() tokenize.Handler { c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T g := make(Grammar) R := g.Rule G := g.Get R("alpha", a.Letter) R("digit", a.Digit) R("minus", a.Minus) R("plus", a.Plus) R("underscore", a.Underscore) R("quotation-mark", a.DoubleQuote) R("apostrophe", a.SingleQuote) R("colon", a.Colon) R("escape", a.Backslash) R("hex-digit", a.HexDigit) // Whitespace, Newline R("tab", a.Tab) R("space", a.Space) R("whitespaceChar", G("tab").Or(G("space"))) R("ws", c.ZeroOrMore(G("whitespaceChar"))) R("newline", a.Newline) R("whitespaceChar-or-newline", G("whitespaceChar").Or(G("newline"))) // Comment R("comment-start-symbol", a.Hash) R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF))) R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii"))) R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol")))) // Basic String R("escape-seq-char", c.Any( a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), a.Rune('u').Then(G("hex-digit").Times(4)), a.Rune('U').Then(G("hex-digit").Times(8)))) R("escaped", G("escape").Then(G("escape-seq-char"))) R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) R("basic-char", G("escaped").Or(G("basic-unescaped"))) R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark")))) // Multiline Basic String R("ml-basic-string-delim", G("quotation-mark").Times(3)) R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii"))) R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped"))) R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline")))) R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat")))) R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim")))) R("ml-basic-string", c.Seq( m.Drop(G("ml-basic-string-delim")), m.Drop(c.Optional(G("newline"))), G("ml-basic-body"), m.Drop(G("ml-basic-string-delim")))) // Literal String R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii"))) R("literal-string", c.Seq( m.Drop(G("apostrophe")), c.ZeroOrMore(G("literal-char")), m.Drop(G("apostrophe")))) // Multiline Literal String R("ml-literal-string-delim", G("apostrophe").Times(3)) R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii"))) R("ml-literal-body-content", G("ml-literal-char").Or(G("newline"))) R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim")))) R("ml-literal-string", c.Seq( m.Drop(G("ml-literal-string-delim")), G("ml-literal-body"), m.Drop(G("ml-literal-string-delim")))) // String R("string", c.Any( tok.StrInterpreted("string", G("ml-basic-string")), tok.StrInterpreted("string", G("basic-string")), tok.Str("string", G("ml-literal-string")), tok.Str("string", G("literal-string")))) // Integer R("digit1-9", a.DigitNotZero) R("underscore-int-digit", c.Any(G("digit"), m.Drop(G("underscore")).Then(G("digit")))) R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit"))) R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int"))) R("hex-prefix", a.Zero.Then(a.Rune('x'))) R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit")))) R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit")))) R("oct-prefix", a.Zero.Then(a.Rune('o'))) R("digit0-7", a.RuneRange('0', '7')) R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7")))) R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit")))) R("bin-prefix", a.Zero.Then(a.Rune('b'))) R("digit0-1", a.Runes('0', '1')) R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1")))) R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit")))) R("integer", c.Any( tok.Int64Base("integer", 16, G("hex-int")), tok.Int64Base("integer", 8, G("oct-int")), tok.Int64Base("integer", 2, G("bin-int")), tok.Int64("integer", G("dec-int")))) // Float R("float-int-part", G("dec-int")) R("exp", a.StrNoCase("e").Then(G("float-int-part"))) R("decimal-point", a.Dot) R("zero-prefixable-int", c.Seq(G("digit"), m.Drop(c.ZeroOrMore(G("underscore-int-digit"))))) R("frac", c.Seq(G("decimal-point"), G("zero-prefixable-int"))) R("standard-float", c.Seq(G("float-int-part"), G("exp").Or(G("frac").Then(c.Optional(G("exp")))))) R("inf-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("inf"))) R("nan-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("nan"))) R("float", c.Any( tok.Float64("float", G("standard-float")), tok.ByCallback("float", G("inf-float"), func(t *tokenize.API) interface{} { if t.Result().Rune(0) == '-' { return math.Inf(-1) } return math.Inf(+1) }), tok.ByValue("float", G("nan-float"), math.NaN()))) // Boolean R("true", a.Str("true")) R("false", a.Str("false")) R("boolean", tok.Boolean("boolean", G("true").Or(G("false")))) // Date and time (as defined in RFC 3339) R("date-year", G("digit").Times(4)) R("date-month", G("digit").Times(2)) R("date-mday", G("digit").Times(2)) R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday")))) R("time-delim", c.Any( tok.Str("T", a.Rune('T')), tok.Str("t", a.Rune('t')), tok.Str(" ", a.Rune(' ')))) R("time-hour", G("digit").Times(2)) R("time-minute", G("digit").Times(2)) R("time-second", G("digit").Times(2)) R("time", tok.Str("15:04:05", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second")))) R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit))))) R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z")) R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute"))) R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset")))) R("offset-date-time", c.Seq(G("date"), G("time-delim"), G("time"), G("time-sec-frac").Optional(), G("time-offset"))) R("local-date-time", c.Seq(G("date"), G("time-delim"), G("time"))) R("local-date", G("date")) R("local-time", G("time")) makeDateTimeValue := func(t *tokenize.API) interface{} { layout := "" input := "" for _, t := range t.Result().Tokens() { layout += t.Type.(string) input += t.Value.(string) } t.Result().ClearTokens() value, err := time.Parse(layout, input) if err != nil { panic(fmt.Sprintf("Ow, we must implement a way to report date parse errors: %s", err)) } return value } R("date-time", c.Any( tok.ByCallback("offset-date-time", G("offset-date-time"), makeDateTimeValue), tok.ByCallback("local-date-time", G("local-date-time"), makeDateTimeValue), tok.ByCallback("local-date", G("local-date"), makeDateTimeValue), tok.ByCallback("local-time", G("local-time"), makeDateTimeValue))) // Inline Table R("inline-table-open", a.CurlyOpen.Then(G("ws"))) R("inline-table-close", G("ws").Then(a.CurlyClose)) R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws"))) R("inline-table-keyvals", c.Seq(G("keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("keyval"))))) R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close")))) // Array R("array-open", a.SquareOpen) R("array-close", a.SquareClose) R("array-sep", G("ws").Then(a.Comma)) R("ws-comment-newline", c.ZeroOrMore(G("whitespaceChar").Or(G("comment").Optional().Then(G("newline"))))) R("array-values", c.Seq( G("ws-comment-newline"), G("val"), c.ZeroOrMore(c.Seq(G("ws"), G("array-sep"), G("ws-comment-newline"), G("val"))), G("array-sep").Optional())) R("inline-array", tok.Group("array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close")))) // Standard Table R("std-table-open", a.SquareOpen.Then(G("ws"))) R("std-table-close", G("ws").Then(a.SquareClose)) R("std-table", c.Seq(G("std-table-open"), tok.Group("table", G("key")), G("std-table-close"))) // Array Table R("array-table", c.Seq(G("array-table-open"), tok.Group("array-of-tables", G("key")), G("array-table-close"))) R("array-table-open", a.SquareOpen.Times(2).Then(G("ws"))) R("array-table-close", G("ws").Then(a.SquareClose.Times(2))) // Table R("table", G("array-table").Or(G("std-table"))) // Key-Value Pairs R("unquoted-key", c.OneOrMore(c.Any(G("alpha"), G("digit"), G("minus"), G("underscore")))) R("quoted-key", G("basic-string").Or(G("literal-string"))) R("key-sep", c.Seq(G("ws"), a.Dot, G("ws"))) R("simple-key", tok.Str("key-part", G("quoted-key").Or(G("unquoted-key")))) R("dotted-key", c.Separated(G("simple-key"), G("key-sep"))) R("key", c.FlushInput(tok.Group("key", G("dotted-key").Or(G("simple-key"))))) R("keyval-sep", c.FlushInput(c.Seq(G("ws"), a.Equal, G("ws")))) R("val", c.FlushInput(tok.Group("val", c.Any(G("string"), G("boolean"), G("inline-array"), G("inline-table"), G("date-time"), G("float"), G("integer"))))) R("keyval", tok.Group("keyval", c.Seq(G("key"), G("keyval-sep"), G("val")))) // Overall Structure R("expression", c.Seq( c.FlushInput(G("ws")), c.FlushInput(c.Optional(G("table").Or(G("keyval")))), c.FlushInput(G("ws")), c.FlushInput(G("comment").Optional()))) R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile)) return G("toml") }