313 lines
11 KiB
Go
313 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"math"
|
|
"os"
|
|
"time"
|
|
|
|
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
|
"github.com/pkg/profile"
|
|
)
|
|
|
|
func main() {
|
|
toml := BuildGrammar()
|
|
fmt.Printf("Reading TOML document from STDIN ...\n")
|
|
t := profile.Start() //profile.CPUProfile)
|
|
result, err := toml.Match(os.Stdin)
|
|
t.Stop()
|
|
fmt.Printf("Completed reading document.\n")
|
|
if err != nil {
|
|
log.Fatalf("Error in parsing TOML: %s\n", err)
|
|
} else {
|
|
fmt.Printf("Result:\n")
|
|
for i, t := range result.Tokens() {
|
|
fmt.Printf("[%d] %v\n", i, t)
|
|
}
|
|
}
|
|
}
|
|
|
|
type Grammar map[string]tokenize.Handler
|
|
|
|
func (g Grammar) Rule(name string, definition tokenize.Handler) {
|
|
if _, ok := g[name]; ok {
|
|
panic(fmt.Sprintf("Grammar rule %q already exists", name))
|
|
}
|
|
g[name] = definition
|
|
}
|
|
|
|
func (g Grammar) Get(name string) tokenize.Handler {
|
|
if handler, ok := g[name]; ok {
|
|
return handler
|
|
}
|
|
return func(t *tokenize.API) bool {
|
|
if handler, ok := g[name]; ok {
|
|
return handler(t)
|
|
}
|
|
panic(fmt.Sprintf("Grammar rule %q does not exist", name))
|
|
}
|
|
}
|
|
|
|
func BuildGrammar() tokenize.Handler {
|
|
|
|
c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T
|
|
|
|
g := make(Grammar)
|
|
R := g.Rule
|
|
G := g.Get
|
|
|
|
R("alpha", a.Letter)
|
|
R("digit", a.Digit)
|
|
R("minus", a.Minus)
|
|
R("plus", a.Plus)
|
|
R("underscore", a.Underscore)
|
|
R("quotation-mark", a.DoubleQuote)
|
|
R("apostrophe", a.SingleQuote)
|
|
R("colon", a.Colon)
|
|
R("escape", a.Backslash)
|
|
R("hex-digit", a.HexDigit)
|
|
|
|
// Whitespace, Newline
|
|
|
|
R("tab", a.Tab)
|
|
R("space", a.Space)
|
|
R("whitespaceChar", G("tab").Or(G("space")))
|
|
R("ws", c.ZeroOrMore(G("whitespaceChar")))
|
|
R("newline", a.Newline)
|
|
R("whitespaceChar-or-newline", G("whitespaceChar").Or(G("newline")))
|
|
|
|
// Comment
|
|
|
|
R("comment-start-symbol", a.Hash)
|
|
R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)))
|
|
R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii")))
|
|
R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol"))))
|
|
|
|
// Basic String
|
|
|
|
R("escape-seq-char", c.Any(
|
|
a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'),
|
|
a.Rune('u').Then(G("hex-digit").Times(4)),
|
|
a.Rune('U').Then(G("hex-digit").Times(8))))
|
|
R("escaped", G("escape").Then(G("escape-seq-char")))
|
|
R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
|
|
R("basic-char", G("escaped").Or(G("basic-unescaped")))
|
|
R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark"))))
|
|
|
|
// Multiline Basic String
|
|
|
|
R("ml-basic-string-delim", G("quotation-mark").Times(3))
|
|
R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
|
|
R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped")))
|
|
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline"))))
|
|
R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat"))))
|
|
R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim"))))
|
|
R("ml-basic-string", c.Seq(
|
|
m.Drop(G("ml-basic-string-delim")),
|
|
m.Drop(c.Optional(G("newline"))),
|
|
G("ml-basic-body"),
|
|
m.Drop(G("ml-basic-string-delim"))))
|
|
|
|
// Literal String
|
|
R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii")))
|
|
R("literal-string", c.Seq(
|
|
m.Drop(G("apostrophe")),
|
|
c.ZeroOrMore(G("literal-char")),
|
|
m.Drop(G("apostrophe"))))
|
|
|
|
// Multiline Literal String
|
|
|
|
R("ml-literal-string-delim", G("apostrophe").Times(3))
|
|
R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii")))
|
|
R("ml-literal-body-content", G("ml-literal-char").Or(G("newline")))
|
|
R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim"))))
|
|
R("ml-literal-string", c.Seq(
|
|
m.Drop(G("ml-literal-string-delim")),
|
|
G("ml-literal-body"),
|
|
m.Drop(G("ml-literal-string-delim"))))
|
|
|
|
// String
|
|
|
|
R("string", c.Any(
|
|
tok.StrInterpreted("string", G("ml-basic-string")),
|
|
tok.StrInterpreted("string", G("basic-string")),
|
|
tok.Str("string", G("ml-literal-string")),
|
|
tok.Str("string", G("literal-string"))))
|
|
|
|
// Integer
|
|
|
|
R("digit1-9", a.DigitNotZero)
|
|
R("underscore-int-digit", c.Any(G("digit"), m.Drop(G("underscore")).Then(G("digit"))))
|
|
R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit")))
|
|
R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int")))
|
|
|
|
R("hex-prefix", a.Zero.Then(a.Rune('x')))
|
|
R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit"))))
|
|
R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit"))))
|
|
|
|
R("oct-prefix", a.Zero.Then(a.Rune('o')))
|
|
R("digit0-7", a.RuneRange('0', '7'))
|
|
R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7"))))
|
|
R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit"))))
|
|
|
|
R("bin-prefix", a.Zero.Then(a.Rune('b')))
|
|
R("digit0-1", a.Runes('0', '1'))
|
|
R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1"))))
|
|
R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit"))))
|
|
|
|
R("integer", c.Any(
|
|
tok.Int64Base("integer", 16, G("hex-int")),
|
|
tok.Int64Base("integer", 8, G("oct-int")),
|
|
tok.Int64Base("integer", 2, G("bin-int")),
|
|
tok.Int64("integer", G("dec-int"))))
|
|
|
|
// Float
|
|
|
|
R("float-int-part", G("dec-int"))
|
|
R("exp", a.StrNoCase("e").Then(G("float-int-part")))
|
|
R("decimal-point", a.Dot)
|
|
R("zero-prefixable-int", c.Seq(G("digit"), m.Drop(c.ZeroOrMore(G("underscore-int-digit")))))
|
|
R("frac", c.Seq(G("decimal-point"), G("zero-prefixable-int")))
|
|
R("standard-float", c.Seq(G("float-int-part"), G("exp").Or(G("frac").Then(c.Optional(G("exp"))))))
|
|
|
|
R("inf-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("inf")))
|
|
|
|
R("nan-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("nan")))
|
|
|
|
R("float", c.Any(
|
|
tok.Float64("float", G("standard-float")),
|
|
tok.ByCallback("float", G("inf-float"), func(t *tokenize.API) interface{} {
|
|
if t.Result().Rune(0) == '-' {
|
|
return math.Inf(-1)
|
|
}
|
|
return math.Inf(+1)
|
|
}),
|
|
tok.ByValue("float", G("nan-float"), math.NaN())))
|
|
|
|
// Boolean
|
|
|
|
R("true", a.Str("true"))
|
|
R("false", a.Str("false"))
|
|
|
|
R("boolean", tok.Boolean("boolean", G("true").Or(G("false"))))
|
|
|
|
// Date and time (as defined in RFC 3339)
|
|
|
|
R("date-year", G("digit").Times(4))
|
|
R("date-month", G("digit").Times(2))
|
|
R("date-mday", G("digit").Times(2))
|
|
R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))))
|
|
|
|
R("time-delim", c.Any(
|
|
tok.Str("T", a.Rune('T')),
|
|
tok.Str("t", a.Rune('t')),
|
|
tok.Str(" ", a.Rune(' '))))
|
|
|
|
R("time-hour", G("digit").Times(2))
|
|
R("time-minute", G("digit").Times(2))
|
|
R("time-second", G("digit").Times(2))
|
|
R("time", tok.Str("15:04:05", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"))))
|
|
|
|
R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit)))))
|
|
|
|
R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z"))
|
|
R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute")))
|
|
R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset"))))
|
|
|
|
R("offset-date-time", c.Seq(G("date"), G("time-delim"), G("time"), G("time-sec-frac").Optional(), G("time-offset")))
|
|
R("local-date-time", c.Seq(G("date"), G("time-delim"), G("time")))
|
|
R("local-date", G("date"))
|
|
R("local-time", G("time"))
|
|
|
|
makeDateTimeValue := func(t *tokenize.API) interface{} {
|
|
layout := ""
|
|
input := ""
|
|
for _, t := range t.Result().Tokens() {
|
|
layout += t.Type.(string)
|
|
input += t.Value.(string)
|
|
}
|
|
t.Result().ClearTokens()
|
|
value, err := time.Parse(layout, input)
|
|
if err != nil {
|
|
panic(fmt.Sprintf("Ow, we must implement a way to report date parse errors: %s", err))
|
|
}
|
|
return value
|
|
}
|
|
|
|
R("date-time", c.Any(
|
|
tok.ByCallback("offset-date-time", G("offset-date-time"), makeDateTimeValue),
|
|
tok.ByCallback("local-date-time", G("local-date-time"), makeDateTimeValue),
|
|
tok.ByCallback("local-date", G("local-date"), makeDateTimeValue),
|
|
tok.ByCallback("local-time", G("local-time"), makeDateTimeValue)))
|
|
|
|
// Inline Table
|
|
|
|
R("inline-table-open", a.CurlyOpen.Then(G("ws")))
|
|
R("inline-table-close", G("ws").Then(a.CurlyClose))
|
|
R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws")))
|
|
R("inline-table-keyvals", c.Seq(G("keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("keyval")))))
|
|
|
|
R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close"))))
|
|
|
|
// Array
|
|
|
|
R("array-open", a.SquareOpen)
|
|
R("array-close", a.SquareClose)
|
|
R("array-sep", G("ws").Then(a.Comma))
|
|
R("ws-comment-newline", c.ZeroOrMore(G("whitespaceChar").Or(G("comment").Optional().Then(G("newline")))))
|
|
R("array-values", c.Seq(
|
|
G("ws-comment-newline"),
|
|
G("val"),
|
|
c.ZeroOrMore(c.Seq(G("ws"), G("array-sep"), G("ws-comment-newline"), G("val"))),
|
|
G("array-sep").Optional()))
|
|
|
|
R("inline-array", tok.Group("array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close"))))
|
|
|
|
// Standard Table
|
|
|
|
R("std-table-open", a.SquareOpen.Then(G("ws")))
|
|
R("std-table-close", G("ws").Then(a.SquareClose))
|
|
|
|
R("std-table", c.Seq(G("std-table-open"), tok.Group("table", G("key")), G("std-table-close")))
|
|
|
|
// Array Table
|
|
|
|
R("array-table", c.Seq(G("array-table-open"), tok.Group("array-of-tables", G("key")), G("array-table-close")))
|
|
|
|
R("array-table-open", a.SquareOpen.Times(2).Then(G("ws")))
|
|
R("array-table-close", G("ws").Then(a.SquareClose.Times(2)))
|
|
|
|
// Table
|
|
|
|
R("table", G("array-table").Or(G("std-table")))
|
|
|
|
// Key-Value Pairs
|
|
|
|
R("unquoted-key", c.OneOrMore(c.Any(G("alpha"), G("digit"), G("minus"), G("underscore"))))
|
|
R("quoted-key", G("basic-string").Or(G("literal-string")))
|
|
R("key-sep", c.Seq(G("ws"), a.Dot, G("ws")))
|
|
R("simple-key", tok.Str("key-part", G("quoted-key").Or(G("unquoted-key"))))
|
|
R("dotted-key", c.Separated(G("simple-key"), G("key-sep")))
|
|
|
|
R("key", c.FlushInput(tok.Group("key", G("dotted-key").Or(G("simple-key")))))
|
|
|
|
R("keyval-sep", c.FlushInput(c.Seq(G("ws"), a.Equal, G("ws"))))
|
|
|
|
R("val", c.FlushInput(tok.Group("val", c.Any(G("string"), G("boolean"), G("inline-array"), G("inline-table"), G("date-time"), G("float"), G("integer")))))
|
|
|
|
R("keyval", tok.Group("keyval", c.Seq(G("key"), G("keyval-sep"), G("val"))))
|
|
|
|
// Overall Structure
|
|
|
|
R("expression", c.Seq(
|
|
c.FlushInput(G("ws")),
|
|
c.FlushInput(c.Optional(G("table").Or(G("keyval")))),
|
|
c.FlushInput(G("ws")),
|
|
c.FlushInput(G("comment").Optional())))
|
|
|
|
R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile))
|
|
|
|
return G("toml")
|
|
}
|