go-toml/parse2/grammar.go

358 lines
11 KiB
Go

package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"math"
"os"
"path"
"time"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
"github.com/pkg/profile"
)
var doProfile *int
func init() {
doProfile = flag.Int("p", 0, "Perform pprof profiling (value is number of run loops)")
flag.Usage = usage
flag.Parse()
}
func usage() {
log.Printf("Usage: %s < <path to TOML-file>\n", path.Base(os.Args[0]))
flag.PrintDefaults()
os.Exit(1)
}
func main() {
toml := BuildGrammar()
var result *tokenize.Result
var err error
if *doProfile > 0 {
fmt.Println("Profiling ...")
inputBytes, _ := ioutil.ReadAll(os.Stdin)
inputStr := string(inputBytes)
p := profile.Start()
start := time.Now()
for i := 0; i < *doProfile; i++ {
result, err = toml.Match(inputStr)
fmt.Printf("cycle %d / %d, tokens=%d\r", i+1, *doProfile, len(result.Tokens))
}
duration := time.Since(start)
p.Stop()
fmt.Printf("\n")
fmt.Println("Duration:", duration)
return
}
result, err = toml.Match(os.Stdin)
if err != nil {
log.Fatalf("Error in parsing TOML: %s\n", err)
} else {
for i, t := range result.Tokens {
fmt.Printf("[%d] %v\n", i, t)
}
}
}
type Grammar map[string]tokenize.Handler
func (g Grammar) Rule(name string, definition tokenize.Handler) {
if _, ok := g[name]; ok {
panic(fmt.Sprintf("Grammar rule %q already exists", name))
}
g[name] = definition
}
func (g Grammar) Get(name string) tokenize.Handler {
if handler, ok := g[name]; ok {
return handler
}
return func(t *tokenize.API) bool {
if handler, ok := g[name]; ok {
return handler(t)
}
panic(fmt.Sprintf("Grammar rule %q does not exist", name))
}
}
func BuildGrammar() tokenize.Handler {
c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T
g := make(Grammar)
R := g.Rule
G := g.Get
R("alpha", a.Letter)
R("digit", a.Digit)
R("minus", a.Minus)
R("plus", a.Plus)
R("underscore", a.Underscore)
R("quotation-mark", a.DoubleQuote)
R("apostrophe", a.SingleQuote)
R("colon", a.Colon)
R("escape", a.Backslash)
R("hex-digit", a.HexDigit)
// Whitespace, Newline
R("tab", a.Tab)
R("space", a.Space)
R("whitespaceChar", G("tab").Or(G("space")))
R("ws", c.ZeroOrMore(G("whitespaceChar")))
R("newline", a.Newline)
R("whitespaceChar-or-newline", G("whitespaceChar").Or(G("newline")))
// Comment
R("comment-start-symbol", a.Hash)
R("non-ascii", a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)))
R("non-eol", c.Any(a.Rune(0x09), a.RuneRange(0x20, 0x7E), G("non-ascii")))
R("comment", G("comment-start-symbol").Then(c.ZeroOrMore(G("non-eol"))))
// Basic String
R("escape-seq-char", c.Any(
a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'),
a.Rune('u').Then(G("hex-digit").Times(4)),
a.Rune('U').Then(G("hex-digit").Times(8))))
R("escaped", G("escape").Then(G("escape-seq-char")))
R("basic-unescaped", c.Any(a.RuneRange(0x20, 0x21), a.RuneRange(0x23, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
R("basic-char", G("escaped").Or(G("basic-unescaped")))
R("basic-string", c.Seq(m.Drop(G("quotation-mark")), c.ZeroOrMore(G("basic-char")), m.Drop(G("quotation-mark"))))
// Multiline Basic String
R("ml-basic-string-delim", G("quotation-mark").Times(3))
R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped")))
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline"))))
R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat"))))
R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim"))))
R("ml-basic-string", c.Seq(
m.Drop(G("ml-basic-string-delim").Then(c.Optional(G("newline")))),
m.Drop(c.Optional(G("newline"))),
G("ml-basic-body"),
m.Drop(G("ml-basic-string-delim"))))
// Literal String
R("literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x26), a.RuneRange(0x28, 0x7E), G("non-ascii")))
R("literal-string", c.Seq(
m.Drop(G("apostrophe")),
c.ZeroOrMore(G("literal-char")),
m.Drop(G("apostrophe"))))
// Multiline Literal String
R("ml-literal-string-delim", G("apostrophe").Times(3))
R("ml-literal-char", c.Any(G("tab"), a.RuneRange(0x20, 0x7E), G("non-ascii")))
R("ml-literal-body-content", G("ml-literal-char").Or(G("newline")))
R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim"))))
R("ml-literal-string", c.Seq(
m.Drop(G("ml-literal-string-delim").Then(c.Optional(G("newline")))),
G("ml-literal-body"),
m.Drop(G("ml-literal-string-delim"))))
// String
R("string", c.Any(
tok.StrInterpreted("string", G("ml-basic-string")),
tok.StrInterpreted("string", G("basic-string")),
tok.Str("string", G("ml-literal-string")),
tok.Str("string", G("literal-string"))))
// Integer
R("digit1-9", a.DigitNotZero)
R("underscore-int-digit", c.Any(G("digit"), m.Drop(G("underscore")).Then(G("digit"))))
R("unsigned-dec-int", c.Any(G("digit1-9").Then(c.OneOrMore(G("underscore-int-digit"))), G("digit")))
R("dec-int", c.Optional(G("plus").Or(G("minus"))).Then(G("unsigned-dec-int")))
R("hex-prefix", a.Zero.Then(a.Rune('x')))
R("underscore-hex-digit", c.Any(G("hex-digit"), m.Drop(G("underscore")).Then(G("hex-digit"))))
R("hex-int", c.Seq(m.Drop(G("hex-prefix")), G("hex-digit"), c.ZeroOrMore(G("underscore-hex-digit"))))
R("oct-prefix", a.Zero.Then(a.Rune('o')))
R("digit0-7", a.RuneRange('0', '7'))
R("underscore-oct-digit", c.Any(G("digit0-7"), m.Drop(G("underscore")).Then(G("digit0-7"))))
R("oct-int", c.Seq(m.Drop(G("oct-prefix")), G("digit0-7"), c.ZeroOrMore(G("underscore-oct-digit"))))
R("bin-prefix", a.Zero.Then(a.Rune('b')))
R("digit0-1", a.Runes('0', '1'))
R("underscore-bin-digit", c.Any(G("digit0-1"), m.Drop(G("underscore")).Then(G("digit0-1"))))
R("bin-int", c.Seq(m.Drop(G("bin-prefix")), G("digit0-1"), c.ZeroOrMore(G("underscore-bin-digit"))))
R("integer", c.Any(
tok.Int64Base("integer", 16, G("hex-int")),
tok.Int64Base("integer", 8, G("oct-int")),
tok.Int64Base("integer", 2, G("bin-int")),
tok.Int64("integer", G("dec-int"))))
// Float
R("float-int-part", G("dec-int"))
R("exp", a.StrNoCase("e").Then(G("float-int-part")))
R("decimal-point", a.Dot)
R("zero-prefixable-int", c.Seq(G("digit"), m.Drop(c.ZeroOrMore(G("underscore-int-digit")))))
R("frac", c.Seq(G("decimal-point"), G("zero-prefixable-int")))
R("standard-float", c.Seq(G("float-int-part"), G("exp").Or(G("frac").Then(c.Optional(G("exp"))))))
R("inf-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("inf")))
R("nan-float", c.Optional(G("plus").Or(G("minus"))).Then(a.Str("nan")))
R("float", c.Any(
tok.Float64("float", G("standard-float")),
tok.ByCallback("float", G("inf-float"), func(t *tokenize.API) interface{} {
if t.Rune(0) == '-' {
return math.Inf(-1)
}
return math.Inf(+1)
}),
tok.ByValue("float", G("nan-float"), math.NaN())))
// Boolean
R("true", a.Str("true"))
R("false", a.Str("false"))
R("boolean", tok.Boolean("boolean", G("true").Or(G("false"))))
// Date and time (as defined in RFC 3339)
R("date-year", G("digit").Times(4))
R("date-month", G("digit").Times(2))
R("date-mday", G("digit").Times(2))
R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))))
R("time-delim", c.Any(
tok.Str("T", a.Rune('T')),
tok.Str("t", a.Rune('t')),
tok.Str(" ", a.Rune(' '))))
R("time-hour", G("digit").Times(2))
R("time-minute", G("digit").Times(2))
R("time-second", G("digit").Times(2))
R("time", tok.Str("15:04:05", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"))))
R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit)))))
R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z"))
R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute")))
R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset"))))
R("offset-date-time", c.Seq(G("date"), G("time-delim"), G("time"), G("time-sec-frac").Optional(), G("time-offset")))
R("local-date-time", c.Seq(G("date"), G("time-delim"), G("time")))
R("local-date", G("date"))
R("local-time", G("time"))
makeDateTimeValue := func(t *tokenize.API) interface{} {
layout := ""
input := ""
for _, t := range t.Tokens() {
layout += t.Type.(string)
input += t.Value.(string)
}
t.ClearTokens()
value, err := time.Parse(layout, input)
if err != nil {
panic(fmt.Sprintf("Ow, we must implement a way to report date parse errors: %s", err))
}
return value
}
R("date-time", c.Any(
tok.ByCallback("offset-date-time", G("offset-date-time"), makeDateTimeValue),
tok.ByCallback("local-date-time", G("local-date-time"), makeDateTimeValue),
tok.ByCallback("local-date", G("local-date"), makeDateTimeValue),
tok.ByCallback("local-time", G("local-time"), makeDateTimeValue)))
// Inline Table
R("inline-table-open", a.CurlyOpen.Then(G("ws")))
R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws")))
R("inline-table-keyvals", c.Separated(G("inline-table-sep"), G("keyval")))
R("inline-table-close", G("ws").Then(a.CurlyClose))
R("inline-table", tok.Group("inline-table", c.Seq(
G("inline-table-open"),
G("inline-table-keyvals").Optional(),
G("inline-table-close"))))
// Inline Array
R("array-open", a.SquareOpen)
R("array-sep", G("ws").Then(a.Comma))
R("ws-comment-newline", c.ZeroOrMore(G("whitespaceChar").Or(G("comment").Optional().Then(G("newline")))))
R("array-values", c.Seq(
G("ws-comment-newline"),
G("val"),
c.ZeroOrMore(c.Seq(G("ws"), G("array-sep"), G("ws-comment-newline"), G("val"))),
G("array-sep").Optional()))
R("array-close", a.SquareClose)
R("inline-array", tok.Group("array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close"))))
// Standard Table
R("std-table-open", a.SquareOpen.Then(G("ws")))
R("std-table-close", G("ws").Then(a.SquareClose))
R("std-table", c.Seq(G("std-table-open"), tok.Group("table", G("key")), G("std-table-close")))
// Array Table
R("array-table", c.Seq(G("array-table-open"), tok.Group("array-of-tables", G("key")), G("array-table-close")))
R("array-table-open", a.SquareOpen.Times(2).Then(G("ws")))
R("array-table-close", G("ws").Then(a.SquareClose.Times(2)))
// Table
R("table", G("array-table").Or(G("std-table")))
// Key-Value Pairs
R("unquoted-key", c.OneOrMore(c.Any(G("alpha"), G("digit"), G("minus"), G("underscore"))))
R("quoted-key", G("basic-string").Or(G("literal-string")))
R("key-sep", c.Seq(G("ws"), a.Dot, G("ws")))
R("simple-key", tok.Str("key-part", G("quoted-key").Or(G("unquoted-key"))))
R("dotted-key", c.Separated(G("key-sep"), G("simple-key")))
R("key", c.FlushInput(tok.Group("key", G("dotted-key").Or(G("simple-key")))))
R("keyval-sep", c.FlushInput(c.Seq(G("ws"), a.Equal, G("ws"))))
R("val", tok.Group("val", c.Any(
G("string"),
G("date-time"),
G("float"),
G("integer"),
G("boolean"),
G("inline-array"),
G("inline-table"),
)))
R("keyval", tok.Group("keyval", c.Seq(G("key"), G("keyval-sep"), G("val"))))
// Overall Structure
R("expression", c.Seq(
c.FlushInput(m.Drop(G("ws"))),
c.FlushInput(c.Optional(G("table").Or(G("keyval")))),
c.FlushInput(m.Drop(G("ws"))),
c.FlushInput(m.Drop(c.Optional(G("comment"))))))
R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile))
return G("toml")
}