Cleaning up some logic around whitespace / newlines / comments.
This commit is contained in:
parent
019dd35d83
commit
af5d35ecee
|
@ -9,15 +9,15 @@ var (
|
|||
detectKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
|
||||
|
||||
// Both [tables] and [[arrays of tables]] start with a square open bracket.
|
||||
detectTableOrArrayOfTables = a.SquareOpen
|
||||
detectTable = a.SquareOpen
|
||||
)
|
||||
|
||||
func (t *parser) startDocument(p *parse.API) {
|
||||
for {
|
||||
switch {
|
||||
case p.Accept(whitespaceOrComment):
|
||||
// NOOP, skip these
|
||||
case p.Peek(detectTableOrArrayOfTables):
|
||||
case p.Accept(whitespaceInclNewlines.Or(comment)):
|
||||
// NOOP
|
||||
case p.Peek(detectTable):
|
||||
p.Handle(t.startTable)
|
||||
case p.Peek(detectKey):
|
||||
p.Handle(t.startKeyValuePair)
|
||||
|
|
|
@ -13,7 +13,8 @@ var (
|
|||
// Blank is ignored around key names and values. The key, equals
|
||||
// sign, and value must be on the same line (though some values can be
|
||||
// broken over multiple lines).
|
||||
keyAssignment = c.Seq(optionalBlanks, a.Equal, optionalBlanks)
|
||||
|
||||
keyAssignment = c.Seq(whitespace, a.Equal, whitespace)
|
||||
|
||||
// A key may be either bare, quoted or dotted. Bare keys may only
|
||||
// contain ASCII letters, ASCII digits, underscores, and dashes
|
||||
|
@ -25,14 +26,16 @@ var (
|
|||
// Best practice is to use bare keys except when absolutely necessary.
|
||||
// A bare key must be non-empty, but an empty quoted key is allowed
|
||||
// (though discouraged).
|
||||
|
||||
bareKeyRune = c.Any(a.ASCIILower, a.ASCIIUpper, a.Digit, a.Underscore, a.Minus)
|
||||
bareKey = c.OneOrMore(bareKeyRune)
|
||||
|
||||
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||
// This allows for grouping similar properties together. Blanks
|
||||
// around dot-separated parts are ignored, however, best practice is to
|
||||
// not use any extraneous blanks.
|
||||
keySeparatorDot = c.Seq(optionalBlanks, a.Dot, optionalBlanks)
|
||||
// This allows for grouping similar properties together. Whitespace
|
||||
// around dot-separated parts is ignored, however, best practice is to
|
||||
// not use any extraneous whitespace.
|
||||
|
||||
keySeparatorDot = c.Seq(whitespace, a.Dot, whitespace)
|
||||
)
|
||||
|
||||
func (t *parser) startKeyValuePair(p *parse.API) {
|
||||
|
@ -58,6 +61,7 @@ func (t *parser) startKeyValuePair(p *parse.API) {
|
|||
// is to use bare keys except when absolutely necessary.
|
||||
// A bare key must be non-empty, but an empty quoted key is allowed (though
|
||||
// discouraged).
|
||||
|
||||
func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
|
||||
var keyPart string
|
||||
var ok bool
|
||||
|
@ -83,6 +87,7 @@ func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) {
|
|||
// This allows for grouping similar properties together.
|
||||
// Whitespace around dot-separated parts is ignored, however, best
|
||||
// practice is to not use any extraneous whitespace.
|
||||
|
||||
func (t *parser) parseEndOfKeyOrDot(p *parse.API, key ast.Key) (ast.Key, bool) {
|
||||
if p.Accept(keySeparatorDot) {
|
||||
return t.parseKey(p, key)
|
||||
|
|
|
@ -10,29 +10,29 @@ import (
|
|||
var (
|
||||
c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
|
||||
|
||||
// From the specs: "Whitespace means tab (0x09) or space (0x20)."
|
||||
// In this package, we name this a blank, to be in line with the
|
||||
// terminology as used in parsekit.
|
||||
blank = a.Runes('\t', ' ')
|
||||
blanks = c.OneOrMore(blank)
|
||||
optionalBlanks = c.ZeroOrMore(blank)
|
||||
// Whitespace means tab (0x09) or space (0x20).
|
||||
// The matches the blanks as defined by parsekit.
|
||||
|
||||
whitespace = a.Blanks.Optional()
|
||||
|
||||
// Newline means LF (0x0A) or CRLF (0x0D0A).
|
||||
// This matches the default newline as defined by parsekit.
|
||||
// This matches the newline as defined by parsekit.
|
||||
|
||||
newline = a.Newline
|
||||
|
||||
// Whitespace is defined as blanks + newlines.
|
||||
whitespace = c.OneOrMore(blank.Or(newline))
|
||||
optionalWhitespace = c.ZeroOrMore(blank.Or(newline))
|
||||
// Whitespace + newlines.
|
||||
// This matches the whitespace as defined by parsekit.
|
||||
|
||||
whitespaceInclNewlines = a.Whitespace
|
||||
|
||||
// A '#' hash symbol marks the rest of the line as a comment.
|
||||
// All characters up to the end of the line are included in the comment.
|
||||
comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)))
|
||||
optionalComment = c.Optional(comment)
|
||||
|
||||
endOfLineOrComment = c.Seq(optionalBlanks, optionalComment, a.EndOfLine)
|
||||
whitespaceOrComment = whitespace.Or(comment)
|
||||
optionalWhitespaceOrComment = c.ZeroOrMore(whitespaceOrComment)
|
||||
comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)))
|
||||
optionalComment = comment.Optional()
|
||||
|
||||
endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine)
|
||||
whitespaceOrComment = c.ZeroOrMore(whitespaceInclNewlines.Or(comment))
|
||||
)
|
||||
|
||||
type parser struct {
|
||||
|
|
|
@ -31,9 +31,9 @@ import (
|
|||
// 2, # this is ok
|
||||
// ]
|
||||
var (
|
||||
arrayOpen = a.SquareOpen.Then(optionalWhitespaceOrComment)
|
||||
arraySeparator = c.Seq(optionalWhitespaceOrComment, a.Comma, optionalWhitespaceOrComment)
|
||||
arrayClose = c.Seq(optionalWhitespaceOrComment, a.Comma.Optional(), optionalWhitespaceOrComment, a.SquareClose)
|
||||
arrayOpen = a.SquareOpen.Then(whitespaceOrComment)
|
||||
arraySeparator = c.Seq(whitespace, a.Comma, whitespaceOrComment)
|
||||
arrayClose = c.Seq(whitespace, a.Comma.Optional(), whitespaceOrComment, a.SquareClose)
|
||||
)
|
||||
|
||||
func (t *parser) parseArray(p *parse.API) (*ast.Value, bool) {
|
||||
|
|
|
@ -30,7 +30,7 @@ func TestArray(t *testing.T) {
|
|||
{"x=[#comment before value\n1]", `{"x": [1]}`, ``},
|
||||
{"x=[1#comment after value\n]", `{"x": [1]}`, ``},
|
||||
{"x=[1\n#comment on its own line after value\n]", `{"x": [1]}`, ``},
|
||||
{"x=[1#comment 1\n#comment 2\n#comment 3\n , \n2]", `{"x": [1, 2]}`, ``},
|
||||
{"x=[1#comment 1\n,\n2]", `{}`, `unexpected input (expected an array separator) at line 1, column 5`},
|
||||
{"x=[1]", `{"x": [1]}`, ``},
|
||||
{"x=[1,0x2, 0b11, 0o4]", `{"x": [1, 2, 3, 4]}`, ``},
|
||||
{"x=[0.1,0.2,3e-1,0.04e+1, nan, inf]", `{"x": [0.1, 0.2, 0.3, 0.4, NaN, +Inf]}`, ``},
|
||||
|
|
|
@ -25,11 +25,13 @@ var (
|
|||
//
|
||||
// lt1 = 07:32:00
|
||||
// lt2 = 00:32:00.999999
|
||||
|
||||
year = a.Digit.Times(4)
|
||||
month = a.Digit.Times(2)
|
||||
day = a.Digit.Times(2)
|
||||
yyyymmdd = c.Seq(year, a.Minus, month, a.Minus, day)
|
||||
dateTok = tok.Str("2006-01-02", yyyymmdd)
|
||||
|
||||
hour = a.Digit.Times(2)
|
||||
minute = a.Digit.Times(2)
|
||||
seconds = a.Digit.Times(2)
|
||||
|
@ -40,6 +42,7 @@ var (
|
|||
// least millisecond precision is expected. If the value contains greater
|
||||
// precision than the implementation can support, the additional precision
|
||||
// must be truncated, not rounded.
|
||||
|
||||
micro = a.Dot.Then(c.MinMax(1, 9, a.Digit).Then(m.Drop(c.ZeroOrMore(a.Digit))))
|
||||
microTok = c.Optional(tok.Str(".999999999", micro))
|
||||
|
||||
|
@ -48,6 +51,7 @@ var (
|
|||
// Note that RFC 3339 also allows the use of a lower case delimiter.
|
||||
//
|
||||
// odt4 = 1979-05-27 07:32:00Z
|
||||
|
||||
tdelimTok = c.Any(
|
||||
tok.Str("T", a.Rune('T')),
|
||||
tok.Str("t", a.Rune('t')),
|
||||
|
@ -66,18 +70,19 @@ var (
|
|||
//
|
||||
// Note that RFC 3339 also allows the use of a lower case 'z'.
|
||||
// Here we replace it with a capital 'Z' to make the Go date parser work.
|
||||
|
||||
zulu = m.Replace(a.Runes('Z', 'z'), "Z")
|
||||
offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute)
|
||||
tz = zulu.Or(offset)
|
||||
tzTok = tok.Str("Z07:00", tz)
|
||||
tzTok = tok.Str("Z07:00", zulu.Or(offset))
|
||||
|
||||
// The full date/time parse format, based on the above definitions.
|
||||
// The first token denotes the type of date/time value.
|
||||
// The rest of the tokens contain layout fragments for time.Parse().
|
||||
offsetDateTime = tok.Str(ast.TypeOffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok))
|
||||
localDateTime = tok.Str(ast.TypeLocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok))
|
||||
localDate = tok.Str(ast.TypeLocalDate, dateTok)
|
||||
localTime = tok.Str(ast.TypeLocalTime, c.Seq(timeTok, microTok))
|
||||
// The token denotes the type of date/time value.
|
||||
// The contained tokens contain layout fragments for time.Parse().
|
||||
|
||||
offsetDateTime = tok.Group(ast.TypeOffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok))
|
||||
localDateTime = tok.Group(ast.TypeLocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok))
|
||||
localDate = tok.Group(ast.TypeLocalDate, dateTok)
|
||||
localTime = tok.Group(ast.TypeLocalTime, c.Seq(timeTok, microTok))
|
||||
datetime = c.Any(offsetDateTime, localDateTime, localDate, localTime)
|
||||
)
|
||||
|
||||
|
@ -86,32 +91,17 @@ func (t *parser) parseDateTime(p *parse.API) (*ast.Value, bool) {
|
|||
p.Expected("a date and/or time")
|
||||
return nil, false
|
||||
}
|
||||
tokens := p.Result().Tokens()
|
||||
valueType := getDateTimeValueType(&tokens)
|
||||
input, value, err := getDateTimeValue(&tokens)
|
||||
if err == nil {
|
||||
return ast.NewValue(valueType, value), true
|
||||
token := p.Result().Token(0)
|
||||
|
||||
layout := ""
|
||||
for _, l := range token.Value.([]*tokenize.Token) {
|
||||
layout += l.Type.(string)
|
||||
}
|
||||
p.Error("invalid date/time value %s: %s", input, err)
|
||||
value, err := time.Parse(layout, string(token.Runes))
|
||||
if err != nil {
|
||||
p.Error("invalid date/time value %s: %s", string(token.Runes), err)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// The first token is a token that wraps the complete date/time input.
|
||||
// Its type denotes the type of date/time value that it wraps.
|
||||
func getDateTimeValueType(tokens *[]*tokenize.Token) ast.ValueType {
|
||||
return (*tokens)[0].Type.(ast.ValueType)
|
||||
}
|
||||
|
||||
// The rest of the tokens contain fragments that can be used with
|
||||
// time.Parse() to parse the provided date/time input. Here, these fragments
|
||||
// are combined into a layout string, which is then used to parse
|
||||
// the input string.
|
||||
func getDateTimeValue(tokens *[]*tokenize.Token) (string, time.Time, error) {
|
||||
layout := ""
|
||||
for _, l := range (*tokens)[1:] {
|
||||
layout += l.Type.(string)
|
||||
}
|
||||
input := string((*tokens)[0].Runes)
|
||||
value, err := time.Parse(layout, input)
|
||||
return input, value, err
|
||||
return ast.NewValue(token.Type.(ast.ValueType), value), true
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ func TestStartNumber(t *testing.T) {
|
|||
func TestInteger(t *testing.T) {
|
||||
for _, test := range []parseTest{
|
||||
// Decimal
|
||||
|
||||
{`x=0`, `{"x": 0}`, ``},
|
||||
{`x=+0`, `{"x": 0}`, ``},
|
||||
{`x=-0`, `{"x": 0}`, ``},
|
||||
|
@ -29,17 +30,12 @@ func TestInteger(t *testing.T) {
|
|||
{`x=5_349_221`, `{"x": 5349221}`, ``},
|
||||
{`x=1_2_3_4_5`, `{"x": 12345}`, ``},
|
||||
{`x=9_223_372_036_854_775_807`, `{"x": 9223372036854775807}`, ``},
|
||||
{`x=9_223_372_036_854_775_808`, `{}`,
|
||||
`Panic: Handler error: MakeInt64Token cannot handle input "9223372036854775808": ` +
|
||||
`strconv.ParseInt: parsing "9223372036854775808": value out of range ` +
|
||||
`(only use a type conversion token maker, when the input has been validated on beforehand)`},
|
||||
{`x=-9_223_372_036_854_775_808`, `{"x": -9223372036854775808}`, ``},
|
||||
{`x=9_223_372_036_854_775_808`, `{}`, `Panic: int64 token invalid (strconv.ParseInt: parsing "9223372036854775808": value out of range)`},
|
||||
// TODO make the use of the same kind of handling for panics and for errors between parsekit and TOML.
|
||||
{`x=-9_223_372_036_854_775_809`, `{}`,
|
||||
`Panic: Handler error: MakeInt64Token cannot handle input "-9223372036854775809": ` +
|
||||
`strconv.ParseInt: parsing "-9223372036854775809": value out of range ` +
|
||||
`(only use a type conversion token maker, when the input has been validated on beforehand)`},
|
||||
{`x=-9_223_372_036_854_775_809`, `{}`, `Panic: int64 token invalid (strconv.ParseInt: parsing "-9223372036854775809": value out of range)`},
|
||||
|
||||
// Hexadecimal
|
||||
|
||||
{`x=0x0`, `{"x": 0}`, ``},
|
||||
{`x=0x1`, `{"x": 1}`, ``},
|
||||
{`x=0x01`, `{"x": 1}`, ``},
|
||||
|
@ -50,7 +46,9 @@ func TestInteger(t *testing.T) {
|
|||
{`x=0xgood_beef`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`},
|
||||
{`x=0x7FFFFFFFFFFFFFFF`, `{"x": 9223372036854775807}`, ``},
|
||||
{`x=0x8000000000000000`, `{}`, `invalid integer value 0x8000000000000000: strconv.ParseInt: parsing "8000000000000000": value out of range at line 1, column 21`},
|
||||
|
||||
//Octal
|
||||
|
||||
{`x=0o0`, `{"x": 0}`, ``},
|
||||
{`x=0o1`, `{"x": 1}`, ``},
|
||||
{`x=0o01`, `{"x": 1}`, ``},
|
||||
|
@ -60,7 +58,9 @@ func TestInteger(t *testing.T) {
|
|||
{`x=0o9`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`},
|
||||
{`x=0o777777777777777777777`, `{"x": 9223372036854775807}`, ``},
|
||||
{`x=0o1000000000000000000000`, `{}`, `invalid integer value 0o1000000000000000000000: strconv.ParseInt: parsing "1000000000000000000000": value out of range at line 1, column 27`},
|
||||
|
||||
// Binary
|
||||
|
||||
{`x=0b0`, `{"x": 0}`, ``},
|
||||
{`x=0b1`, `{"x": 1}`, ``},
|
||||
{`x=0b01`, `{"x": 1}`, ``},
|
||||
|
|
|
@ -13,12 +13,15 @@ import (
|
|||
var (
|
||||
// Multi-line basic strings are surrounded by three quotation marks on each
|
||||
// side and allow newlines.
|
||||
|
||||
doubleQuote3 = a.Str(`"""`)
|
||||
|
||||
// Multi-line literal strings are surrounded by three single quotes on each side and allow newlines.
|
||||
|
||||
singleQuote3 = a.Str(`'''`)
|
||||
|
||||
// Control characters as defined by TOML (U+0000 to U+001F, U+007F)
|
||||
|
||||
controlCharacter = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
|
||||
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
|
@ -32,6 +35,7 @@ var (
|
|||
// \\ - backslash (U+005C)
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
|
||||
validEscapeChar = a.Runes('b', 't', 'n', 'f', 'r', '"', '\\')
|
||||
shortEscape = c.Seq(a.Backslash, validEscapeChar)
|
||||
shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4))
|
||||
|
@ -42,7 +46,8 @@ var (
|
|||
// "line ending backslash". When the last non-whitespace character on a line is
|
||||
// a \, it will be trimmed along with all whitespace (including newlines) up to
|
||||
// the next non-whitespace character or closing delimiter.
|
||||
lineEndingBackslash = c.Seq(a.Backslash, optionalBlanks, newline, optionalWhitespace)
|
||||
|
||||
lineEndingBackslash = c.Seq(a.Backslash, whitespace, newline, whitespaceInclNewlines.Optional())
|
||||
)
|
||||
|
||||
// There are four ways to express strings: basic, multi-line basic, literal and
|
||||
|
|
|
@ -7,17 +7,17 @@ import (
|
|||
|
||||
var (
|
||||
// Opener and closer for [table].
|
||||
tableOpen = c.Seq(optionalBlanks, a.SquareOpen, optionalBlanks)
|
||||
tableClose = c.Seq(optionalBlanks, a.SquareClose, optionalBlanks)
|
||||
tableOpen = c.Seq(whitespace, a.SquareOpen, whitespace)
|
||||
tableClose = c.Seq(whitespace, a.SquareClose, whitespace)
|
||||
|
||||
// Opener and closer for [[array.of.tables]].
|
||||
tableArrayOpen = c.Seq(optionalBlanks, a.SquareOpen, a.SquareOpen, optionalBlanks)
|
||||
tableArrayClose = c.Seq(optionalBlanks, a.SquareClose, a.SquareClose, optionalBlanks)
|
||||
tableArrayOpen = c.Seq(whitespace, a.SquareOpen, a.SquareOpen, whitespace)
|
||||
tableArrayClose = c.Seq(whitespace, a.SquareClose, a.SquareClose, whitespace)
|
||||
|
||||
// Opener, separator and closer for { inline: "tables" }.
|
||||
inlineTableOpen = c.Seq(optionalBlanks, a.CurlyOpen, optionalBlanks)
|
||||
inlineTableSeparator = c.Seq(optionalBlanks, a.Comma, optionalBlanks)
|
||||
inlineTableClose = c.Seq(optionalBlanks, a.CurlyClose, optionalBlanks)
|
||||
inlineTableOpen = c.Seq(whitespace, a.CurlyOpen, whitespace)
|
||||
inlineTableSeparator = c.Seq(whitespace, a.Comma, whitespace)
|
||||
inlineTableClose = c.Seq(whitespace, a.CurlyClose, whitespace)
|
||||
)
|
||||
|
||||
func (t *parser) startTable(p *parse.API) {
|
||||
|
|
|
@ -47,8 +47,8 @@ func BuildGrammar() tokenize.Handler {
|
|||
|
||||
g["tab"] = a.Tab
|
||||
g["space"] = a.Space
|
||||
g["wschar"] = g["tab"].Or(g["space"])
|
||||
g["ws"] = c.ZeroOrMore(g["wschar"])
|
||||
g["whitespaceChar"] = g["tab"].Or(g["space"])
|
||||
g["ws"] = c.ZeroOrMore(g["whitespaceChar"])
|
||||
g["newline"] = a.Newline
|
||||
g["ws-or-newline"] = g["ws"].Or(g["newline"])
|
||||
|
||||
|
@ -168,7 +168,7 @@ func BuildGrammar() tokenize.Handler {
|
|||
|
||||
g["array-open"] = a.SquareOpen
|
||||
g["array-close"] = a.SquareClose
|
||||
g["ws-comment-newline"] = c.ZeroOrMore(g["wschar"].Or(g["comment"].Optional().Then(g["newline"])))
|
||||
g["ws-comment-newline"] = c.ZeroOrMore(g["whitespaceChar"].Or(g["comment"].Optional().Then(g["newline"])))
|
||||
g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val"))
|
||||
g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional())
|
||||
g["array-sep"] = g["ws"].Then(a.Comma)
|
||||
|
|
|
@ -5,14 +5,18 @@ import (
|
|||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
"github.com/pkg/profile"
|
||||
)
|
||||
|
||||
func main() {
|
||||
toml := BuildGrammar()
|
||||
fmt.Printf("Reading TOML document from STDIN ...\n")
|
||||
t := profile.Start()
|
||||
result, err := toml.Match(os.Stdin)
|
||||
t.Stop()
|
||||
fmt.Printf("Completed reading document.\n")
|
||||
if err != nil {
|
||||
log.Fatalf("Error in parsing TOML: %s\n", err)
|
||||
|
@ -68,10 +72,10 @@ func BuildGrammar() tokenize.Handler {
|
|||
|
||||
R("tab", a.Tab)
|
||||
R("space", a.Space)
|
||||
R("wschar", G("tab").Or(G("space")))
|
||||
R("ws", c.ZeroOrMore(G("wschar")))
|
||||
R("whitespaceChar", G("tab").Or(G("space")))
|
||||
R("ws", c.ZeroOrMore(G("whitespaceChar")))
|
||||
R("newline", a.Newline)
|
||||
R("wschar-or-newline", G("wschar").Or(G("newline")))
|
||||
R("whitespaceChar-or-newline", G("whitespaceChar").Or(G("newline")))
|
||||
|
||||
// Comment
|
||||
|
||||
|
@ -96,7 +100,7 @@ func BuildGrammar() tokenize.Handler {
|
|||
R("ml-basic-string-delim", G("quotation-mark").Times(3))
|
||||
R("ml-basic-unescaped", c.Any(a.RuneRange(0x20, 0x5B), a.RuneRange(0x5D, 0x7E), G("non-ascii")))
|
||||
R("ml-basic-char", G("ml-basic-unescaped").Or(G("escaped")))
|
||||
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("wschar-or-newline"))))
|
||||
R("ml-basic-body-concat", c.Seq(G("escape"), G("ws"), G("newline"), c.ZeroOrMore(G("whitespaceChar-or-newline"))))
|
||||
R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat"))))
|
||||
R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim"))))
|
||||
R("ml-basic-string", c.Seq(
|
||||
|
@ -190,35 +194,59 @@ func BuildGrammar() tokenize.Handler {
|
|||
|
||||
// Date and time (as defined in RFC 3339)
|
||||
|
||||
R("date-full-year", G("digit").Times(4))
|
||||
R("date-year", G("digit").Times(4))
|
||||
R("date-month", G("digit").Times(2))
|
||||
R("date-mday", G("digit").Times(2))
|
||||
R("time-delim", a.Runes('T', 't', ' '))
|
||||
R("date", tok.Str("2006-01-02", c.Seq(G("date-year"), G("minus"), G("date-month"), G("minus"), G("date-mday"))))
|
||||
|
||||
R("time-delim", c.Any(
|
||||
tok.Str("T", a.Rune('T')),
|
||||
tok.Str("t", a.Rune('t')),
|
||||
tok.Str(" ", a.Rune(' '))))
|
||||
|
||||
R("time-hour", G("digit").Times(2))
|
||||
R("time-minute", G("digit").Times(2))
|
||||
R("time-second", G("digit").Times(2))
|
||||
R("time-sec-frac", G("decimal-point").Then(c.OneOrMore(G("digit"))))
|
||||
R("time-zulu", a.Runes('Z', 'z'))
|
||||
R("time", tok.Str("15:04:05", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"))))
|
||||
|
||||
R("time-sec-frac", tok.Str(".999999999", c.Seq(G("decimal-point"), c.MinMax(1, 9, a.Digit), m.Drop(c.ZeroOrMore(a.Digit)))))
|
||||
|
||||
R("time-zulu", m.Replace(a.Runes('Z', 'z'), "Z"))
|
||||
R("time-num-offset", c.Seq(G("plus").Or(G("minus")), G("time-hour"), G("colon"), G("time-minute")))
|
||||
R("time-offset", c.Any(G("time-zulu"), G("time-num-offset")))
|
||||
R("partial-time", c.Seq(G("time-hour"), G("colon"), G("time-minute"), G("colon"), G("time-second"), G("time-sec-frac").Optional()))
|
||||
R("full-time", c.Seq(G("partial-time"), G("time-offset")))
|
||||
R("full-date", c.Seq(G("date-full-year"), G("minus"), G("date-month"), G("minus"), G("date-mday")))
|
||||
R("time-offset", tok.Str("Z07:00", c.Any(G("time-zulu"), G("time-num-offset"))))
|
||||
|
||||
R("offset-date-time", c.Seq(G("full-date"), G("time-delim"), G("full-time")))
|
||||
R("local-date-time", c.Seq(G("full-date"), G("time-delim"), G("partial-time")))
|
||||
R("local-date", G("full-date"))
|
||||
R("local-time", G("partial-time"))
|
||||
R("offset-date-time", c.Seq(G("date"), G("time-delim"), G("time"), G("time-sec-frac").Optional(), G("time-offset")))
|
||||
R("local-date-time", c.Seq(G("date"), G("time-delim"), G("time")))
|
||||
R("local-date", G("date"))
|
||||
R("local-time", G("time"))
|
||||
|
||||
R("date-time", c.Any(G("offset-date-time"), G("local-date-time"), G("local-date"), G("local-time")))
|
||||
makeDateTimeValue := func(t *tokenize.API) interface{} {
|
||||
layout := ""
|
||||
input := ""
|
||||
for _, t := range t.Result().Tokens() {
|
||||
layout += t.Type.(string)
|
||||
input += t.Value.(string)
|
||||
}
|
||||
t.Result().ClearTokens()
|
||||
value, err := time.Parse(layout, input)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("Ow, we must implement a way to report date parse errors: %s", err))
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
R("date-time", c.Any(
|
||||
tok.ByCallback("offset-date-time", G("offset-date-time"), makeDateTimeValue),
|
||||
tok.ByCallback("local-date-time", G("local-date-time"), makeDateTimeValue),
|
||||
tok.ByCallback("local-date", G("local-date"), makeDateTimeValue),
|
||||
tok.ByCallback("local-time", G("local-time"), makeDateTimeValue)))
|
||||
|
||||
// Inline Table
|
||||
|
||||
R("inline-table-open", a.CurlyOpen.Then(G("ws")))
|
||||
R("inline-table-close", G("ws").Then(a.CurlyClose))
|
||||
R("inline-table-sep", c.Seq(G("ws"), a.Comma, G("ws")))
|
||||
R("inline-table-keyval", tok.Group("inline-table-keyval", c.Seq(G("key"), G("keyval-sep"), G("val"))))
|
||||
R("inline-table-keyvals", c.Seq(G("inline-table-keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("inline-table-keyval")))))
|
||||
R("inline-table-keyvals", c.Seq(G("keyval"), c.ZeroOrMore(c.Seq(G("inline-table-sep"), G("keyval")))))
|
||||
|
||||
R("inline-table", tok.Group("inline-table", c.Seq(G("inline-table-open"), G("inline-table-keyvals"), G("inline-table-close"))))
|
||||
|
||||
|
@ -227,12 +255,14 @@ func BuildGrammar() tokenize.Handler {
|
|||
R("array-open", a.SquareOpen)
|
||||
R("array-close", a.SquareClose)
|
||||
R("array-sep", G("ws").Then(a.Comma))
|
||||
R("ws-comment-newline", c.ZeroOrMore(G("wschar").Or(G("comment").Optional().Then(G("newline")))))
|
||||
R("array-values", c.Any(
|
||||
c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep"), G("array-values")),
|
||||
c.Seq(G("ws-comment-newline"), G("val"), G("ws"), G("array-sep").Optional())))
|
||||
R("ws-comment-newline", c.ZeroOrMore(G("whitespaceChar").Or(G("comment").Optional().Then(G("newline")))))
|
||||
R("array-values", c.Seq(
|
||||
G("ws-comment-newline"),
|
||||
G("val"),
|
||||
c.ZeroOrMore(c.Seq(G("ws"), G("array-sep"), G("ws-comment-newline"), G("val"))),
|
||||
G("array-sep").Optional()))
|
||||
|
||||
R("inline-array", tok.Group("inline-array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close"))))
|
||||
R("inline-array", tok.Group("array", c.Seq(G("array-open"), G("array-values").Optional(), G("ws-comment-newline"), G("array-close"))))
|
||||
|
||||
// Standard Table
|
||||
|
||||
|
@ -270,14 +300,9 @@ func BuildGrammar() tokenize.Handler {
|
|||
|
||||
// Overall Structure
|
||||
|
||||
R("expression", c.Any(
|
||||
c.Seq(G("ws"), G("table"), G("ws"), G("comment").Optional()),
|
||||
c.Seq(G("ws"), G("keyval"), G("ws"), G("comment").Optional()),
|
||||
c.Seq(G("ws"), G("comment").Optional()),
|
||||
))
|
||||
R("expression", c.Seq(G("ws"), c.Optional(G("table").Or(G("keyval"))), G("ws"), G("comment").Optional()))
|
||||
|
||||
//R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile))
|
||||
R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression")))))
|
||||
R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile))
|
||||
|
||||
return G("toml")
|
||||
}
|
||||
|
|
|
@ -21,14 +21,14 @@ var (
|
|||
|
||||
// Whitespace, Newline
|
||||
|
||||
ws = c.ZeroOrMore(wschar)
|
||||
ws = c.ZeroOrMore(whitespaceChar)
|
||||
tab = a.Tab
|
||||
space = a.Space
|
||||
wschar = tab.Or(space)
|
||||
whitespaceChar = tab.Or(space)
|
||||
|
||||
newline = a.Newline
|
||||
|
||||
wsOrNewline = ws.Or(newline)
|
||||
whitespace = ws.Or(newline)
|
||||
|
||||
// Comment
|
||||
|
||||
|
@ -84,7 +84,7 @@ var (
|
|||
mlBasicBody = c.ZeroOrMore(c.Any(mlBasicChar, newline, mlBasicBodyConcat))
|
||||
mlBasicChar = mlBasicUnescaped.Or(escaped)
|
||||
mlBasicUnescaped = c.Any(printableASCII.Except(a.Backslash), nonASCII)
|
||||
mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(wsOrNewline))
|
||||
mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(whitespace))
|
||||
|
||||
// Literal String
|
||||
|
||||
|
@ -185,7 +185,7 @@ var (
|
|||
arrayvalues = c.Seq(arrayValue, c.ZeroOrMore(c.Seq(arraySep, arrayValue)), arraySep.Optional())
|
||||
arraySep = ws.Then(a.Comma)
|
||||
arrayValue = wsCommentNewline.Then(val)
|
||||
wsCommentNewline = c.ZeroOrMore(wschar.Or(comment.Optional().Then(newline)))
|
||||
wsCommentNewline = c.ZeroOrMore(whitespaceChar.Or(comment.Optional().Then(newline)))
|
||||
|
||||
// Table
|
||||
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
go build
|
||||
ppfile=`cat /tmp/y | ./parse2 2>&1 | grep "cpu profiling enabled" | cut -d, -f2`
|
||||
go tool pprof -http 0.0.0.0:8888 ./parse2 $ppfile
|
|
@ -20,9 +20,9 @@ expression =/ ws table ws [ comment ]
|
|||
|
||||
;; Whitespace
|
||||
|
||||
ws = *wschar
|
||||
wschar = %x20 ; Space
|
||||
wschar =/ %x09 ; Horizontal tab
|
||||
ws = *whitespaceChar
|
||||
whitespaceChar = %x20 ; Space
|
||||
whitespaceChar =/ %x09 ; Horizontal tab
|
||||
|
||||
;; Newline
|
||||
|
||||
|
@ -197,7 +197,7 @@ array-values =/ ws-comment-newline val ws [ array-sep ]
|
|||
|
||||
array-sep = %x2C ; , Comma
|
||||
|
||||
ws-comment-newline = *( wschar / [ comment ] newline )
|
||||
ws-comment-newline = *( whitespaceChar / [ comment ] newline )
|
||||
|
||||
;; Table
|
||||
|
||||
|
|
50
parse2/x
50
parse2/x
|
@ -70,8 +70,6 @@ Two"""
|
|||
# The following strings are byte-for-byte equivalent:
|
||||
key1 = "The quick brown fox jumps over the lazy dog."
|
||||
|
||||
key1.1 = """The quick brown fox jumps over the lazy dog."""
|
||||
|
||||
key2 = """
|
||||
The quick brown \
|
||||
fox jumps over \
|
||||
|
@ -267,6 +265,48 @@ het.is.een.hex1 = 0x0
|
|||
het.is.een.hex2 = 0x1
|
||||
het.is.een.hex3 = 0xffffffff
|
||||
|
||||
go = [1,2,3,
|
||||
4,5,
|
||||
6,]
|
||||
no1 = []
|
||||
no2 = [ ]
|
||||
no3 = [
|
||||
]
|
||||
go1 = [1,2,3,]
|
||||
go2 = [
|
||||
1,
|
||||
2,
|
||||
3,]
|
||||
go3 = [ 1, #one
|
||||
2, #two
|
||||
3 #three
|
||||
]
|
||||
|
||||
go4 =[
|
||||
|
||||
|
||||
|
||||
1 ,
|
||||
|
||||
|
||||
|
||||
2,
|
||||
|
||||
|
||||
|
||||
|
||||
3,
|
||||
|
||||
|
||||
|
||||
# hi
|
||||
# hi
|
||||
# hi
|
||||
]
|
||||
|
||||
k="kaka"
|
||||
|
||||
[date.types]
|
||||
the.d1=2019-01-01
|
||||
the.d2=2019-01-01 12:12:12
|
||||
the.d3=2019-01-01T12:12:12
|
||||
the.d4=2019-01-01T12:12:12Z
|
||||
the.d5=2019-01-01 12:12:12Z
|
||||
the.d6=2019-01-01 12:12:12+03:45
|
||||
|
|
Loading…
Reference in New Issue