diff --git a/parse/parse.go b/parse/parse.go index 93f22c4..532de9c 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -57,11 +57,3 @@ func Run(input interface{}) (ast.Table, error) { err := parse.New(p.startDocument)(input) return p.doc.Root, err } - -// RunWithoutSanityChecks runs the TOML parser against the provided input data. -// The parsekit sanity checks are disabled during the parse run. -func RunWithoutSanityChecks(input interface{}) (ast.Table, error) { - p := newParser() - err := parse.NewWithoutSanityChecks(p.startDocument)(input) - return p.doc.Root, err -} diff --git a/parse/testfiles_test.go b/parse/testfiles_test.go index 3b6cec4..5f561c5 100644 --- a/parse/testfiles_test.go +++ b/parse/testfiles_test.go @@ -45,7 +45,7 @@ func Test_Valid(t *testing.T) { if err != nil { panic(fmt.Sprintf("Cannot open toml file for test (%v): %s", testCase, err)) } - tomlTable, err := parse.RunWithoutSanityChecks(input) + tomlTable, err := parse.Run(input) if err != nil { t.Errorf("[%s] parse failed unexpectedly: %s", name, err) fail++ diff --git a/parse2/grammar.1.bak b/parse2/grammar.1.bak deleted file mode 100644 index f5771b5..0000000 --- a/parse2/grammar.1.bak +++ /dev/null @@ -1,232 +0,0 @@ -package parse2 - -import ( - "fmt" - - "git.makaay.nl/mauricem/go-parsekit/tokenize" -) - -type Grammar map[string]tokenize.Handler - -func (g Grammar) Rule(name string, definition tokenize.Handler) { - if _, ok := g[name]; ok { - panic(fmt.Sprintf("Grammar rule %q already exists", name)) - } - g[name] = definition -} - -func (g Grammar) Get(name string) tokenize.Handler { - if h, ok := g[name]; ok { - return g[name] - } - return func(t *tokenize.API) bool { - return g[name](t) - } -} - -func BuildGrammar() tokenize.Handler { - - c, a, m, tok := tokenize.C, tokenize.A, tokenize.M, tokenize.T - - g := make(Grammar) - R := g.Rule - G := g.Get - - R("alpha", a.Letter) - R("digit", a.Digit) - g["minus"] = a.Minus - g["plus"] = a.Plus - g["underscore"] = a.Underscore - g["quotation-mark"] = a.DoubleQuote - g["apostrophe"] = a.SingleQuote - g["colon"] = a.Colon - g["escape"] = a.Backslash - g["hex-digit"] = a.HexDigit - - // Whitespace, Newline - - g["tab"] = a.Tab - g["space"] = a.Space - g["whitespaceChar"] = g["tab"].Or(g["space"]) - g["ws"] = c.ZeroOrMore(g["whitespaceChar"]) - g["newline"] = a.Newline - g["ws-or-newline"] = g["ws"].Or(g["newline"]) - - // Comment - - g["comment-start-symbol"] = a.Hash - g["printable-ascii"] = a.RuneRange(0x20, 0x7E) - g["non-ascii"] = a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)) - g["non-eol"] = c.Any(a.Rune(0x09), g["printable-ascii"], g["non-ascii"]) - g["comment"] = g["comment-start-symbol"].Then(c.ZeroOrMore(g["non-eol"])) - - // Basic String - - g["escape-seq-char"] = c.Any( - a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), - a.Rune('u').Then(g["hex-digit"].Times(4)), - a.Rune('U').Then(g["hex-digit"].Times(8))) - g["escaped"] = g["escape"].Then(g["escape-seq-char"]) - g["basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["quotation-mark"].Or(g["escape"])), g["non-ascii"]) - g["basic-char"] = g["escaped"].Or(g["basic-unescaped"]) - g["basic-string"] = c.Seq(g["quotation-mark"], c.ZeroOrMore(g["basic-char"]), g["quotation-mark"]) - - // Multiline Basic String - - g["ml-basic-string-delim"] = g["quotation-mark"].Times(3) - g["ml-basic-unescaped"] = c.Any(g["printable-ascii"].Except(g["backslash"]), g["non-ascii"]) - g["ml-basic-char"] = g["ml-basic-unescaped"].Or(g["escaped"]) - g["ml-basic-body-concat"] = c.Seq(g["escape"], g["ws"], g["newline"], c.ZeroOrMore(g["ws-or-newline"])) - g["ml-basic-body"] = c.ZeroOrMore(c.Any(g["ml-basic-char"], g["newline"], g["ml-basic-body-concat"])) - g["ml-basic-strinct"] = c.Seq(g["ml-basic-string-delim"], g["ml-basic-body"], g["ml-basic-string-delim"]) - - // Literal String - - g["literal-char"] = c.Any(g["tab"], g["printable-ascii"].Except(g["apostrophe"]), g["non-ascii"]) - g["literal-string"] = c.Seq(g["apostrophe"], c.ZeroOrMore(g["literal-char"]), g["apostrophe"]) - - // Multiline Literal String - - g["ml-literal-string-delim"] = g["apostrophe"].Times(3) - g["ml-literal-char"] = c.Any(g["tab"], g["printable-ascii"], g["non-ascii"]) - g["ml-literal-body"] = c.ZeroOrMore(g["ml-literal-char"].Or(g["newline"])) - g["ml-literal-string"] = c.Seq(g["ml-literal-string-delim"], g["ml-literal-body"], g["ml-literal-string-delim"]) - - // String - - g["string"] = c.Any(g["ml-basic-string"], g["basic-string"], g["ml-literal-string"], g["literal-string"]) - - // Integer - - g["digit1-9"] = a.DigitNotZero - g["underscore-int-digit"] = c.Any(g["digit"], g["underscore"].Then(g["digit"])) - g["unsiged-dec-int"] = c.Any(g["digit"], g["digit1-9"].Then(c.OneOrMore(g["underscore-int-digit"]))) - g["dec-int"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["unsigned-dec-int"]) - - g["hex-prefix"] = a.Zero.Then(a.Rune('x')) - g["underscore-hex-digit"] = c.Any(g["hex-digit"], g["underscore"].Then(g["hex-digit"])) - g["hex-int"] = c.Seq(g["hex-prefix"], g["hex-digit"], c.ZeroOrMore(g["underscore-hex-digit"])) - - g["oct-prefix"] = a.Zero.Then(a.Rune('o')) - g["digit0-7"] = a.RuneRange('0', '7') - g["underscore-oct-digit"] = c.Any(g["digit0-7"], g["underscore"].Then(g["digit0-7"])) - g["oct-int"] = c.Seq(g["oct-prefix"], g["digit0-7"], c.ZeroOrMore(g["underscore-oct-digit"])) - - g["bin-prefix"] = a.Zero.Then(a.Rune('b')) - g["digit0-1"] = a.Runes('0', '1') - g["underscore-bin-digit"] = c.Any(g["digit0-1"], g["underscore"].Then(g["digit0-1"])) - g["bin-int"] = c.Seq(g["bin-prefix"], g["digit0-1"], c.ZeroOrMore(g["underscore-bin-digit"])) - - g["integer"] = c.Any(g["dec-int"], g["hex-int"], g["oct-int"], g["bin-int"]) - - // Float - - g["float-int-part"] = g["dec-int"] - g["exp"] = a.StrNoCase("e").Then(g["float-int-part"]) - g["decimal-point"] = a.Dot - g["zero-prefixable-int"] = c.Seq(g["digit"], c.ZeroOrMore(g["underscore-int-digit"])) - g["frac"] = c.Seq(g["decimal-point"], g["zero-prefixable-int"]) - g["standard-float"] = c.Seq(g["float-int-part"], g["exp"].Or(g["frac"].Then(c.Optional(g["exp"])))) - - g["inf"] = a.Str("inf") - g["nan"] = a.Str("nan") - g["special-float"] = c.Optional(g["plus"].Or(g["minus"])).Then(g["inf"].Or(g["nan"])) - - g["float"] = g["standard-float"].Or(g["special-float"]) - - // Boolean - - g["true"] = a.Str("true") - g["false"] = a.Str("false") - - g["boolean"] = g["true"].Or(g["false"]) - - // Date and time (as defined in RFC 3339) - - g["date-full-year"] = g["digit"].Times(4) - g["date-month"] = g["digit"].Times(2) - g["date-mday"] = g["digit"].Times(2) - g["time-delim"] = a.Runes('T', 't', ' ') - g["time-hour"] = g["digit"].Times(2) - g["time-minute"] = g["digit"].Times(2) - g["time-second"] = g["digit"].Times(2) - g["time-sec-frac"] = g["decimal-point"].Then(c.OneOrMore(g["digit"])) - g["time-num-offset"] = c.Seq(g["plus"].Or(g["minus"]), g["time-hour"], g["colon"], g["time-minute"]) - g["time-offset"] = c.Any(a.Runes('Z', 'z'), g["time-num-offset"]) - g["partial-time"] = c.Seq(g["time-hour"], g["colon"], g["time-minute"], g["colon"], g["time-second"], g["time-sec-frac"].Optional()) - g["full-time"] = c.Seq(g["partial-time"], g["time-offset"]) - g["full-date"] = c.Seq(g["date-full-year"], g["minus"], g["date-month"], g["minus"], g["date-mday"]) - - g["offset-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["full-time"]) - g["local-date-time"] = c.Seq(g["full-date"], g["time-delim"], g["parial-time"]) - g["local-date"] = g["full-date"] - g["local-time"] = g["parial-time"] - - g["date-time"] = c.Any(g["offset-date-time"], g["local-date-time"], g["local-date"], g["local-time"]) - - // Array - - g["array-open"] = a.SquareOpen - g["array-close"] = a.SquareClose - g["ws-comment-newline"] = c.ZeroOrMore(g["whitespaceChar"].Or(g["comment"].Optional().Then(g["newline"]))) - g["array-value"] = g["ws-comment-newline"].Then(g.Recursive("val")) - g["array-values"] = c.Seq(g["array-value"], c.ZeroOrMore(c.Seq(g["array-sep"], g["array-value"])), g["array-sep"].Optional()) - g["array-sep"] = g["ws"].Then(a.Comma) - - g["array"] = c.Seq(g["array-open"], g["array-values"].Optional(), g["ws-comment-newline"], g["array-close"]) - - // Table - - g["table"] = g["std-table"].Or(g["array-table"]) - - // Standard Table - - g["std-table"] = c.Seq(g["std-table-open"], g.Recursive("key"), g["std-table-close"]) - - g["std-table-open"] = a.SquareOpen.Then(g["ws"]) - g["std-table-close"] = g["ws"].Then(a.SquareClose) - - // Inline Table - - g["inline-table"] = c.Seq(g["inline-table-open"], g["inline-table-keyvals"], g["inline-table-close"]) - - g["inline-table-open"] = a.CurlyOpen.Then(g["ws"]) - g["inline-table-close"] = g["ws"].Then(a.CurlyClose) - g["inline-table-sep"] = c.Seq(g["ws"], a.Comma, g["ws"]) - g["inline-table-keyval"] = c.Seq(g.Recursive("key"), g.Recursive("keyval-sep"), g.Recursive("val")) - g["inline-table-keyvals"] = c.Seq(g["inline-table-keyval"], c.ZeroOrMore(c.Seq(g["inline-table-sep"], g["inline-table-keyval"]))) - - // Array Table - - g["array-table"] = c.Seq(g["array-table-open"], g.Recursive("key"), g["array-table-close"]) - - g["array-table-open"] = a.SquareOpen.Times(2).Then(g["ws"]) - g["array-table-close"] = g["ws"].Then(a.SquareClose.Times(2)) - - // Key-Value Pairs - - g["unquoted-key"] = c.OneOrMore(c.Any(g["alpha"], g["digit"], g["minus"], g["underscore"])) - g["quoted-key"] = g["basic-string"].Or(g["literal-string"]) - g["dot-sep"] = c.Seq(g["ws"], a.Dot, g["ws"]) - g["simple-key"] = g["quoted-key"].Or(g["unquoted-key"]) - g["dotted-key"] = c.Seq(g["simple-key"], c.OneOrMore(g["dot-sep"].Then(g["simple_key"]))) - - g["key-val-sep"] = c.Seq(g["ws"], a.Equal, g["ws"]) - - g["val"] = c.Any(g["string"], g["boolean"], g["array"], g["inlineTable"], g["dateTime"], g["float"], g["integer"]) - - g["key"] = g["simple-key"].Or(g["dotted-key"]) - - g["keyval"] = c.Seq(g["key"], g["keyval-sep"], g["val"]) - - // Overall Structure - - g["expression"] = c.Any( - c.Seq(g["ws"], g["comment"].Optional()), - c.Seq(g["ws"], g["keyval"], g["ws"], g["comment"].Optional()), - c.Seq(g["ws"], g["table"], g["ws"], g["comment"].Optional())) - - g["toml"] = c.Seq(g["expression"], c.ZeroOrMore(g["newline"].Then(g["expression"]))) - - return g["toml"] -} diff --git a/parse2/grammar.go b/parse2/grammar.go index 79377b1..6e392ff 100644 --- a/parse2/grammar.go +++ b/parse2/grammar.go @@ -20,7 +20,7 @@ func main() { toml := BuildGrammar() - var result *tokenize.API + var result *tokenize.Result var err error if runProfiler { @@ -37,7 +37,7 @@ func main() { if err != nil { log.Fatalf("Error in parsing TOML: %s\n", err) } else { - for i, t := range result.Tokens() { + for i, t := range result.Tokens { fmt.Printf("[%d] %v\n", i, t) } } @@ -119,7 +119,7 @@ func BuildGrammar() tokenize.Handler { R("ml-basic-body-content", c.Any(G("ml-basic-char"), G("newline"), m.Drop(G("ml-basic-body-concat")))) R("ml-basic-body", c.ZeroOrMore(G("ml-basic-body-content").Except(G("ml-basic-string-delim")))) R("ml-basic-string", c.Seq( - m.Drop(G("ml-basic-string-delim")), + m.Drop(G("ml-basic-string-delim").Then(c.Optional(G("newline")))), m.Drop(c.Optional(G("newline"))), G("ml-basic-body"), m.Drop(G("ml-basic-string-delim")))) @@ -139,7 +139,7 @@ func BuildGrammar() tokenize.Handler { R("ml-literal-body-content", G("ml-literal-char").Or(G("newline"))) R("ml-literal-body", c.ZeroOrMore(G("ml-literal-body-content").Except(G("ml-literal-string-delim")))) R("ml-literal-string", c.Seq( - m.Drop(G("ml-literal-string-delim")), + m.Drop(G("ml-literal-string-delim").Then(c.Optional(G("newline")))), G("ml-literal-body"), m.Drop(G("ml-literal-string-delim")))) @@ -328,10 +328,10 @@ func BuildGrammar() tokenize.Handler { // Overall Structure R("expression", c.Seq( - c.FlushInput(G("ws")), + c.FlushInput(m.Drop(G("ws"))), c.FlushInput(c.Optional(G("table").Or(G("keyval")))), - c.FlushInput(G("ws")), - c.FlushInput(G("comment").Optional()))) + c.FlushInput(m.Drop(G("ws"))), + c.FlushInput(m.Drop(c.Optional(G("comment")))))) R("toml", c.Seq(G("expression"), c.ZeroOrMore(G("newline").Then(G("expression"))), a.EndOfFile)) diff --git a/parse2/grammar.go.bak b/parse2/grammar.go.bak deleted file mode 100644 index 64a8894..0000000 --- a/parse2/grammar.go.bak +++ /dev/null @@ -1,221 +0,0 @@ -package parse2 - -import "git.makaay.nl/mauricem/go-parsekit/tokenize" - -var ( - c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T - - // Overall Structure - - toml = c.Seq(expression, c.ZeroOrMore(newline.Then(expression))) - - expression = c.Any( - c.Seq(ws, comment.Optional()), - c.Seq(ws, keyval, ws, comment.Optional()), - c.Seq(ws, table, ws, comment.Optional())) - - // ABNF definitions - - alpha = a.Letter - digit = a.Digit - - // Whitespace, Newline - - ws = c.ZeroOrMore(whitespaceChar) - tab = a.Tab - space = a.Space - whitespaceChar = tab.Or(space) - - newline = a.Newline - - whitespace = ws.Or(newline) - - // Comment - - comment = commentStartSymbol.Then(c.ZeroOrMore(nonEOL)) - - commentStartSymbol = a.Hash - nonEOL = c.Any(a.Rune(0x09), printableASCII, nonASCII) - printableASCII = a.RuneRange(0x20, 0x7E) - nonASCII = a.RuneRange(0x80, 0xD7FF).Or(a.RuneRange(0xE000, 0x10FFFF)) - - // Key-Value Pairs - - keyval = c.Seq(key, keyvalSep, val) - - key = simpleKey.Or(dottedKey) - simpleKey = quotedKey.Or(unquotedKey) - - unquotedKey = c.OneOrMore(c.Any(alpha, digit, minus, underscore)) - quotedKey = basicString.Or(literalString) - dottedKey = c.Seq(simpleKey, c.OneOrMore(dotSep.Then(simpleKey))) - - dotSep = c.Seq(ws, a.Dot, ws) - keyvalSep = c.Seq(ws, a.Equal, ws) - - val = c.Any(string, boolean, array, inlineTable, dateTime, float, integer) - - // String - - string = c.Any(mlBasicString, basicString, mlLiteralString, literalString) - - // Basic String - - basicString = c.Seq(quotationMark, c.ZeroOrMore(basicChar), quotationMark) - - quotationMark = a.DoubleQuote - - basicChar = escaped.Or(basicUnescaped) - basicUnescaped = c.Any(printableASCII.Except(quotationMark.Or(escape)), nonASCII) - escaped = escape.Then(escapeSeqChar) - - escape = a.Backslash - escapeSeqChar = c.Any( - a.Runes('"', '\\', 'b', 'f', 'n', 'r', 't'), - a.Rune('u').Then(a.HexDigit.Times(4)), - a.Rune('U').Then(a.HexDigit.Times(8))) - - // Multiline Basic String - - mlBasicString = c.Seq(mlBasicStringDelim, mlBasicBody, mlBasicStringDelim) - - mlBasicStringDelim = quotationMark.Times(3) - - mlBasicBody = c.ZeroOrMore(c.Any(mlBasicChar, newline, mlBasicBodyConcat)) - mlBasicChar = mlBasicUnescaped.Or(escaped) - mlBasicUnescaped = c.Any(printableASCII.Except(a.Backslash), nonASCII) - mlBasicBodyConcat = c.Seq(escape, ws, newline, c.ZeroOrMore(whitespace)) - - // Literal String - - literalString = c.Seq(apostrophe, c.ZeroOrMore(literalChar), apostrophe) - - apostrophe = a.SingleQuote - - literalChar = c.Any(a.Tab, printableASCII.Except(apostrophe), nonASCII) - - // Multiline Literal String - - mlLiteralString = c.Seq(mlLiteralStringDelim, mlLiteralBody, mlLiteralStringDelim) - - mlLiteralStringDelim = apostrophe.Times(3) - - mlLiteralBody = c.ZeroOrMore(mlLiteralChar.Or(newline)) - mlLiteralChar = c.Any(a.Tab, printableASCII, nonASCII) - - // Integer - - integer = c.Any(decInt, hexInt, octInt, binInt) - - minus = a.Minus - plus = a.Plus - underscore = a.Underscore - - decInt = c.Optional(plus.Or(minus)).Then(unsignedDecInt) - unsignedDecInt = c.Any(digit, digit1to9.Then(c.OneOrMore(intDigitOrUnderscoreIntDigit))) - digit1to9 = a.DigitNotZero - intDigitOrUnderscoreIntDigit = c.Any(digit, underscore.Then(digit)) - - hexInt = c.Seq(hexPrefix, hexDigit, c.ZeroOrMore(hexDigitOrUnderscoreHexDigit)) - hexPrefix = a.Zero.Then(a.Rune('x')) - hexDigit = a.HexDigit - hexDigitOrUnderscoreHexDigit = c.Any(hexDigit, underscore.Then(hexDigit)) - - octInt = c.Seq(octPrefix, digit0to7, c.ZeroOrMore(octDigitOrUnderscoreOctDigit)) - octPrefix = a.Zero.Then(a.Rune('o')) - digit0to7 = a.RuneRange('0', '7') - octDigitOrUnderscoreOctDigit = c.Any(digit0to7, underscore.Then(digit0to7)) - - binInt = c.Seq(binPrefix, digit0to1, c.ZeroOrMore(binDigitOrUnderscoreBinDigit)) - binPrefix = a.Zero.Then(a.Rune('b')) - digit0to1 = a.Runes('0', '1') - binDigitOrUnderscoreBinDigit = c.Any(digit0to1, underscore.Then(digit0to1)) - - // Float - - float = standardFloat.Or(specialFloat) - - standardFloat = c.Seq(floatIntPart, exp.Or(frac.Then(c.Optional(exp)))) - floatIntPart = decInt - exp = a.StrNoCase("e").Then(floatIntPart) - frac = c.Seq(decimalPoint, zeroPrefixableInt) - decimalPoint = a.Dot - zeroPrefixableInt = c.Seq(digit, c.ZeroOrMore(intDigitOrUnderscoreIntDigit)) - - specialFloat = c.Optional(plus.Or(minus)).Then(inf.Or(nan)) - inf = a.Str("inf") - nan = a.Str("nan") - - // Boolean - - boolean = boolTrue.Or(boolFalse) - - boolTrue = a.Str("true") - boolFalse = a.Str("false") - - // Date and time (as defined in RFC 3339) - - dateTime = c.Any(offsetDateTime, localDateTime, localDate, localTime) - - offsetDateTime = c.Seq(fullDate, timeDelim, fullTime) - localDateTime = c.Seq(fullDate, timeDelim, partialTime) - localDate = fullDate - localTime = partialTime - - dateFullYear = digit.Times(4) - dateMonth = digit.Times(2) - dateMday = digit.Times(2) - timeDelim = a.Runes('T', 't', ' ') - timeHour = digit.Times(2) - timeMinute = digit.Times(2) - timeSecond = digit.Times(2) - timeSecfrac = a.Dot.Then(c.OneOrMore(digit)) - timeNumOffset = c.Seq(plus.Or(minus), timeHour, a.Colon, timeMinute) - timeOffset = c.Any(a.Runes('Z', 'z'), timeNumOffset) - partialTime = c.Seq(timeHour, a.Colon, timeMinute, a.Colon, timeSecond, timeSecfrac.Optional()) - fullTime = c.Seq(partialTime, timeOffset) - fullDate = c.Seq(dateFullYear, minus, dateMonth, minus, dateMday) - - // Array - - array = c.Seq(arrayOpen, arrayvalues.Optional(), wsCommentNewline, arrayClose) - - arrayOpen = a.SquareOpen - arrayClose = a.SquareClose - arrayvalues = c.Seq(arrayValue, c.ZeroOrMore(c.Seq(arraySep, arrayValue)), arraySep.Optional()) - arraySep = ws.Then(a.Comma) - arrayValue = wsCommentNewline.Then(val) - wsCommentNewline = c.ZeroOrMore(whitespaceChar.Or(comment.Optional().Then(newline))) - - // Table - - table = stdTable.Or(arrayTable) - - // Standard Table - - stdTable = c.Seq(stdTableOpen, key, stdTableClose) - - stdTableOpen = a.SquareOpen.Then(ws) - stdTableClose = ws.Then(a.SquareClose) - - // Inline Table - - inlineTable = c.Seq(inlineTableOpen, inlineTableKeyvals, inlineTableClose) - - inlineTableOpen = a.CurlyOpen.Then(ws) - inlineTableClose = ws.Then(a.CurlyClose) - inlineTableKeyvals = c.Seq(inlineTableKeyval, c.ZeroOrMore(c.Seq(inlineTableSep, inlineTableKeyval))) - inlineTableKeyval = c.Seq(key, keyvalSep, val) - inlineTableSep = c.Seq(ws, a.Comma, ws) - - // Array Table - - arrayTable = c.Seq(arrayTableOpen, key, arrayTableClose) - - arrayTableOpen = a.SquareOpen.Times(2).Then(ws) - arrayTableClose = ws.Then(a.SquareClose.Times(2)) -) - -func init() { - -} diff --git a/parse2/parse2 b/parse2/parse2 new file mode 100755 index 0000000..193eadd Binary files /dev/null and b/parse2/parse2 differ