Brought the TOML code up-to-speed with the latest version of parsekit.

This commit is contained in:
Maurice Makaay 2019-06-17 13:59:59 +00:00
parent 269bd9ed36
commit b49715652e
10 changed files with 333 additions and 336 deletions

View File

@ -1,20 +1,17 @@
package parser
import (
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/parse"
)
// A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment.
var comment = c.Seq(
m.Drop(c.OneOrMore(a.Hash)),
m.Trim(c.ZeroOrMore(c.Not(a.EndOfLine)), " \t"),
m.Drop(a.EndOfLine))
var comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)), m.Drop(a.EndOfLine))
func startComment(p *parsekit.ParseAPI) {
p.Expects("comment")
if p.On(comment).Accept() {
p.EmitLiteral(ItemComment)
p.RouteReturn()
func (t *parser) startComment(p *parse.API) {
if p.Accept(comment) {
t.emitCommand(cComment, p.Result().String())
} else {
p.Expected("comment")
}
}

View File

@ -1,22 +1,24 @@
package parser_test
package parser
import (
"testing"
)
func TestComments(t *testing.T) {
runStatesTs(t, []statesT{
{"empty comment at end of file", "#", "#()", ""},
{"empty comment at end of windows line", "#\r\n", "#()", ""},
{"empty comment at end of unix line", "#\n", "#()", ""},
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
{"basic comment", "#chicken", "#(chicken)", ""},
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r\n", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
})
func TestComment2(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected comment) at start of file`}},
{`#`, []string{`comment("#")`}},
{`# `, []string{`comment("# ")`}},
{`# with data`, []string{`comment("# with data")`}},
{"# ending in EOL & EOF\r\n", []string{`comment("# ending in EOL & EOF")`}},
{`# \xxx/ \u can't escape/`, []string{`comment("# \\xxx/ \\u can't escape/")`}},
{"# \tlexe\r accepts embedded ca\r\riage \returns\r\n", []string{
`comment("# \tlexe\r accepts embedded ca\r\riage \returns")`}},
{"# with data and newline\ncode continues here", []string{
`comment("# with data and newline")`,
`Error: unexpected input (expected end of file) at line 2, column 1`}},
} {
p := &parser{}
testParseHandler(t, p, p.startComment, test)
}
}

1
go.sum
View File

@ -1,3 +1,2 @@
git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939 h1:cMBHhfSJR2BZgVN7NmP+c2agNlXDef4Iz6+XQp5AqdU=
git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939/go.mod h1:/mo+aM5Im5rkBqBvXTAsVR0//OfsAAiFyvuxxcxGGlU=
github.com/mmakaay/toml v0.3.1 h1:2uKRPvA/smKM8YuYGxWnW4KximMkWOMfunJOXgM5Zos=

View File

@ -1,12 +1,10 @@
package parser_test
package parser
import (
"fmt"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
toml "git.makaay.nl/mauricem/go-toml"
"git.makaay.nl/mauricem/go-parsekit/parse"
)
type statesT struct {
@ -16,75 +14,35 @@ type statesT struct {
err string
}
func runStatesTs(t *testing.T, tests []statesT) {
for _, c := range tests {
runStatesT(t, c)
}
type parseTest struct {
input interface{}
expected []string
}
// ToArray returns Parser items as an array.
// When an error occurs during scanning, a partial result will be
// returned, accompanied by the error that occurred.
func parseItemsToArray(p *parsekit.ParseRun) ([]parsekit.Item, *parsekit.Error) {
var items []parsekit.Item
for {
item, err, more := p.Next()
if !more {
return items, err
}
items = append(items, item)
func testParseHandler(t *testing.T, p *parser, handler parse.Handler, test parseTest) {
err := parse.New(handler)(test.input)
results := []string{}
for _, cmd := range p.commands {
results = append(results, cmd.String())
}
if err != nil {
results = append(results, fmt.Sprintf("Error: %s", err))
}
}
func runStatesT(t *testing.T, c statesT) {
p := toml.Parse(c.in)
l, err := parseItemsToArray(p)
if err == nil && c.err != "" {
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
}
if err != nil && c.err == "" {
t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
}
if err != nil && c.err != "" && err.Error() != c.err {
t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
}
switch expected := c.out.(type) {
case []string:
if len(expected) != len(l) {
t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
for i, e := range test.expected {
if i > len(results)-1 {
t.Errorf("No result at index %d, expected: %s", i, e)
continue
}
for i, e := range expected {
v := parserItemToString(l[i])
if v != e {
t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
}
r := results[i]
if e != r {
t.Errorf("Unexpected result at index %d:\nexpected: %s\nactual: %s\n", i, e, r)
}
case string:
a := make([]string, len(l))
for _, v := range l {
a = append(a, parserItemToString(v))
}
actual := strings.Join(a, "")
if actual != expected {
t.Errorf("[%s] Unexpected parser output:\nexpected: %q\nactual: %q\n", c.name, expected, actual)
}
if len(results) > len(test.expected) {
t.Errorf("Got more results than expected, surplus result(s):\n")
for i := len(test.expected); i < len(results); i++ {
t.Errorf("[%d] %s", i, results[i])
}
}
}
// parserItemToString returns a string representation of the parsekit.Item.
func parserItemToString(i parsekit.Item) string {
switch i.Type {
case toml.ItemComment:
return fmt.Sprintf("#(%s)", i.Value)
case toml.ItemKey:
return fmt.Sprintf("[%s]", i.Value)
case toml.ItemString:
return fmt.Sprintf("STR(%s)", i.Value)
case toml.ItemKeyDot:
return "."
case toml.ItemAssignment:
return "="
default:
panic(fmt.Sprintf("parsekit bug: no string formatting exists for parsekit.Item id %d", i.Type))
}
}

View File

@ -1,15 +1,20 @@
package parser
import "git.makaay.nl/mauricem/go-parsekit"
import (
"git.makaay.nl/mauricem/go-parsekit/parse"
)
// The primary building block of a TOML document is the key/value pair.
var (
dropWhitespace = m.Drop(a.Whitespace.Optional())
dropBlanks = m.Drop(a.Blanks.Optional())
// Keys are on the left of the equals sign and values are on the right.
// Whitespace is ignored around key names and values. The key, equals
// Blank is ignored around key names and values. The key, equals
// sign, and value must be on the same line (though some values can be
// broken over multiple lines).
keyAssignment = c.Seq(c.Opt(a.Whitespace), a.Equal, c.Opt(a.Whitespace))
keyAssignment = c.Seq(dropBlanks, a.Equal, dropBlanks)
// A key may be either bare, quoted or dotted. Bare keys may only
// contain ASCII letters, ASCII digits, underscores, and dashes
@ -26,62 +31,68 @@ var (
startOfKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
// This allows for grouping similar properties together. Whitespace
// around dot-separated parts is ignored, however, best practice is to
// not use any extraneous whitespace.
keySeparatorDot = c.Seq(c.Opt(a.Whitespace), a.Dot, c.Opt(a.Whitespace))
// This allows for grouping similar properties together. Blanks
// around dot-separated parts are ignored, however, best practice is to
// not use any extraneous blanks.
keySeparatorDot = c.Seq(dropBlanks, a.Dot, dropBlanks)
)
func startKeyValuePair(p *parsekit.ParseAPI) {
switch {
case p.On(a.WhitespaceAndNewlines).Skip():
p.RouteRepeat()
case p.On(a.Hash).Stay():
p.RouteTo(startComment).ThenReturnHere()
case p.On(startOfKey).Stay():
p.RouteTo(startKey)
default:
p.ExpectEndOfFile()
func (t *parser) startKeyValuePair(p *parse.API) {
for {
p.Accept(dropWhitespace)
switch {
case p.Peek(a.Hash):
p.Handle(t.startComment)
case p.Peek(startOfKey):
p.Handle(t.startKey, t.startAssignment, t.startValue)
default:
p.ExpectEndOfFile()
return
}
if p.IsStoppedOrInError() {
return
}
}
}
func startKey(p *parsekit.ParseAPI) {
p.Expects("a key name")
if p.On(bareKeyRune).Stay() {
p.RouteTo(startBareKey)
}
}
func startBareKey(p *parsekit.ParseAPI) {
p.Expects("a bare key name")
if p.On(bareKey).Accept() {
p.EmitLiteral(ItemKey)
p.RouteTo(endOfKeyOrDot)
}
}
func endOfKeyOrDot(p *parsekit.ParseAPI) {
if p.On(keySeparatorDot).Skip() {
p.Emit(ItemKeyDot, ".")
p.RouteTo(startKey)
func (t *parser) startKey(p *parse.API) {
if p.Peek(bareKeyRune) {
p.Handle(t.startBareKey)
} else {
p.RouteTo(startAssignment)
p.Expected("a key name")
}
}
func startAssignment(p *parsekit.ParseAPI) {
p.Expects("a value assignment")
if p.On(keyAssignment).Skip() {
p.Emit(ItemAssignment, "=")
p.RouteTo(startValue)
func (t *parser) startBareKey(p *parse.API) {
if p.Accept(bareKey) {
t.emitCommand(cKey, p.Result().String())
p.Handle(t.endOfKeyOrDot)
} else {
p.Expected("a bare key name")
}
}
func (t *parser) endOfKeyOrDot(p *parse.API) {
if p.Accept(keySeparatorDot) {
t.emitCommand(cNewKeyLvl)
p.Handle(t.startKey)
}
}
func (t *parser) startAssignment(p *parse.API) {
if p.Accept(keyAssignment) {
t.emitCommand(cAssign)
} else {
p.Expected("a value assignment")
}
}
// Values must be of the following types: String, Integer, Float, Boolean,
// Datetime, Array, or Inline Table. Unspecified values are invalid.
func startValue(p *parsekit.ParseAPI) {
p.Expects("a value")
if p.On(c.Any(a.SingleQuote, a.DoubleQuote)).Stay() {
p.RouteTo(startString)
func (t *parser) startValue(p *parse.API) {
if p.Peek(c.Any(a.SingleQuote, a.DoubleQuote)) {
p.Handle(t.startString)
} else {
p.Expected("a value")
}
}

View File

@ -1,46 +1,74 @@
package parser_test
package parser
import (
"testing"
)
import "testing"
func TestKeyWithoutAssignment(t *testing.T) {
err := "unexpected end of file (expected a value assignment)"
runStatesTs(t, []statesT{
{"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"},
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
{"bare numbers", "0123456789", "[0123456789]", err},
{"bare underscore", "_", "[_]", err},
{"bare dash", "-", "[-]", err},
{"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
})
func TestKey(t *testing.T) {
for _, test := range []parseTest{
{"", []string{`Error: unexpected end of file (expected a key name) at start of file`}},
{"barekey", []string{`key("barekey")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startKey, test)
}
}
func TestDottedKey(t *testing.T) {
runStatesTs(t, []statesT{
{"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"},
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`},
})
func TestBareKey(t *testing.T) {
for _, test := range []parseTest{
{"", []string{`Error: unexpected end of file (expected a bare key name) at start of file`}},
{"barekey", []string{`key("barekey")`}},
{"1234567", []string{`key("1234567")`}},
{"mix-12_34", []string{`key("mix-12_34")`}},
{"-hey_good_Lookin123-", []string{`key("-hey_good_Lookin123-")`}},
{"wrong!", []string{`key("wrong")`, `Error: unexpected input (expected end of file) at line 1, column 6`}},
{"key1.", []string{`key("key1")`, `keydot()`, `Error: unexpected end of file (expected a key name) at line 1, column 6`}},
{"key1.key2", []string{`key("key1")`, `keydot()`, `key("key2")`}},
{"key . with . spaces", []string{`key("key")`, `keydot()`, `key("with")`, `keydot()`, `key("spaces")`}},
{"key \t . \twithtabs\t . \tandspaces", []string{`key("key")`, `keydot()`, `key("withtabs")`, `keydot()`, `key("andspaces")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startBareKey, test)
}
}
func TestKeyWithAssignmentButNoValue(t *testing.T) {
err := "unexpected end of file (expected a value)"
runStatesTs(t, []statesT{
{"bare", "a=", "[a]=", err},
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
{"bare dotted", "a.b=", "[a].[b]=", err},
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
})
func TestAssignment(t *testing.T) {
for _, test := range []parseTest{
{"", []string{`Error: unexpected end of file (expected a value assignment) at start of file`}},
{"=", []string{`assign()`}},
{" \t = \t ", []string{`assign()`}},
{" \n = \n ", []string{`Error: unexpected input (expected a value assignment) at start of file`}},
} {
p := &parser{}
testParseHandler(t, p, p.startAssignment, test)
}
}
func TestKeyWithValue(t *testing.T) {
runStatesTs(t, []statesT{
{"with string value",
" -key- = \"value\" # nice\r\n",
"[-key-]=STR(value)#(nice)", ""},
{"multiple string values",
"key = \"value1\"\nbare_key = \"value2\"\n# More coming up!\nbare-key = \"value3\"\n1234 = \"value4\"\n",
"[key]=STR(value1)[bare_key]=STR(value2)#(More coming up!)[bare-key]=STR(value3)[1234]=STR(value4)", ""},
})
func TestValue(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected a value) at start of file`}},
{`"basic string value"`, []string{`string("basic string value")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startValue, test)
}
}
func TestKeyValuePair(t *testing.T) {
for _, test := range []parseTest{
{"", []string{}},
{" ", []string{}},
{" \t ", []string{}},
{" key ", []string{`key("key")`, `Error: unexpected input (expected a value assignment) at line 1, column 5`}},
{" key \t=", []string{`key("key")`, `assign()`, `Error: unexpected end of file (expected a value) at line 1, column 8`}},
{" key \t =\t \"The Value\" \r\n", []string{`key("key")`, `assign()`, `string("The Value")`}},
{"key1=\"value1\"key2=\"value2\"\r\nkey3=\"value3\"", []string{
`key("key1")`, `assign()`, `string("value1")`,
`key("key2")`, `assign()`, `string("value2")`,
`key("key3")`, `assign()`, `string("value3")`}},
{"with=\"comments\"# boring \nanother.cool =\"one\" \t # to the end\r\n", []string{
`key("with")`, `assign()`, `string("comments")`, `comment("# boring ")`,
`key("another")`, `keydot()`, `key("cool")`, `assign()`, `string("one")`, `comment("# to the end")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startKeyValuePair, test)
}
}

61
toml.go
View File

@ -1,23 +1,52 @@
package parser
import "git.makaay.nl/mauricem/go-parsekit"
import (
"fmt"
"strings"
// Item types that are produced by this parser.
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// Easy access to the parsekit.tokenize definitions.
var c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
type cmdType string
// Command types that are emitted by the parser.
const (
ItemComment parsekit.ItemType = iota // Comment string
ItemKey // Key of a key/value pair
ItemKeyDot // Dot for a dotted key
ItemAssignment // Value assignment coming up (=)
ItemString // A value of type string
cComment cmdType = "comment" // a # comment at the end of the line
cKey = "key" // set key name
cNewKeyLvl = "keydot" // new key stack level
cAssign = "assign" // assign a value
csetStrVal = "string" // set a string value
)
var (
c, a, m = parsekit.C, parsekit.A, parsekit.M
)
var parser = parsekit.NewParser(startKeyValuePair)
// Parse starts the parser for the provided input string.
func Parse(input string) *parsekit.ParseRun {
return parser.Parse(input)
type parser struct {
commands []cmd
keyStack []string
}
type cmd struct {
command cmdType
args []interface{}
}
func (cmd *cmd) String() string {
args := make([]string, len(cmd.args))
for i, arg := range cmd.args {
args[i] = fmt.Sprintf("%q", arg)
}
return fmt.Sprintf("%s(%s)", cmd.command, strings.Join(args, ", "))
}
func (p *parser) emitCommand(command cmdType, args ...interface{}) {
c := cmd{command: command, args: args}
p.commands = append(p.commands, c)
}
// Parse starts the parser for the provided input.
// func Parse(input interface{}) []cmd {
// p := &parser{}
// parse.New(p.startKeyValuePair)(input)
// return p.commands
// }

View File

@ -1,40 +1,34 @@
package parser_test
import (
"testing"
// func TestEmptyInput(t *testing.T) {
// runStatesT(t, statesT{"empty string", "", "", ""})
// }
toml "git.makaay.nl/mauricem/go-toml"
)
// func TestFullIncludesLineAndRowPosition(t *testing.T) {
// p := toml.Parse("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +")
// _, err := parseItemsToArray(p)
// actual := err.Error()
// expected := "unexpected input (expected end of file) at line 6, column 3"
// if actual != expected {
// t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual)
// }
// }
func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""})
}
// func TestInvalidUTF8Data(t *testing.T) {
// runStatesTs(t, []statesT{
// {"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
// {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
// {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
// {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
// })
// }
func TestErrorFullIncludesLineAndRowPosition(t *testing.T) {
p := toml.Parse("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +")
_, err := parseItemsToArray(p)
actual := err.ErrorFull()
expected := "unexpected character '+' (expected end of file) after line 6, column 3"
if actual != expected {
t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual)
}
}
func TestInvalidUTF8Data(t *testing.T) {
runStatesTs(t, []statesT{
{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
})
}
func TestWhiteSpaceAndNewlines(t *testing.T) {
runStatesTs(t, []statesT{
{"space", " ", "", ""},
{"tab", "\t", "", ""},
{"newline", "\n", "", ""},
{"all whitespace and newlines", " \t \t \r\n\n \n \t", "", ""},
{"bare carriage return", "\r", "", "unexpected character '\\r' (expected end of file)"},
})
}
// func TestWhiteSpaceAndNewlines(t *testing.T) {
// runStatesTs(t, []statesT{
// {"space", " ", "", ""},
// {"tab", "\t", "", ""},
// {"newline", "\n", "", ""},
// {"all blanks and newlines", " \t \t \r\n\n \n \t", "", ""},
// {"bare carriage return", "\r", "", "unexpected character '\\r' (expected end of file)"},
// })
// }

View File

@ -1,6 +1,10 @@
package parser
import "git.makaay.nl/mauricem/go-parsekit"
import (
"strings"
"git.makaay.nl/mauricem/go-parsekit/parse"
)
var (
// There are four ways to express strings: basic, multi-line basic,
@ -8,12 +12,12 @@ var (
// UTF-8 characters. * Multi-line basic strings are surrounded by three
// quotation marks on each side. * Basic strings are surrounded by
// quotation marks.
doubleQuote3 = c.Str(`"""`)
doubleQuote3 = a.Str(`"""`)
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F).
charThatMustBeEscaped = c.Any(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
charThatMustBeEscaped = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
// For convenience, some popular characters have a compact escape sequence.
//
@ -26,43 +30,21 @@ var (
// \\ - backslash (U+005C)
// \uXXXX - unicode (U+XXXX)
// \UXXXXXXXX - unicode (U+XXXXXXXX)
validEscapeChar = c.Any(c.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash)
validEscapeChar = c.Any(a.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash)
shortEscape = c.Seq(a.Backslash, validEscapeChar)
shortUTF8Escape = c.Seq(a.Backslash, c.Rune('u'), c.Rep(4, a.HexDigit))
longUTF8Escape = c.Seq(a.Backslash, c.Rune('U'), c.Rep(8, a.HexDigit))
shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4))
longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8))
validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape)
)
func startString(p *parsekit.ParseAPI) {
p.Expects("a string value")
func (t *parser) startString(p *parse.API) {
switch {
case p.On(doubleQuote3).Stay():
p.RouteTo(startMultiLineBasicString)
case p.On(a.DoubleQuote).Stay():
p.RouteTo(startBasicString)
}
}
func startBasicString(p *parsekit.ParseAPI) {
p.Expects("a basic string")
if p.On(a.DoubleQuote).Skip() {
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
}
func parseBasicString(p *parsekit.ParseAPI) {
p.Expects("string contents")
switch {
case p.On(charThatMustBeEscaped).Stay():
p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch)
case p.On(validEscape).Accept():
p.RouteRepeat()
case p.On(a.Backslash).Stay():
p.RouteReturn()
case p.On(a.DoubleQuote).Stay():
p.RouteReturn()
case p.On(a.AnyRune).Accept():
p.RouteRepeat()
case p.Peek(doubleQuote3):
p.Handle(t.startMultiLineBasicString)
case p.Peek(a.DoubleQuote):
p.Handle(t.startBasicString)
default:
p.Expected("a string value")
}
}
@ -71,20 +53,41 @@ func parseBasicString(p *parsekit.ParseAPI) {
// * No additional \escape sequences are allowed. What the spec say about this:
// "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error.""
func basicStringSpecifics(p *parsekit.ParseAPI) {
p.Expects("string contents")
switch {
case p.On(a.DoubleQuote).Skip():
p.EmitInterpreted(ItemString)
p.RouteTo(startKeyValuePair)
case p.On(a.Backslash).Stay():
p.EmitError("invalid escape sequence")
func (t *parser) startBasicString(p *parse.API) {
if !p.Accept(a.DoubleQuote) {
p.Expected("a basic string")
return
}
sb := &strings.Builder{}
for {
switch {
case p.Peek(charThatMustBeEscaped):
p.Error("invalid character in basic string: %q (must be escaped)", p.Result().Rune(0))
return
case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))):
sb.WriteString(p.Result().Value(0).(string))
case p.Peek(a.Backslash):
p.Error("invalid escape sequence")
return
case p.Accept(m.Drop(a.DoubleQuote)):
t.emitCommand(csetStrVal, sb.String())
return
case p.Accept(a.ValidRune):
sb.WriteString(p.Result().String())
case p.Peek(a.InvalidRune):
p.Error("invalid UTF8 rune")
return
default:
p.Expected("end of string")
return
}
}
}
func startMultiLineBasicString(p *parsekit.ParseAPI) {
p.Expects("a multi-line basic string")
if p.On(doubleQuote3).Skip() {
p.EmitError("not yet implemented")
func (t *parser) startMultiLineBasicString(p *parse.API) {
if p.Accept(doubleQuote3) {
p.Error("not yet implemented")
} else {
p.Expected("a multi-line basic string")
}
}

View File

@ -1,73 +1,49 @@
package parser_test
package parser
import (
"fmt"
"testing"
)
func TestUnterminatedBasicString(t *testing.T) {
runStatesT(t, statesT{
"missing closing quote", `a="value`, "[a]=",
"unexpected end of file (expected string contents)"})
}
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
runStatesTs(t, []statesT{
{"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`},
})
// No need to write all test cases for disallowed characters by hand.
for i := 0x00; i <= 0x1F; i++ {
name := fmt.Sprintf("control character %x", rune(i))
runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
func TestString(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected a string value) at start of file`}},
{`no start quote"`, []string{`Error: unexpected input (expected a string value) at start of file`}},
{`"simple string"`, []string{`string("simple string")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startString, test)
}
}
func TestEmptyBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"empty", `a=""`, "[a]=STR()", ""},
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
{"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
{"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
})
}
func TestBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
})
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected a basic string) at start of file`}},
{`no start quote"`, []string{`Error: unexpected input (expected a basic string) at start of file`}},
{`"no end quote`, []string{`Error: unexpected end of file (expected end of string) at line 1, column 14`}},
{`""`, []string{`string("")`}},
{`"simple string"`, []string{`string("simple string")`}},
{`"with\tsome\r\nvalid escapes\b"`, []string{`string("with\tsome\r\nvalid escapes\b")`}},
{`"with an \invalid escape"`, []string{`Error: invalid escape sequence at line 1, column 10`}},
{`"A cool UTF8 ƃuıɹʇs"`, []string{`string("A cool UTF8 ƃuıɹʇs")`}},
{`"A string with UTF8 escape \u2318"`, []string{`string("A string with UTF8 escape ⌘")`}},
{"\"Invalid character for UTF \xcd\"", []string{`Error: invalid UTF8 rune at line 1, column 28`}},
{"\"Character that mus\t be escaped\"", []string{`Error: invalid character in basic string: '\t' (must be escaped) at line 1, column 20`}},
{"\"Character that must be escaped \u0000\"", []string{`Error: invalid character in basic string: '\x00' (must be escaped) at line 1, column 33`}},
{"\"Character that must be escaped \x7f\"", []string{`Error: invalid character in basic string: '\u007f' (must be escaped) at line 1, column 33`}},
} {
p := &parser{}
testParseHandler(t, p, p.startBasicString, test)
}
}
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
err := "invalid escape sequence"
runStatesTs(t, []statesT{
{"invalid escape sequence", `a="\x"`, "[a]=", err},
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
})
}
func TestBasicStringEscapes(t *testing.T) {
runStatesTs(t, []statesT{
{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
{"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""},
})
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
// A quick check for almost all characters that must be escaped.
// The missing one (\x7f) is covered in the previous test.
for i := 0x00; i <= 0x1F; i++ {
p := &parser{}
input := fmt.Sprintf(`"%c"`, rune(i))
expected := fmt.Sprintf(`Error: invalid character in basic string: %q (must be escaped) at line 1, column 2`, rune(i))
testParseHandler(t, p, p.startString, parseTest{input, []string{expected}})
}
}