Brought the TOML code up-to-speed with the latest version of parsekit.

This commit is contained in:
Maurice Makaay 2019-06-17 13:59:59 +00:00
parent 269bd9ed36
commit b49715652e
10 changed files with 333 additions and 336 deletions

View File

@ -1,20 +1,17 @@
package parser package parser
import ( import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit/parse"
) )
// A '#' hash symbol marks the rest of the line as a comment. // A '#' hash symbol marks the rest of the line as a comment.
// All characters up to the end of the line are included in the comment. // All characters up to the end of the line are included in the comment.
var comment = c.Seq( var comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)), m.Drop(a.EndOfLine))
m.Drop(c.OneOrMore(a.Hash)),
m.Trim(c.ZeroOrMore(c.Not(a.EndOfLine)), " \t"),
m.Drop(a.EndOfLine))
func startComment(p *parsekit.ParseAPI) { func (t *parser) startComment(p *parse.API) {
p.Expects("comment") if p.Accept(comment) {
if p.On(comment).Accept() { t.emitCommand(cComment, p.Result().String())
p.EmitLiteral(ItemComment) } else {
p.RouteReturn() p.Expected("comment")
} }
} }

View File

@ -1,22 +1,24 @@
package parser_test package parser
import ( import (
"testing" "testing"
) )
func TestComments(t *testing.T) { func TestComment2(t *testing.T) {
runStatesTs(t, []statesT{ for _, test := range []parseTest{
{"empty comment at end of file", "#", "#()", ""}, {``, []string{`Error: unexpected end of file (expected comment) at start of file`}},
{"empty comment at end of windows line", "#\r\n", "#()", ""}, {`#`, []string{`comment("#")`}},
{"empty comment at end of unix line", "#\n", "#()", ""}, {`# `, []string{`comment("# ")`}},
{"empty comment with spaces", "# \t \r\n", `#()`, ""}, {`# with data`, []string{`comment("# with data")`}},
{"basic comment", "#chicken", "#(chicken)", ""}, {"# ending in EOL & EOF\r\n", []string{`comment("# ending in EOL & EOF")`}},
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, {`# \xxx/ \u can't escape/`, []string{`comment("# \\xxx/ \\u can't escape/")`}},
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, {"# \tlexe\r accepts embedded ca\r\riage \returns\r\n", []string{
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, `comment("# \tlexe\r accepts embedded ca\r\riage \returns")`}},
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, {"# with data and newline\ncode continues here", []string{
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, `comment("# with data and newline")`,
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, `Error: unexpected input (expected end of file) at line 2, column 1`}},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r\n", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, } {
}) p := &parser{}
testParseHandler(t, p, p.startComment, test)
}
} }

3
go.sum
View File

@ -1,3 +1,2 @@
git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939 h1:cMBHhfSJR2BZgVN7NmP+c2agNlXDef4Iz6+XQp5AqdU= git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939 h1:cMBHhfSJR2BZgVN7NmP+c2agNlXDef4Iz6+XQp5AqdU=
git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939/go.mod h1:/mo+aM5Im5rkBqBvXTAsVR0//OfsAAiFyvuxxcxGGlU= git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939/go.mod h1:/mo+aM5Im5rkBqBvXTAsVR0//OfsAAiFyvuxxcxGGlU=
github.com/mmakaay/toml v0.3.1 h1:2uKRPvA/smKM8YuYGxWnW4KximMkWOMfunJOXgM5Zos=

View File

@ -1,12 +1,10 @@
package parser_test package parser
import ( import (
"fmt" "fmt"
"strings"
"testing" "testing"
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit/parse"
toml "git.makaay.nl/mauricem/go-toml"
) )
type statesT struct { type statesT struct {
@ -16,75 +14,35 @@ type statesT struct {
err string err string
} }
func runStatesTs(t *testing.T, tests []statesT) { type parseTest struct {
for _, c := range tests { input interface{}
runStatesT(t, c) expected []string
}
} }
// ToArray returns Parser items as an array. func testParseHandler(t *testing.T, p *parser, handler parse.Handler, test parseTest) {
// When an error occurs during scanning, a partial result will be err := parse.New(handler)(test.input)
// returned, accompanied by the error that occurred. results := []string{}
func parseItemsToArray(p *parsekit.ParseRun) ([]parsekit.Item, *parsekit.Error) { for _, cmd := range p.commands {
var items []parsekit.Item results = append(results, cmd.String())
for { }
item, err, more := p.Next() if err != nil {
if !more { results = append(results, fmt.Sprintf("Error: %s", err))
return items, err
}
items = append(items, item)
} }
}
func runStatesT(t *testing.T, c statesT) { for i, e := range test.expected {
p := toml.Parse(c.in) if i > len(results)-1 {
l, err := parseItemsToArray(p) t.Errorf("No result at index %d, expected: %s", i, e)
if err == nil && c.err != "" { continue
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
}
if err != nil && c.err == "" {
t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
}
if err != nil && c.err != "" && err.Error() != c.err {
t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
}
switch expected := c.out.(type) {
case []string:
if len(expected) != len(l) {
t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
} }
for i, e := range expected { r := results[i]
v := parserItemToString(l[i]) if e != r {
if v != e { t.Errorf("Unexpected result at index %d:\nexpected: %s\nactual: %s\n", i, e, r)
t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
}
} }
case string: }
a := make([]string, len(l)) if len(results) > len(test.expected) {
for _, v := range l { t.Errorf("Got more results than expected, surplus result(s):\n")
a = append(a, parserItemToString(v)) for i := len(test.expected); i < len(results); i++ {
} t.Errorf("[%d] %s", i, results[i])
actual := strings.Join(a, "")
if actual != expected {
t.Errorf("[%s] Unexpected parser output:\nexpected: %q\nactual: %q\n", c.name, expected, actual)
} }
} }
} }
// parserItemToString returns a string representation of the parsekit.Item.
func parserItemToString(i parsekit.Item) string {
switch i.Type {
case toml.ItemComment:
return fmt.Sprintf("#(%s)", i.Value)
case toml.ItemKey:
return fmt.Sprintf("[%s]", i.Value)
case toml.ItemString:
return fmt.Sprintf("STR(%s)", i.Value)
case toml.ItemKeyDot:
return "."
case toml.ItemAssignment:
return "="
default:
panic(fmt.Sprintf("parsekit bug: no string formatting exists for parsekit.Item id %d", i.Type))
}
}

View File

@ -1,15 +1,20 @@
package parser package parser
import "git.makaay.nl/mauricem/go-parsekit" import (
"git.makaay.nl/mauricem/go-parsekit/parse"
)
// The primary building block of a TOML document is the key/value pair. // The primary building block of a TOML document is the key/value pair.
var ( var (
dropWhitespace = m.Drop(a.Whitespace.Optional())
dropBlanks = m.Drop(a.Blanks.Optional())
// Keys are on the left of the equals sign and values are on the right. // Keys are on the left of the equals sign and values are on the right.
// Whitespace is ignored around key names and values. The key, equals // Blank is ignored around key names and values. The key, equals
// sign, and value must be on the same line (though some values can be // sign, and value must be on the same line (though some values can be
// broken over multiple lines). // broken over multiple lines).
keyAssignment = c.Seq(c.Opt(a.Whitespace), a.Equal, c.Opt(a.Whitespace)) keyAssignment = c.Seq(dropBlanks, a.Equal, dropBlanks)
// A key may be either bare, quoted or dotted. Bare keys may only // A key may be either bare, quoted or dotted. Bare keys may only
// contain ASCII letters, ASCII digits, underscores, and dashes // contain ASCII letters, ASCII digits, underscores, and dashes
@ -26,62 +31,68 @@ var (
startOfKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote) startOfKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote)
// Dotted keys are a sequence of bare or quoted keys joined with a dot. // Dotted keys are a sequence of bare or quoted keys joined with a dot.
// This allows for grouping similar properties together. Whitespace // This allows for grouping similar properties together. Blanks
// around dot-separated parts is ignored, however, best practice is to // around dot-separated parts are ignored, however, best practice is to
// not use any extraneous whitespace. // not use any extraneous blanks.
keySeparatorDot = c.Seq(c.Opt(a.Whitespace), a.Dot, c.Opt(a.Whitespace)) keySeparatorDot = c.Seq(dropBlanks, a.Dot, dropBlanks)
) )
func startKeyValuePair(p *parsekit.ParseAPI) { func (t *parser) startKeyValuePair(p *parse.API) {
switch { for {
case p.On(a.WhitespaceAndNewlines).Skip(): p.Accept(dropWhitespace)
p.RouteRepeat() switch {
case p.On(a.Hash).Stay(): case p.Peek(a.Hash):
p.RouteTo(startComment).ThenReturnHere() p.Handle(t.startComment)
case p.On(startOfKey).Stay(): case p.Peek(startOfKey):
p.RouteTo(startKey) p.Handle(t.startKey, t.startAssignment, t.startValue)
default: default:
p.ExpectEndOfFile() p.ExpectEndOfFile()
return
}
if p.IsStoppedOrInError() {
return
}
} }
} }
func startKey(p *parsekit.ParseAPI) { func (t *parser) startKey(p *parse.API) {
p.Expects("a key name") if p.Peek(bareKeyRune) {
if p.On(bareKeyRune).Stay() { p.Handle(t.startBareKey)
p.RouteTo(startBareKey)
}
}
func startBareKey(p *parsekit.ParseAPI) {
p.Expects("a bare key name")
if p.On(bareKey).Accept() {
p.EmitLiteral(ItemKey)
p.RouteTo(endOfKeyOrDot)
}
}
func endOfKeyOrDot(p *parsekit.ParseAPI) {
if p.On(keySeparatorDot).Skip() {
p.Emit(ItemKeyDot, ".")
p.RouteTo(startKey)
} else { } else {
p.RouteTo(startAssignment) p.Expected("a key name")
} }
} }
func startAssignment(p *parsekit.ParseAPI) { func (t *parser) startBareKey(p *parse.API) {
p.Expects("a value assignment") if p.Accept(bareKey) {
if p.On(keyAssignment).Skip() { t.emitCommand(cKey, p.Result().String())
p.Emit(ItemAssignment, "=") p.Handle(t.endOfKeyOrDot)
p.RouteTo(startValue) } else {
p.Expected("a bare key name")
}
}
func (t *parser) endOfKeyOrDot(p *parse.API) {
if p.Accept(keySeparatorDot) {
t.emitCommand(cNewKeyLvl)
p.Handle(t.startKey)
}
}
func (t *parser) startAssignment(p *parse.API) {
if p.Accept(keyAssignment) {
t.emitCommand(cAssign)
} else {
p.Expected("a value assignment")
} }
} }
// Values must be of the following types: String, Integer, Float, Boolean, // Values must be of the following types: String, Integer, Float, Boolean,
// Datetime, Array, or Inline Table. Unspecified values are invalid. // Datetime, Array, or Inline Table. Unspecified values are invalid.
func startValue(p *parsekit.ParseAPI) { func (t *parser) startValue(p *parse.API) {
p.Expects("a value") if p.Peek(c.Any(a.SingleQuote, a.DoubleQuote)) {
if p.On(c.Any(a.SingleQuote, a.DoubleQuote)).Stay() { p.Handle(t.startString)
p.RouteTo(startString) } else {
p.Expected("a value")
} }
} }

View File

@ -1,46 +1,74 @@
package parser_test package parser
import ( import "testing"
"testing"
)
func TestKeyWithoutAssignment(t *testing.T) { func TestKey(t *testing.T) {
err := "unexpected end of file (expected a value assignment)" for _, test := range []parseTest{
runStatesTs(t, []statesT{ {"", []string{`Error: unexpected end of file (expected a key name) at start of file`}},
{"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"}, {"barekey", []string{`key("barekey")`}},
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err}, } {
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err}, p := &parser{}
{"bare numbers", "0123456789", "[0123456789]", err}, testParseHandler(t, p, p.startKey, test)
{"bare underscore", "_", "[_]", err}, }
{"bare dash", "-", "[-]", err},
{"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
})
} }
func TestDottedKey(t *testing.T) { func TestBareKey(t *testing.T) {
runStatesTs(t, []statesT{ for _, test := range []parseTest{
{"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"}, {"", []string{`Error: unexpected end of file (expected a bare key name) at start of file`}},
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`}, {"barekey", []string{`key("barekey")`}},
}) {"1234567", []string{`key("1234567")`}},
{"mix-12_34", []string{`key("mix-12_34")`}},
{"-hey_good_Lookin123-", []string{`key("-hey_good_Lookin123-")`}},
{"wrong!", []string{`key("wrong")`, `Error: unexpected input (expected end of file) at line 1, column 6`}},
{"key1.", []string{`key("key1")`, `keydot()`, `Error: unexpected end of file (expected a key name) at line 1, column 6`}},
{"key1.key2", []string{`key("key1")`, `keydot()`, `key("key2")`}},
{"key . with . spaces", []string{`key("key")`, `keydot()`, `key("with")`, `keydot()`, `key("spaces")`}},
{"key \t . \twithtabs\t . \tandspaces", []string{`key("key")`, `keydot()`, `key("withtabs")`, `keydot()`, `key("andspaces")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startBareKey, test)
}
} }
func TestKeyWithAssignmentButNoValue(t *testing.T) { func TestAssignment(t *testing.T) {
err := "unexpected end of file (expected a value)" for _, test := range []parseTest{
runStatesTs(t, []statesT{ {"", []string{`Error: unexpected end of file (expected a value assignment) at start of file`}},
{"bare", "a=", "[a]=", err}, {"=", []string{`assign()`}},
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"}, {" \t = \t ", []string{`assign()`}},
{"bare dotted", "a.b=", "[a].[b]=", err}, {" \n = \n ", []string{`Error: unexpected input (expected a value assignment) at start of file`}},
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, } {
}) p := &parser{}
testParseHandler(t, p, p.startAssignment, test)
}
} }
func TestKeyWithValue(t *testing.T) { func TestValue(t *testing.T) {
runStatesTs(t, []statesT{ for _, test := range []parseTest{
{"with string value", {``, []string{`Error: unexpected end of file (expected a value) at start of file`}},
" -key- = \"value\" # nice\r\n", {`"basic string value"`, []string{`string("basic string value")`}},
"[-key-]=STR(value)#(nice)", ""}, } {
{"multiple string values", p := &parser{}
"key = \"value1\"\nbare_key = \"value2\"\n# More coming up!\nbare-key = \"value3\"\n1234 = \"value4\"\n", testParseHandler(t, p, p.startValue, test)
"[key]=STR(value1)[bare_key]=STR(value2)#(More coming up!)[bare-key]=STR(value3)[1234]=STR(value4)", ""}, }
}) }
func TestKeyValuePair(t *testing.T) {
for _, test := range []parseTest{
{"", []string{}},
{" ", []string{}},
{" \t ", []string{}},
{" key ", []string{`key("key")`, `Error: unexpected input (expected a value assignment) at line 1, column 5`}},
{" key \t=", []string{`key("key")`, `assign()`, `Error: unexpected end of file (expected a value) at line 1, column 8`}},
{" key \t =\t \"The Value\" \r\n", []string{`key("key")`, `assign()`, `string("The Value")`}},
{"key1=\"value1\"key2=\"value2\"\r\nkey3=\"value3\"", []string{
`key("key1")`, `assign()`, `string("value1")`,
`key("key2")`, `assign()`, `string("value2")`,
`key("key3")`, `assign()`, `string("value3")`}},
{"with=\"comments\"# boring \nanother.cool =\"one\" \t # to the end\r\n", []string{
`key("with")`, `assign()`, `string("comments")`, `comment("# boring ")`,
`key("another")`, `keydot()`, `key("cool")`, `assign()`, `string("one")`, `comment("# to the end")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startKeyValuePair, test)
}
} }

61
toml.go
View File

@ -1,23 +1,52 @@
package parser package parser
import "git.makaay.nl/mauricem/go-parsekit" import (
"fmt"
"strings"
// Item types that are produced by this parser. "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// Easy access to the parsekit.tokenize definitions.
var c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T
type cmdType string
// Command types that are emitted by the parser.
const ( const (
ItemComment parsekit.ItemType = iota // Comment string cComment cmdType = "comment" // a # comment at the end of the line
ItemKey // Key of a key/value pair cKey = "key" // set key name
ItemKeyDot // Dot for a dotted key cNewKeyLvl = "keydot" // new key stack level
ItemAssignment // Value assignment coming up (=) cAssign = "assign" // assign a value
ItemString // A value of type string csetStrVal = "string" // set a string value
) )
var ( type parser struct {
c, a, m = parsekit.C, parsekit.A, parsekit.M commands []cmd
) keyStack []string
var parser = parsekit.NewParser(startKeyValuePair)
// Parse starts the parser for the provided input string.
func Parse(input string) *parsekit.ParseRun {
return parser.Parse(input)
} }
type cmd struct {
command cmdType
args []interface{}
}
func (cmd *cmd) String() string {
args := make([]string, len(cmd.args))
for i, arg := range cmd.args {
args[i] = fmt.Sprintf("%q", arg)
}
return fmt.Sprintf("%s(%s)", cmd.command, strings.Join(args, ", "))
}
func (p *parser) emitCommand(command cmdType, args ...interface{}) {
c := cmd{command: command, args: args}
p.commands = append(p.commands, c)
}
// Parse starts the parser for the provided input.
// func Parse(input interface{}) []cmd {
// p := &parser{}
// parse.New(p.startKeyValuePair)(input)
// return p.commands
// }

View File

@ -1,40 +1,34 @@
package parser_test package parser_test
import ( // func TestEmptyInput(t *testing.T) {
"testing" // runStatesT(t, statesT{"empty string", "", "", ""})
// }
toml "git.makaay.nl/mauricem/go-toml" // func TestFullIncludesLineAndRowPosition(t *testing.T) {
) // p := toml.Parse("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +")
// _, err := parseItemsToArray(p)
// actual := err.Error()
// expected := "unexpected input (expected end of file) at line 6, column 3"
// if actual != expected {
// t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual)
// }
// }
func TestEmptyInput(t *testing.T) { // func TestInvalidUTF8Data(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""}) // runStatesTs(t, []statesT{
} // {"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
// {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
// {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
// {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
// })
// }
func TestErrorFullIncludesLineAndRowPosition(t *testing.T) { // func TestWhiteSpaceAndNewlines(t *testing.T) {
p := toml.Parse("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +") // runStatesTs(t, []statesT{
_, err := parseItemsToArray(p) // {"space", " ", "", ""},
actual := err.ErrorFull() // {"tab", "\t", "", ""},
expected := "unexpected character '+' (expected end of file) after line 6, column 3" // {"newline", "\n", "", ""},
if actual != expected { // {"all blanks and newlines", " \t \t \r\n\n \n \t", "", ""},
t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual) // {"bare carriage return", "\r", "", "unexpected character '\\r' (expected end of file)"},
} // })
} // }
func TestInvalidUTF8Data(t *testing.T) {
runStatesTs(t, []statesT{
{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
})
}
func TestWhiteSpaceAndNewlines(t *testing.T) {
runStatesTs(t, []statesT{
{"space", " ", "", ""},
{"tab", "\t", "", ""},
{"newline", "\n", "", ""},
{"all whitespace and newlines", " \t \t \r\n\n \n \t", "", ""},
{"bare carriage return", "\r", "", "unexpected character '\\r' (expected end of file)"},
})
}

View File

@ -1,6 +1,10 @@
package parser package parser
import "git.makaay.nl/mauricem/go-parsekit" import (
"strings"
"git.makaay.nl/mauricem/go-parsekit/parse"
)
var ( var (
// There are four ways to express strings: basic, multi-line basic, // There are four ways to express strings: basic, multi-line basic,
@ -8,12 +12,12 @@ var (
// UTF-8 characters. * Multi-line basic strings are surrounded by three // UTF-8 characters. * Multi-line basic strings are surrounded by three
// quotation marks on each side. * Basic strings are surrounded by // quotation marks on each side. * Basic strings are surrounded by
// quotation marks. // quotation marks.
doubleQuote3 = c.Str(`"""`) doubleQuote3 = a.Str(`"""`)
// Any Unicode character may be used except those that must be escaped: // Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to // quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F). // U+001F, U+007F).
charThatMustBeEscaped = c.Any(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F')) charThatMustBeEscaped = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
// For convenience, some popular characters have a compact escape sequence. // For convenience, some popular characters have a compact escape sequence.
// //
@ -26,43 +30,21 @@ var (
// \\ - backslash (U+005C) // \\ - backslash (U+005C)
// \uXXXX - unicode (U+XXXX) // \uXXXX - unicode (U+XXXX)
// \UXXXXXXXX - unicode (U+XXXXXXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX)
validEscapeChar = c.Any(c.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash) validEscapeChar = c.Any(a.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash)
shortEscape = c.Seq(a.Backslash, validEscapeChar) shortEscape = c.Seq(a.Backslash, validEscapeChar)
shortUTF8Escape = c.Seq(a.Backslash, c.Rune('u'), c.Rep(4, a.HexDigit)) shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4))
longUTF8Escape = c.Seq(a.Backslash, c.Rune('U'), c.Rep(8, a.HexDigit)) longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8))
validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape)
) )
func startString(p *parsekit.ParseAPI) { func (t *parser) startString(p *parse.API) {
p.Expects("a string value")
switch { switch {
case p.On(doubleQuote3).Stay(): case p.Peek(doubleQuote3):
p.RouteTo(startMultiLineBasicString) p.Handle(t.startMultiLineBasicString)
case p.On(a.DoubleQuote).Stay(): case p.Peek(a.DoubleQuote):
p.RouteTo(startBasicString) p.Handle(t.startBasicString)
} default:
} p.Expected("a string value")
func startBasicString(p *parsekit.ParseAPI) {
p.Expects("a basic string")
if p.On(a.DoubleQuote).Skip() {
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
}
func parseBasicString(p *parsekit.ParseAPI) {
p.Expects("string contents")
switch {
case p.On(charThatMustBeEscaped).Stay():
p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch)
case p.On(validEscape).Accept():
p.RouteRepeat()
case p.On(a.Backslash).Stay():
p.RouteReturn()
case p.On(a.DoubleQuote).Stay():
p.RouteReturn()
case p.On(a.AnyRune).Accept():
p.RouteRepeat()
} }
} }
@ -71,20 +53,41 @@ func parseBasicString(p *parsekit.ParseAPI) {
// * No additional \escape sequences are allowed. What the spec say about this: // * No additional \escape sequences are allowed. What the spec say about this:
// "All other escape sequences [..] are reserved and, if used, TOML should // "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error."" // produce an error.""
func basicStringSpecifics(p *parsekit.ParseAPI) { func (t *parser) startBasicString(p *parse.API) {
p.Expects("string contents") if !p.Accept(a.DoubleQuote) {
switch { p.Expected("a basic string")
case p.On(a.DoubleQuote).Skip(): return
p.EmitInterpreted(ItemString) }
p.RouteTo(startKeyValuePair) sb := &strings.Builder{}
case p.On(a.Backslash).Stay(): for {
p.EmitError("invalid escape sequence") switch {
case p.Peek(charThatMustBeEscaped):
p.Error("invalid character in basic string: %q (must be escaped)", p.Result().Rune(0))
return
case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))):
sb.WriteString(p.Result().Value(0).(string))
case p.Peek(a.Backslash):
p.Error("invalid escape sequence")
return
case p.Accept(m.Drop(a.DoubleQuote)):
t.emitCommand(csetStrVal, sb.String())
return
case p.Accept(a.ValidRune):
sb.WriteString(p.Result().String())
case p.Peek(a.InvalidRune):
p.Error("invalid UTF8 rune")
return
default:
p.Expected("end of string")
return
}
} }
} }
func startMultiLineBasicString(p *parsekit.ParseAPI) { func (t *parser) startMultiLineBasicString(p *parse.API) {
p.Expects("a multi-line basic string") if p.Accept(doubleQuote3) {
if p.On(doubleQuote3).Skip() { p.Error("not yet implemented")
p.EmitError("not yet implemented") } else {
p.Expected("a multi-line basic string")
} }
} }

View File

@ -1,73 +1,49 @@
package parser_test package parser
import ( import (
"fmt" "fmt"
"testing" "testing"
) )
func TestUnterminatedBasicString(t *testing.T) { func TestString(t *testing.T) {
runStatesT(t, statesT{ for _, test := range []parseTest{
"missing closing quote", `a="value`, "[a]=", {``, []string{`Error: unexpected end of file (expected a string value) at start of file`}},
"unexpected end of file (expected string contents)"}) {`no start quote"`, []string{`Error: unexpected input (expected a string value) at start of file`}},
} {`"simple string"`, []string{`string("simple string")`}},
} {
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { p := &parser{}
runStatesTs(t, []statesT{ testParseHandler(t, p, p.startString, test)
{"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`},
})
// No need to write all test cases for disallowed characters by hand.
for i := 0x00; i <= 0x1F; i++ {
name := fmt.Sprintf("control character %x", rune(i))
runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
} }
} }
func TestEmptyBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"empty", `a=""`, "[a]=STR()", ""},
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
{"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
{"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
})
}
func TestBasicString(t *testing.T) { func TestBasicString(t *testing.T) {
runStatesTs(t, []statesT{ for _, test := range []parseTest{
{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""}, {``, []string{`Error: unexpected end of file (expected a basic string) at start of file`}},
{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""}, {`no start quote"`, []string{`Error: unexpected input (expected a basic string) at start of file`}},
}) {`"no end quote`, []string{`Error: unexpected end of file (expected end of string) at line 1, column 14`}},
{`""`, []string{`string("")`}},
{`"simple string"`, []string{`string("simple string")`}},
{`"with\tsome\r\nvalid escapes\b"`, []string{`string("with\tsome\r\nvalid escapes\b")`}},
{`"with an \invalid escape"`, []string{`Error: invalid escape sequence at line 1, column 10`}},
{`"A cool UTF8 ƃuıɹʇs"`, []string{`string("A cool UTF8 ƃuıɹʇs")`}},
{`"A string with UTF8 escape \u2318"`, []string{`string("A string with UTF8 escape ⌘")`}},
{"\"Invalid character for UTF \xcd\"", []string{`Error: invalid UTF8 rune at line 1, column 28`}},
{"\"Character that mus\t be escaped\"", []string{`Error: invalid character in basic string: '\t' (must be escaped) at line 1, column 20`}},
{"\"Character that must be escaped \u0000\"", []string{`Error: invalid character in basic string: '\x00' (must be escaped) at line 1, column 33`}},
{"\"Character that must be escaped \x7f\"", []string{`Error: invalid character in basic string: '\u007f' (must be escaped) at line 1, column 33`}},
} {
p := &parser{}
testParseHandler(t, p, p.startBasicString, test)
}
} }
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
err := "invalid escape sequence" // A quick check for almost all characters that must be escaped.
runStatesTs(t, []statesT{ // The missing one (\x7f) is covered in the previous test.
{"invalid escape sequence", `a="\x"`, "[a]=", err}, for i := 0x00; i <= 0x1F; i++ {
{"too short \\u UTF8", `a="\u123"`, "[a]=", err}, p := &parser{}
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err}, input := fmt.Sprintf(`"%c"`, rune(i))
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err}, expected := fmt.Sprintf(`Error: invalid character in basic string: %q (must be escaped) at line 1, column 2`, rune(i))
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err}, testParseHandler(t, p, p.startString, parseTest{input, []string{expected}})
}) }
}
func TestBasicStringEscapes(t *testing.T) {
runStatesTs(t, []statesT{
{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
{"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""},
})
} }