Implemented all string types and key types

This commit is contained in:
Maurice Makaay 2019-06-17 23:25:39 +00:00
parent b49715652e
commit e1ef9df7ca
4 changed files with 273 additions and 51 deletions

View File

@ -56,19 +56,24 @@ func (t *parser) startKeyValuePair(p *parse.API) {
}
func (t *parser) startKey(p *parse.API) {
if p.Peek(bareKeyRune) {
p.Handle(t.startBareKey)
} else {
p.Expected("a key name")
}
}
func (t *parser) startBareKey(p *parse.API) {
if p.Accept(bareKey) {
t.emitCommand(cKey, p.Result().String())
endFunc := func(str string) {
t.emitCommand(cKey, str)
p.Handle(t.endOfKeyOrDot)
} else {
p.Expected("a bare key name")
}
switch {
case p.Accept(bareKey):
endFunc(p.Result().String())
case p.Peek(a.SingleQuote):
if str, ok := t.parseLiteralString("key", p); ok {
endFunc(str)
}
case p.Peek(a.DoubleQuote):
if str, ok := t.parseBasicString("key", p); ok {
endFunc(str)
}
default:
p.Expected("a key name")
}
}

View File

@ -5,16 +5,7 @@ import "testing"
func TestKey(t *testing.T) {
for _, test := range []parseTest{
{"", []string{`Error: unexpected end of file (expected a key name) at start of file`}},
{"barekey", []string{`key("barekey")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startKey, test)
}
}
func TestBareKey(t *testing.T) {
for _, test := range []parseTest{
{"", []string{`Error: unexpected end of file (expected a bare key name) at start of file`}},
// Bare key tests
{"barekey", []string{`key("barekey")`}},
{"1234567", []string{`key("1234567")`}},
{"mix-12_34", []string{`key("mix-12_34")`}},
@ -24,9 +15,21 @@ func TestBareKey(t *testing.T) {
{"key1.key2", []string{`key("key1")`, `keydot()`, `key("key2")`}},
{"key . with . spaces", []string{`key("key")`, `keydot()`, `key("with")`, `keydot()`, `key("spaces")`}},
{"key \t . \twithtabs\t . \tandspaces", []string{`key("key")`, `keydot()`, `key("withtabs")`, `keydot()`, `key("andspaces")`}},
// Single quoted key tests
{"''", []string{`key("")`}},
{"'single quoted'", []string{`key("single quoted")`}},
{`'escape\s are literal'`, []string{`key("escape\\s are literal")`}},
{`'"using inner quotes"'`, []string{`key("\"using inner quotes\"")`}},
// Double quoted key tests
{`""`, []string{`key("")`}},
{`"double quoted"`, []string{`key("double quoted")`}},
{`"escapes are in\terpreted"`, []string{`key("escapes are in\terpreted")`}},
{`"using 'inner' \"quotes\""`, []string{`key("using 'inner' \"quotes\"")`}},
// Mixed key types
{`this.'i\s'."madness\t".''`, []string{`key("this")`, `keydot()`, `key("i\\s")`, `keydot()`, `key("madness\t")`, `keydot()`, `key("")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startBareKey, test)
testParseHandler(t, p, p.startKey, test)
}
}

View File

@ -7,17 +7,15 @@ import (
)
var (
// There are four ways to express strings: basic, multi-line basic,
// literal, and multi-line literal. All strings must contain only valid
// UTF-8 characters. * Multi-line basic strings are surrounded by three
// quotation marks on each side. * Basic strings are surrounded by
// quotation marks.
// Multi-line basic strings are surrounded by three quotation marks on each
// side and allow newlines.
doubleQuote3 = a.Str(`"""`)
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F).
charThatMustBeEscaped = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
// Multi-line literal strings are surrounded by three single quotes on each side and allow newlines.
singleQuote3 = a.Str(`'''`)
// Control characters as defined by TOML (U+0000 to U+001F, U+007F)
controlCharacter = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
// For convenience, some popular characters have a compact escape sequence.
//
@ -30,46 +28,170 @@ var (
// \\ - backslash (U+005C)
// \uXXXX - unicode (U+XXXX)
// \UXXXXXXXX - unicode (U+XXXXXXXX)
validEscapeChar = c.Any(a.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash)
validEscapeChar = a.Runes('b', 't', 'n', 'f', 'r', '"', '\\')
shortEscape = c.Seq(a.Backslash, validEscapeChar)
shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4))
longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8))
validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape)
// For writing long strings without introducing extraneous whitespace, use a
// "line ending backslash". When the last non-whitespace character on a line is
// a \, it will be trimmed along with all whitespace (including newlines) up to
// the next non-whitespace character or closing delimiter.
lineEndingBackslash = a.Backslash.
Then(c.ZeroOrMore(a.Blanks)).
Then(a.Newline).
Then(c.ZeroOrMore(a.Whitespace))
)
// There are four ways to express strings: basic, multi-line basic, literal and
// multi-line literal. All strings must contain only valid UTF-8 characters.
func (t *parser) startString(p *parse.API) {
switch {
case p.Peek(doubleQuote3):
p.Handle(t.startMultiLineBasicString)
case p.Peek(a.DoubleQuote):
p.Handle(t.startBasicString)
case p.Peek(singleQuote3):
p.Handle(t.startMultiLineLiteralString)
case p.Peek(a.SingleQuote):
p.Handle(t.startLiteralString)
default:
p.Expected("a string value")
}
}
// Specific handling of input for basic strings.
// * A double quote ends the string
// * No additional \escape sequences are allowed. What the spec say about this:
//
// • Basic strings are surrounded by quotation marks.
//
// • Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F).
//
// • No additional \escape sequences are allowed. What the spec say about this:
// "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error.""
func (t *parser) startBasicString(p *parse.API) {
if str, ok := t.parseBasicString("basic string", p); ok {
t.emitCommand(csetStrVal, str)
}
}
func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
if !p.Accept(a.DoubleQuote) {
p.Expected("a basic string")
p.Expected(`opening quotation marks`)
return "", false
}
sb := &strings.Builder{}
for {
switch {
case p.Peek(controlCharacter):
p.Error("invalid character in %s: %q (must be escaped)", name, p.Result().Rune(0))
return sb.String(), false
case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))):
sb.WriteString(p.Result().Value(0).(string))
case p.Peek(a.Backslash):
p.Error("invalid escape sequence")
return sb.String(), false
case p.Accept(m.Drop(a.DoubleQuote)):
return sb.String(), true
case p.Accept(a.ValidRune):
sb.WriteString(p.Result().String())
case p.Peek(a.InvalidRune):
p.Error("invalid UTF8 rune")
return sb.String(), false
default:
p.Expected(`closing quotation marks`)
return sb.String(), false
}
}
}
// Specific handling of input for literal strings.
//
// • Literal strings are surrounded by single quotes.
//
// • Like basic strings, they must appear on a single line.
//
// • Control characters other than tab are not permitted in a literal string.
func (t *parser) startLiteralString(p *parse.API) {
if str, ok := t.parseLiteralString("literal string", p); ok {
t.emitCommand(csetStrVal, str)
}
}
func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
if !p.Accept(a.SingleQuote) {
p.Expected("opening single quote")
return "", false
}
sb := &strings.Builder{}
for {
switch {
case p.Accept(m.Drop(a.SingleQuote)):
return sb.String(), true
case p.Accept(a.Tab):
sb.WriteString("\t")
case p.Peek(controlCharacter):
p.Error("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result().Rune(0))
return sb.String(), false
case p.Accept(a.ValidRune):
sb.WriteString(p.Result().String())
case p.Peek(a.InvalidRune):
p.Error("invalid UTF8 rune")
return sb.String(), false
default:
p.Expected("closing single quote")
return sb.String(), false
}
}
}
// Specific handling of input for multi-line basic strings.
//
// • Multi-line basic strings are surrounded by three quotation marks on
// each side and allow newlines.
//
// • A newline immediately following the opening delimiter will be trimmed.
// All other whitespace and newline characters remain intact.
//
// • TOML parsers should feel free to normalize newline to whatever makes
// sense for their platform.
//
// • All of the escape sequences that are valid for basic strings are also valid
// for multi-line basic strings.
//
// • Any Unicode character may be used except those that must be escaped:
// backslash and the control characters (U+0000 to U+001F, U+007F). Quotation
// marks need not be escaped unless their presence would create a premature
// closing delimiter.
//
// • For writing long strings without introducing extraneous whitespace, use a
// "line ending backslash". When the last non-whitespace character on a line is
// a \, it will be trimmed along with all whitespace (including newlines) up to
// the next non-whitespace character or closing delimiter.
func (t *parser) startMultiLineBasicString(p *parse.API) {
if !p.Accept(doubleQuote3.Then(a.Newline.Optional())) {
p.Expected("opening three quotation marks")
return
}
sb := &strings.Builder{}
for {
switch {
case p.Peek(charThatMustBeEscaped):
p.Error("invalid character in basic string: %q (must be escaped)", p.Result().Rune(0))
case p.Accept(a.Newline):
sb.WriteString("\n")
case p.Peek(controlCharacter):
p.Error("invalid character in multi-line basic string: %q (must be escaped)", p.Result().Rune(0))
return
case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))):
sb.WriteString(p.Result().Value(0).(string))
case p.Accept(lineEndingBackslash):
// NOOP, the line-ending backslash sequence is skipped.
case p.Peek(a.Backslash):
p.Error("invalid escape sequence")
return
case p.Accept(m.Drop(a.DoubleQuote)):
case p.Accept(m.Drop(doubleQuote3)):
t.emitCommand(csetStrVal, sb.String())
return
case p.Accept(a.ValidRune):
@ -78,16 +200,51 @@ func (t *parser) startBasicString(p *parse.API) {
p.Error("invalid UTF8 rune")
return
default:
p.Expected("end of string")
p.Expected("closing three quotation marks")
return
}
}
}
func (t *parser) startMultiLineBasicString(p *parse.API) {
if p.Accept(doubleQuote3) {
p.Error("not yet implemented")
} else {
p.Expected("a multi-line basic string")
// Specific handling of input for multi-line literal strings.
//
// • Multi-line literal strings are surrounded by three single quotes on
// each side and allow newlines.
//
// • A newline immediately following the opening delimiter will be trimmed.
//
// • All other content between the delimiters is interpreted as-is without modification.
//
// • TOML parsers should feel free to normalize newline to whatever makes
// sense for their platform.
//
// • Control characters other than tab and newline are not permitted in a multi-line literal string.
func (t *parser) startMultiLineLiteralString(p *parse.API) {
if !p.Accept(singleQuote3.Then(a.Newline.Optional())) {
p.Expected("opening three single quotes")
return
}
sb := &strings.Builder{}
for {
switch {
case p.Accept(m.Drop(singleQuote3)):
t.emitCommand(csetStrVal, sb.String())
return
case p.Accept(a.Tab):
sb.WriteString("\t")
case p.Accept(a.Newline):
sb.WriteString("\n")
case p.Peek(controlCharacter):
p.Error("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result().Rune(0))
return
case p.Accept(a.ValidRune):
sb.WriteString(p.Result().String())
case p.Peek(a.InvalidRune):
p.Error("invalid UTF8 rune")
return
default:
p.Expected("closing three single quotes")
return
}
}
}

View File

@ -9,7 +9,10 @@ func TestString(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected a string value) at start of file`}},
{`no start quote"`, []string{`Error: unexpected input (expected a string value) at start of file`}},
{`"simple string"`, []string{`string("simple string")`}},
{`"basic s\tring"`, []string{`string("basic s\tring")`}},
{"\"\"\"\n basic multi-line\n string value\n\"\"\"", []string{`string(" basic multi-line\n string value\n")`}},
{`'literal s\tring'`, []string{`string("literal s\\tring")`}},
{"'''\n literal multi-line\n string value\n'''", []string{`string(" literal multi-line\n string value\n")`}},
} {
p := &parser{}
testParseHandler(t, p, p.startString, test)
@ -18,9 +21,9 @@ func TestString(t *testing.T) {
func TestBasicString(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected a basic string) at start of file`}},
{`no start quote"`, []string{`Error: unexpected input (expected a basic string) at start of file`}},
{`"no end quote`, []string{`Error: unexpected end of file (expected end of string) at line 1, column 14`}},
{``, []string{`Error: unexpected end of file (expected opening quotation marks) at start of file`}},
{`no start quote"`, []string{`Error: unexpected input (expected opening quotation marks) at start of file`}},
{`"no end quote`, []string{`Error: unexpected end of file (expected closing quotation marks) at line 1, column 14`}},
{`""`, []string{`string("")`}},
{`"simple string"`, []string{`string("simple string")`}},
{`"with\tsome\r\nvalid escapes\b"`, []string{`string("with\tsome\r\nvalid escapes\b")`}},
@ -37,6 +40,60 @@ func TestBasicString(t *testing.T) {
}
}
func TestMultiLineBasicString(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected opening three quotation marks) at start of file`}},
{`"""missing close quote""`, []string{`Error: unexpected end of file (expected closing three quotation marks) at line 1, column 25`}},
{`""""""`, []string{`string("")`}},
{"\"\"\"\n\"\"\"", []string{`string("")`}},
{"\"\"\"\r\n\r\n\"\"\"", []string{`string("\n")`}},
{`"""\"\"\"\""""`, []string{`string("\"\"\"\"")`}},
{"\"\"\"\nThe quick brown \\\n\n\n \t fox jumps over \\\n\t the lazy dog.\\\n \"\"\"", []string{`string("The quick brown fox jumps over the lazy dog.")`}},
{"\"\"\"No control chars \f allowed\"\"\"", []string{`Error: invalid character in multi-line basic string: '\f' (must be escaped) at line 1, column 21`}},
{"\"\"\"Escaping control chars\\nis valid\"\"\"", []string{`string("Escaping control chars\nis valid")`}},
{"\"\"\"Invalid escaping \\is not allowed\"\"\"", []string{`Error: invalid escape sequence at line 1, column 21`}},
{"\"\"\"Invalid rune \xcd\"\"\"", []string{`Error: invalid UTF8 rune at line 1, column 17`}},
} {
p := &parser{}
testParseHandler(t, p, p.startMultiLineBasicString, test)
}
}
func TestLiteralString(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected opening single quote) at start of file`}},
{`'missing close quote`, []string{`Error: unexpected end of file (expected closing single quote) at line 1, column 21`}},
{`''`, []string{`string("")`}},
{`'simple'`, []string{`string("simple")`}},
{`'C:\Users\nodejs\templates'`, []string{`string("C:\\Users\\nodejs\\templates")`}},
{`'\\ServerX\admin$\system32\'`, []string{`string("\\\\ServerX\\admin$\\system32\\")`}},
{`'Tom "Dubs" Preston-Werner'`, []string{`string("Tom \"Dubs\" Preston-Werner")`}},
{`'<\i\c*\s*>'`, []string{`string("<\\i\\c*\\s*>")`}},
{"'No cont\rol chars allowed'", []string{`Error: invalid character in literal string: '\r' (no control chars allowed, except for tab) at line 1, column 9`}},
{"'Except\tfor\ttabs'", []string{`string("Except\tfor\ttabs")`}},
{"'Invalid rune \xcd'", []string{`Error: invalid UTF8 rune at line 1, column 15`}},
} {
p := &parser{}
testParseHandler(t, p, p.startLiteralString, test)
}
}
func TestMultiLineLiteralString(t *testing.T) {
for _, test := range []parseTest{
{``, []string{`Error: unexpected end of file (expected opening three single quotes) at start of file`}},
{`'''missing close quote''`, []string{`Error: unexpected end of file (expected closing three single quotes) at line 1, column 25`}},
{`''''''`, []string{`string("")`}},
{"'''\n'''", []string{`string("")`}},
{`'''I [dw]on't need \d{2} apples'''`, []string{`string("I [dw]on't need \\d{2} apples")`}},
{"'''\nThere can\nbe newlines\r\nand \ttabs!\r\n'''", []string{`string("There can\nbe newlines\nand \ttabs!\n")`}},
{"'''No other \f control characters'''", []string{`Error: invalid character in literal string: '\f' (no control chars allowed, except for tab and newline) at line 1, column 13`}},
{"'''No invalid runes allowed \xcd'''", []string{"Error: invalid UTF8 rune at line 1, column 29"}},
} {
p := &parser{}
testParseHandler(t, p, p.startMultiLineLiteralString, test)
}
}
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
// A quick check for almost all characters that must be escaped.
// The missing one (\x7f) is covered in the previous test.