go-toml/parser/value_string.go

91 lines
3.0 KiB
Go

package parser
import "github.com/mmakaay/toml/parsekit"
var (
// There are four ways to express strings: basic, multi-line basic,
// literal, and multi-line literal. All strings must contain only valid
// UTF-8 characters. * Multi-line basic strings are surrounded by three
// quotation marks on each side. * Basic strings are surrounded by
// quotation marks.
doubleQuote3 = c.String(`"""`)
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F).
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
// For convenience, some popular characters have a compact escape sequence.
//
// \b - backspace (U+0008)
// \t - tab (U+0009)
// \n - linefeed (U+000A)
// \f - form feed (U+000C)
// \r - carriage return (U+000D)
// \" - quote (U+0022)
// \\ - backslash (U+005C)
// \uXXXX - unicode (U+XXXX)
// \UXXXXXXXX - unicode (U+XXXXXXXX)
validEscapeChar = c.AnyOf(c.Runes('b', 't', 'n', 'f', 'r'), doubleQuote, backslash)
shortEscape = c.Sequence(backslash, validEscapeChar)
hex = c.AnyOf(digit, c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
shortUtf8Escape = c.Sequence(backslash, c.Rune('u'), c.Repeat(4, hex))
longUtf8Escape = c.Sequence(backslash, c.Rune('U'), c.Repeat(8, hex))
validEscape = c.AnyOf(shortEscape, shortUtf8Escape, longUtf8Escape)
)
func startString(p *parsekit.P) {
p.Expects("a string value")
switch {
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
case p.On(doubleQuote).RouteTo(startBasicString):
}
}
func startBasicString(p *parsekit.P) {
p.Expects("a basic string")
if p.On(doubleQuote).Skip() {
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
}
func parseBasicString(p *parsekit.P) {
p.Expects("string contents")
switch {
case p.On(charThatMustBeEscaped).Stay():
p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch)
case p.On(validEscape).Accept():
p.RouteRepeat()
case p.On(backslash).RouteReturn():
case p.On(doubleQuote).RouteReturn():
case p.On(any).Accept():
p.RouteRepeat()
}
}
// Specific handling of input for basic strings.
// * A double quote ends the string
// * No additional \escape sequences are allowed. What the spec say about this:
// "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error.""
func basicStringSpecifics(p *parsekit.P) {
p.Expects("string contents")
switch {
case p.On(doubleQuote).Skip():
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
p.EmitError("Invalid data in string: %s", err)
} else {
p.RouteTo(startKeyValuePair)
}
case p.On(backslash).Stay():
p.EmitError("Invalid escape sequence")
}
}
func startMultiLineBasicString(p *parsekit.P) {
p.Expects("a multi-line basic string")
if p.On(doubleQuote3).Skip() {
p.EmitError("Not yet implemented")
}
}