80 lines
2.5 KiB
Go
80 lines
2.5 KiB
Go
package parser
|
|
|
|
import "github.com/mmakaay/toml/parsekit"
|
|
|
|
// There are four ways to express strings: basic, multi-line basic, literal,
|
|
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
|
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
|
// * Basic strings are surrounded by quotation marks.
|
|
func stateStringValue(p *parsekit.P) {
|
|
switch {
|
|
case p.After(doubleQuote3).Ignore():
|
|
p.RouteTo(stateMultiLineBasicString)
|
|
case p.After(doubleQuote).Ignore():
|
|
p.RouteTo(startBasicString)
|
|
default:
|
|
p.UnexpectedInput("a string value")
|
|
}
|
|
}
|
|
|
|
// For convenience, some popular characters have a compact escape sequence.
|
|
//
|
|
// \b - backspace (U+0008)
|
|
// \t - tab (U+0009)
|
|
// \n - linefeed (U+000A)
|
|
// \f - form feed (U+000C)
|
|
// \r - carriage return (U+000D)
|
|
// \" - quote (U+0022)
|
|
// \\ - backslash (U+005C)
|
|
// \uXXXX - unicode (U+XXXX)
|
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
|
//
|
|
// Any Unicode character may be used except those that must be escaped:
|
|
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
|
func parseString(p *parsekit.P) {
|
|
switch {
|
|
case p.AtEndOfFile():
|
|
p.UnexpectedEndOfFile("basic string token")
|
|
case p.After(backslash, validEscapeChars).Store() ||
|
|
p.After(shortUtf8Match).Store() ||
|
|
p.After(longUtf8Match).Store():
|
|
p.RouteRepeat()
|
|
case p.After(mustBeEscaped).Backup():
|
|
r, _, _ := p.Match(mustBeEscaped)
|
|
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
|
case p.After(backslash).Backup() || p.After(doubleQuote).Backup():
|
|
p.RouteReturn()
|
|
default:
|
|
p.AcceptAny()
|
|
p.RouteRepeat()
|
|
}
|
|
}
|
|
|
|
func startBasicString(p *parsekit.P) {
|
|
p.RouteTo(parseString).ThenTo(basicStringSpecifics)
|
|
}
|
|
|
|
// Specific handling of input for basic strings.
|
|
// * A double quote ends the string
|
|
// * No additional \escape sequences are allowed. What the spec say about this:
|
|
// "All other escape sequences [..] are reserved and, if used, TOML should
|
|
// produce an error.""
|
|
func basicStringSpecifics(p *parsekit.P) {
|
|
switch {
|
|
case p.After(doubleQuote).Ignore():
|
|
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
|
|
p.EmitError("Invalid data in string: %s", err)
|
|
} else {
|
|
p.RouteTo(stateKeyValuePair)
|
|
}
|
|
case p.After(backslash).Backup():
|
|
p.EmitError("Invalid escape sequence")
|
|
default:
|
|
p.RouteTo(startBasicString)
|
|
}
|
|
}
|
|
|
|
func stateMultiLineBasicString(p *parsekit.P) {
|
|
p.EmitError("Not yet implemented")
|
|
}
|