94 lines
2.9 KiB
Go
94 lines
2.9 KiB
Go
package parser
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"git.makaay.nl/mauricem/go-parsekit/parse"
|
|
)
|
|
|
|
var (
|
|
// There are four ways to express strings: basic, multi-line basic,
|
|
// literal, and multi-line literal. All strings must contain only valid
|
|
// UTF-8 characters. * Multi-line basic strings are surrounded by three
|
|
// quotation marks on each side. * Basic strings are surrounded by
|
|
// quotation marks.
|
|
doubleQuote3 = a.Str(`"""`)
|
|
|
|
// Any Unicode character may be used except those that must be escaped:
|
|
// quotation mark, backslash, and the control characters (U+0000 to
|
|
// U+001F, U+007F).
|
|
charThatMustBeEscaped = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F'))
|
|
|
|
// For convenience, some popular characters have a compact escape sequence.
|
|
//
|
|
// \b - backspace (U+0008)
|
|
// \t - tab (U+0009)
|
|
// \n - LF (U+000A)
|
|
// \f - form feed (U+000C)
|
|
// \r - carriage return (U+000D)
|
|
// \" - quote (U+0022)
|
|
// \\ - backslash (U+005C)
|
|
// \uXXXX - unicode (U+XXXX)
|
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
|
validEscapeChar = c.Any(a.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash)
|
|
shortEscape = c.Seq(a.Backslash, validEscapeChar)
|
|
shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4))
|
|
longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8))
|
|
validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape)
|
|
)
|
|
|
|
func (t *parser) startString(p *parse.API) {
|
|
switch {
|
|
case p.Peek(doubleQuote3):
|
|
p.Handle(t.startMultiLineBasicString)
|
|
case p.Peek(a.DoubleQuote):
|
|
p.Handle(t.startBasicString)
|
|
default:
|
|
p.Expected("a string value")
|
|
}
|
|
}
|
|
|
|
// Specific handling of input for basic strings.
|
|
// * A double quote ends the string
|
|
// * No additional \escape sequences are allowed. What the spec say about this:
|
|
// "All other escape sequences [..] are reserved and, if used, TOML should
|
|
// produce an error.""
|
|
func (t *parser) startBasicString(p *parse.API) {
|
|
if !p.Accept(a.DoubleQuote) {
|
|
p.Expected("a basic string")
|
|
return
|
|
}
|
|
sb := &strings.Builder{}
|
|
for {
|
|
switch {
|
|
case p.Peek(charThatMustBeEscaped):
|
|
p.Error("invalid character in basic string: %q (must be escaped)", p.Result().Rune(0))
|
|
return
|
|
case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))):
|
|
sb.WriteString(p.Result().Value(0).(string))
|
|
case p.Peek(a.Backslash):
|
|
p.Error("invalid escape sequence")
|
|
return
|
|
case p.Accept(m.Drop(a.DoubleQuote)):
|
|
t.emitCommand(csetStrVal, sb.String())
|
|
return
|
|
case p.Accept(a.ValidRune):
|
|
sb.WriteString(p.Result().String())
|
|
case p.Peek(a.InvalidRune):
|
|
p.Error("invalid UTF8 rune")
|
|
return
|
|
default:
|
|
p.Expected("end of string")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (t *parser) startMultiLineBasicString(p *parse.API) {
|
|
if p.Accept(doubleQuote3) {
|
|
p.Error("not yet implemented")
|
|
} else {
|
|
p.Expected("a multi-line basic string")
|
|
}
|
|
}
|