go-toml/lexer/strings.go

89 lines
3.1 KiB
Go

package lexer
import "github.com/mmakaay/toml/parser"
// There are four ways to express strings: basic, multi-line basic, literal,
// and multi-line literal. All strings must contain only valid UTF-8 characters.
func stateStringValue(l *parser.Parser) parser.StateFn {
switch {
case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote):
// Multi-line basic strings are surrounded by three quotation marks on each side.
return stateMultiLineBasicString
case l.SkipMatching(doubleQuote):
// Basic strings are surrounded by quotation marks.
return stateSingleLineBasicString
}
return l.UnexpectedInputError("a string value")
}
func stateSingleLineBasicString(l *parser.Parser) parser.StateFn {
if l.Upcoming(doubleQuote, doubleQuote) {
return stateMultiLineBasicString
}
return stateBasicString
}
func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
l.EmitError("Not yet implemented")
return nil
}
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
const invalidBasicStringCharacters string = "\"\\" +
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
"\u007F"
func stateParseBasicString(l *parser.Parser) parser.StateFn {
for {
switch {
case l.AtEndOfFile():
return l.UnexpectedEndOfFile("basic string token")
case l.SkipMatching(doubleQuote):
return l.PopState()
case l.AcceptMatching(backslash, escapeChars):
// For convenience, some popular characters have a compact escape sequence.
// \b - backspace (U+0008)
// \t - tab (U+0009)
// \n - linefeed (U+000A)
// \f - form feed (U+000C)
// \r - carriage return (U+000D)
// \" - quote (U+0022)
// \\ - backslash (U+005C)
case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex):
// \uXXXX - unicode (U+XXXX)
case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
// \UXXXXXXXX - unicode (U+XXXXXXXX)
case l.Upcoming(backslash):
// All other escape sequences not listed above are reserved and,
// if used, TOML should produce an error.
return l.EmitError("Invalid escape sequence in basic string")
case l.Upcoming(invalidBasicStringCharacters):
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
r, _, _ := l.Match(invalidBasicStringCharacters)
l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
return nil
default:
if !l.AcceptAny() {
return nil
}
}
}
}
func stateBasicString(l *parser.Parser) parser.StateFn {
l.PushState(func(l *parser.Parser) parser.StateFn {
err := l.EmitInterpreted(ItemString)
if err != nil {
l.EmitError("Invalid data in string: %s", err)
return nil
}
return stateKeyValuePair
})
return stateParseBasicString
}