210 lines
6.7 KiB
Go
210 lines
6.7 KiB
Go
package lexer
|
|
|
|
import "github.com/mmakaay/toml/parser"
|
|
|
|
const (
|
|
whitespace string = " \t"
|
|
carriageReturn string = "\r"
|
|
newline string = "\n"
|
|
hash string = "#"
|
|
equal string = "="
|
|
lower string = "abcdefghijklmnopqrstuvwxyz"
|
|
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
digits string = "0123456789"
|
|
hex string = digits + "abcdefABCDEF"
|
|
dot string = "."
|
|
underscore string = "_"
|
|
dash string = "-"
|
|
singleQuote string = "'"
|
|
doubleQuote string = "\""
|
|
backslash string = "\\"
|
|
quoteChars string = singleQuote + doubleQuote
|
|
bareKeyChars string = lower + upper + digits + underscore + dash
|
|
startOfKey string = bareKeyChars + quoteChars
|
|
escapeChars string = `btnfr"\`
|
|
shortUtf8Escape string = "u"
|
|
longUtf8Escape string = "U"
|
|
)
|
|
|
|
// NewParser creates a new parser, using the provided input string
|
|
// as the data to parse.
|
|
func NewParser(input string) *parser.Parser {
|
|
return parser.New(input, stateKeyValuePair)
|
|
}
|
|
|
|
func stateKeyValuePair(l *parser.Parser) parser.StateFn {
|
|
l.SkipConsecutive(whitespace + carriageReturn + newline)
|
|
if l.SkipMatching(hash) {
|
|
return stateComment
|
|
}
|
|
if l.Upcoming(startOfKey) {
|
|
return stateKey
|
|
}
|
|
return stateEndOfFile
|
|
}
|
|
|
|
// A '#' hash symbol marks the rest of the line as a comment.
|
|
func stateComment(l *parser.Parser) parser.StateFn {
|
|
for {
|
|
switch {
|
|
case l.AtEndOfFile() || l.SkipMatching(newline):
|
|
l.EmitLiteralTrim(ItemComment)
|
|
return stateKeyValuePair
|
|
default:
|
|
if !l.AcceptAny() {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// A key may be either bare, quoted or dotted.
|
|
func stateKey(l *parser.Parser) parser.StateFn {
|
|
if l.AcceptMatching(bareKeyChars) {
|
|
return statebareKeyChars
|
|
}
|
|
return l.UnexpectedInputError("a valid key name")
|
|
}
|
|
|
|
// Bare keys may only contain ASCII letters, ASCII digits,
|
|
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
|
// keys are allowed to be composed of only ASCII digits,
|
|
// e.g. 1234, but are always interpreted as strings.
|
|
func statebareKeyChars(l *parser.Parser) parser.StateFn {
|
|
l.AcceptConsecutive(bareKeyChars)
|
|
l.EmitLiteral(ItemKey)
|
|
return stateEndOfKeyOrKeyDot
|
|
}
|
|
|
|
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
|
// This allows for grouping similar properties together:
|
|
func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
|
|
// Whitespace around dot-separated parts is ignored, however,
|
|
// best practice is to not use any extraneous whitespace.
|
|
l.SkipConsecutive(whitespace)
|
|
if l.SkipMatching(dot) {
|
|
l.Emit(ItemKeyDot, "")
|
|
l.SkipConsecutive(whitespace)
|
|
return stateKey
|
|
}
|
|
return stateKeyAssignment
|
|
}
|
|
|
|
// Keys are on the left of the equals sign and values are on the right.
|
|
// Whitespace is ignored around key names and values. The key, equals
|
|
// sign, and value must be on the same line (though some values can
|
|
// be broken over multiple lines).
|
|
func stateKeyAssignment(l *parser.Parser) parser.StateFn {
|
|
l.SkipConsecutive(whitespace)
|
|
if l.SkipMatching(equal) {
|
|
l.Emit(ItemAssignment, "")
|
|
l.SkipConsecutive(whitespace)
|
|
return stateValue
|
|
}
|
|
return l.UnexpectedInputError("a value assignment")
|
|
}
|
|
|
|
// Values must be of the following types: String, Integer, Float, Boolean,
|
|
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
|
func stateValue(l *parser.Parser) parser.StateFn {
|
|
l.SkipConsecutive(whitespace)
|
|
if l.Upcoming(quoteChars) {
|
|
return stateStringValue
|
|
}
|
|
return l.UnexpectedInputError("a value")
|
|
}
|
|
|
|
// There are four ways to express strings: basic, multi-line basic, literal,
|
|
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
|
func stateStringValue(l *parser.Parser) parser.StateFn {
|
|
switch {
|
|
case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote):
|
|
// Multi-line basic strings are surrounded by three quotation marks on each side.
|
|
return stateMultiLineBasicString
|
|
case l.SkipMatching(doubleQuote):
|
|
// Basic strings are surrounded by quotation marks.
|
|
return stateSingleLineBasicString
|
|
}
|
|
return l.UnexpectedInputError("a string value")
|
|
}
|
|
|
|
func stateSingleLineBasicString(l *parser.Parser) parser.StateFn {
|
|
if l.Upcoming(doubleQuote, doubleQuote) {
|
|
return stateMultiLineBasicString
|
|
}
|
|
return stateBasicString
|
|
}
|
|
|
|
func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
|
|
l.EmitError("Not yet implemented")
|
|
return nil
|
|
}
|
|
|
|
// Any Unicode character may be used except those that must be escaped:
|
|
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
|
const invalidBasicStringCharacters string = "\"\\" +
|
|
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
|
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
|
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
|
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
|
"\u007F"
|
|
|
|
func stateParseBasicString(l *parser.Parser) parser.StateFn {
|
|
for {
|
|
switch {
|
|
case l.AtEndOfFile():
|
|
return l.UnexpectedEndOfFile("basic string token")
|
|
case l.SkipMatching(doubleQuote):
|
|
return l.PopState()
|
|
case l.AcceptMatching(backslash, escapeChars):
|
|
// For convenience, some popular characters have a compact escape sequence.
|
|
// \b - backspace (U+0008)
|
|
// \t - tab (U+0009)
|
|
// \n - linefeed (U+000A)
|
|
// \f - form feed (U+000C)
|
|
// \r - carriage return (U+000D)
|
|
// \" - quote (U+0022)
|
|
// \\ - backslash (U+005C)
|
|
case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex):
|
|
// \uXXXX - unicode (U+XXXX)
|
|
case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
|
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
|
case l.Upcoming(backslash):
|
|
// All other escape sequences not listed above are reserved and,
|
|
// if used, TOML should produce an error.
|
|
return l.EmitError("Invalid escape sequence in basic string")
|
|
case l.Upcoming(invalidBasicStringCharacters):
|
|
// Any Unicode character may be used except those that must be escaped:
|
|
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
|
r, _, _ := l.Match(invalidBasicStringCharacters)
|
|
l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
|
return nil
|
|
default:
|
|
if !l.AcceptAny() {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func stateBasicString(l *parser.Parser) parser.StateFn {
|
|
l.PushState(func(l *parser.Parser) parser.StateFn {
|
|
err := l.EmitInterpreted(ItemString)
|
|
if err != nil {
|
|
l.EmitError("Invalid data in string: %s", err)
|
|
return nil
|
|
}
|
|
return stateKeyValuePair
|
|
})
|
|
return stateParseBasicString
|
|
}
|
|
|
|
func stateEndOfFile(l *parser.Parser) parser.StateFn {
|
|
if l.AtEndOfFile() {
|
|
l.Emit(parser.ItemEOF, "EOF") // todo Automate within parser?
|
|
} else {
|
|
l.UnexpectedInputError("end of file")
|
|
}
|
|
return nil
|
|
}
|