package lexer import "github.com/mmakaay/toml/parser" const ( whitespace string = " \t" carriageReturn string = "\r" newline string = "\n" hash string = "#" equal string = "=" lower string = "abcdefghijklmnopqrstuvwxyz" upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" digits string = "0123456789" hex string = digits + "abcdefABCDEF" dot string = "." underscore string = "_" dash string = "-" singleQuote string = "'" doubleQuote string = "\"" backslash string = "\\" quoteChars string = singleQuote + doubleQuote bareKeyChars string = lower + upper + digits + underscore + dash startOfKey string = bareKeyChars + quoteChars escapeChars string = `btnfr"\` shortUtf8Escape string = "u" longUtf8Escape string = "U" ) // NewParser creates a new parser, using the provided input string // as the data to parse. func NewParser(input string) *parser.Parser { return parser.New(input, stateKeyValuePair) } func stateKeyValuePair(l *parser.Parser) parser.StateFn { l.SkipConsecutive(whitespace + carriageReturn + newline) if l.SkipMatching(hash) { return stateComment } if l.Upcoming(startOfKey) { return stateKey } return stateEndOfFile } // A '#' hash symbol marks the rest of the line as a comment. func stateComment(l *parser.Parser) parser.StateFn { for { switch { case l.AtEndOfFile() || l.SkipMatching(newline): l.EmitLiteralTrim(ItemComment) return stateKeyValuePair default: if !l.AcceptAny() { return nil } } } } // A key may be either bare, quoted or dotted. func stateKey(l *parser.Parser) parser.StateFn { if l.AcceptMatching(bareKeyChars) { return statebareKeyChars } return l.UnexpectedInputError("a valid key name") } // Bare keys may only contain ASCII letters, ASCII digits, // underscores, and dashes (A-Za-z0-9_-). Note that bare // keys are allowed to be composed of only ASCII digits, // e.g. 1234, but are always interpreted as strings. func statebareKeyChars(l *parser.Parser) parser.StateFn { l.AcceptConsecutive(bareKeyChars) l.EmitLiteral(ItemKey) return stateEndOfKeyOrKeyDot } // Dotted keys are a sequence of bare or quoted keys joined with a dot. // This allows for grouping similar properties together: func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn { // Whitespace around dot-separated parts is ignored, however, // best practice is to not use any extraneous whitespace. l.SkipConsecutive(whitespace) if l.SkipMatching(dot) { l.Emit(ItemKeyDot, "") l.SkipConsecutive(whitespace) return stateKey } return stateKeyAssignment } // Keys are on the left of the equals sign and values are on the right. // Whitespace is ignored around key names and values. The key, equals // sign, and value must be on the same line (though some values can // be broken over multiple lines). func stateKeyAssignment(l *parser.Parser) parser.StateFn { l.SkipConsecutive(whitespace) if l.SkipMatching(equal) { l.Emit(ItemAssignment, "") l.SkipConsecutive(whitespace) return stateValue } return l.UnexpectedInputError("a value assignment") } // Values must be of the following types: String, Integer, Float, Boolean, // Datetime, Array, or Inline Table. Unspecified values are invalid. func stateValue(l *parser.Parser) parser.StateFn { l.SkipConsecutive(whitespace) if l.Upcoming(quoteChars) { return stateStringValue } return l.UnexpectedInputError("a value") } // There are four ways to express strings: basic, multi-line basic, literal, // and multi-line literal. All strings must contain only valid UTF-8 characters. func stateStringValue(l *parser.Parser) parser.StateFn { switch { case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote): // Multi-line basic strings are surrounded by three quotation marks on each side. return stateMultiLineBasicString case l.SkipMatching(doubleQuote): // Basic strings are surrounded by quotation marks. return stateSingleLineBasicString } return l.UnexpectedInputError("a string value") } func stateSingleLineBasicString(l *parser.Parser) parser.StateFn { if l.Upcoming(doubleQuote, doubleQuote) { return stateMultiLineBasicString } return stateBasicString } func stateMultiLineBasicString(l *parser.Parser) parser.StateFn { l.EmitError("Not yet implemented") return nil } // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). const invalidBasicStringCharacters string = "\"\\" + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + "\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" + "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + "\u007F" func stateParseBasicString(l *parser.Parser) parser.StateFn { for { switch { case l.AtEndOfFile(): return l.UnexpectedEndOfFile("basic string token") case l.SkipMatching(doubleQuote): return l.PopState() case l.AcceptMatching(backslash, escapeChars): // For convenience, some popular characters have a compact escape sequence. // \b - backspace (U+0008) // \t - tab (U+0009) // \n - linefeed (U+000A) // \f - form feed (U+000C) // \r - carriage return (U+000D) // \" - quote (U+0022) // \\ - backslash (U+005C) case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex): // \uXXXX - unicode (U+XXXX) case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex): // \UXXXXXXXX - unicode (U+XXXXXXXX) case l.Upcoming(backslash): // All other escape sequences not listed above are reserved and, // if used, TOML should produce an error. return l.EmitError("Invalid escape sequence in basic string") case l.Upcoming(invalidBasicStringCharacters): // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). r, _, _ := l.Match(invalidBasicStringCharacters) l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0]) return nil default: if !l.AcceptAny() { return nil } } } } func stateBasicString(l *parser.Parser) parser.StateFn { l.PushState(func(l *parser.Parser) parser.StateFn { err := l.EmitInterpreted(ItemString) if err != nil { l.EmitError("Invalid data in string: %s", err) return nil } return stateKeyValuePair }) return stateParseBasicString } func stateEndOfFile(l *parser.Parser) parser.StateFn { if l.AtEndOfFile() { l.Emit(parser.ItemEOF, "EOF") // todo Automate within parser? } else { l.UnexpectedInputError("end of file") } return nil }