package lexer import "github.com/mmakaay/toml/parser" // There are four ways to express strings: basic, multi-line basic, literal, // and multi-line literal. All strings must contain only valid UTF-8 characters. func stateStringValue(l *parser.Parser) parser.StateFn { switch { case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote): // Multi-line basic strings are surrounded by three quotation marks on each side. return stateMultiLineBasicString case l.SkipMatching(doubleQuote): // Basic strings are surrounded by quotation marks. return stateSingleLineBasicString } return l.UnexpectedInputError("a string value") } func stateSingleLineBasicString(l *parser.Parser) parser.StateFn { if l.Upcoming(doubleQuote, doubleQuote) { return stateMultiLineBasicString } return stateBasicString } func stateMultiLineBasicString(l *parser.Parser) parser.StateFn { l.EmitError("Not yet implemented") return nil } // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). const invalidBasicStringCharacters string = "\"\\" + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + "\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" + "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + "\u007F" func stateParseBasicString(l *parser.Parser) parser.StateFn { for { switch { case l.AtEndOfFile(): return l.UnexpectedEndOfFile("basic string token") case l.SkipMatching(doubleQuote): return l.PopState() case l.AcceptMatching(backslash, escapeChars): // For convenience, some popular characters have a compact escape sequence. // \b - backspace (U+0008) // \t - tab (U+0009) // \n - linefeed (U+000A) // \f - form feed (U+000C) // \r - carriage return (U+000D) // \" - quote (U+0022) // \\ - backslash (U+005C) case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex): // \uXXXX - unicode (U+XXXX) case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex): // \UXXXXXXXX - unicode (U+XXXXXXXX) case l.Upcoming(backslash): // All other escape sequences not listed above are reserved and, // if used, TOML should produce an error. return l.EmitError("Invalid escape sequence in basic string") case l.Upcoming(invalidBasicStringCharacters): // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). r, _, _ := l.Match(invalidBasicStringCharacters) l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0]) return nil default: if !l.AcceptAny() { return nil } } } } func stateBasicString(l *parser.Parser) parser.StateFn { l.PushState(func(l *parser.Parser) parser.StateFn { err := l.EmitInterpreted(ItemString) if err != nil { l.EmitError("Invalid data in string: %s", err) return nil } return stateKeyValuePair }) return stateParseBasicString }