package parser import ( "strings" "git.makaay.nl/mauricem/go-parsekit/parse" ) var ( // There are four ways to express strings: basic, multi-line basic, // literal, and multi-line literal. All strings must contain only valid // UTF-8 characters. * Multi-line basic strings are surrounded by three // quotation marks on each side. * Basic strings are surrounded by // quotation marks. doubleQuote3 = a.Str(`"""`) // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to // U+001F, U+007F). charThatMustBeEscaped = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F')) // For convenience, some popular characters have a compact escape sequence. // // \b - backspace (U+0008) // \t - tab (U+0009) // \n - LF (U+000A) // \f - form feed (U+000C) // \r - carriage return (U+000D) // \" - quote (U+0022) // \\ - backslash (U+005C) // \uXXXX - unicode (U+XXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX) validEscapeChar = c.Any(a.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash) shortEscape = c.Seq(a.Backslash, validEscapeChar) shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4)) longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8)) validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) ) func (t *parser) startString(p *parse.API) { switch { case p.Peek(doubleQuote3): p.Handle(t.startMultiLineBasicString) case p.Peek(a.DoubleQuote): p.Handle(t.startBasicString) default: p.Expected("a string value") } } // Specific handling of input for basic strings. // * A double quote ends the string // * No additional \escape sequences are allowed. What the spec say about this: // "All other escape sequences [..] are reserved and, if used, TOML should // produce an error."" func (t *parser) startBasicString(p *parse.API) { if !p.Accept(a.DoubleQuote) { p.Expected("a basic string") return } sb := &strings.Builder{} for { switch { case p.Peek(charThatMustBeEscaped): p.Error("invalid character in basic string: %q (must be escaped)", p.Result().Rune(0)) return case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))): sb.WriteString(p.Result().Value(0).(string)) case p.Peek(a.Backslash): p.Error("invalid escape sequence") return case p.Accept(m.Drop(a.DoubleQuote)): t.emitCommand(csetStrVal, sb.String()) return case p.Accept(a.ValidRune): sb.WriteString(p.Result().String()) case p.Peek(a.InvalidRune): p.Error("invalid UTF8 rune") return default: p.Expected("end of string") return } } } func (t *parser) startMultiLineBasicString(p *parse.API) { if p.Accept(doubleQuote3) { p.Error("not yet implemented") } else { p.Expected("a multi-line basic string") } }