package parser import "git.makaay.nl/mauricem/go-parsekit" var ( // There are four ways to express strings: basic, multi-line basic, // literal, and multi-line literal. All strings must contain only valid // UTF-8 characters. * Multi-line basic strings are surrounded by three // quotation marks on each side. * Basic strings are surrounded by // quotation marks. doubleQuote3 = c.Str(`"""`) // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to // U+001F, U+007F). charThatMustBeEscaped = c.Any(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F')) // For convenience, some popular characters have a compact escape sequence. // // \b - backspace (U+0008) // \t - tab (U+0009) // \n - LF (U+000A) // \f - form feed (U+000C) // \r - carriage return (U+000D) // \" - quote (U+0022) // \\ - backslash (U+005C) // \uXXXX - unicode (U+XXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX) validEscapeChar = c.Any(c.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash) shortEscape = c.Seq(a.Backslash, validEscapeChar) shortUTF8Escape = c.Seq(a.Backslash, c.Rune('u'), c.Rep(4, a.HexDigit)) longUTF8Escape = c.Seq(a.Backslash, c.Rune('U'), c.Rep(8, a.HexDigit)) validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) ) func startString(p *parsekit.P) { p.Expects("a string value") switch { case p.On(doubleQuote3).Stay(): p.RouteTo(startMultiLineBasicString) case p.On(a.DoubleQuote).Stay(): p.RouteTo(startBasicString) } } func startBasicString(p *parsekit.P) { p.Expects("a basic string") if p.On(a.DoubleQuote).Skip() { p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics) } } func parseBasicString(p *parsekit.P) { p.Expects("string contents") switch { case p.On(charThatMustBeEscaped).Stay(): p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch) case p.On(validEscape).Accept(): p.RouteRepeat() case p.On(a.Backslash).Stay(): p.RouteReturn() case p.On(a.DoubleQuote).Stay(): p.RouteReturn() case p.On(a.AnyRune).Accept(): p.RouteRepeat() } } // Specific handling of input for basic strings. // * A double quote ends the string // * No additional \escape sequences are allowed. What the spec say about this: // "All other escape sequences [..] are reserved and, if used, TOML should // produce an error."" func basicStringSpecifics(p *parsekit.P) { p.Expects("string contents") switch { case p.On(a.DoubleQuote).Skip(): p.EmitInterpreted(ItemString) p.RouteTo(startKeyValuePair) case p.On(a.Backslash).Stay(): p.EmitError("invalid escape sequence") } } func startMultiLineBasicString(p *parsekit.P) { p.Expects("a multi-line basic string") if p.On(doubleQuote3).Skip() { p.EmitError("not yet implemented") } }