go-toml/parse/value_string.go

package parse

import (
	"unicode/utf8"

	"git.makaay.nl/mauricem/go-parsekit/parse"
	"git.makaay.nl/mauricem/go-parsekit/tokenize"
)

func (t *parser) parseString(p *parse.API) (string, stringType, bool) {
	if !p.Accept(t.stringHandler) {
		p.Expected("a string value")
		return "", strTypeNone, false
	}
	strType := stringTypeFromFlags(t.strFlags)
	str := p.Result.String()
	return str, strType, true
}

type stringType byte

const (
	strTypeNone stringType = iota
	strTypeBasic
	strTypeLiteral
	strTypeMultiLineBasic
	strTypeMultiLineLiteral
)

func stringTypeFromFlags(flags byte) stringType {
	if flags&strFlagBasic == strFlagBasic {
		if flags&strFlagMultiLine == 0 {
			return strTypeBasic
		}
		return strTypeMultiLineBasic
	}
	if flags&strFlagMultiLine == 0 {
		return strTypeLiteral
	}
	return strTypeMultiLineLiteral
}

const (
	strFlagLiteral      byte = 1
	strFlagBasic        byte = 2
	strFlagMultiLine    byte = 4
	strFlagNewlinesOK   byte = 8
	strFlagTabsOK       byte = 16
	strFlagEscapesOK    byte = 32
	strFlagLineConcatOK byte = 64
)

func (t *parser) stringHandler(tokenAPI *tokenize.API) bool {
	var state stringTokenizerState
	in := tokenAPI.Input
	out := tokenAPI.Output

	unicodeReqLen := 0
	unicodeLen := 0
	unicodeHex := make([]byte, 8)

	utf8ReqLen := 0
	utf8Len := 0
	utf8Rune := rune(0)
	utf8Bytes := make([]byte, 4)

	flags := byte(0)
	delim := byte(0)
	subState := 0

	for {
		bs, _ := in.Byte.PeekBuffered(0)
		bslen := len(bs)

		// End of input reached.
		if bslen == 0 {
			// We might be at the second delimiter of a basic or literal string.
			if state == strStateStart && subState == 2 {
				return true
			}
			// Unexpected end of input.
			return false
		}

		for i := 0; i < bslen; i++ {
			b := bs[i]
			switch state {

			// Parse the string opener.
			// There are four ways to express strings: basic, multi-line basic, literal and
			// multi-line literal. Basic strings are surrounded by quotation marks ("...").
			// Literal strings are surrounded by single quotes ('...').
			// Multi-line basic strings are surrounded by three quotation marks on each
			// side and allow newlines ("""..."""). Multi-line literal strings are surrounded
			// by three single quotes on each side and allow newlines as well ('''...''').
			case strStateStart:
				if subState == 0 {
					if b != '"' && b != '\'' {
						// Expected an opener quote here.
						return false
					}
					if b == '\'' {
						flags |= strFlagLiteral | strFlagTabsOK
					} else {
						flags |= strFlagBasic | strFlagEscapesOK
					}
					t.strFlags = flags
					subState = 1
					delim = b
					in.Byte.MoveCursor(b)
					continue
				}
				if subState == 1 {
					// Not a second quote, so this is the start of
					// single-line string content.
					if b != delim {
						i--
						state = strStateContent
						continue
					}
					in.Byte.MoveCursor(b)
					subState = 2
					continue
				}
				if subState == 2 {
					// Not a third quote, so this is an empty string ('' or "").
					if b != delim {
						return true
					}
					// Third quote, so this is a multi-line string (''' or """).
					flags |= strFlagMultiLine | strFlagNewlinesOK
					if flags&strFlagBasic == strFlagBasic {
						flags |= strFlagLineConcatOK
					}
					t.strFlags = flags
					in.Byte.MoveCursor(b)
					subState = 3
					continue
				}
				if subState == 3 {
					// We're in a multi-line string. From the TOML spec:
					// A newline immediately following the opening delimiter will be trimmed.
					// All other whitespace and newline characters remain intact.
					if b == '\n' {
						in.Byte.MoveCursor(b)
						state = strStateContent
						continue
					}
					if b == '\r' {
						in.Byte.MoveCursor(b)
						subState = 4
						continue
					}
					// Not a newline, so this byte is part of the content.
					i--
					state = strStateContent
					continue
				}
				if subState == 4 {
					// We've seen a \r, so here we should see a \n for a newline
					// after a multi-line opener.
					if b == '\n' {
						in.Byte.MoveCursor(b)
						state = strStateContent
						continue
					}
					// Lonely \r found. Pass it to the content handler.
					i -= 2
					state = strStateContent
					continue
				}

			// Parse string contents.
			case strStateContent:
				switch {
				case b == '\r' && flags&strFlagNewlinesOK == strFlagNewlinesOK:
					state = strStateCRLF
					continue
				case b == '\n' && flags&strFlagNewlinesOK == strFlagNewlinesOK:
					out.AddByte(b)
					in.Byte.MoveCursor(b)
					continue
				case b == '\t' && flags&strFlagTabsOK == strFlagTabsOK:
					out.AddByte(b)
					in.Byte.MoveCursor(b)
					continue
				case (b >= 0x00 && b <= 0x1F) || b == 0x7F:
					// Control characters must be escaped.
					return false
				case b == '\\':
					in.Byte.MoveCursor(b)
					// Handle escape codes, when they are allowed.
					if flags&strFlagEscapesOK == strFlagEscapesOK {
						state = strStateEscape
						continue
					}
					// Otherwise, add the backslash as plain output.
					out.AddByte(b)
					continue
				case b == delim:
					// Single-line string.
					if flags&strFlagMultiLine == 0 {
						in.Byte.MoveCursor(b)
						return true
					}
					// Multi-line string
					in.Byte.MoveCursor(b)
					state = strStateMultiLineEnd
					subState = 0
					continue
				}

				// At this point, we must have a UTF8 character on the input.
				// Here we check what length the character must have in bytes.
				// Then the rest of the work is offloaded to the strUTF8 state.
				switch b >> 4 {
				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
					out.AddByte(b)
					in.Byte.MoveCursor(b)
					continue
				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
					utf8ReqLen = 2
					utf8Rune = rune((b & lowest5bits) << 6)
				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
					utf8ReqLen = 3
					utf8Rune = rune((b & lowest4bits) << 6)
				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
					utf8ReqLen = 4
					utf8Rune = rune((b & lowest3bits) << 6)
				default: // Invalid UTF8 rune
					return false
				}
				utf8Bytes[0] = b
				utf8Len = 1
				state = strStateUTF8

			// Parse followup bytes of a UTF8 byte sequence.
			case strStateUTF8:
				// The input byte must be a continuation byte (10xxxxxx)
				if b>>6 != 2 {
					// Invalid UTF8 rune
					return false
				}
				utf8Bytes[utf8Len] = b
				utf8Len++
				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
				if utf8Len == utf8ReqLen {
					if !utf8.ValidRune(utf8Rune) {
						// Invalid unicode character
						return false
					}
					bytes := utf8Bytes[:utf8Len]
					out.AddBytes(bytes...)
					in.Byte.MoveCursorMulti(bytes...)
					state = strStateContent
				}

			// Parse the \n in a \r\n sequence.
			case strStateCRLF:
				// \r\n is normalized to just \n here (as allowed by the TOML spec).
				if b == '\n' {
					in.Byte.MoveCursorMulti('\r', b)
					out.AddByte('\n')
					state = strStateContent
					continue
				}
				// Lonely \r, should have been escaped.
				return false

			// Parse escape byte sequences.
			// For convenience, some popular characters have a compact escape sequence.
			//
			// \b         - backspace       (U+0008)
			// \t         - tab             (U+0009)
			// \n         - LF       	    (U+000A)
			// \f         - form feed       (U+000C)
			// \r         - carriage return (U+000D)
			// \"         - quote           (U+0022)
			// \\         - backslash       (U+005C)
			// \uXXXX     - unicode         (U+XXXX)
			// \UXXXXXXXX - unicode         (U+XXXXXXXX)
			case strStateEscape:
				// Handle short control character escape sequence (\t, \a, etc).
				if escaped, ok := getEscapedChar(b); ok {
					out.AddByte(escaped)
					in.Byte.MoveCursor(b)
					state = strStateContent
					continue
				}
				switch b {
				case ' ', '\t', '\r', '\n':
					// Handle line concatenation escape sequence.
					if flags&strFlagLineConcatOK == 0 {
						// Invalid escape.
						return false
					}
					// Point the parser at an appropriate subState of
					// the strEscapeConcat state.
					switch b {
					case ' ', '\t':
						subState = 0
					case '\r':
						subState = 1
					case '\n':
						subState = 2
					}
					in.Byte.MoveCursor(b)
					state = strStateEscapeConcat
					continue
				case 'u', 'U':
					// Handle unicode escape sequence (\uXXXX, \UXXXXXXXX).
					in.Byte.MoveCursor(b)
					unicodeReqLen = 4
					if b == 'u' {
						unicodeReqLen = 4
					} else {
						unicodeReqLen = 8
					}
					unicodeLen = 0
					utf8Rune = 0
					state = strStateEscapeUnicode
				default:
					// Invalid escape sequence used.
					return false
				}

			// For writing long strings without introducing extraneous whitespace, use a
			// "line ending backslash". When the last non-whitespace character on a line is
			// a \, it will be trimmed along with all whitespace (including newlines) up to
			// the next non-whitespace character or closing delimiter.
			case strStateEscapeConcat:
				// Skip over whitespace until the end of the line is found.
				if subState == 0 {
					switch b {
					case ' ', '\t':
						in.Byte.MoveCursor(b)
						continue
					case '\r':
						in.Byte.MoveCursor(b)
						subState = 1
						continue
					case '\n':
						in.Byte.MoveCursor(b)
						subState = 2
						continue
					default:
						// Invalid escape sequence used. Expected whitespace or newline.
						return false
					}
				}
				// We've seen a \r at the same line as the escape char,
				// skip over the following \n.
				if subState == 1 {
					if b == '\n' {
						in.Byte.MoveCursor(b)
						subState = 2
						continue
					}
					// Invalid escape sequence used. Expected newline.
					return false
				}
				// We've seen a \n at the same line as the escape char,
				// skip over all whitespace and newlines from here on.
				if subState == 2 {
					if b == ' ' || b == '\t' || b == '\n' {
						in.Byte.MoveCursor(b)
						continue
					}
					if b == '\r' {
						in.Byte.MoveCursor(b)
						subState = 3
						continue
					}
				}
				// We've seen a \r, skip over the following \n.
				if subState == 3 {
					if b == '\n' {
						in.Byte.MoveCursor(b)
						subState = 2
						continue
					}
				}
				// End of concat escape. Let the strContent state reprocess the byte.
				i--
				state = strStateContent
				continue

			// Parse unicode escape sequence (\uXXXX, \UXXXXXXXX).
			case strStateEscapeUnicode:
				value, ok := getHexValueForChar(b)
				if !ok {
					// Invalid unicode escape sequence used.
					return false
				}
				utf8Rune = utf8Rune<<4 + rune(value)
				unicodeHex[unicodeLen] = b
				unicodeLen++
				if unicodeLen == unicodeReqLen {
					if !utf8.ValidRune(utf8Rune) {
						// Invalid unicode escape
						return false
					}
					in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...)
					w := utf8.EncodeRune(utf8Bytes, utf8Rune)
					out.AddBytes(utf8Bytes[:w]...)
					state = strStateContent
				}

			// Parse the end of the string.
			// One delimiter has already been seen by the strContent state.
			// Here we check if we have a full set of 3 delimiters to end
			// the string.
			case strStateMultiLineEnd: // TODO rename to strEndMultiLine
				if subState == 0 {
					// Second delimiter found.
					if b == delim {
						subState = 1
						in.Byte.MoveCursor(b)
						continue
					}
					// No delimiter found, so we're looking at a single
					// delimiter within the multi-line body. Add the delimiter
					// to the output and feed the current byte back to the
					// strContent state.
					out.AddByte(delim)
					i--
					state = strStateContent
					continue
				}
				if subState == 1 {
					// Third delimiter found. This ends the string.
					if b == delim {
						in.Byte.MoveCursor(b)
						return true
					}
					// No delimiter found, so we're looking at two delimiters
					// within the multi-line body. Add the delimiters to the
					// output and feed the current byte back to the strContent state.
					out.AddBytes(delim, delim)
					i--
					state = strStateContent
					continue
				}
			}
		}
	}
}

// Specific handling of input for basic strings.
//
// • Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F).
//
// • No additional \escape sequences are allowed. What the spec say about this:
// "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error.""
// func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
// 	if !p.Accept(basicStringHandler) {
// 		return "", false
// 	}
// 	return p.Result.String(), true
// }

type stringTokenizerState int

const (
	strStateStart stringTokenizerState = iota
	strStateContent
	strStateEscape
	strStateEscapeUnicode
	strStateEscapeConcat
	strStateCRLF
	strStateUTF8
	strStateMultiLineEnd
)

const (
	lowest6bits = 0x3F // 0011 1111
	lowest5bits = 0x1F // 0001 1111
	lowest4bits = 0x0F // 0000 1111
	lowest3bits = 0x07 // 0000 0111
)

func getHexValueForChar(b byte) (byte, bool) {
	switch {
	case '0' <= b && b <= '9':
		return b - '0', true
	case 'a' <= b && b <= 'z':
		return b - 'a' + 10, true
	case 'A' <= b && b <= 'Z':
		return b - 'A' + 10, true
	default:
		return 0, false
	}
}

func getEscapedChar(b byte) (byte, bool) {
	switch b {
	case 'b':
		return '\b', true
	case 't':
		return '\t', true
	case 'n':
		return '\n', true
	case 'f':
		return '\f', true
	case 'r':
		return '\r', true
	case '"':
		return '"', true
	case '\\':
		return '\\', true
	}
	return 0, false
}

// Specific handling of input for multi-line basic strings.
//
// • Multi-line basic strings are surrounded by three quotation marks on
// each side and allow newlines.
//
// • A newline immediately following the opening delimiter will be trimmed.
// All other whitespace and newline characters remain intact.
//
// • TOML parsers should feel free to normalize newline to whatever makes
// sense for their platform.
//
// • All of the escape sequences that are valid for basic strings are also valid
// for multi-line basic strings.
//
// • Any Unicode character may be used except those that must be escaped:
// backslash and the control characters (U+0000 to U+001F, U+007F). Quotation
// marks need not be escaped unless their presence would create a premature
// closing delimiter.
//
// • For writing long strings without introducing extraneous whitespace, use a
// "line ending backslash". When the last non-whitespace character on a line is
// a \, it will be trimmed along with all whitespace (including newlines) up to
// the next non-whitespace character or closing delimiter.
// func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) {
// 	if !p.Accept(multiLineBasicStringHandler) {
// 		return "", false
// 	}
// 	return p.Result.String(), true
// }

// func multiLineBasicStringHandler(tokenAPI *tokenize.API) bool {
// 	var state stringTokenizerState
// 	in := tokenAPI.Input
// 	out := tokenAPI.Output

// 	unicodeReqLen := 0
// 	unicodeLen := 0
// 	unicodeHex := make([]byte, 8)

// 	utf8ReqLen := 0
// 	utf8Len := 0
// 	utf8Rune := rune(0)
// 	utf8Bytes := make([]byte, 4)

// 	crlf := false

// 	for {
// 		bs, _ := in.Byte.PeekBuffered(0)
// 		bslen := len(bs)
// 		if bslen == 0 {
// 			return false
// 		}
// 		for i := 0; i < bslen; i++ {
// 			b := bs[i]
// 			switch state {
// 			case strStart, strStart2, strStart3:
// 				if b != '"' {
// 					// No triple opening quotes found.
// 					return false
// 				}
// 				in.Byte.MoveCursor(b)
// 				switch state {
// 				case strStart:
// 					state = strStart2
// 				case strStart2:
// 					state = strStart3
// 				case strStart3:
// 					state = strStart4
// 				}
// 			case strStart4:
// 				if !crlf && b == '\r' {
// 					crlf = true
// 					in.Byte.MoveCursor(b)
// 					continue
// 				}
// 				if b == '\n' {
// 					in.Byte.MoveCursor(b)
// 					state = strContent
// 					continue
// 				}
// 				if crlf {
// 					// Lonely \r without \n.
// 					return false
// 				}
// 				state = strContent
// 				fallthrough
// 			case strContent:
// 				switch {
// 				case b == '\r':
// 					state = strCRLF
// 					continue
// 				case b == '\n':
// 					out.AddByte(b)
// 					in.Byte.MoveCursor(b)
// 					continue
// 				case (b >= 0x00 && b <= 0x1F) || b == 0x7F:
// 					// Unescaped control character
// 					// TODO error reporting instead of full reject
// 					return false
// 				case b == '\\':
// 					in.Byte.MoveCursor(b)
// 					state = strEscape
// 					continue
// 				case b == '"':
// 					in.Byte.MoveCursor(b)
// 					state = strEnd
// 					continue
// 				}
// 				switch b >> 4 {
// 				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
// 					out.AddByte(b)
// 					in.Byte.MoveCursor(b)
// 					continue
// 				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
// 					utf8ReqLen = 2
// 					utf8Rune = rune((b & lowest5bits) << 6)
// 				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
// 					utf8ReqLen = 3
// 					utf8Rune = rune((b & lowest4bits) << 6)
// 				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
// 					utf8ReqLen = 4
// 					utf8Rune = rune((b & lowest3bits) << 6)
// 				default: // Invalid UTF8 rune
// 					return false
// 				}
// 				utf8Bytes[0] = b
// 				utf8Len = 1
// 				state = strUTF8
// 			case strUTF8:
// 				// This should be a continuation byte (10xxxxxx)
// 				if b>>6 != 2 {
// 					// Invalid UTF8 rune
// 					return false
// 				}
// 				utf8Bytes[utf8Len] = b
// 				utf8Len++
// 				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
// 				if utf8Len == utf8ReqLen {
// 					if !utf8.ValidRune(utf8Rune) {
// 						// Invalid unicode character
// 						return false
// 					}
// 					bytes := utf8Bytes[:utf8Len]
// 					out.AddBytes(bytes...)
// 					in.Byte.MoveCursorMulti(bytes...)
// 					state = strContent
// 				}
// 			case strCRLF:
// 				if b == '\n' {
// 					in.Byte.MoveCursorMulti('\r', b)
// 					out.AddByte('\n')
// 					state = strContent
// 					continue
// 				}
// 				// Lonely \r, should have been escaped.
// 				return false
// 			case strEscape:
// 				state = strContent
// 				if escaped, ok := getEscapedChar(b); ok {
// 					out.AddByte(escaped)
// 					in.Byte.MoveCursor(b)
// 					continue
// 				}
// 				switch b {
// 				case ' ', '\t':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcat
// 					continue
// 				case '\r':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatCRLF
// 					continue
// 				case '\n':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatWs2
// 					continue
// 				case 'u', 'U':
// 					in.Byte.MoveCursor(b)
// 					unicodeReqLen = 4
// 					if b == 'u' {
// 						unicodeReqLen = 4
// 					} else {
// 						unicodeReqLen = 8
// 					}
// 					unicodeLen = 0
// 					utf8Rune = 0
// 					state = strEscapeUnicode
// 				default:
// 					// Invalid escape sequence used.
// 					return false
// 				}
// 			case strEscapeConcat:
// 				switch b {
// 				case ' ', '\t':
// 					in.Byte.MoveCursor(b)
// 					continue
// 				case '\r':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatCRLF
// 					continue
// 				case '\n':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatWs2
// 					continue
// 				default:
// 					// Invalid line concatenation
// 					return false
// 				}
// 			case strEscapeConcatCRLF:
// 				switch b {
// 				case '\n':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatWs2
// 					continue
// 				default:
// 					// Invalid line concatenation
// 					return false
// 				}
// 			case strEscapeConcatWs2:
// 				switch b {
// 				case ' ', '\t':
// 					in.Byte.MoveCursor(b)
// 					continue
// 				case '\r':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatCRLF
// 					continue
// 				case '\n':
// 					in.Byte.MoveCursor(b)
// 					state = strEscapeConcatWs2
// 					continue
// 				default:
// 					i--
// 					state = strContent
// 					continue
// 				}
// 			case strEscapeUnicode:
// 				value, ok := getHexValueForChar(b)
// 				if !ok {
// 					// Invalid unicode escape sequence used.
// 					return false
// 				}
// 				utf8Rune = utf8Rune<<4 + rune(value)
// 				unicodeHex[unicodeLen] = b
// 				unicodeLen++
// 				if unicodeLen == unicodeReqLen {
// 					if !utf8.ValidRune(utf8Rune) {
// 						// Invalid unicode escape
// 						return false
// 					}
// 					in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...)
// 					w := utf8.EncodeRune(utf8Bytes, utf8Rune)
// 					out.AddBytes(utf8Bytes[:w]...)
// 					state = strContent
// 				}
// 			case strEnd:
// 				if b == '"' {
// 					state = strEnd3
// 					in.Byte.MoveCursor(b)
// 				} else {
// 					state = strContent
// 					out.AddByte('"')
// 					i--
// 				}
// 			case strEnd3:
// 				if b == '"' {
// 					in.Byte.MoveCursor(b)
// 					return true
// 				}
// 				state = strContent
// 				out.AddBytes('"', '"')
// 				i--
// 			}
// 		}
// 	}
// }

// Specific handling of input for multi-line literal strings.
//
// • Multi-line literal strings are surrounded by three single quotes on
// each side and allow newlines.
//
// • A newline immediately following the opening delimiter will be trimmed.
//
// • All other content between the delimiters is interpreted as-is without modification.
//
// • TOML parsers should feel free to normalize newline to whatever makes
// sense for their platform.
//
// • Control characters other than tab and newline are not permitted in a multi-line literal string.
// func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) {
// 	if !p.Accept(multiLineLiteralStringHandler) {
// 		return "", false
// 	}
// 	return p.Result.String(), true
// }

// func multiLineLiteralStringHandler(tokenAPI *tokenize.API) bool {
// 	var state stringTokenizerState
// 	in := tokenAPI.Input
// 	out := tokenAPI.Output

// 	utf8ReqLen := 0
// 	utf8Len := 0
// 	utf8Rune := rune(0)
// 	utf8Bytes := make([]byte, 4)

// 	crlf := false

// 	for {
// 		bs, _ := in.Byte.PeekBuffered(0)
// 		bslen := len(bs)
// 		if bslen == 0 {
// 			return false
// 		}
// 		for i := 0; i < bslen; i++ {
// 			b := bs[i]
// 			switch state {
// 			case strStart, strStart2, strStart3:
// 				if b != '\'' {
// 					// No triple opening quotes found.
// 					return false
// 				}
// 				in.Byte.MoveCursor(b)
// 				switch state {
// 				case strStart:
// 					state = strStart2
// 				case strStart2:
// 					state = strStart3
// 				case strStart3:
// 					state = strStart4
// 				}
// 			case strStart4:
// 				if !crlf && b == '\r' {
// 					crlf = true
// 					in.Byte.MoveCursor(b)
// 					continue
// 				}
// 				if b == '\n' {
// 					in.Byte.MoveCursor(b)
// 					state = strContent
// 					continue
// 				}
// 				if crlf {
// 					// Lonely \r without \n.
// 					return false
// 				}
// 				state = strContent
// 				fallthrough
// 			case strContent:
// 				switch {
// 				case b == '\r':
// 					state = strCRLF
// 					continue
// 				case b == '\n' || b == '\t':
// 					out.AddByte(b)
// 					in.Byte.MoveCursor(b)
// 					continue
// 				case (b >= 0x00 && b <= 0x1F) || b == 0x7F:
// 					// Unescaped control character
// 					// TODO error reporting instead of full reject
// 					return false
// 				case b == '\'':
// 					in.Byte.MoveCursor(b)
// 					state = strEnd
// 					continue
// 				}
// 				switch b >> 4 {
// 				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
// 					out.AddByte(b)
// 					in.Byte.MoveCursor(b)
// 					continue
// 				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
// 					utf8ReqLen = 2
// 					utf8Rune = rune((b & lowest5bits) << 6)
// 				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
// 					utf8ReqLen = 3
// 					utf8Rune = rune((b & lowest4bits) << 6)
// 				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
// 					utf8ReqLen = 4
// 					utf8Rune = rune((b & lowest3bits) << 6)
// 				default: // Invalid UTF8 rune
// 					return false
// 				}
// 				utf8Bytes[0] = b
// 				utf8Len = 1
// 				state = strUTF8
// 			case strUTF8:
// 				// This should be a continuation byte (10xxxxxx)
// 				if b>>6 != 2 {
// 					// Invalid UTF8 rune
// 					return false
// 				}
// 				utf8Bytes[utf8Len] = b
// 				utf8Len++
// 				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
// 				if utf8Len == utf8ReqLen {
// 					if !utf8.ValidRune(utf8Rune) {
// 						// Invalid unicode character
// 						return false
// 					}
// 					bytes := utf8Bytes[:utf8Len]
// 					out.AddBytes(bytes...)
// 					in.Byte.MoveCursorMulti(bytes...)
// 					state = strContent
// 				}
// 			case strCRLF:
// 				if b == '\n' {
// 					in.Byte.MoveCursorMulti('\r', b)
// 					out.AddByte('\n')
// 					state = strContent
// 					continue
// 				}
// 				// Lonely \r, should have been escaped.
// 				return false
// 			case strEnd:
// 				if b == '\'' {
// 					state = strEnd3
// 					in.Byte.MoveCursor(b)
// 				} else {
// 					state = strContent
// 					out.AddByte('\'')
// 					i--
// 				}
// 			case strEnd3:
// 				if b == '\'' {
// 					in.Byte.MoveCursor(b)
// 					return true
// 				}
// 				state = strContent
// 				out.AddBytes('\'', '\'')
// 				i--
// 			}
// 		}
// 	}
// }