Wrote a first crude version of specific tokenizer handlers for string parsing. Missing feature is good error reporting from the tokenize handler code (which has been a TODO for a while, so a nice one to implement after this).

2019-07-31 07:51:37 +00:00 · 2019-07-31 07:51:37 +00:00 · ed846c7e53
parent 5ff6f20ab7
commit ed846c7e53
6 changed files with 742 additions and 132 deletions
--- a/cmd/burntsushi-tester/test.toml
+++ b/cmd/burntsushi-tester/test.toml
@ -0,0 +1,7 @@
 regex2 = '''I [dw]on't need \d{2} apples'''
 lines  = '''
 The first newline is
 trimmed in raw strings.
   All other whitespace
   is preserved.
 '''
--- a/parse/benchmark_test.go
+++ b/parse/benchmark_test.go
@ -0,0 +1,76 @@
 package parse_test
 import (
 	"testing"
 )
 func A(b byte) (byte, bool) {
 	if b > 'b' {
 		switch b {
 		case 't':
 			return '\t', true
 		case 'n':
 			return '\n', true
 		case 'r':
 			return '\r', true
 		case 'f':
 			return '\f', true
 		}
 	} else {
 		switch b {
 		case '"':
 			return '"', true
 		case '\\':
 			return '\\', true
 		case 'b':
 			return '\b', true
 		}
 	}
 	return 0x00, false
 }
 func B(b byte) (byte, bool) {
 	switch b {
 	case 'r':
 		return '\r', true
 	case 'n':
 		return '\n', true
 	case 't':
 		return '\t', true
 	case 'b':
 		return '\b', true
 	case 'f':
 		return '\f', true
 	case '"':
 		return '"', true
 	case '\\':
 		return '\\', true
 	}
 	return 0x00, false
 }
 // TODO cleanup unused benchmark.
 func Benchmark_A(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		A('b')
 		A('t')
 		A('n')
 		A('f')
 		A('r')
 		A('"')
 		A('\\')
 	}
 }
 // TODO cleanup unused benchmark.
 func Benchmark_B(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		B('b')
 		B('t')
 		B('n')
 		B('f')
 		B('r')
 		B('"')
 		B('\\')
 	}
 }
--- a/parse/keyvaluepair.go
+++ b/parse/keyvaluepair.go
@ -52,6 +52,8 @@ func (t *parser) startKeyValuePair(p *parse.API) {
 			} else if !p.Skip(endOfLineOrComment) {
 				p.Expected("end of line")
 			}
 		} else {
 			p.Expected("a value")
 		}
 	}
 }
--- a/parse/value.go
+++ b/parse/value.go
@ -6,9 +6,9 @@ import (
 )
 var (
-	detectString         = a.SingleQuote.Or(a.DoubleQuote)
+	detectString         = a.Char('\'', '"')
-	detectBoolean        = a.Str("true").Or(a.Str("false"))
+	detectBoolean        = a.Str("true").Or(a.Str("false"))                   // TODO use 't' or 'f' and let the boolean handler format errors on mismatch
-	detectNumberSpecials = c.Any(a.Plus, a.Minus, a.Str("inf"), a.Str("nan"))
+	detectNumberSpecials = c.Any(a.Plus, a.Minus, a.Str("inf"), a.Str("nan")) // TODO likewise as for boolean
 	detectDateTime       = a.Digits.Then(a.Minus.Or(a.Colon))
 	detectNumber         = a.Digit
 	detectArray          = a.SquareOpen
--- a/parse/value_string.go
+++ b/parse/value_string.go
@ -1,12 +1,10 @@
 package parse
 import (
 	"fmt"
 	"strconv"
 	"strings"
 	"unicode/utf8"
 	"git.makaay.nl/mauricem/go-parsekit/parse"
 	"git.makaay.nl/mauricem/go-parsekit/tokenize"
 	"git.makaay.nl/mauricem/go-toml/ast"
 )
@ -30,11 +28,6 @@ var (
 	// Opening and losing character for literal strings.
 	literalStringDelimiter = a.SingleQuote
 	// Control characters as defined by TOML (U+0000 to U+001F, U+007F)
 	isControlCharacter = func(b byte) bool { return (b >= 0x00 && b <= 0x1F) || b == 0x7F }
 	controlCharacter   = a.ByteByCallback(isControlCharacter)
 	// For convenience, some popular characters have a compact escape sequence.
 	//
 	// \b         - backspace       (U+0008)
@ -96,37 +89,203 @@ func (t *parser) parseString(p *parse.API) (*ast.Value, bool) {
 // "All other escape sequences [..] are reserved and, if used, TOML should
 // produce an error.""
 func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
-	if !p.Skip(a.DoubleQuote) {
+	if !p.Accept(basicStringHandler) {
 		p.Expected(`opening quotation marks`)
 		return "", false
 	}
-	sb := &strings.Builder{}
+	return p.Result.String(), true
 }
 type stringTokenizerState int
 const (
 	strStart stringTokenizerState = iota
 	strStart2
 	strStart3
 	strStart4
 	strChar
 	strEscape
 	strEscapeUnicode
 	strEscapeConcatWs1
 	strEscapeConcatCRLF
 	strEscapeConcatWs2
 	strCRLF
 	strUTF8
 	strEnd2
 	strEnd3
 )
 const (
 	lowest6bits = 0x3F // 0011 1111
 	lowest5bits = 0x1F // 0001 1111
 	lowest4bits = 0x0F // 0000 1111
 	lowest3bits = 0x07 // 0000 0111
 )
 func basicStringHandler(tokenAPI *tokenize.API) bool {
 	var state stringTokenizerState
 	in := tokenAPI.Input
 	out := tokenAPI.Output
 	unicodeReqLen := 0
 	unicodeLen := 0
 	unicodeHex := make([]byte, 8)
 	utf8ReqLen := 0
 	utf8Len := 0
 	utf8Rune := rune(0)
 	utf8Bytes := make([]byte, 4)
 	for {
-		switch {
+		bs, _ := in.Byte.PeekBuffered(0)
-		case p.Peek(controlCharacter):
+		bslen := len(bs)
-			p.SetError("invalid character in %s: %q (must be escaped)", name, p.Result.Byte(0))
+		if bslen == 0 {
-			return sb.String(), false
+			return false
-		case p.Accept(validEscape):
+		}
-			if !appendEscapedRune(p, sb) {
+		for i := 0; i < bslen; i++ {
-				return sb.String(), false
+			b := bs[i]
 			switch state {
 			case strStart:
 				if b != '"' {
 					// No opening quotes found.
 					return false
 				}
 				in.Byte.MoveCursor(b)
 				state = strChar
 			case strChar:
 				switch {
 				case (b >= 0x00 && b <= 0x1F) || b == 0x7F:
 					// Control characters as defined by the TOML specification.
 					// These must always be escaped.
 					// Unescaped control character
 					// TODO error reporting instead of full reject
 					return false
 				case b == '\\':
 					in.Byte.MoveCursor(b)
 					state = strEscape
 					continue
 				case b == '"':
 					in.Byte.MoveCursor(b)
 					return true
 				}
 				switch b >> 4 {
 				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
 					out.AddByte(b)
 					in.Byte.MoveCursor(b)
 					continue
 				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
 					utf8ReqLen = 2
 					utf8Rune = rune((b & lowest5bits) << 6)
 				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 3
 					utf8Rune = rune((b & lowest4bits) << 6)
 				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 4
 					utf8Rune = rune((b & lowest3bits) << 6)
 				default: // Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[0] = b
 				utf8Len = 1
 				state = strUTF8
 			case strUTF8:
 				// This should be a continuation byte (10xxxxxx)
 				if b>>6 != 2 {
 					// Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[utf8Len] = b
 				utf8Len++
 				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
 				if utf8Len == utf8ReqLen {
 					if !utf8.ValidRune(utf8Rune) {
 						// Invalid unicode character
 						return false
 					}
 					bytes := utf8Bytes[:utf8Len]
 					out.AddBytes(bytes...)
 					in.Byte.MoveCursorMulti(bytes...)
 					state = strChar
 				}
 			case strEscape:
 				state = strChar
 				if escaped, ok := getEscapedChar(b); ok {
 					out.AddByte(escaped)
 					in.Byte.MoveCursor(b)
 					continue
 				}
 				switch b {
 				case 'u', 'U':
 					in.Byte.MoveCursor(b)
 					unicodeReqLen = 4
 					if b == 'u' {
 						unicodeReqLen = 4
 					} else {
 						unicodeReqLen = 8
 					}
 					unicodeLen = 0
 					utf8Rune = 0
 					state = strEscapeUnicode
 				default:
 					// Invalid escape sequence used.
 					return false
 				}
 			case strEscapeUnicode:
 				value, ok := getHexValueForChar(b)
 				if !ok {
 					// Invalid unicode escape sequence used.
 					return false
 				}
 				utf8Rune = utf8Rune<<4 + rune(value)
 				unicodeHex[unicodeLen] = b
 				unicodeLen++
 				if unicodeLen == unicodeReqLen {
 					if !utf8.ValidRune(utf8Rune) {
 						// Invalid unicode escape
 						return false
 					}
 					in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...)
 					w := utf8.EncodeRune(utf8Bytes, utf8Rune)
 					out.AddBytes(utf8Bytes[:w]...)
 					state = strChar
 				}
 			}
 		case p.Peek(a.Backslash):
 			p.SetError("invalid escape sequence")
 			return sb.String(), false
 		case p.Skip(basicStringDelimiter):
 			return sb.String(), true
 		case p.Peek(a.InvalidRune):
 			p.SetError("invalid UTF8 rune")
 			return sb.String(), false
 		case p.Accept(a.ValidRune):
 			sb.WriteString(p.Result.String())
 		default:
 			p.Expected(`closing quotation marks`)
 			return sb.String(), false
 		}
 	}
 }
 func getHexValueForChar(b byte) (byte, bool) {
 	switch {
 	case '0' <= b && b <= '9':
 		return b - '0', true
 	case 'a' <= b && b <= 'z':
 		return b - 'a' + 10, true
 	case 'A' <= b && b <= 'Z':
 		return b - 'A' + 10, true
 	default:
 		return 0, false
 	}
 }
 func getEscapedChar(b byte) (byte, bool) {
 	switch b {
 	case 'b':
 		return '\b', true
 	case 't':
 		return '\t', true
 	case 'n':
 		return '\n', true
 	case 'f':
 		return '\f', true
 	case 'r':
 		return '\r', true
 	case '"':
 		return '"', true
 	case '\\':
 		return '\\', true
 	}
 	return 0, false
 }
 // Specific handling of input for literal strings.
 //
 // • Literal strings are surrounded by single quotes.
@ -135,28 +294,88 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) {
 //
 // • Control characters other than tab are not permitted in a literal string.
 func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
-	if !p.Skip(a.SingleQuote) {
+	if !p.Accept(literalStringHandler) {
 		p.Expected("opening single quote")
 		return "", false
 	}
-	sb := &strings.Builder{}
+	return p.Result.String(), true
 }
 func literalStringHandler(tokenAPI *tokenize.API) bool {
 	var state stringTokenizerState
 	in := tokenAPI.Input
 	out := tokenAPI.Output
 	utf8ReqLen := 0
 	utf8Len := 0
 	utf8Rune := rune(0)
 	utf8Bytes := [4]byte{}
 	for {
-		switch {
+		bs, _ := tokenAPI.Input.Byte.PeekBuffered(0)
-		case p.Skip(literalStringDelimiter):
+		bslen := len(bs)
-			return sb.String(), true
+		if bslen == 0 {
-		case p.Skip(a.Tab):
+			// Unexpected end of file.
-			sb.WriteString("\t")
+			return false
-		case p.Peek(controlCharacter):
+		}
-			p.SetError("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result.Byte(0))
+		for i := 0; i < bslen; i++ {
-			return sb.String(), false
+			b := bs[i]
-		case p.Peek(a.InvalidRune):
+			switch state {
-			p.SetError("invalid UTF8 rune")
+			case strStart:
-			return sb.String(), false
+				if b != '\'' {
-		case p.Accept(a.ValidRune):
+					// No opening quote found.
-			sb.WriteString(p.Result.String())
+					return false
-		default:
+				}
-			p.Expected("closing single quote")
+				in.Byte.MoveCursor(b)
-			return sb.String(), false
+				state = strChar
 			case strChar:
 				switch {
 				case (b >= 0x00 && b < 0x09) || (b > 0x09 && b <= 0x1F) || b == 0x7F:
 					// Unescaped control character
 					return false
 				case b == '\'':
 					in.Byte.MoveCursor(b)
 					return true
 				}
 				switch b >> 4 {
 				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
 					out.AddByte(b)
 					in.Byte.MoveCursor(b)
 					continue
 				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
 					utf8ReqLen = 2
 					utf8Rune = rune((b & lowest5bits) << 6)
 				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 3
 					utf8Rune = rune((b & lowest4bits) << 6)
 				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 4
 					utf8Rune = rune((b & lowest3bits) << 6)
 				default: // Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[0] = b
 				utf8Len = 1
 				state = strUTF8
 			case strUTF8:
 				// This should be a continuation byte (10xxxxxx)
 				if b>>6 != 2 {
 					// Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[utf8Len] = b
 				utf8Len++
 				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
 				if utf8Len == utf8ReqLen {
 					if !utf8.ValidRune(utf8Rune) {
 						// Invalid unicode character
 						return false
 					}
 					bytes := utf8Bytes[:utf8Len]
 					out.AddBytes(bytes...)
 					in.Byte.MoveCursorMulti(bytes...)
 					state = strChar
 				}
 			}
 		}
 	}
 }
@ -185,70 +404,257 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) {
 // a \, it will be trimmed along with all whitespace (including newlines) up to
 // the next non-whitespace character or closing delimiter.
 func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) {
-	if !p.Skip(openingMultiLineBasicString) {
+	if !p.Accept(multiLineBasicStringHandler) {
 		p.Expected("opening three quotation marks")
 		return "", false
 	}
-	sb := &strings.Builder{}
+	return p.Result.String(), true
 	for {
 		switch {
 		case p.Skip(newline):
 			sb.WriteString("\n")
 		case p.Peek(controlCharacter):
 			p.SetError("invalid character in multi-line basic string: %q (must be escaped)", p.Result.Byte(0))
 			return sb.String(), false
 		case p.Accept(validEscape):
 			if !appendEscapedRune(p, sb) {
 				return sb.String(), false
 			}
 		case p.Skip(lineEndingBackslash):
 			// NOOP
 		case p.Peek(a.Backslash):
 			p.SetError("invalid escape sequence")
 			return sb.String(), false
 		case p.Skip(closingMultiLineBasicString):
 			return sb.String(), true
 		case p.Accept(a.ValidRune):
 			sb.WriteString(p.Result.String())
 		case p.Peek(a.InvalidRune):
 			p.SetError("invalid UTF8 rune")
 			return sb.String(), false
 		default:
 			p.Expected("closing three quotation marks")
 			return sb.String(), false
 		}
 	}
 }
-func appendEscapedRune(p *parse.API, sb *strings.Builder) bool {
+func multiLineBasicStringHandler(tokenAPI *tokenize.API) bool {
-	s := p.Result.String()
+	var state stringTokenizerState
-	switch s {
+	in := tokenAPI.Input
-	case `\b`:
+	out := tokenAPI.Output
-		sb.WriteRune('\b')
+
-	case `\t`:
+	unicodeReqLen := 0
-		sb.WriteRune('\t')
+	unicodeLen := 0
-	case `\n`:
+	unicodeHex := make([]byte, 8)
-		sb.WriteRune('\n')
+
-	case `\f`:
+	utf8ReqLen := 0
-		sb.WriteRune('\f')
+	utf8Len := 0
-	case `\r`:
+	utf8Rune := rune(0)
-		sb.WriteRune('\r')
+	utf8Bytes := make([]byte, 4)
-	case `\"`:
+
-		sb.WriteRune('"')
+	crlf := false
-	case `\\`:
+
-		sb.WriteRune('\\')
+	for {
-	default:
+		bs, _ := in.Byte.PeekBuffered(0)
-		// UTF8 escape code: \uXXXX or \UXXXXXXXXXXXX.
+		bslen := len(bs)
-		hex := s[2:]
+		if bslen == 0 {
 		val, _ := strconv.ParseUint(hex, 16, 32) // hex format already validated by parser
 		r := rune(val)
 		if !utf8.ValidRune(r) {
 			p.SetError(fmt.Sprintf("invalid UTF8 escape '%s'", s))
 			return false
 		}
-		sb.WriteRune(r)
+		for i := 0; i < bslen; i++ {
 			b := bs[i]
 			switch state {
 			case strStart, strStart2, strStart3:
 				if b != '"' {
 					// No triple opening quotes found.
 					return false
 				}
 				in.Byte.MoveCursor(b)
 				switch state {
 				case strStart:
 					state = strStart2
 				case strStart2:
 					state = strStart3
 				case strStart3:
 					state = strStart4
 				}
 			case strStart4:
 				if !crlf && b == '\r' {
 					crlf = true
 					in.Byte.MoveCursor(b)
 					continue
 				}
 				if b == '\n' {
 					in.Byte.MoveCursor(b)
 					state = strChar
 					continue
 				}
 				if crlf {
 					// Lonely \r without \n.
 					return false
 				}
 				state = strChar
 				fallthrough
 			case strChar:
 				switch {
 				case b == '\r':
 					state = strCRLF
 					continue
 				case b == '\n':
 					out.AddByte(b)
 					in.Byte.MoveCursor(b)
 					continue
 				case (b >= 0x00 && b <= 0x1F) || b == 0x7F:
 					// Unescaped control character
 					// TODO error reporting instead of full reject
 					return false
 				case b == '\\':
 					in.Byte.MoveCursor(b)
 					state = strEscape
 					continue
 				case b == '"':
 					in.Byte.MoveCursor(b)
 					state = strEnd2
 					continue
 				}
 				switch b >> 4 {
 				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
 					out.AddByte(b)
 					in.Byte.MoveCursor(b)
 					continue
 				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
 					utf8ReqLen = 2
 					utf8Rune = rune((b & lowest5bits) << 6)
 				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 3
 					utf8Rune = rune((b & lowest4bits) << 6)
 				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 4
 					utf8Rune = rune((b & lowest3bits) << 6)
 				default: // Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[0] = b
 				utf8Len = 1
 				state = strUTF8
 			case strUTF8:
 				// This should be a continuation byte (10xxxxxx)
 				if b>>6 != 2 {
 					// Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[utf8Len] = b
 				utf8Len++
 				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
 				if utf8Len == utf8ReqLen {
 					if !utf8.ValidRune(utf8Rune) {
 						// Invalid unicode character
 						return false
 					}
 					bytes := utf8Bytes[:utf8Len]
 					out.AddBytes(bytes...)
 					in.Byte.MoveCursorMulti(bytes...)
 					state = strChar
 				}
 			case strCRLF:
 				if b == '\n' {
 					in.Byte.MoveCursorMulti('\r', b)
 					out.AddByte('\n')
 					state = strChar
 					continue
 				}
 				// Lonely \r, should have been escaped.
 				return false
 			case strEscape:
 				state = strChar
 				if escaped, ok := getEscapedChar(b); ok {
 					out.AddByte(escaped)
 					in.Byte.MoveCursor(b)
 					continue
 				}
 				switch b {
 				case ' ', '\t':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatWs1
 					continue
 				case '\r':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatCRLF
 					continue
 				case '\n':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatWs2
 					continue
 				case 'u', 'U':
 					in.Byte.MoveCursor(b)
 					unicodeReqLen = 4
 					if b == 'u' {
 						unicodeReqLen = 4
 					} else {
 						unicodeReqLen = 8
 					}
 					unicodeLen = 0
 					utf8Rune = 0
 					state = strEscapeUnicode
 				default:
 					// Invalid escape sequence used.
 					return false
 				}
 			case strEscapeConcatWs1:
 				switch b {
 				case ' ', '\t':
 					in.Byte.MoveCursor(b)
 					continue
 				case '\r':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatCRLF
 					continue
 				case '\n':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatWs2
 					continue
 				default:
 					// Invalid line concatenation
 					return false
 				}
 			case strEscapeConcatCRLF:
 				switch b {
 				case '\n':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatWs2
 					continue
 				default:
 					// Invalid line concatenation
 					return false
 				}
 			case strEscapeConcatWs2:
 				switch b {
 				case ' ', '\t':
 					in.Byte.MoveCursor(b)
 					continue
 				case '\r':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatCRLF
 					continue
 				case '\n':
 					in.Byte.MoveCursor(b)
 					state = strEscapeConcatWs2
 					continue
 				default:
 					i--
 					state = strChar
 					continue
 				}
 			case strEscapeUnicode:
 				value, ok := getHexValueForChar(b)
 				if !ok {
 					// Invalid unicode escape sequence used.
 					return false
 				}
 				utf8Rune = utf8Rune<<4 + rune(value)
 				unicodeHex[unicodeLen] = b
 				unicodeLen++
 				if unicodeLen == unicodeReqLen {
 					if !utf8.ValidRune(utf8Rune) {
 						// Invalid unicode escape
 						return false
 					}
 					in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...)
 					w := utf8.EncodeRune(utf8Bytes, utf8Rune)
 					out.AddBytes(utf8Bytes[:w]...)
 					state = strChar
 				}
 			case strEnd2:
 				if b == '"' {
 					state = strEnd3
 					in.Byte.MoveCursor(b)
 				} else {
 					state = strChar
 					out.AddByte('"')
 					i--
 				}
 			case strEnd3:
 				if b == '"' {
 					in.Byte.MoveCursor(b)
 					return true
 				}
 				state = strChar
 				out.AddBytes('"', '"')
 				i--
 			}
 		}
 	}
 	return true
 }
 // Specific handling of input for multi-line literal strings.
@ -265,30 +671,148 @@ func appendEscapedRune(p *parse.API, sb *strings.Builder) bool {
 //
 // • Control characters other than tab and newline are not permitted in a multi-line literal string.
 func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) {
-	if !p.Skip(openingMultiLineLiteralString) {
+	if !p.Accept(multiLineLiteralStringHandler) {
 		p.Expected("opening three single quotes")
 		return "", false
 	}
-	sb := &strings.Builder{}
+	return p.Result.String(), true
 }
 func multiLineLiteralStringHandler(tokenAPI *tokenize.API) bool {
 	var state stringTokenizerState
 	in := tokenAPI.Input
 	out := tokenAPI.Output
 	utf8ReqLen := 0
 	utf8Len := 0
 	utf8Rune := rune(0)
 	utf8Bytes := make([]byte, 4)
 	crlf := false
 	for {
-		switch {
+		bs, _ := in.Byte.PeekBuffered(0)
-		case p.Skip(closingMultiLineLiteralString):
+		bslen := len(bs)
-			return sb.String(), true
+		if bslen == 0 {
-		case p.Skip(a.Tab):
+			return false
-			sb.WriteString("\t")
+		}
-		case p.Skip(newline):
+		for i := 0; i < bslen; i++ {
-			sb.WriteString("\n")
+			b := bs[i]
-		case p.Peek(controlCharacter):
+			switch state {
-			p.SetError("invalid character in literal string: %q (no control chars allowed, except for tab and newline)", p.Result.Byte(0))
+			case strStart, strStart2, strStart3:
-			return sb.String(), false
+				if b != '\'' {
-		case p.Accept(a.ValidRune):
+					// No triple opening quotes found.
-			sb.WriteString(p.Result.String())
+					return false
-		case p.Peek(a.InvalidRune):
+				}
-			p.SetError("invalid UTF8 rune")
+				in.Byte.MoveCursor(b)
-			return sb.String(), false
+				switch state {
-		default:
+				case strStart:
-			p.Expected("closing three single quotes")
+					state = strStart2
-			return sb.String(), false
+				case strStart2:
 					state = strStart3
 				case strStart3:
 					state = strStart4
 				}
 			case strStart4:
 				if !crlf && b == '\r' {
 					crlf = true
 					in.Byte.MoveCursor(b)
 					continue
 				}
 				if b == '\n' {
 					in.Byte.MoveCursor(b)
 					state = strChar
 					continue
 				}
 				if crlf {
 					// Lonely \r without \n.
 					return false
 				}
 				state = strChar
 				fallthrough
 			case strChar:
 				switch {
 				case b == '\r':
 					state = strCRLF
 					continue
 				case b == '\n' || b == '\t':
 					out.AddByte(b)
 					in.Byte.MoveCursor(b)
 					continue
 				case (b >= 0x00 && b <= 0x1F) || b == 0x7F:
 					// Unescaped control character
 					// TODO error reporting instead of full reject
 					return false
 				case b == '\'':
 					in.Byte.MoveCursor(b)
 					state = strEnd2
 					continue
 				}
 				switch b >> 4 {
 				case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII)
 					out.AddByte(b)
 					in.Byte.MoveCursor(b)
 					continue
 				case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx)
 					utf8ReqLen = 2
 					utf8Rune = rune((b & lowest5bits) << 6)
 				case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 3
 					utf8Rune = rune((b & lowest4bits) << 6)
 				case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
 					utf8ReqLen = 4
 					utf8Rune = rune((b & lowest3bits) << 6)
 				default: // Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[0] = b
 				utf8Len = 1
 				state = strUTF8
 			case strUTF8:
 				// This should be a continuation byte (10xxxxxx)
 				if b>>6 != 2 {
 					// Invalid UTF8 rune
 					return false
 				}
 				utf8Bytes[utf8Len] = b
 				utf8Len++
 				utf8Rune = utf8Rune<<6 + rune(b&lowest6bits)
 				if utf8Len == utf8ReqLen {
 					if !utf8.ValidRune(utf8Rune) {
 						// Invalid unicode character
 						return false
 					}
 					bytes := utf8Bytes[:utf8Len]
 					out.AddBytes(bytes...)
 					in.Byte.MoveCursorMulti(bytes...)
 					state = strChar
 				}
 			case strCRLF:
 				if b == '\n' {
 					in.Byte.MoveCursorMulti('\r', b)
 					out.AddByte('\n')
 					state = strChar
 					continue
 				}
 				// Lonely \r, should have been escaped.
 				return false
 			case strEnd2:
 				if b == '\'' {
 					state = strEnd3
 					in.Byte.MoveCursor(b)
 				} else {
 					state = strChar
 					out.AddByte('\'')
 					i--
 				}
 			case strEnd3:
 				if b == '\'' {
 					in.Byte.MoveCursor(b)
 					return true
 				}
 				state = strChar
 				out.AddBytes('\'', '\'')
 				i--
 			}
 		}
 	}
 }
--- a/parse/value_string_test.go
+++ b/parse/value_string_test.go
@ -79,7 +79,8 @@ func TestMultiLineBasicString(t *testing.T) {
 		{"x=\"\"\"\n\"\"\"", `{"x": ""}`, ``},
 		{"x=\"\"\"\r\n\r\n\"\"\"", `{"x": "\n"}`, ``},
 		{`x="""\"\"\"\""""`, `{"x": "\"\"\"\""}`, ``},
-		{"x=\"\"\"\nThe quick brown \\\n\n\n  \t  fox jumps over \\\n\t the lazy dog.\\\n   \"\"\"", `{"x": "The quick brown fox jumps over the lazy dog."}`, ``},
+		{"x=\"\"\"\nThe quick brown \\\r\n\r\n\n  \t  fox jumps over \\\n\t the lazy dog.\\\n   \"\"\"", `{"x": "The quick brown fox jumps over the lazy dog."}`, ``},
 		{"x=\"\"\"\r\nThe quick brown \\\r\n\r\n\n  \t\r\n  \n\n  fox jumps over \\\n\t the lazy dog.\\\n   \"\"\"", `{"x": "The quick brown fox jumps over the lazy dog."}`, ``},
 		{"x=\"\"\"No control chars \f allowed\"\"\"", `{}`, `invalid character in multi-line basic string: '\f' (must be escaped) at line 1, column 23`},
 		{"x=\"\"\"Escaping control chars\\nis valid\"\"\"", `{"x": "Escaping control chars\nis valid"}`, ``},
 		{"x=\"\"\"Invalid escaping \\is not allowed\"\"\"", `{}`, `invalid escape sequence at line 1, column 23`},