diff --git a/cmd/burntsushi-tester/test.toml b/cmd/burntsushi-tester/test.toml deleted file mode 100644 index bc88494..0000000 --- a/cmd/burntsushi-tester/test.toml +++ /dev/null @@ -1,7 +0,0 @@ -regex2 = '''I [dw]on't need \d{2} apples''' -lines = ''' -The first newline is -trimmed in raw strings. - All other whitespace - is preserved. -''' diff --git a/parse/benchmark_test.go b/parse/benchmark_test.go index e974a98..f2aa52f 100644 --- a/parse/benchmark_test.go +++ b/parse/benchmark_test.go @@ -4,73 +4,29 @@ import ( "testing" ) -func A(b byte) (byte, bool) { - if b > 'b' { - switch b { - case 't': - return '\t', true - case 'n': - return '\n', true - case 'r': - return '\r', true - case 'f': - return '\f', true - } - } else { - switch b { - case '"': - return '"', true - case '\\': - return '\\', true - case 'b': - return '\b', true - } - } - return 0x00, false -} - -func B(b byte) (byte, bool) { - switch b { - case 'r': - return '\r', true - case 'n': - return '\n', true - case 't': - return '\t', true - case 'b': - return '\b', true - case 'f': - return '\f', true - case '"': - return '"', true - case '\\': - return '\\', true - } - return 0x00, false -} - -// TODO cleanup unused benchmark. -func Benchmark_A(b *testing.B) { - for i := 0; i < b.N; i++ { - A('b') - A('t') - A('n') - A('f') - A('r') - A('"') - A('\\') - } -} - -// TODO cleanup unused benchmark. func Benchmark_B(b *testing.B) { + f := func(i int) int { i = i + 1; return i } for i := 0; i < b.N; i++ { - B('b') - B('t') - B('n') - B('f') - B('r') - B('"') - B('\\') + f(i) + } +} +func Benchmark_C(b *testing.B) { + f := func(i int) int { i += 1; return i } + for i := 0; i < b.N; i++ { + f(i) + } +} + +func Benchmark_D(b *testing.B) { + f := func(i int) int { i++; return i } + for i := 0; i < b.N; i++ { + f(i) + } +} + +func Benchmark_A(b *testing.B) { + f := func(i int) int { i = 2; return i } + for i := 0; i < b.N; i++ { + f(i) } } diff --git a/parse/document.go b/parse/document.go index 08fefd0..93bc7db 100644 --- a/parse/document.go +++ b/parse/document.go @@ -27,11 +27,9 @@ var ( // A '#' hash symbol marks the rest of the line as a comment. // All characters up to the end of the line are included in the comment. - comment = c.Seq(a.Hash, a.UntilEndOfLine.Optional()) - optionalComment = comment.Optional() - - endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) - + comment = c.Seq(a.Hash, a.UntilEndOfLine.Optional()) + optionalComment = comment.Optional() + endOfLineOrComment = c.Seq(whitespace, optionalComment, a.EndOfLine) whitespaceNewlinesOrComments = whitespaceInclNewlines.Or(comment) // Keys may be either bare or quoted. diff --git a/parse/document_test.go b/parse/document_test.go index 9c86cf4..650f2d6 100644 --- a/parse/document_test.go +++ b/parse/document_test.go @@ -1,6 +1,7 @@ package parse import ( + "strings" "testing" ) @@ -29,3 +30,18 @@ func TestInvalidDocument(t *testing.T) { testParse(t, p, p.startDocument, test) } } + +func TestBufferRelatedBug(t *testing.T) { + text := strings.Repeat("#", 2040) + "\n# a bug\n" + ast, err := Run(text) + + if len(text) != 2049 { + t.Fatalf("Test input is not 2049 bytes, but %d", len(text)) + } + if err != nil { + t.Fatalf("Unexpected error from parser: %s", err) + } + if ast.String() != "{}" { + t.Fatalf("Unexpected TOML document structure returned:\nexpected: {}\nactual: %s", ast) + } +} diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go index f92a060..1700f88 100644 --- a/parse/keyvaluepair.go +++ b/parse/keyvaluepair.go @@ -53,7 +53,11 @@ func (t *parser) startKeyValuePair(p *parse.API) { p.Expected("end of line") } } else { - p.Expected("a value") + // Should have been handled by the value parsing code. + // This is a safety net. + if !p.IsStoppedOrInError() { + panic("Bug: value parsing did not return a successful value, neither an error") + } } } } @@ -70,14 +74,16 @@ func (t *parser) startKeyValuePair(p *parse.API) { func (t *parser) parseKey(p *parse.API, key ast.Key) (ast.Key, bool) { var keyPart string + var strType stringType var ok bool switch { case p.Accept(bareKey): keyPart, ok = p.Result.String(), true - case p.Peek(a.SingleQuote): - keyPart, ok = t.parseLiteralString("key", p) - case p.Peek(a.DoubleQuote): - keyPart, ok = t.parseBasicString("key", p) + case p.Peek(detectString): + keyPart, strType, ok = t.parseString(p) + if strType != strTypeBasic && strType != strTypeLiteral { + p.Expected("a key name") // TODO more specific error telling about the abuse of multi-line string? + } default: p.Expected("a key name") return nil, false diff --git a/parse/parse.go b/parse/parse.go index b35ce2a..ad8fd11 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -6,12 +6,13 @@ import ( ) type parser struct { - doc *ast.Document + doc *ast.Document + strFlags byte // A helper field used for string parsing. } func newParser() *parser { doc := ast.NewDocument() - return &parser{doc} + return &parser{doc: doc} } // Run the TOML parser against the provided input data. diff --git a/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.json b/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.json @@ -0,0 +1 @@ +{} diff --git a/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.toml b/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.toml new file mode 100644 index 0000000..32403c4 --- /dev/null +++ b/parse/testfiles/valid/mmakaay/toml-readbuffer-bug.toml @@ -0,0 +1,2 @@ +# This line is 2040 long, to make the read buffer end at the 'g' of the second line, leaving only the newline at the end of the file for the next read operation. There was a bug that resulted in the word 'a' of the second line being seen as a key, because the comment skipping did not work as it should with the buffer filling operation between '# a bug' and the final '\n'. ################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################# +# a bug diff --git a/parse/value.go b/parse/value.go index dac8251..ee8b7ac 100644 --- a/parse/value.go +++ b/parse/value.go @@ -20,7 +20,8 @@ var ( func (t *parser) parseValue(p *parse.API) (*ast.Value, bool) { switch { case p.Peek(detectString): - return t.parseString(p) + str, _, ok := t.parseString(p) + return ast.NewValue(ast.TypeString, str), ok case p.Peek(detectBoolean): return t.parseBoolean(p) case p.Peek(detectNumberSpecials): diff --git a/parse/value_string.go b/parse/value_string.go index 12c1c42..24418e5 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -5,123 +5,52 @@ import ( "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-parsekit/tokenize" - "git.makaay.nl/mauricem/go-toml/ast" ) -var ( - // Multi-line basic strings are surrounded by three quotation marks on each - // side and allow newlines. - - multiLineBasicStringDelimiter = a.Str(`"""`) - openingMultiLineBasicString = multiLineBasicStringDelimiter.Then(newline.Optional()) - closingMultiLineBasicString = m.Drop(multiLineBasicStringDelimiter) - - // Multi-line literal strings are surrounded by three single quotes on each side and allow newlines. - - multiLineLiteralStringDelimiter = a.Str(`'''`) - openingMultiLineLiteralString = multiLineLiteralStringDelimiter.Then(newline.Optional()) - closingMultiLineLiteralString = m.Drop(multiLineLiteralStringDelimiter) - - // Opening and closing character for basic strings. - basicStringDelimiter = a.DoubleQuote - - // Opening and losing character for literal strings. - literalStringDelimiter = a.SingleQuote - - // For convenience, some popular characters have a compact escape sequence. - // - // \b - backspace (U+0008) - // \t - tab (U+0009) - // \n - LF (U+000A) - // \f - form feed (U+000C) - // \r - carriage return (U+000D) - // \" - quote (U+0022) - // \\ - backslash (U+005C) - // \uXXXX - unicode (U+XXXX) - // \UXXXXXXXX - unicode (U+XXXXXXXX) - - validEscapeChar = a.Char('b', 't', 'n', 'f', 'r', '"', '\\') - shortEscape = c.Seq(a.Backslash, validEscapeChar) - shortUTF8Escape = c.Seq(a.Backslash, a.Char('u'), a.HexDigit.Times(4)) - longUTF8Escape = c.Seq(a.Backslash, a.Char('U'), a.HexDigit.Times(8)) - validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) - - // For writing long strings without introducing extraneous whitespace, use a - // "line ending backslash". When the last non-whitespace character on a line is - // a \, it will be trimmed along with all whitespace (including newlines) up to - // the next non-whitespace character or closing delimiter. - - lineEndingBackslash = c.Seq(a.Backslash, whitespace, newline, whitespaceInclNewlines.Optional()) -) - -// There are four ways to express strings: basic, multi-line basic, literal and -// multi-line literal. All strings must parse/value_array.gocontain only valid UTF-8 characters. -func (t *parser) parseString(p *parse.API) (*ast.Value, bool) { - var value string - var ok bool - switch { - case p.Peek(openingMultiLineBasicString): - value, ok = t.parseMultiLineBasicString(p) - case p.Peek(basicStringDelimiter): - value, ok = t.parseBasicString("string value", p) - case p.Peek(openingMultiLineLiteralString): - value, ok = t.parseMultiLineLiteralString(p) - case p.Peek(literalStringDelimiter): - value, ok = t.parseLiteralString("string value", p) - default: +func (t *parser) parseString(p *parse.API) (string, stringType, bool) { + if !p.Accept(t.stringHandler) { p.Expected("a string value") + return "", strTypeNone, false } - if ok { - return ast.NewValue(ast.TypeString, value), ok - } - return nil, false + strType := stringTypeFromFlags(t.strFlags) + str := p.Result.String() + return str, strType, true } -// Specific handling of input for basic strings. -// -// • Basic strings are surrounded by quotation marks. -// -// • Any Unicode character may be used except those that must be escaped: -// quotation mark, backslash, and the control characters (U+0000 to -// U+001F, U+007F). -// -// • No additional \escape sequences are allowed. What the spec say about this: -// "All other escape sequences [..] are reserved and, if used, TOML should -// produce an error."" -func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { - if !p.Accept(basicStringHandler) { - return "", false +type stringType byte + +const ( + strTypeNone stringType = iota + strTypeBasic + strTypeLiteral + strTypeMultiLineBasic + strTypeMultiLineLiteral +) + +func stringTypeFromFlags(flags byte) stringType { + if flags&strFlagBasic == strFlagBasic { + if flags&strFlagMultiLine == 0 { + return strTypeBasic + } + return strTypeMultiLineBasic } - return p.Result.String(), true + if flags&strFlagMultiLine == 0 { + return strTypeLiteral + } + return strTypeMultiLineLiteral } -type stringTokenizerState int - const ( - strStart stringTokenizerState = iota - strStart2 - strStart3 - strStart4 - strChar - strEscape - strEscapeUnicode - strEscapeConcatWs1 - strEscapeConcatCRLF - strEscapeConcatWs2 - strCRLF - strUTF8 - strEnd2 - strEnd3 + strFlagLiteral byte = 1 + strFlagBasic byte = 2 + strFlagMultiLine byte = 4 + strFlagNewlinesOK byte = 8 + strFlagTabsOK byte = 16 + strFlagEscapesOK byte = 32 + strFlagLineConcatOK byte = 64 ) -const ( - lowest6bits = 0x3F // 0011 1111 - lowest5bits = 0x1F // 0001 1111 - lowest4bits = 0x0F // 0000 1111 - lowest3bits = 0x07 // 0000 0111 -) - -func basicStringHandler(tokenAPI *tokenize.API) bool { +func (t *parser) stringHandler(tokenAPI *tokenize.API) bool { var state stringTokenizerState in := tokenAPI.Input out := tokenAPI.Output @@ -135,38 +64,155 @@ func basicStringHandler(tokenAPI *tokenize.API) bool { utf8Rune := rune(0) utf8Bytes := make([]byte, 4) + flags := byte(0) + delim := byte(0) + subState := 0 + for { bs, _ := in.Byte.PeekBuffered(0) bslen := len(bs) + + // End of input reached. if bslen == 0 { + // We might be at the second delimiter of a basic or literal string. + if state == strStateStart && subState == 2 { + return true + } + // Unexpected end of input. return false } + for i := 0; i < bslen; i++ { b := bs[i] switch state { - case strStart: - if b != '"' { - // No opening quotes found. - return false + + // Parse the string opener. + // There are four ways to express strings: basic, multi-line basic, literal and + // multi-line literal. Basic strings are surrounded by quotation marks ("..."). + // Literal strings are surrounded by single quotes ('...'). + // Multi-line basic strings are surrounded by three quotation marks on each + // side and allow newlines ("""..."""). Multi-line literal strings are surrounded + // by three single quotes on each side and allow newlines as well ('''...'''). + case strStateStart: + if subState == 0 { + if b != '"' && b != '\'' { + // Expected an opener quote here. + return false + } + if b == '\'' { + flags |= strFlagLiteral | strFlagTabsOK + } else { + flags |= strFlagBasic | strFlagEscapesOK + } + t.strFlags = flags + subState = 1 + delim = b + in.Byte.MoveCursor(b) + continue } - in.Byte.MoveCursor(b) - state = strChar - case strChar: + if subState == 1 { + // Not a second quote, so this is the start of + // single-line string content. + if b != delim { + i-- + state = strStateContent + continue + } + in.Byte.MoveCursor(b) + subState = 2 + continue + } + if subState == 2 { + // Not a third quote, so this is an empty string ('' or ""). + if b != delim { + return true + } + // Third quote, so this is a multi-line string (''' or """). + flags |= strFlagMultiLine | strFlagNewlinesOK + if flags&strFlagBasic == strFlagBasic { + flags |= strFlagLineConcatOK + } + t.strFlags = flags + in.Byte.MoveCursor(b) + subState = 3 + continue + } + if subState == 3 { + // We're in a multi-line string. From the TOML spec: + // A newline immediately following the opening delimiter will be trimmed. + // All other whitespace and newline characters remain intact. + if b == '\n' { + in.Byte.MoveCursor(b) + state = strStateContent + continue + } + if b == '\r' { + in.Byte.MoveCursor(b) + subState = 4 + continue + } + // Not a newline, so this byte is part of the content. + i-- + state = strStateContent + continue + } + if subState == 4 { + // We've seen a \r, so here we should see a \n for a newline + // after a multi-line opener. + if b == '\n' { + in.Byte.MoveCursor(b) + state = strStateContent + continue + } + // Lonely \r found. Pass it to the content handler. + i -= 2 + state = strStateContent + continue + } + + // Parse string contents. + case strStateContent: switch { + case b == '\r' && flags&strFlagNewlinesOK == strFlagNewlinesOK: + state = strStateCRLF + continue + case b == '\n' && flags&strFlagNewlinesOK == strFlagNewlinesOK: + out.AddByte(b) + in.Byte.MoveCursor(b) + continue + case b == '\t' && flags&strFlagTabsOK == strFlagTabsOK: + out.AddByte(b) + in.Byte.MoveCursor(b) + continue case (b >= 0x00 && b <= 0x1F) || b == 0x7F: - // Control characters as defined by the TOML specification. - // These must always be escaped. - // Unescaped control character - // TODO error reporting instead of full reject + // Control characters must be escaped. return false case b == '\\': in.Byte.MoveCursor(b) - state = strEscape + // Handle escape codes, when they are allowed. + if flags&strFlagEscapesOK == strFlagEscapesOK { + state = strStateEscape + continue + } + // Otherwise, add the backslash as plain output. + out.AddByte(b) continue - case b == '"': + case b == delim: + // Single-line string. + if flags&strFlagMultiLine == 0 { + in.Byte.MoveCursor(b) + return true + } + // Multi-line string in.Byte.MoveCursor(b) - return true + state = strStateMultiLineEnd + subState = 0 + continue } + + // At this point, we must have a UTF8 character on the input. + // Here we check what length the character must have in bytes. + // Then the rest of the work is offloaded to the strUTF8 state. switch b >> 4 { case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII) out.AddByte(b) @@ -186,9 +232,11 @@ func basicStringHandler(tokenAPI *tokenize.API) bool { } utf8Bytes[0] = b utf8Len = 1 - state = strUTF8 - case strUTF8: - // This should be a continuation byte (10xxxxxx) + state = strStateUTF8 + + // Parse followup bytes of a UTF8 byte sequence. + case strStateUTF8: + // The input byte must be a continuation byte (10xxxxxx) if b>>6 != 2 { // Invalid UTF8 rune return false @@ -204,17 +252,63 @@ func basicStringHandler(tokenAPI *tokenize.API) bool { bytes := utf8Bytes[:utf8Len] out.AddBytes(bytes...) in.Byte.MoveCursorMulti(bytes...) - state = strChar + state = strStateContent } - case strEscape: - state = strChar + + // Parse the \n in a \r\n sequence. + case strStateCRLF: + // \r\n is normalized to just \n here (as allowed by the TOML spec). + if b == '\n' { + in.Byte.MoveCursorMulti('\r', b) + out.AddByte('\n') + state = strStateContent + continue + } + // Lonely \r, should have been escaped. + return false + + // Parse escape byte sequences. + // For convenience, some popular characters have a compact escape sequence. + // + // \b - backspace (U+0008) + // \t - tab (U+0009) + // \n - LF (U+000A) + // \f - form feed (U+000C) + // \r - carriage return (U+000D) + // \" - quote (U+0022) + // \\ - backslash (U+005C) + // \uXXXX - unicode (U+XXXX) + // \UXXXXXXXX - unicode (U+XXXXXXXX) + case strStateEscape: + // Handle short control character escape sequence (\t, \a, etc). if escaped, ok := getEscapedChar(b); ok { out.AddByte(escaped) in.Byte.MoveCursor(b) + state = strStateContent continue } switch b { + case ' ', '\t', '\r', '\n': + // Handle line concatenation escape sequence. + if flags&strFlagLineConcatOK == 0 { + // Invalid escape. + return false + } + // Point the parser at an appropriate subState of + // the strEscapeConcat state. + switch b { + case ' ', '\t': + subState = 0 + case '\r': + subState = 1 + case '\n': + subState = 2 + } + in.Byte.MoveCursor(b) + state = strStateEscapeConcat + continue case 'u', 'U': + // Handle unicode escape sequence (\uXXXX, \UXXXXXXXX). in.Byte.MoveCursor(b) unicodeReqLen = 4 if b == 'u' { @@ -224,12 +318,75 @@ func basicStringHandler(tokenAPI *tokenize.API) bool { } unicodeLen = 0 utf8Rune = 0 - state = strEscapeUnicode + state = strStateEscapeUnicode default: // Invalid escape sequence used. return false } - case strEscapeUnicode: + + // For writing long strings without introducing extraneous whitespace, use a + // "line ending backslash". When the last non-whitespace character on a line is + // a \, it will be trimmed along with all whitespace (including newlines) up to + // the next non-whitespace character or closing delimiter. + case strStateEscapeConcat: + // Skip over whitespace until the end of the line is found. + if subState == 0 { + switch b { + case ' ', '\t': + in.Byte.MoveCursor(b) + continue + case '\r': + in.Byte.MoveCursor(b) + subState = 1 + continue + case '\n': + in.Byte.MoveCursor(b) + subState = 2 + continue + default: + // Invalid escape sequence used. Expected whitespace or newline. + return false + } + } + // We've seen a \r at the same line as the escape char, + // skip over the following \n. + if subState == 1 { + if b == '\n' { + in.Byte.MoveCursor(b) + subState = 2 + continue + } + // Invalid escape sequence used. Expected newline. + return false + } + // We've seen a \n at the same line as the escape char, + // skip over all whitespace and newlines from here on. + if subState == 2 { + if b == ' ' || b == '\t' || b == '\n' { + in.Byte.MoveCursor(b) + continue + } + if b == '\r' { + in.Byte.MoveCursor(b) + subState = 3 + continue + } + } + // We've seen a \r, skip over the following \n. + if subState == 3 { + if b == '\n' { + in.Byte.MoveCursor(b) + subState = 2 + continue + } + } + // End of concat escape. Let the strContent state reprocess the byte. + i-- + state = strStateContent + continue + + // Parse unicode escape sequence (\uXXXX, \UXXXXXXXX). + case strStateEscapeUnicode: value, ok := getHexValueForChar(b) if !ok { // Invalid unicode escape sequence used. @@ -246,13 +403,85 @@ func basicStringHandler(tokenAPI *tokenize.API) bool { in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...) w := utf8.EncodeRune(utf8Bytes, utf8Rune) out.AddBytes(utf8Bytes[:w]...) - state = strChar + state = strStateContent + } + + // Parse the end of the string. + // One delimiter has already been seen by the strContent state. + // Here we check if we have a full set of 3 delimiters to end + // the string. + case strStateMultiLineEnd: // TODO rename to strEndMultiLine + if subState == 0 { + // Second delimiter found. + if b == delim { + subState = 1 + in.Byte.MoveCursor(b) + continue + } + // No delimiter found, so we're looking at a single + // delimiter within the multi-line body. Add the delimiter + // to the output and feed the current byte back to the + // strContent state. + out.AddByte(delim) + i-- + state = strStateContent + continue + } + if subState == 1 { + // Third delimiter found. This ends the string. + if b == delim { + in.Byte.MoveCursor(b) + return true + } + // No delimiter found, so we're looking at two delimiters + // within the multi-line body. Add the delimiters to the + // output and feed the current byte back to the strContent state. + out.AddBytes(delim, delim) + i-- + state = strStateContent + continue } } } } } +// Specific handling of input for basic strings. +// +// • Any Unicode character may be used except those that must be escaped: +// quotation mark, backslash, and the control characters (U+0000 to +// U+001F, U+007F). +// +// • No additional \escape sequences are allowed. What the spec say about this: +// "All other escape sequences [..] are reserved and, if used, TOML should +// produce an error."" +// func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { +// if !p.Accept(basicStringHandler) { +// return "", false +// } +// return p.Result.String(), true +// } + +type stringTokenizerState int + +const ( + strStateStart stringTokenizerState = iota + strStateContent + strStateEscape + strStateEscapeUnicode + strStateEscapeConcat + strStateCRLF + strStateUTF8 + strStateMultiLineEnd +) + +const ( + lowest6bits = 0x3F // 0011 1111 + lowest5bits = 0x1F // 0001 1111 + lowest4bits = 0x0F // 0000 1111 + lowest3bits = 0x07 // 0000 0111 +) + func getHexValueForChar(b byte) (byte, bool) { switch { case '0' <= b && b <= '9': @@ -286,100 +515,6 @@ func getEscapedChar(b byte) (byte, bool) { return 0, false } -// Specific handling of input for literal strings. -// -// • Literal strings are surrounded by single quotes. -// -// • Like basic strings, they must appear on a single line. -// -// • Control characters other than tab are not permitted in a literal string. -func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) { - if !p.Accept(literalStringHandler) { - return "", false - } - return p.Result.String(), true -} - -func literalStringHandler(tokenAPI *tokenize.API) bool { - var state stringTokenizerState - in := tokenAPI.Input - out := tokenAPI.Output - - utf8ReqLen := 0 - utf8Len := 0 - utf8Rune := rune(0) - utf8Bytes := [4]byte{} - - for { - bs, _ := tokenAPI.Input.Byte.PeekBuffered(0) - bslen := len(bs) - if bslen == 0 { - // Unexpected end of file. - return false - } - for i := 0; i < bslen; i++ { - b := bs[i] - switch state { - case strStart: - if b != '\'' { - // No opening quote found. - return false - } - in.Byte.MoveCursor(b) - state = strChar - case strChar: - switch { - case (b >= 0x00 && b < 0x09) || (b > 0x09 && b <= 0x1F) || b == 0x7F: - // Unescaped control character - return false - case b == '\'': - in.Byte.MoveCursor(b) - return true - } - switch b >> 4 { - case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII) - out.AddByte(b) - in.Byte.MoveCursor(b) - continue - case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx) - utf8ReqLen = 2 - utf8Rune = rune((b & lowest5bits) << 6) - case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx) - utf8ReqLen = 3 - utf8Rune = rune((b & lowest4bits) << 6) - case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) - utf8ReqLen = 4 - utf8Rune = rune((b & lowest3bits) << 6) - default: // Invalid UTF8 rune - return false - } - utf8Bytes[0] = b - utf8Len = 1 - state = strUTF8 - case strUTF8: - // This should be a continuation byte (10xxxxxx) - if b>>6 != 2 { - // Invalid UTF8 rune - return false - } - utf8Bytes[utf8Len] = b - utf8Len++ - utf8Rune = utf8Rune<<6 + rune(b&lowest6bits) - if utf8Len == utf8ReqLen { - if !utf8.ValidRune(utf8Rune) { - // Invalid unicode character - return false - } - bytes := utf8Bytes[:utf8Len] - out.AddBytes(bytes...) - in.Byte.MoveCursorMulti(bytes...) - state = strChar - } - } - } - } -} - // Specific handling of input for multi-line basic strings. // // • Multi-line basic strings are surrounded by three quotation marks on @@ -403,259 +538,259 @@ func literalStringHandler(tokenAPI *tokenize.API) bool { // "line ending backslash". When the last non-whitespace character on a line is // a \, it will be trimmed along with all whitespace (including newlines) up to // the next non-whitespace character or closing delimiter. -func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) { - if !p.Accept(multiLineBasicStringHandler) { - return "", false - } - return p.Result.String(), true -} +// func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) { +// if !p.Accept(multiLineBasicStringHandler) { +// return "", false +// } +// return p.Result.String(), true +// } -func multiLineBasicStringHandler(tokenAPI *tokenize.API) bool { - var state stringTokenizerState - in := tokenAPI.Input - out := tokenAPI.Output +// func multiLineBasicStringHandler(tokenAPI *tokenize.API) bool { +// var state stringTokenizerState +// in := tokenAPI.Input +// out := tokenAPI.Output - unicodeReqLen := 0 - unicodeLen := 0 - unicodeHex := make([]byte, 8) +// unicodeReqLen := 0 +// unicodeLen := 0 +// unicodeHex := make([]byte, 8) - utf8ReqLen := 0 - utf8Len := 0 - utf8Rune := rune(0) - utf8Bytes := make([]byte, 4) +// utf8ReqLen := 0 +// utf8Len := 0 +// utf8Rune := rune(0) +// utf8Bytes := make([]byte, 4) - crlf := false +// crlf := false - for { - bs, _ := in.Byte.PeekBuffered(0) - bslen := len(bs) - if bslen == 0 { - return false - } - for i := 0; i < bslen; i++ { - b := bs[i] - switch state { - case strStart, strStart2, strStart3: - if b != '"' { - // No triple opening quotes found. - return false - } - in.Byte.MoveCursor(b) - switch state { - case strStart: - state = strStart2 - case strStart2: - state = strStart3 - case strStart3: - state = strStart4 - } - case strStart4: - if !crlf && b == '\r' { - crlf = true - in.Byte.MoveCursor(b) - continue - } - if b == '\n' { - in.Byte.MoveCursor(b) - state = strChar - continue - } - if crlf { - // Lonely \r without \n. - return false - } - state = strChar - fallthrough - case strChar: - switch { - case b == '\r': - state = strCRLF - continue - case b == '\n': - out.AddByte(b) - in.Byte.MoveCursor(b) - continue - case (b >= 0x00 && b <= 0x1F) || b == 0x7F: - // Unescaped control character - // TODO error reporting instead of full reject - return false - case b == '\\': - in.Byte.MoveCursor(b) - state = strEscape - continue - case b == '"': - in.Byte.MoveCursor(b) - state = strEnd2 - continue - } - switch b >> 4 { - case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII) - out.AddByte(b) - in.Byte.MoveCursor(b) - continue - case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx) - utf8ReqLen = 2 - utf8Rune = rune((b & lowest5bits) << 6) - case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx) - utf8ReqLen = 3 - utf8Rune = rune((b & lowest4bits) << 6) - case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) - utf8ReqLen = 4 - utf8Rune = rune((b & lowest3bits) << 6) - default: // Invalid UTF8 rune - return false - } - utf8Bytes[0] = b - utf8Len = 1 - state = strUTF8 - case strUTF8: - // This should be a continuation byte (10xxxxxx) - if b>>6 != 2 { - // Invalid UTF8 rune - return false - } - utf8Bytes[utf8Len] = b - utf8Len++ - utf8Rune = utf8Rune<<6 + rune(b&lowest6bits) - if utf8Len == utf8ReqLen { - if !utf8.ValidRune(utf8Rune) { - // Invalid unicode character - return false - } - bytes := utf8Bytes[:utf8Len] - out.AddBytes(bytes...) - in.Byte.MoveCursorMulti(bytes...) - state = strChar - } - case strCRLF: - if b == '\n' { - in.Byte.MoveCursorMulti('\r', b) - out.AddByte('\n') - state = strChar - continue - } - // Lonely \r, should have been escaped. - return false - case strEscape: - state = strChar - if escaped, ok := getEscapedChar(b); ok { - out.AddByte(escaped) - in.Byte.MoveCursor(b) - continue - } - switch b { - case ' ', '\t': - in.Byte.MoveCursor(b) - state = strEscapeConcatWs1 - continue - case '\r': - in.Byte.MoveCursor(b) - state = strEscapeConcatCRLF - continue - case '\n': - in.Byte.MoveCursor(b) - state = strEscapeConcatWs2 - continue - case 'u', 'U': - in.Byte.MoveCursor(b) - unicodeReqLen = 4 - if b == 'u' { - unicodeReqLen = 4 - } else { - unicodeReqLen = 8 - } - unicodeLen = 0 - utf8Rune = 0 - state = strEscapeUnicode - default: - // Invalid escape sequence used. - return false - } - case strEscapeConcatWs1: - switch b { - case ' ', '\t': - in.Byte.MoveCursor(b) - continue - case '\r': - in.Byte.MoveCursor(b) - state = strEscapeConcatCRLF - continue - case '\n': - in.Byte.MoveCursor(b) - state = strEscapeConcatWs2 - continue - default: - // Invalid line concatenation - return false - } - case strEscapeConcatCRLF: - switch b { - case '\n': - in.Byte.MoveCursor(b) - state = strEscapeConcatWs2 - continue - default: - // Invalid line concatenation - return false - } - case strEscapeConcatWs2: - switch b { - case ' ', '\t': - in.Byte.MoveCursor(b) - continue - case '\r': - in.Byte.MoveCursor(b) - state = strEscapeConcatCRLF - continue - case '\n': - in.Byte.MoveCursor(b) - state = strEscapeConcatWs2 - continue - default: - i-- - state = strChar - continue - } - case strEscapeUnicode: - value, ok := getHexValueForChar(b) - if !ok { - // Invalid unicode escape sequence used. - return false - } - utf8Rune = utf8Rune<<4 + rune(value) - unicodeHex[unicodeLen] = b - unicodeLen++ - if unicodeLen == unicodeReqLen { - if !utf8.ValidRune(utf8Rune) { - // Invalid unicode escape - return false - } - in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...) - w := utf8.EncodeRune(utf8Bytes, utf8Rune) - out.AddBytes(utf8Bytes[:w]...) - state = strChar - } - case strEnd2: - if b == '"' { - state = strEnd3 - in.Byte.MoveCursor(b) - } else { - state = strChar - out.AddByte('"') - i-- - } - case strEnd3: - if b == '"' { - in.Byte.MoveCursor(b) - return true - } - state = strChar - out.AddBytes('"', '"') - i-- - } - } - } -} +// for { +// bs, _ := in.Byte.PeekBuffered(0) +// bslen := len(bs) +// if bslen == 0 { +// return false +// } +// for i := 0; i < bslen; i++ { +// b := bs[i] +// switch state { +// case strStart, strStart2, strStart3: +// if b != '"' { +// // No triple opening quotes found. +// return false +// } +// in.Byte.MoveCursor(b) +// switch state { +// case strStart: +// state = strStart2 +// case strStart2: +// state = strStart3 +// case strStart3: +// state = strStart4 +// } +// case strStart4: +// if !crlf && b == '\r' { +// crlf = true +// in.Byte.MoveCursor(b) +// continue +// } +// if b == '\n' { +// in.Byte.MoveCursor(b) +// state = strContent +// continue +// } +// if crlf { +// // Lonely \r without \n. +// return false +// } +// state = strContent +// fallthrough +// case strContent: +// switch { +// case b == '\r': +// state = strCRLF +// continue +// case b == '\n': +// out.AddByte(b) +// in.Byte.MoveCursor(b) +// continue +// case (b >= 0x00 && b <= 0x1F) || b == 0x7F: +// // Unescaped control character +// // TODO error reporting instead of full reject +// return false +// case b == '\\': +// in.Byte.MoveCursor(b) +// state = strEscape +// continue +// case b == '"': +// in.Byte.MoveCursor(b) +// state = strEnd +// continue +// } +// switch b >> 4 { +// case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII) +// out.AddByte(b) +// in.Byte.MoveCursor(b) +// continue +// case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx) +// utf8ReqLen = 2 +// utf8Rune = rune((b & lowest5bits) << 6) +// case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx) +// utf8ReqLen = 3 +// utf8Rune = rune((b & lowest4bits) << 6) +// case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) +// utf8ReqLen = 4 +// utf8Rune = rune((b & lowest3bits) << 6) +// default: // Invalid UTF8 rune +// return false +// } +// utf8Bytes[0] = b +// utf8Len = 1 +// state = strUTF8 +// case strUTF8: +// // This should be a continuation byte (10xxxxxx) +// if b>>6 != 2 { +// // Invalid UTF8 rune +// return false +// } +// utf8Bytes[utf8Len] = b +// utf8Len++ +// utf8Rune = utf8Rune<<6 + rune(b&lowest6bits) +// if utf8Len == utf8ReqLen { +// if !utf8.ValidRune(utf8Rune) { +// // Invalid unicode character +// return false +// } +// bytes := utf8Bytes[:utf8Len] +// out.AddBytes(bytes...) +// in.Byte.MoveCursorMulti(bytes...) +// state = strContent +// } +// case strCRLF: +// if b == '\n' { +// in.Byte.MoveCursorMulti('\r', b) +// out.AddByte('\n') +// state = strContent +// continue +// } +// // Lonely \r, should have been escaped. +// return false +// case strEscape: +// state = strContent +// if escaped, ok := getEscapedChar(b); ok { +// out.AddByte(escaped) +// in.Byte.MoveCursor(b) +// continue +// } +// switch b { +// case ' ', '\t': +// in.Byte.MoveCursor(b) +// state = strEscapeConcat +// continue +// case '\r': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatCRLF +// continue +// case '\n': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatWs2 +// continue +// case 'u', 'U': +// in.Byte.MoveCursor(b) +// unicodeReqLen = 4 +// if b == 'u' { +// unicodeReqLen = 4 +// } else { +// unicodeReqLen = 8 +// } +// unicodeLen = 0 +// utf8Rune = 0 +// state = strEscapeUnicode +// default: +// // Invalid escape sequence used. +// return false +// } +// case strEscapeConcat: +// switch b { +// case ' ', '\t': +// in.Byte.MoveCursor(b) +// continue +// case '\r': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatCRLF +// continue +// case '\n': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatWs2 +// continue +// default: +// // Invalid line concatenation +// return false +// } +// case strEscapeConcatCRLF: +// switch b { +// case '\n': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatWs2 +// continue +// default: +// // Invalid line concatenation +// return false +// } +// case strEscapeConcatWs2: +// switch b { +// case ' ', '\t': +// in.Byte.MoveCursor(b) +// continue +// case '\r': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatCRLF +// continue +// case '\n': +// in.Byte.MoveCursor(b) +// state = strEscapeConcatWs2 +// continue +// default: +// i-- +// state = strContent +// continue +// } +// case strEscapeUnicode: +// value, ok := getHexValueForChar(b) +// if !ok { +// // Invalid unicode escape sequence used. +// return false +// } +// utf8Rune = utf8Rune<<4 + rune(value) +// unicodeHex[unicodeLen] = b +// unicodeLen++ +// if unicodeLen == unicodeReqLen { +// if !utf8.ValidRune(utf8Rune) { +// // Invalid unicode escape +// return false +// } +// in.Byte.MoveCursorMulti(unicodeHex[:unicodeLen]...) +// w := utf8.EncodeRune(utf8Bytes, utf8Rune) +// out.AddBytes(utf8Bytes[:w]...) +// state = strContent +// } +// case strEnd: +// if b == '"' { +// state = strEnd3 +// in.Byte.MoveCursor(b) +// } else { +// state = strContent +// out.AddByte('"') +// i-- +// } +// case strEnd3: +// if b == '"' { +// in.Byte.MoveCursor(b) +// return true +// } +// state = strContent +// out.AddBytes('"', '"') +// i-- +// } +// } +// } +// } // Specific handling of input for multi-line literal strings. // @@ -670,149 +805,149 @@ func multiLineBasicStringHandler(tokenAPI *tokenize.API) bool { // sense for their platform. // // • Control characters other than tab and newline are not permitted in a multi-line literal string. -func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) { - if !p.Accept(multiLineLiteralStringHandler) { - return "", false - } - return p.Result.String(), true -} +// func (t *parser) parseMultiLineLiteralString(p *parse.API) (string, bool) { +// if !p.Accept(multiLineLiteralStringHandler) { +// return "", false +// } +// return p.Result.String(), true +// } -func multiLineLiteralStringHandler(tokenAPI *tokenize.API) bool { - var state stringTokenizerState - in := tokenAPI.Input - out := tokenAPI.Output +// func multiLineLiteralStringHandler(tokenAPI *tokenize.API) bool { +// var state stringTokenizerState +// in := tokenAPI.Input +// out := tokenAPI.Output - utf8ReqLen := 0 - utf8Len := 0 - utf8Rune := rune(0) - utf8Bytes := make([]byte, 4) +// utf8ReqLen := 0 +// utf8Len := 0 +// utf8Rune := rune(0) +// utf8Bytes := make([]byte, 4) - crlf := false +// crlf := false - for { - bs, _ := in.Byte.PeekBuffered(0) - bslen := len(bs) - if bslen == 0 { - return false - } - for i := 0; i < bslen; i++ { - b := bs[i] - switch state { - case strStart, strStart2, strStart3: - if b != '\'' { - // No triple opening quotes found. - return false - } - in.Byte.MoveCursor(b) - switch state { - case strStart: - state = strStart2 - case strStart2: - state = strStart3 - case strStart3: - state = strStart4 - } - case strStart4: - if !crlf && b == '\r' { - crlf = true - in.Byte.MoveCursor(b) - continue - } - if b == '\n' { - in.Byte.MoveCursor(b) - state = strChar - continue - } - if crlf { - // Lonely \r without \n. - return false - } - state = strChar - fallthrough - case strChar: - switch { - case b == '\r': - state = strCRLF - continue - case b == '\n' || b == '\t': - out.AddByte(b) - in.Byte.MoveCursor(b) - continue - case (b >= 0x00 && b <= 0x1F) || b == 0x7F: - // Unescaped control character - // TODO error reporting instead of full reject - return false - case b == '\'': - in.Byte.MoveCursor(b) - state = strEnd2 - continue - } - switch b >> 4 { - case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII) - out.AddByte(b) - in.Byte.MoveCursor(b) - continue - case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx) - utf8ReqLen = 2 - utf8Rune = rune((b & lowest5bits) << 6) - case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx) - utf8ReqLen = 3 - utf8Rune = rune((b & lowest4bits) << 6) - case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) - utf8ReqLen = 4 - utf8Rune = rune((b & lowest3bits) << 6) - default: // Invalid UTF8 rune - return false - } - utf8Bytes[0] = b - utf8Len = 1 - state = strUTF8 - case strUTF8: - // This should be a continuation byte (10xxxxxx) - if b>>6 != 2 { - // Invalid UTF8 rune - return false - } - utf8Bytes[utf8Len] = b - utf8Len++ - utf8Rune = utf8Rune<<6 + rune(b&lowest6bits) - if utf8Len == utf8ReqLen { - if !utf8.ValidRune(utf8Rune) { - // Invalid unicode character - return false - } - bytes := utf8Bytes[:utf8Len] - out.AddBytes(bytes...) - in.Byte.MoveCursorMulti(bytes...) - state = strChar - } - case strCRLF: - if b == '\n' { - in.Byte.MoveCursorMulti('\r', b) - out.AddByte('\n') - state = strChar - continue - } - // Lonely \r, should have been escaped. - return false - case strEnd2: - if b == '\'' { - state = strEnd3 - in.Byte.MoveCursor(b) - } else { - state = strChar - out.AddByte('\'') - i-- - } - case strEnd3: - if b == '\'' { - in.Byte.MoveCursor(b) - return true - } - state = strChar - out.AddBytes('\'', '\'') - i-- - } - } - } -} +// for { +// bs, _ := in.Byte.PeekBuffered(0) +// bslen := len(bs) +// if bslen == 0 { +// return false +// } +// for i := 0; i < bslen; i++ { +// b := bs[i] +// switch state { +// case strStart, strStart2, strStart3: +// if b != '\'' { +// // No triple opening quotes found. +// return false +// } +// in.Byte.MoveCursor(b) +// switch state { +// case strStart: +// state = strStart2 +// case strStart2: +// state = strStart3 +// case strStart3: +// state = strStart4 +// } +// case strStart4: +// if !crlf && b == '\r' { +// crlf = true +// in.Byte.MoveCursor(b) +// continue +// } +// if b == '\n' { +// in.Byte.MoveCursor(b) +// state = strContent +// continue +// } +// if crlf { +// // Lonely \r without \n. +// return false +// } +// state = strContent +// fallthrough +// case strContent: +// switch { +// case b == '\r': +// state = strCRLF +// continue +// case b == '\n' || b == '\t': +// out.AddByte(b) +// in.Byte.MoveCursor(b) +// continue +// case (b >= 0x00 && b <= 0x1F) || b == 0x7F: +// // Unescaped control character +// // TODO error reporting instead of full reject +// return false +// case b == '\'': +// in.Byte.MoveCursor(b) +// state = strEnd +// continue +// } +// switch b >> 4 { +// case 0, 1, 2, 3, 4, 5, 6, 7: // 1 byte UTF8 (0xxxxxxx, a.k.a. ASCII) +// out.AddByte(b) +// in.Byte.MoveCursor(b) +// continue +// case 12, 13: // 2 byte UTF8 (110xxxxx 10xxxxxx) +// utf8ReqLen = 2 +// utf8Rune = rune((b & lowest5bits) << 6) +// case 14: // 3 byte UTF8 (1110xxxx 10xxxxxx 10xxxxxx) +// utf8ReqLen = 3 +// utf8Rune = rune((b & lowest4bits) << 6) +// case 15: // 4 byte UTF8 (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) +// utf8ReqLen = 4 +// utf8Rune = rune((b & lowest3bits) << 6) +// default: // Invalid UTF8 rune +// return false +// } +// utf8Bytes[0] = b +// utf8Len = 1 +// state = strUTF8 +// case strUTF8: +// // This should be a continuation byte (10xxxxxx) +// if b>>6 != 2 { +// // Invalid UTF8 rune +// return false +// } +// utf8Bytes[utf8Len] = b +// utf8Len++ +// utf8Rune = utf8Rune<<6 + rune(b&lowest6bits) +// if utf8Len == utf8ReqLen { +// if !utf8.ValidRune(utf8Rune) { +// // Invalid unicode character +// return false +// } +// bytes := utf8Bytes[:utf8Len] +// out.AddBytes(bytes...) +// in.Byte.MoveCursorMulti(bytes...) +// state = strContent +// } +// case strCRLF: +// if b == '\n' { +// in.Byte.MoveCursorMulti('\r', b) +// out.AddByte('\n') +// state = strContent +// continue +// } +// // Lonely \r, should have been escaped. +// return false +// case strEnd: +// if b == '\'' { +// state = strEnd3 +// in.Byte.MoveCursor(b) +// } else { +// state = strContent +// out.AddByte('\'') +// i-- +// } +// case strEnd3: +// if b == '\'' { +// in.Byte.MoveCursor(b) +// return true +// } +// state = strContent +// out.AddBytes('\'', '\'') +// i-- +// } +// } +// } +// } diff --git a/parse/value_string_test.go b/parse/value_string_test.go index 2a336af..bdcdf43 100644 --- a/parse/value_string_test.go +++ b/parse/value_string_test.go @@ -13,30 +13,6 @@ func TestStartString(t *testing.T) { testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected a string value) at start of file"}) } -func TestStartBasicString(t *testing.T) { - parser := newParser() - wrapper := func(p *parse.API) { parser.parseBasicString("xyz", p) } - testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening quotation marks) at start of file"}) -} - -func TestStartLiteralString(t *testing.T) { - parser := newParser() - wrapper := func(p *parse.API) { parser.parseLiteralString("xyz", p) } - testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening single quote) at start of file"}) -} - -func TestStartMultiLineBasicString(t *testing.T) { - parser := newParser() - wrapper := func(p *parse.API) { parser.parseMultiLineBasicString(p) } - testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening three quotation marks) at start of file"}) -} - -func TestStartMultiLineLiteralString(t *testing.T) { - parser := newParser() - wrapper := func(p *parse.API) { parser.parseMultiLineLiteralString(p) } - testParse(t, parser, wrapper, parseTest{"(not a string)", "{}", "unexpected input (expected opening three single quotes) at start of file"}) -} - func TestString(t *testing.T) { for _, test := range []parseTest{ {`x=no start quote"`, `{}`, `unexpected input (expected a value) at line 1, column 3`}, diff --git a/parse2/profile-qa.sh b/parse2/profile-qa.sh index 59e86dc..df6a607 100755 --- a/parse2/profile-qa.sh +++ b/parse2/profile-qa.sh @@ -4,10 +4,16 @@ go build $(cd ../cmd/burntsushi-tester/; go build) DURATION=`./parse2 -p 10 < long.toml 2>&1 | grep Duration | awk '{print $2}'` -echo "$DURATION parse2 10 iteration profiling of long.toml" +echo "$DURATION ./parse2 -p 10 < long.toml" + +DURATION=`./parse2 -p 100 < long.toml 2>&1 | grep Duration | awk '{print $2}'` +echo "$DURATION ./parse2 -p 100 < long.toml" DURATION=`./parse2 -p 1000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'` -echo "$DURATION parse2 1000 iteration profiling of normal.toml" +echo "$DURATION ./parse2 -p 1000 < normal.toml" + +DURATION=`./parse2 -p 10000 < normal.toml 2>&1 | grep Duration | awk '{print $2}'` +echo "$DURATION ./parse2 -p 10000 < normal.toml" echo ""