From 3f638c59cd51298788b014eb2f65600cc7a52311 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Fri, 17 May 2019 19:56:55 +0000 Subject: [PATCH] Some huge refactorings before I start moving forward again. Learned a lot about Go in the meanwhile, and more ideas keep popping up to improve what I've got so far even further. --- .gitignore | 1 + Makefile | 3 + lexer/{main.go => definitions.go} | 8 +- lexer/helpers_test.go | 75 ++++++++ lexer/lexer_test.go | 43 +++++ lexer/states_test.go | 218 ----------------------- lexer/strings.go | 88 --------- lexer/{comments.go => syn_comments.go} | 0 lexer/syn_comments_test.go | 20 +++ lexer/{end_of_file.go => syn_eof.go} | 0 lexer/{key_value_pairs.go => syn_key.go} | 12 +- lexer/syn_key_test.go | 36 ++++ lexer/syn_strings.go | 84 +++++++++ lexer/syn_strings_test.go | 73 ++++++++ lexer/syn_value.go | 13 ++ parser/parser.go | 138 ++++++-------- parser/statestack.go | 33 ++++ parser/stringbuf.go | 26 +-- parser/stringbuf_test.go | 22 ++- parser/types.go | 2 +- 20 files changed, 470 insertions(+), 425 deletions(-) create mode 100644 Makefile rename lexer/{main.go => definitions.go} (83%) create mode 100644 lexer/helpers_test.go create mode 100644 lexer/lexer_test.go delete mode 100644 lexer/states_test.go delete mode 100644 lexer/strings.go rename lexer/{comments.go => syn_comments.go} (100%) create mode 100644 lexer/syn_comments_test.go rename lexer/{end_of_file.go => syn_eof.go} (100%) rename lexer/{key_value_pairs.go => syn_key.go} (84%) create mode 100644 lexer/syn_key_test.go create mode 100644 lexer/syn_strings.go create mode 100644 lexer/syn_strings_test.go create mode 100644 lexer/syn_value.go create mode 100644 parser/statestack.go diff --git a/.gitignore b/.gitignore index 722d5e7..14eedb5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .vscode +*-workspace diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8d3a8f6 --- /dev/null +++ b/Makefile @@ -0,0 +1,3 @@ +test: + cd parser && go test + cd lexer && go test diff --git a/lexer/main.go b/lexer/definitions.go similarity index 83% rename from lexer/main.go rename to lexer/definitions.go index a218c65..ee32ae0 100644 --- a/lexer/main.go +++ b/lexer/definitions.go @@ -2,7 +2,7 @@ package lexer import "github.com/mmakaay/toml/parser" -// Definition of the item types that are emitted by this parser. +// Item types that are emitted by this parser. const ( ItemComment parser.ItemType = iota // An error occurred ItemKey // Key of a key/value pair @@ -35,6 +35,12 @@ const ( longUtf8Escape string = "U" ) +var ( + doubleQuote3 = []string{doubleQuote, doubleQuote, doubleQuote} + shortUtf8Match = []string{backslash, "u", hex, hex, hex, hex} + longUtf8Match = []string{backslash, "U", hex, hex, hex, hex, hex, hex, hex, hex} +) + // NewParser creates a new parser, using the provided input string // as the data to parse. func NewParser(input string) *parser.Parser { diff --git a/lexer/helpers_test.go b/lexer/helpers_test.go new file mode 100644 index 0000000..69d686e --- /dev/null +++ b/lexer/helpers_test.go @@ -0,0 +1,75 @@ +package lexer_test + +import ( + "fmt" + "strings" + "testing" + + "github.com/mmakaay/toml/lexer" + "github.com/mmakaay/toml/parser" +) + +type statesT struct { + name string + in string + out interface{} + err string +} + +func runStatesTs(t *testing.T, tests []statesT) { + for _, c := range tests { + runStatesT(t, c) + } +} + +func runStatesT(t *testing.T, c statesT) { + l, err := lexer.NewParser(c.in).ToArray() + if err == nil && c.err != "" { + t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err) + } + if err != nil && c.err == "" { + t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err) + } + if err != nil && c.err != "" && err.Error() != c.err { + t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err) + } + switch expected := c.out.(type) { + case []string: + if len(expected) != len(l) { + t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l)) + } + for i, e := range expected { + v := ParserItemToString(l[i]) + if v != e { + t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v) + } + } + case string: + a := make([]string, len(l)) + for _, v := range l { + a = append(a, ParserItemToString(v)) + } + actual := strings.Join(a, "") + if actual != expected { + t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual) + } + } +} + +// ParserItemToString returns a string representation of the parser.Item. +func ParserItemToString(i parser.Item) string { + switch i.Type { + case lexer.ItemComment: + return fmt.Sprintf("#(%s)", i.Value) + case lexer.ItemKey: + return fmt.Sprintf("[%s]", i.Value) + case lexer.ItemString: + return fmt.Sprintf("STR(%s)", i.Value) + case lexer.ItemKeyDot: + return "." + case lexer.ItemAssignment: + return "=" + default: + panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type)) + } +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..97542de --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,43 @@ +package lexer_test + +import ( + "testing" + + "github.com/mmakaay/toml/lexer" +) + +func TestErrorsIncludeLineAndRowPosition(t *testing.T) { + _, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray() + t.Logf("Got error: %s", err.Error()) + if err.Row != 4 { + t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4) + } + if err.Column != 6 { + t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6) + } +} + +func TestEmptyInput(t *testing.T) { + runStatesT(t, statesT{"empty string", "", "", ""}) +} + +func TestInvalidUtf8Data(t *testing.T) { + runStatesTs(t, []statesT{ + {"inside comment", "# \xbc", "", "invalid UTF8 character"}, + {"bare key 1", "\xbc", "", "invalid UTF8 character"}, + {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"}, + {"assignment", "key \xbc", "[key]", "invalid UTF8 character"}, + {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"}, + {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"}, + }) +} + +func TestWhiteSpaceAndNewlines(t *testing.T) { + runStatesTs(t, []statesT{ + {"space", " ", "", ""}, + {"tab", "\t", "", ""}, + {"newline", "\n", "", ""}, + {"carriage return", "\r", "", ""}, + {"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""}, + }) +} diff --git a/lexer/states_test.go b/lexer/states_test.go deleted file mode 100644 index 5a242ff..0000000 --- a/lexer/states_test.go +++ /dev/null @@ -1,218 +0,0 @@ -package lexer_test - -import ( - "fmt" - "strings" - "testing" - - "github.com/mmakaay/toml/lexer" - "github.com/mmakaay/toml/parser" -) - -func TestErrorsIncludeLineAndRowPosition(t *testing.T) { - _, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray() - t.Logf("Got error: %s", err.Error()) - if err.Row != 4 { - t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4) - } - if err.Column != 6 { - t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6) - } -} - -func TestEmptyInput(t *testing.T) { - runStatesT(t, statesT{"empty string", "", "", ""}) -} - -func TestInvalidUtf8Data(t *testing.T) { - runStatesTs(t, []statesT{ - {"inside comment", "# \xbc", "", "invalid UTF8 character"}, - {"bare key 1", "\xbc", "", "invalid UTF8 character"}, - {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"}, - {"assignment", "key \xbc", "[key]", "invalid UTF8 character"}, - {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"}, - {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"}, - }) -} -func TestWhiteSpaceAndNewlines(t *testing.T) { - runStatesTs(t, []statesT{ - {"space", " ", "", ""}, - {"tab", "\t", "", ""}, - {"newline", "\n", "", ""}, - {"carriage return", "\r", "", ""}, - {"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""}, - }) -} - -func TestComments(t *testing.T) { - runStatesTs(t, []statesT{ - {"empty comment", "#", "#()", ""}, - {"empty comment with spaces", "# \t \r\n", `#()`, ""}, - {"basic comment", "#chicken", "#(chicken)", ""}, - {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, - {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, - {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, - {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, - {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, - {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, - {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, - }) -} - -func TestKeyWithoutAssignment(t *testing.T) { - err := "unexpected end of file" - runStatesTs(t, []statesT{ - {"bare with whitespace", " a ", "[a]", err}, - {"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err}, - {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err}, - {"bare numbers", "0123456789", "[0123456789]", err}, - {"bare underscore", "_", "[_]", err}, - {"bare dash", "-", "[-]", err}, - {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err}, - {"bare dotted", "a._.c", "[a].[_].[c]", err}, - {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err}, - }) -} - -func TestKeyWithAssignmentButNoValue(t *testing.T) { - err := "unexpected end of file" - runStatesTs(t, []statesT{ - {"bare", "a=", "[a]=", err}, - {"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"}, - {"bare dotted", "a.b=", "[a].[b]=", err}, - {"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, - }) -} - -func TestUnterminatedBasicString(t *testing.T) { - runStatesT(t, statesT{ - "missing closing quote", `a="value`, "[a]=", - "Unexpected end of file (expected basic string token)"}) -} - -func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { - runStatesTs(t, []statesT{ - {"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`}, - {"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`}, - {"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`}, - }) - - // No need to write all test cases for disallowed characters by hand. - for i := 0x00; i <= 0x1F; i++ { - name := fmt.Sprintf("control character %x", rune(i)) - runStatesT( - t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", - fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))}) - } -} - -func TestEmptyBasicString(t *testing.T) { - runStatesTs(t, []statesT{ - {"empty", `a=""`, "[a]=STR()", ""}, - {"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""}, - {"with whitespaces", ` a = "" `, "[a]=STR()", ""}, - {"dotted", ` a.b = "" `, "[a].[b]=STR()", ""}, - {"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""}, - {"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""}, - }) -} - -func TestBasicString(t *testing.T) { - runStatesTs(t, []statesT{ - {"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""}, - {"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""}, - }) -} - -func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { - err := "Invalid escape sequence in basic string" - runStatesTs(t, []statesT{ - {"invalid escape sequence", `a="\x"`, "[a]=", err}, - {"too short \\u UTF8", `a="\u123"`, "[a]=", err}, - {"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err}, - {"too short \\U UTF8", `a="\U1234567"`, "[a]=", err}, - {"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err}, - }) -} - -func TestBasicStringEscapes(t *testing.T) { - runStatesTs(t, []statesT{ - {"bell escape", `_="\b"`, "[_]=STR(\b)", ""}, - {"tab escape", `_="\t"`, "[_]=STR(\t)", ""}, - {"newline escape", `_="\n"`, "[_]=STR(\n)", ""}, - {"form feed escape", `_="\f"`, "[_]=STR(\f)", ""}, - {"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""}, - {"double quote escape", `_="\""`, `[_]=STR(")`, ""}, - {"backslash escape", `_="\\"`, `[_]=STR(\)`, ""}, - {"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""}, - {"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""}, - {"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""}, - {"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""}, - }) -} - -type statesT struct { - name string - in string - out interface{} - err string -} - -func runStatesTs(t *testing.T, tests []statesT) { - for _, c := range tests { - runStatesT(t, c) - } -} - -func runStatesT(t *testing.T, c statesT) { - l, err := lexer.NewParser(c.in).ToArray() - if err == nil && c.err != "" { - t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err) - } - if err != nil && c.err == "" { - t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err) - } - if err != nil && c.err != "" && err.Error() != c.err { - t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err) - } - switch expected := c.out.(type) { - case []string: - if len(expected) != len(l) { - t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l)) - } - for i, e := range expected { - v := ParserItemToString(l[i]) - if v != e { - t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v) - } - } - case string: - a := make([]string, len(l)) - for _, v := range l { - a = append(a, ParserItemToString(v)) - } - actual := strings.Join(a, "") - if actual != expected { - t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual) - } - } -} - -// ParserItemToString returns a string representation of the -// parser.Item. This is used for unit testing purposes. -func ParserItemToString(i parser.Item) string { - switch i.Type { - case lexer.ItemComment: - return fmt.Sprintf("#(%s)", i.Value) - case lexer.ItemKey: - return fmt.Sprintf("[%s]", i.Value) - case lexer.ItemString: - return fmt.Sprintf("STR(%s)", i.Value) - case lexer.ItemKeyDot: - return "." - case lexer.ItemAssignment: - return "=" - default: - panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type)) - } -} diff --git a/lexer/strings.go b/lexer/strings.go deleted file mode 100644 index 960273f..0000000 --- a/lexer/strings.go +++ /dev/null @@ -1,88 +0,0 @@ -package lexer - -import "github.com/mmakaay/toml/parser" - -// There are four ways to express strings: basic, multi-line basic, literal, -// and multi-line literal. All strings must contain only valid UTF-8 characters. -func stateStringValue(l *parser.Parser) parser.StateFn { - switch { - case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote): - // Multi-line basic strings are surrounded by three quotation marks on each side. - return stateMultiLineBasicString - case l.SkipMatching(doubleQuote): - // Basic strings are surrounded by quotation marks. - return stateSingleLineBasicString - } - return l.UnexpectedInputError("a string value") -} - -func stateSingleLineBasicString(l *parser.Parser) parser.StateFn { - if l.Upcoming(doubleQuote, doubleQuote) { - return stateMultiLineBasicString - } - return stateBasicString -} - -func stateMultiLineBasicString(l *parser.Parser) parser.StateFn { - l.EmitError("Not yet implemented") - return nil -} - -// Any Unicode character may be used except those that must be escaped: -// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). -const invalidBasicStringCharacters string = "\"\\" + - "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + - "\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" + - "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + - "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + - "\u007F" - -func stateParseBasicString(l *parser.Parser) parser.StateFn { - for { - switch { - case l.AtEndOfFile(): - return l.UnexpectedEndOfFile("basic string token") - case l.SkipMatching(doubleQuote): - return l.PopState() - case l.AcceptMatching(backslash, escapeChars): - // For convenience, some popular characters have a compact escape sequence. - // \b - backspace (U+0008) - // \t - tab (U+0009) - // \n - linefeed (U+000A) - // \f - form feed (U+000C) - // \r - carriage return (U+000D) - // \" - quote (U+0022) - // \\ - backslash (U+005C) - case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex): - // \uXXXX - unicode (U+XXXX) - case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex): - // \UXXXXXXXX - unicode (U+XXXXXXXX) - case l.Upcoming(backslash): - // All other escape sequences not listed above are reserved and, - // if used, TOML should produce an error. - return l.EmitError("Invalid escape sequence in basic string") - case l.Upcoming(invalidBasicStringCharacters): - // Any Unicode character may be used except those that must be escaped: - // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). - r, _, _ := l.Match(invalidBasicStringCharacters) - l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0]) - return nil - default: - if !l.AcceptAny() { - return nil - } - } - } -} - -func stateBasicString(l *parser.Parser) parser.StateFn { - l.PushState(func(l *parser.Parser) parser.StateFn { - err := l.EmitInterpreted(ItemString) - if err != nil { - l.EmitError("Invalid data in string: %s", err) - return nil - } - return stateKeyValuePair - }) - return stateParseBasicString -} diff --git a/lexer/comments.go b/lexer/syn_comments.go similarity index 100% rename from lexer/comments.go rename to lexer/syn_comments.go diff --git a/lexer/syn_comments_test.go b/lexer/syn_comments_test.go new file mode 100644 index 0000000..0927350 --- /dev/null +++ b/lexer/syn_comments_test.go @@ -0,0 +1,20 @@ +package lexer_test + +import ( + "testing" +) + +func TestComments(t *testing.T) { + runStatesTs(t, []statesT{ + {"empty comment", "#", "#()", ""}, + {"empty comment with spaces", "# \t \r\n", `#()`, ""}, + {"basic comment", "#chicken", "#(chicken)", ""}, + {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, + {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, + {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, + {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, + {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, + {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, + {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, + }) +} diff --git a/lexer/end_of_file.go b/lexer/syn_eof.go similarity index 100% rename from lexer/end_of_file.go rename to lexer/syn_eof.go diff --git a/lexer/key_value_pairs.go b/lexer/syn_key.go similarity index 84% rename from lexer/key_value_pairs.go rename to lexer/syn_key.go index c64213f..31d85e7 100644 --- a/lexer/key_value_pairs.go +++ b/lexer/syn_key.go @@ -10,7 +10,7 @@ func stateKeyValuePair(l *parser.Parser) parser.StateFn { case l.Upcoming(hash): return l.ToChildState(stateCommentStart) case l.Upcoming(startOfKey): - return l.ToChildState(stateKey) + return stateKey default: return stateEndOfFile } @@ -61,13 +61,3 @@ func stateKeyAssignment(l *parser.Parser) parser.StateFn { } return l.UnexpectedInputError("a value assignment") } - -// Values must be of the following types: String, Integer, Float, Boolean, -// Datetime, Array, or Inline Table. Unspecified values are invalid. -func stateValue(l *parser.Parser) parser.StateFn { - l.SkipConsecutive(whitespace) - if l.Upcoming(quoteChars) { - return stateStringValue - } - return l.UnexpectedInputError("a value") -} diff --git a/lexer/syn_key_test.go b/lexer/syn_key_test.go new file mode 100644 index 0000000..fe3b234 --- /dev/null +++ b/lexer/syn_key_test.go @@ -0,0 +1,36 @@ +package lexer_test + +import ( + "testing" +) + +func TestKeyWithoutAssignment(t *testing.T) { + err := "unexpected end of file" + runStatesTs(t, []statesT{ + {"bare with whitespace", " a ", "[a]", err}, + {"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err}, + {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err}, + {"bare numbers", "0123456789", "[0123456789]", err}, + {"bare underscore", "_", "[_]", err}, + {"bare dash", "-", "[-]", err}, + {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err}, + }) +} + +func TestDottedKey(t *testing.T) { + err := "unexpected end of file" + runStatesTs(t, []statesT{ + {"bare dotted", "a._.c", "[a].[_].[c]", err}, + {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err}, + }) +} + +func TestKeyWithAssignmentButNoValue(t *testing.T) { + err := "unexpected end of file" + runStatesTs(t, []statesT{ + {"bare", "a=", "[a]=", err}, + {"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"}, + {"bare dotted", "a.b=", "[a].[b]=", err}, + {"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, + }) +} diff --git a/lexer/syn_strings.go b/lexer/syn_strings.go new file mode 100644 index 0000000..18fd0fb --- /dev/null +++ b/lexer/syn_strings.go @@ -0,0 +1,84 @@ +package lexer + +import "github.com/mmakaay/toml/parser" + +// There are four ways to express strings: basic, multi-line basic, literal, +// and multi-line literal. All strings must contain only valid UTF-8 characters. +// * Multi-line basic strings are surrounded by three quotation marks on each side. +// * Basic strings are surrounded by quotation marks. +func stateStringValue(l *parser.Parser) parser.StateFn { + switch { + case l.SkipMatching(doubleQuote3...): + return stateMultiLineBasicString + case l.SkipMatching(doubleQuote): + return l.QueueStates(stateParseString, stateBasicStringSpecific) + } + return l.UnexpectedInputError("a string value") +} + +// Specific handling of input for basic strings. +// * A double quote ends the string +// * No additional \escape sequences are allowed. What the spec say about this: +// "All other escape sequences [..] are reserved and, if used, TOML should +// produce an error."" + +func stateBasicStringSpecific(p *parser.Parser) parser.StateFn { + switch { + case p.SkipMatching(doubleQuote): + if err := p.EmitInterpreted(ItemString); err != nil { + return p.EmitError("Invalid data in string: %s", err) + } + return stateKeyValuePair + case p.Upcoming(backslash): + return p.EmitError("Invalid escape sequence") + default: + return p.QueueStates(stateParseString, stateBasicStringSpecific) + } +} + +func stateMultiLineBasicString(l *parser.Parser) parser.StateFn { + l.EmitError("Not yet implemented") + return nil +} + +// Any Unicode character may be used except those that must be escaped: +// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). +const invalidBasicStringCharacters string = "\"\\" + + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + + "\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" + + "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + + "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + + "\u007F" + +func stateParseString(l *parser.Parser) parser.StateFn { + switch { + case l.AtEndOfFile(): + return l.UnexpectedEndOfFile("basic string token") + case l.AcceptMatching(backslash, escapeChars): + // For convenience, some popular characters have a compact escape sequence. + // \b - backspace (U+0008) + // \t - tab (U+0009) + // \n - linefeed (U+000A) + // \f - form feed (U+000C) + // \r - carriage return (U+000D) + // \" - quote (U+0022) + // \\ - backslash (U+005C) + case l.AcceptMatching(shortUtf8Match...): + // \uXXXX - unicode (U+XXXX) + case l.AcceptMatching(longUtf8Match...): + // \UXXXXXXXX - unicode (U+XXXXXXXX) + case l.Upcoming(backslash) || l.Upcoming(doubleQuote): + // Returning to the parent state to have special cases handled, + // because there are differences between single and multi line strings. + return l.ToParentState() + case l.Upcoming(invalidBasicStringCharacters): + // Any Unicode character may be used except those that must be escaped: + // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). + r, _, _ := l.Match(invalidBasicStringCharacters) + l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0]) + return nil + default: + l.AcceptAny() + } + return stateParseString +} diff --git a/lexer/syn_strings_test.go b/lexer/syn_strings_test.go new file mode 100644 index 0000000..bbb0cfe --- /dev/null +++ b/lexer/syn_strings_test.go @@ -0,0 +1,73 @@ +package lexer_test + +import ( + "fmt" + "testing" +) + +func TestUnterminatedBasicString(t *testing.T) { + runStatesT(t, statesT{ + "missing closing quote", `a="value`, "[a]=", + "Unexpected end of file (expected basic string token)"}) +} + +func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { + runStatesTs(t, []statesT{ + {"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`}, + {"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`}, + {"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`}, + }) + + // No need to write all test cases for disallowed characters by hand. + for i := 0x00; i <= 0x1F; i++ { + name := fmt.Sprintf("control character %x", rune(i)) + runStatesT( + t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", + fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))}) + } +} + +func TestEmptyBasicString(t *testing.T) { + runStatesTs(t, []statesT{ + {"empty", `a=""`, "[a]=STR()", ""}, + {"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""}, + {"with whitespaces", ` a = "" `, "[a]=STR()", ""}, + {"dotted", ` a.b = "" `, "[a].[b]=STR()", ""}, + {"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""}, + {"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""}, + }) +} + +func TestBasicString(t *testing.T) { + runStatesTs(t, []statesT{ + {"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""}, + {"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""}, + }) +} + +func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { + err := "Invalid escape sequence" + runStatesTs(t, []statesT{ + {"invalid escape sequence", `a="\x"`, "[a]=", err}, + {"too short \\u UTF8", `a="\u123"`, "[a]=", err}, + {"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err}, + {"too short \\U UTF8", `a="\U1234567"`, "[a]=", err}, + {"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err}, + }) +} + +func TestBasicStringEscapes(t *testing.T) { + runStatesTs(t, []statesT{ + {"bell escape", `_="\b"`, "[_]=STR(\b)", ""}, + {"tab escape", `_="\t"`, "[_]=STR(\t)", ""}, + {"newline escape", `_="\n"`, "[_]=STR(\n)", ""}, + {"form feed escape", `_="\f"`, "[_]=STR(\f)", ""}, + {"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""}, + {"double quote escape", `_="\""`, `[_]=STR(")`, ""}, + {"backslash escape", `_="\\"`, `[_]=STR(\)`, ""}, + {"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""}, + {"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""}, + {"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""}, + {"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""}, + }) +} diff --git a/lexer/syn_value.go b/lexer/syn_value.go new file mode 100644 index 0000000..15501a2 --- /dev/null +++ b/lexer/syn_value.go @@ -0,0 +1,13 @@ +package lexer + +import "github.com/mmakaay/toml/parser" + +// Values must be of the following types: String, Integer, Float, Boolean, +// Datetime, Array, or Inline Table. Unspecified values are invalid. +func stateValue(l *parser.Parser) parser.StateFn { + l.SkipConsecutive(whitespace) + if l.Upcoming(quoteChars) { + return stateStringValue + } + return l.UnexpectedInputError("a value") +} diff --git a/parser/parser.go b/parser/parser.go index f6af569..5fcd8ec 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -17,33 +17,9 @@ func New(input string, startState StateFn) *Parser { } } -func (p *Parser) ToChildState(state StateFn) StateFn { - p.PushState(p.state) - return state -} - -func (p *Parser) ToParentState() StateFn { - state := p.PopState() - return state -} - -// PushState adds the state function to the state stack. -// This is used for implementing nested parsing. -func (l *Parser) PushState(state StateFn) { - l.stack = append(l.stack, state) -} - -// PopState pops the last pushed state from the state stack. -func (l *Parser) PopState() StateFn { - last := len(l.stack) - 1 - head, tail := l.stack[:last], l.stack[last] - l.stack = head - return tail -} - // AtEndOfFile returns true when there is no more data available in the input. -func (l *Parser) AtEndOfFile() bool { - return l.pos >= l.len +func (p *Parser) AtEndOfFile() bool { + return p.pos >= p.len } func (p *Parser) AtEndOfLine() bool { @@ -67,29 +43,29 @@ func (p *Parser) AcceptEndOfLine() bool { // normalized newline, which is just a '\n'. This will normalize // '\r\n' into '\n'. if p.SkipEndOfLine() { - p.buffer.WriteRune('\n') + p.buffer.writeRune('\n') return true } return false } // Emit passes a Parser item to the client, including the provided string. -func (l *Parser) Emit(t ItemType, s string) { - l.items <- Item{t, s} - l.buffer.Reset() +func (p *Parser) Emit(t ItemType, s string) { + p.items <- Item{t, s} + p.buffer.reset() } // EmitLiteral passes a Parser item to the client, including the accumulated // string buffer data as a literal string. -func (l *Parser) EmitLiteral(t ItemType) { - l.Emit(t, l.buffer.AsLiteralString()) +func (p *Parser) EmitLiteral(t ItemType) { + p.Emit(t, p.buffer.asLiteralString()) } // EmitLiteralTrim passes a Parser item to the client, including the // accumulated string buffer data as a literal string with whitespace // trimmed from it. -func (l *Parser) EmitLiteralTrim(t ItemType) { - l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString())) +func (p *Parser) EmitLiteralTrim(t ItemType) { + p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString())) } // EmitInterpreted passes a Parser item to the client, including the @@ -97,19 +73,19 @@ func (l *Parser) EmitLiteralTrim(t ItemType) { // (handling escape codes like \n, \t, \uXXXX, etc.) // This method might return an error, in case there is data in the // string buffer that is not valid for string interpretation. -func (l *Parser) EmitInterpreted(t ItemType) error { - s, err := l.buffer.AsInterpretedString() +func (p *Parser) EmitInterpreted(t ItemType) error { + s, err := p.buffer.asInterpretedString() if err != nil { return err } - l.Emit(t, s) + p.Emit(t, s) return nil } // EmitError emits a Parser error item to the client. -func (l *Parser) EmitError(format string, args ...interface{}) StateFn { +func (p *Parser) EmitError(format string, args ...interface{}) StateFn { message := fmt.Sprintf(format, args...) - l.Emit(ItemError, message) + p.Emit(ItemError, message) return nil } @@ -117,8 +93,8 @@ func (l *Parser) EmitError(format string, args ...interface{}) StateFn { // It returns a slice of runes that were found, their total byte width // and a boolean indicating whether or not all provided patterns were // satisfied by the input data. -func (l *Parser) Match(patterns ...string) ([]rune, int, bool) { - peeked, width, ok := l.peekMulti(len(patterns)) +func (p *Parser) Match(patterns ...string) ([]rune, int, bool) { + peeked, width, ok := p.peekMulti(len(patterns)) if ok { for i, r := range patterns { if strings.IndexRune(r, peeked[i]) < 0 { @@ -132,17 +108,17 @@ func (l *Parser) Match(patterns ...string) ([]rune, int, bool) { // Upcoming checks if the upcoming runes satisfy all provided patterns. // Returns true if all provided patterns are satisfied. -func (l *Parser) Upcoming(patterns ...string) bool { - _, _, ok := l.Match(patterns...) +func (p *Parser) Upcoming(patterns ...string) bool { + _, _, ok := p.Match(patterns...) return ok } // AcceptAny adds the next rune from the input to the string buffer. // If no rune could be read (end of file or invalid UTF8 data), // then false is returned. -func (l *Parser) AcceptAny() bool { - if r, ok := l.next(); ok { - l.buffer.WriteRune(r) +func (p *Parser) AcceptAny() bool { + if r, ok := p.next(); ok { + p.buffer.writeRune(r) return true } return false @@ -151,16 +127,16 @@ func (l *Parser) AcceptAny() bool { // AcceptMatching adds the next runes to the string buffer, but only // if the upcoming runes satisfy the provided patterns. // When runes were added then true is returned, false otherwise. -func (l *Parser) AcceptMatching(patterns ...string) bool { - return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...) +func (p *Parser) AcceptMatching(patterns ...string) bool { + return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...) } // AcceptConsecutive adds consecutive runes from the input to the string // buffer, as long as they exist in the pattern. // If any runes were added then true is returned, false otherwise. -func (l *Parser) AcceptConsecutive(pattern string) bool { +func (p *Parser) AcceptConsecutive(pattern string) bool { accepted := false - for l.AcceptMatching(pattern) { + for p.AcceptMatching(pattern) { accepted = true } return accepted @@ -168,11 +144,11 @@ func (l *Parser) AcceptConsecutive(pattern string) bool { // SkipMatching skips runes, but only when all provided patterns are satisfied. // Returns true when one or more runes were skipped. -func (l *Parser) SkipMatching(patterns ...string) bool { - if runes, w, ok := l.Match(patterns...); ok { - l.pos += w +func (p *Parser) SkipMatching(patterns ...string) bool { + if runes, w, ok := p.Match(patterns...); ok { + p.pos += w for _, r := range runes { - l.advanceCursor(r) + p.advanceCursor(r) } return true } @@ -181,9 +157,9 @@ func (l *Parser) SkipMatching(patterns ...string) bool { // SkipConsecutive skips consecutive runes from the provided pattern. // Returns true when one or more runes were skipped. -func (l *Parser) SkipConsecutive(pattern string) bool { +func (p *Parser) SkipConsecutive(pattern string) bool { didSkip := false - for l.SkipMatching(pattern) { + for p.SkipMatching(pattern) { didSkip = true } return didSkip @@ -197,10 +173,10 @@ func (l *Parser) SkipConsecutive(pattern string) bool { // error item that tells the client that an unexpected rune was // encountered in the input. // The parameter 'expected' is used to provide some context to the error. -func (l *Parser) UnexpectedInputError(expected string) StateFn { +func (p *Parser) UnexpectedInputError(expected string) StateFn { // next() takes care of error messages for ok == false. - if r, ok := l.next(); ok { - return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected)) + if r, ok := p.next(); ok { + return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected)) } return nil } @@ -209,8 +185,8 @@ func (l *Parser) UnexpectedInputError(expected string) StateFn { // error item that tells the client that more data was expected from // the input. // The parameter 'expected' is used to provide some context to the error. -func (l *Parser) UnexpectedEndOfFile(expected string) StateFn { - return l.EmitError("Unexpected end of file (expected %s)", expected) +func (p *Parser) UnexpectedEndOfFile(expected string) StateFn { + return p.EmitError("Unexpected end of file (expected %s)", expected) } // ============================================================================ @@ -220,8 +196,8 @@ func (l *Parser) UnexpectedEndOfFile(expected string) StateFn { // peek returns but does not advance to the next rune(s) in the input. // Returns the rune, its width and a boolean. The boolean will be false in case // no upcoming rune can be peeked (end of data or invalid UTF8 character). -func (l *Parser) peek() (rune, int, bool) { - peeked, width := utf8.DecodeRuneInString(l.input[l.pos:]) +func (p *Parser) peek() (rune, int, bool) { + peeked, width := utf8.DecodeRuneInString(p.input[p.pos:]) return peeked, width, peeked != utf8.RuneError } @@ -229,11 +205,11 @@ func (l *Parser) peek() (rune, int, bool) { // Returns a slice of runes, their total width in bytes and a boolean. // The boolean will be false in case less runes can be peeked than // the requested amount (end of data or invalid UTF8 character). -func (l *Parser) peekMulti(amount int) ([]rune, int, bool) { +func (p *Parser) peekMulti(amount int) ([]rune, int, bool) { width := 0 var peeked []rune for i := 0; i < amount; i++ { - r, w := utf8.DecodeRuneInString(l.input[l.pos+width:]) + r, w := utf8.DecodeRuneInString(p.input[p.pos+width:]) switch { case r == utf8.RuneError: return peeked, width, false @@ -252,12 +228,12 @@ func (l *Parser) peekMulti(amount int) ([]rune, int, bool) { // moved forward, false otherwise. // A callback function can be provided to specify what to do with // the runes that are encountered in the input. -func (l *Parser) progress(callback func(rune), patterns ...string) bool { - if runes, w, ok := l.Match(patterns...); ok { - l.pos += w +func (p *Parser) progress(callback func(rune), patterns ...string) bool { + if runes, w, ok := p.Match(patterns...); ok { + p.pos += w for _, r := range runes { callback(r) - l.advanceCursor(r) + p.advanceCursor(r) } return true } @@ -269,17 +245,17 @@ func (l *Parser) progress(callback func(rune), patterns ...string) bool { // When the end of input is reached, or an invalid UTF8 character is // read, then false is returned. Both are considered error cases, // and for that reason these automatically emit an error to the client. -func (l *Parser) next() (rune, bool) { - r, w, ok := l.peek() +func (p *Parser) next() (rune, bool) { + r, w, ok := p.peek() if ok { - l.pos += w - l.advanceCursor(r) + p.pos += w + p.advanceCursor(r) return r, true } if r == utf8.RuneError && w == 0 { - l.EmitError("unexpected end of file") + p.EmitError("unexpected end of file") } else { - l.EmitError("invalid UTF8 character") + p.EmitError("invalid UTF8 character") } return r, false } @@ -287,12 +263,12 @@ func (l *Parser) next() (rune, bool) { // advanceCursor advances the rune cursor one position in the // input data. While doing so, it keeps tracks of newlines, // so we can report on row + column positions on error. -func (l *Parser) advanceCursor(r rune) { - if l.newline { - l.cursorColumn = 0 - l.cursorRow++ +func (p *Parser) advanceCursor(r rune) { + if p.newline { + p.cursorColumn = 0 + p.cursorRow++ } else { - l.cursorColumn++ + p.cursorColumn++ } - l.newline = r == '\n' + p.newline = r == '\n' } diff --git a/parser/statestack.go b/parser/statestack.go new file mode 100644 index 0000000..1252fc6 --- /dev/null +++ b/parser/statestack.go @@ -0,0 +1,33 @@ +package parser + +func (p *Parser) QueueStates(states ...StateFn) StateFn { + first, followup := states[0], states[1:] + for reverse := range followup { + p.PushState(followup[len(followup)-reverse-1]) + } + return first +} + +func (p *Parser) ToChildState(state StateFn) StateFn { + p.PushState(p.state) + return state +} + +func (p *Parser) ToParentState() StateFn { + state := p.PopState() + return state +} + +// PushState adds the state function to the state stack. +// This is used for implementing nested parsing. +func (p *Parser) PushState(state StateFn) { + p.stack = append(p.stack, state) +} + +// PopState pops the last pushed state from the state stack. +func (p *Parser) PopState() StateFn { + last := len(p.stack) - 1 + head, tail := p.stack[:last], p.stack[last] + p.stack = head + return tail +} diff --git a/parser/stringbuf.go b/parser/stringbuf.go index 1b9b570..46df171 100644 --- a/parser/stringbuf.go +++ b/parser/stringbuf.go @@ -6,40 +6,40 @@ import ( "strings" ) -// StringBuffer is a string buffer implementation, which is used by the parser +// stringBuffer is a string buffer implementation, which is used by the parser // to efficiently accumulate runes from the input and eventually turn these // into a string, either literal or interpreted. -type StringBuffer struct { +type stringBuffer struct { buffer bytes.Buffer } -// Reset resets the string buffer, in order to build a new string. -func (b *StringBuffer) Reset() *StringBuffer { +// reset resets the string buffer, in order to build a new string. +func (b *stringBuffer) reset() *stringBuffer { b.buffer.Reset() return b } -// WriteString adds the runes of the input string to the string buffer. -func (b *StringBuffer) WriteString(s string) *StringBuffer { +// writeString adds the runes of the input string to the string buffer. +func (b *stringBuffer) writeString(s string) *stringBuffer { for _, r := range s { - b.WriteRune(r) + b.writeRune(r) } return b } -// WriteRune adds a single rune to the string buffer. -func (b *StringBuffer) WriteRune(r rune) *StringBuffer { +// writeRune adds a single rune to the string buffer. +func (b *stringBuffer) writeRune(r rune) *stringBuffer { b.buffer.WriteRune(r) return b } -// AsLiteralString returns the string buffer as a literal string. +// asLiteralString returns the string buffer as a literal string. // Literal means that no escape sequences are processed. -func (b *StringBuffer) AsLiteralString() string { +func (b *stringBuffer) asLiteralString() string { return b.buffer.String() } -// AsInterpretedString returns the string in its interpreted form. +// asInterpretedString returns the string in its interpreted form. // Interpreted means that escape sequences are handled in the way that Go would // have, had it been inside double quotes. It translates for example escape // sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string @@ -47,7 +47,7 @@ func (b *StringBuffer) AsLiteralString() string { // Since the input might contain invalid escape sequences, this method // also returns an error. When an error is returned, the returned string will // contain the string as far as it could be interpreted. -func (b *StringBuffer) AsInterpretedString() (string, error) { +func (b *stringBuffer) asInterpretedString() (string, error) { var sb strings.Builder tail := b.buffer.String() for len(tail) > 0 { diff --git a/parser/stringbuf_test.go b/parser/stringbuf_test.go index fda5e98..a71a6e8 100644 --- a/parser/stringbuf_test.go +++ b/parser/stringbuf_test.go @@ -1,15 +1,13 @@ -package parser_test +package parser import ( "testing" - - "github.com/mmakaay/toml/parser" ) func TestGeneratingStringDoesNotResetBuffer(t *testing.T) { - var b parser.StringBuffer - s1, _ := b.WriteString(`hi\nthere`).AsInterpretedString() - s2 := b.AsLiteralString() + var b stringBuffer + s1, _ := b.writeString(`hi\nthere`).asInterpretedString() + s2 := b.asLiteralString() if s1 != "hi\nthere" { t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1) } @@ -19,15 +17,15 @@ func TestGeneratingStringDoesNotResetBuffer(t *testing.T) { } func TestResetResetsBuffer(t *testing.T) { - var b parser.StringBuffer - s := b.WriteRune('X').Reset().AsLiteralString() + var b stringBuffer + s := b.writeRune('X').reset().asLiteralString() if s != "" { t.Fatalf("Did not get expected empty string, but %q", s) } } func TestAsLiteralString(t *testing.T) { - b := parser.StringBuffer{} + b := stringBuffer{} for _, c := range []stringbufT{ {"empty string", ``, ``, OK}, {"simple string", `Simple string!`, `Simple string!`, OK}, @@ -39,7 +37,7 @@ func TestAsLiteralString(t *testing.T) { {"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK}, {"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK}, } { - s := b.Reset().WriteString(c.in).AsLiteralString() + s := b.reset().writeString(c.in).asLiteralString() if s != c.out { t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s) } @@ -47,7 +45,7 @@ func TestAsLiteralString(t *testing.T) { } func TestAsInterpretedString(t *testing.T) { - b := parser.StringBuffer{} + b := stringBuffer{} for _, c := range []stringbufT{ {"empty string", "", "", OK}, {"one character", "Simple string!", "Simple string!", OK}, @@ -64,7 +62,7 @@ func TestAsInterpretedString(t *testing.T) { `I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`, "I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK}, } { - s, err := b.Reset().WriteString(c.in).AsInterpretedString() + s, err := b.reset().writeString(c.in).asInterpretedString() if c.isSuccessCase && err != nil { t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err) } diff --git a/parser/types.go b/parser/types.go index 4818f95..951a785 100644 --- a/parser/types.go +++ b/parser/types.go @@ -10,7 +10,7 @@ type Parser struct { newline bool // keep track of when we have scanned a newline cursorRow int // current row number in the input cursorColumn int // current column position in the input - buffer StringBuffer // an efficient buffer, used to build string values + buffer stringBuffer // an efficient buffer, used to build string values items chan Item // channel of resulting Parser items item Item // the current item as reached by Next() and retrieved by Get() err *Error // an error when lexing failed, retrieved by Error()