From b49715652eb3c6327d7264bba433e604c4318ab6 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Mon, 17 Jun 2019 13:59:59 +0000 Subject: [PATCH] Brought the TOML code up-to-speed with the latest version of parsekit. --- comment.go | 17 +++---- comment_test.go | 34 +++++++------- go.sum | 3 +- helpers_test.go | 92 +++++++++++--------------------------- keyvaluepair.go | 103 ++++++++++++++++++++++++------------------- keyvaluepair_test.go | 102 ++++++++++++++++++++++++++---------------- toml.go | 61 ++++++++++++++++++------- toml_test.go | 64 ++++++++++++--------------- value_string.go | 97 ++++++++++++++++++++-------------------- value_string_test.go | 96 +++++++++++++++------------------------- 10 files changed, 333 insertions(+), 336 deletions(-) diff --git a/comment.go b/comment.go index c573d48..a7f5835 100644 --- a/comment.go +++ b/comment.go @@ -1,20 +1,17 @@ package parser import ( - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/parse" ) // A '#' hash symbol marks the rest of the line as a comment. // All characters up to the end of the line are included in the comment. -var comment = c.Seq( - m.Drop(c.OneOrMore(a.Hash)), - m.Trim(c.ZeroOrMore(c.Not(a.EndOfLine)), " \t"), - m.Drop(a.EndOfLine)) +var comment = c.Seq(a.Hash, c.ZeroOrMore(c.Not(a.EndOfLine)), m.Drop(a.EndOfLine)) -func startComment(p *parsekit.ParseAPI) { - p.Expects("comment") - if p.On(comment).Accept() { - p.EmitLiteral(ItemComment) - p.RouteReturn() +func (t *parser) startComment(p *parse.API) { + if p.Accept(comment) { + t.emitCommand(cComment, p.Result().String()) + } else { + p.Expected("comment") } } diff --git a/comment_test.go b/comment_test.go index 944c3df..4b7cd3d 100644 --- a/comment_test.go +++ b/comment_test.go @@ -1,22 +1,24 @@ -package parser_test +package parser import ( "testing" ) -func TestComments(t *testing.T) { - runStatesTs(t, []statesT{ - {"empty comment at end of file", "#", "#()", ""}, - {"empty comment at end of windows line", "#\r\n", "#()", ""}, - {"empty comment at end of unix line", "#\n", "#()", ""}, - {"empty comment with spaces", "# \t \r\n", `#()`, ""}, - {"basic comment", "#chicken", "#(chicken)", ""}, - {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, - {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, - {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, - {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, - {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, - {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, - {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r\n", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, - }) +func TestComment2(t *testing.T) { + for _, test := range []parseTest{ + {``, []string{`Error: unexpected end of file (expected comment) at start of file`}}, + {`#`, []string{`comment("#")`}}, + {`# `, []string{`comment("# ")`}}, + {`# with data`, []string{`comment("# with data")`}}, + {"# ending in EOL & EOF\r\n", []string{`comment("# ending in EOL & EOF")`}}, + {`# \xxx/ \u can't escape/`, []string{`comment("# \\xxx/ \\u can't escape/")`}}, + {"# \tlexe\r accepts embedded ca\r\riage \returns\r\n", []string{ + `comment("# \tlexe\r accepts embedded ca\r\riage \returns")`}}, + {"# with data and newline\ncode continues here", []string{ + `comment("# with data and newline")`, + `Error: unexpected input (expected end of file) at line 2, column 1`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startComment, test) + } } diff --git a/go.sum b/go.sum index 6491093..391dbee 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,2 @@ git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939 h1:cMBHhfSJR2BZgVN7NmP+c2agNlXDef4Iz6+XQp5AqdU= -git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939/go.mod h1:/mo+aM5Im5rkBqBvXTAsVR0//OfsAAiFyvuxxcxGGlU= -github.com/mmakaay/toml v0.3.1 h1:2uKRPvA/smKM8YuYGxWnW4KximMkWOMfunJOXgM5Zos= +git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939/go.mod h1:/mo+aM5Im5rkBqBvXTAsVR0//OfsAAiFyvuxxcxGGlU= \ No newline at end of file diff --git a/helpers_test.go b/helpers_test.go index 8702099..6d53cbb 100644 --- a/helpers_test.go +++ b/helpers_test.go @@ -1,12 +1,10 @@ -package parser_test +package parser import ( "fmt" - "strings" "testing" - "git.makaay.nl/mauricem/go-parsekit" - toml "git.makaay.nl/mauricem/go-toml" + "git.makaay.nl/mauricem/go-parsekit/parse" ) type statesT struct { @@ -16,75 +14,35 @@ type statesT struct { err string } -func runStatesTs(t *testing.T, tests []statesT) { - for _, c := range tests { - runStatesT(t, c) - } +type parseTest struct { + input interface{} + expected []string } -// ToArray returns Parser items as an array. -// When an error occurs during scanning, a partial result will be -// returned, accompanied by the error that occurred. -func parseItemsToArray(p *parsekit.ParseRun) ([]parsekit.Item, *parsekit.Error) { - var items []parsekit.Item - for { - item, err, more := p.Next() - if !more { - return items, err - } - items = append(items, item) +func testParseHandler(t *testing.T, p *parser, handler parse.Handler, test parseTest) { + err := parse.New(handler)(test.input) + results := []string{} + for _, cmd := range p.commands { + results = append(results, cmd.String()) + } + if err != nil { + results = append(results, fmt.Sprintf("Error: %s", err)) } -} -func runStatesT(t *testing.T, c statesT) { - p := toml.Parse(c.in) - l, err := parseItemsToArray(p) - if err == nil && c.err != "" { - t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err) - } - if err != nil && c.err == "" { - t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err) - } - if err != nil && c.err != "" && err.Error() != c.err { - t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err) - } - switch expected := c.out.(type) { - case []string: - if len(expected) != len(l) { - t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l)) + for i, e := range test.expected { + if i > len(results)-1 { + t.Errorf("No result at index %d, expected: %s", i, e) + continue } - for i, e := range expected { - v := parserItemToString(l[i]) - if v != e { - t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v) - } + r := results[i] + if e != r { + t.Errorf("Unexpected result at index %d:\nexpected: %s\nactual: %s\n", i, e, r) } - case string: - a := make([]string, len(l)) - for _, v := range l { - a = append(a, parserItemToString(v)) - } - actual := strings.Join(a, "") - if actual != expected { - t.Errorf("[%s] Unexpected parser output:\nexpected: %q\nactual: %q\n", c.name, expected, actual) + } + if len(results) > len(test.expected) { + t.Errorf("Got more results than expected, surplus result(s):\n") + for i := len(test.expected); i < len(results); i++ { + t.Errorf("[%d] %s", i, results[i]) } } } - -// parserItemToString returns a string representation of the parsekit.Item. -func parserItemToString(i parsekit.Item) string { - switch i.Type { - case toml.ItemComment: - return fmt.Sprintf("#(%s)", i.Value) - case toml.ItemKey: - return fmt.Sprintf("[%s]", i.Value) - case toml.ItemString: - return fmt.Sprintf("STR(%s)", i.Value) - case toml.ItemKeyDot: - return "." - case toml.ItemAssignment: - return "=" - default: - panic(fmt.Sprintf("parsekit bug: no string formatting exists for parsekit.Item id %d", i.Type)) - } -} diff --git a/keyvaluepair.go b/keyvaluepair.go index 260ef7d..7e22be0 100644 --- a/keyvaluepair.go +++ b/keyvaluepair.go @@ -1,15 +1,20 @@ package parser -import "git.makaay.nl/mauricem/go-parsekit" +import ( + "git.makaay.nl/mauricem/go-parsekit/parse" +) // The primary building block of a TOML document is the key/value pair. var ( + dropWhitespace = m.Drop(a.Whitespace.Optional()) + dropBlanks = m.Drop(a.Blanks.Optional()) + // Keys are on the left of the equals sign and values are on the right. - // Whitespace is ignored around key names and values. The key, equals + // Blank is ignored around key names and values. The key, equals // sign, and value must be on the same line (though some values can be // broken over multiple lines). - keyAssignment = c.Seq(c.Opt(a.Whitespace), a.Equal, c.Opt(a.Whitespace)) + keyAssignment = c.Seq(dropBlanks, a.Equal, dropBlanks) // A key may be either bare, quoted or dotted. Bare keys may only // contain ASCII letters, ASCII digits, underscores, and dashes @@ -26,62 +31,68 @@ var ( startOfKey = c.Any(bareKeyRune, a.SingleQuote, a.DoubleQuote) // Dotted keys are a sequence of bare or quoted keys joined with a dot. - // This allows for grouping similar properties together. Whitespace - // around dot-separated parts is ignored, however, best practice is to - // not use any extraneous whitespace. - keySeparatorDot = c.Seq(c.Opt(a.Whitespace), a.Dot, c.Opt(a.Whitespace)) + // This allows for grouping similar properties together. Blanks + // around dot-separated parts are ignored, however, best practice is to + // not use any extraneous blanks. + keySeparatorDot = c.Seq(dropBlanks, a.Dot, dropBlanks) ) -func startKeyValuePair(p *parsekit.ParseAPI) { - switch { - case p.On(a.WhitespaceAndNewlines).Skip(): - p.RouteRepeat() - case p.On(a.Hash).Stay(): - p.RouteTo(startComment).ThenReturnHere() - case p.On(startOfKey).Stay(): - p.RouteTo(startKey) - default: - p.ExpectEndOfFile() +func (t *parser) startKeyValuePair(p *parse.API) { + for { + p.Accept(dropWhitespace) + switch { + case p.Peek(a.Hash): + p.Handle(t.startComment) + case p.Peek(startOfKey): + p.Handle(t.startKey, t.startAssignment, t.startValue) + default: + p.ExpectEndOfFile() + return + } + if p.IsStoppedOrInError() { + return + } } } -func startKey(p *parsekit.ParseAPI) { - p.Expects("a key name") - if p.On(bareKeyRune).Stay() { - p.RouteTo(startBareKey) - } -} - -func startBareKey(p *parsekit.ParseAPI) { - p.Expects("a bare key name") - if p.On(bareKey).Accept() { - p.EmitLiteral(ItemKey) - p.RouteTo(endOfKeyOrDot) - } -} - -func endOfKeyOrDot(p *parsekit.ParseAPI) { - if p.On(keySeparatorDot).Skip() { - p.Emit(ItemKeyDot, ".") - p.RouteTo(startKey) +func (t *parser) startKey(p *parse.API) { + if p.Peek(bareKeyRune) { + p.Handle(t.startBareKey) } else { - p.RouteTo(startAssignment) + p.Expected("a key name") } } -func startAssignment(p *parsekit.ParseAPI) { - p.Expects("a value assignment") - if p.On(keyAssignment).Skip() { - p.Emit(ItemAssignment, "=") - p.RouteTo(startValue) +func (t *parser) startBareKey(p *parse.API) { + if p.Accept(bareKey) { + t.emitCommand(cKey, p.Result().String()) + p.Handle(t.endOfKeyOrDot) + } else { + p.Expected("a bare key name") + } +} + +func (t *parser) endOfKeyOrDot(p *parse.API) { + if p.Accept(keySeparatorDot) { + t.emitCommand(cNewKeyLvl) + p.Handle(t.startKey) + } +} + +func (t *parser) startAssignment(p *parse.API) { + if p.Accept(keyAssignment) { + t.emitCommand(cAssign) + } else { + p.Expected("a value assignment") } } // Values must be of the following types: String, Integer, Float, Boolean, // Datetime, Array, or Inline Table. Unspecified values are invalid. -func startValue(p *parsekit.ParseAPI) { - p.Expects("a value") - if p.On(c.Any(a.SingleQuote, a.DoubleQuote)).Stay() { - p.RouteTo(startString) +func (t *parser) startValue(p *parse.API) { + if p.Peek(c.Any(a.SingleQuote, a.DoubleQuote)) { + p.Handle(t.startString) + } else { + p.Expected("a value") } } diff --git a/keyvaluepair_test.go b/keyvaluepair_test.go index cf79235..ea32761 100644 --- a/keyvaluepair_test.go +++ b/keyvaluepair_test.go @@ -1,46 +1,74 @@ -package parser_test +package parser -import ( - "testing" -) +import "testing" -func TestKeyWithoutAssignment(t *testing.T) { - err := "unexpected end of file (expected a value assignment)" - runStatesTs(t, []statesT{ - {"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"}, - {"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err}, - {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err}, - {"bare numbers", "0123456789", "[0123456789]", err}, - {"bare underscore", "_", "[_]", err}, - {"bare dash", "-", "[-]", err}, - {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err}, - }) +func TestKey(t *testing.T) { + for _, test := range []parseTest{ + {"", []string{`Error: unexpected end of file (expected a key name) at start of file`}}, + {"barekey", []string{`key("barekey")`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startKey, test) + } } -func TestDottedKey(t *testing.T) { - runStatesTs(t, []statesT{ - {"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"}, - {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`}, - }) +func TestBareKey(t *testing.T) { + for _, test := range []parseTest{ + {"", []string{`Error: unexpected end of file (expected a bare key name) at start of file`}}, + {"barekey", []string{`key("barekey")`}}, + {"1234567", []string{`key("1234567")`}}, + {"mix-12_34", []string{`key("mix-12_34")`}}, + {"-hey_good_Lookin123-", []string{`key("-hey_good_Lookin123-")`}}, + {"wrong!", []string{`key("wrong")`, `Error: unexpected input (expected end of file) at line 1, column 6`}}, + {"key1.", []string{`key("key1")`, `keydot()`, `Error: unexpected end of file (expected a key name) at line 1, column 6`}}, + {"key1.key2", []string{`key("key1")`, `keydot()`, `key("key2")`}}, + {"key . with . spaces", []string{`key("key")`, `keydot()`, `key("with")`, `keydot()`, `key("spaces")`}}, + {"key \t . \twithtabs\t . \tandspaces", []string{`key("key")`, `keydot()`, `key("withtabs")`, `keydot()`, `key("andspaces")`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startBareKey, test) + } } -func TestKeyWithAssignmentButNoValue(t *testing.T) { - err := "unexpected end of file (expected a value)" - runStatesTs(t, []statesT{ - {"bare", "a=", "[a]=", err}, - {"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"}, - {"bare dotted", "a.b=", "[a].[b]=", err}, - {"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, - }) +func TestAssignment(t *testing.T) { + for _, test := range []parseTest{ + {"", []string{`Error: unexpected end of file (expected a value assignment) at start of file`}}, + {"=", []string{`assign()`}}, + {" \t = \t ", []string{`assign()`}}, + {" \n = \n ", []string{`Error: unexpected input (expected a value assignment) at start of file`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startAssignment, test) + } } -func TestKeyWithValue(t *testing.T) { - runStatesTs(t, []statesT{ - {"with string value", - " -key- = \"value\" # nice\r\n", - "[-key-]=STR(value)#(nice)", ""}, - {"multiple string values", - "key = \"value1\"\nbare_key = \"value2\"\n# More coming up!\nbare-key = \"value3\"\n1234 = \"value4\"\n", - "[key]=STR(value1)[bare_key]=STR(value2)#(More coming up!)[bare-key]=STR(value3)[1234]=STR(value4)", ""}, - }) +func TestValue(t *testing.T) { + for _, test := range []parseTest{ + {``, []string{`Error: unexpected end of file (expected a value) at start of file`}}, + {`"basic string value"`, []string{`string("basic string value")`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startValue, test) + } +} + +func TestKeyValuePair(t *testing.T) { + for _, test := range []parseTest{ + {"", []string{}}, + {" ", []string{}}, + {" \t ", []string{}}, + {" key ", []string{`key("key")`, `Error: unexpected input (expected a value assignment) at line 1, column 5`}}, + {" key \t=", []string{`key("key")`, `assign()`, `Error: unexpected end of file (expected a value) at line 1, column 8`}}, + {" key \t =\t \"The Value\" \r\n", []string{`key("key")`, `assign()`, `string("The Value")`}}, + {"key1=\"value1\"key2=\"value2\"\r\nkey3=\"value3\"", []string{ + `key("key1")`, `assign()`, `string("value1")`, + `key("key2")`, `assign()`, `string("value2")`, + `key("key3")`, `assign()`, `string("value3")`}}, + {"with=\"comments\"# boring \nanother.cool =\"one\" \t # to the end\r\n", []string{ + `key("with")`, `assign()`, `string("comments")`, `comment("# boring ")`, + `key("another")`, `keydot()`, `key("cool")`, `assign()`, `string("one")`, `comment("# to the end")`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startKeyValuePair, test) + } } diff --git a/toml.go b/toml.go index 0732c1a..d13d073 100644 --- a/toml.go +++ b/toml.go @@ -1,23 +1,52 @@ package parser -import "git.makaay.nl/mauricem/go-parsekit" +import ( + "fmt" + "strings" -// Item types that are produced by this parser. + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +// Easy access to the parsekit.tokenize definitions. +var c, a, m, tok = tokenize.C, tokenize.A, tokenize.M, tokenize.T + +type cmdType string + +// Command types that are emitted by the parser. const ( - ItemComment parsekit.ItemType = iota // Comment string - ItemKey // Key of a key/value pair - ItemKeyDot // Dot for a dotted key - ItemAssignment // Value assignment coming up (=) - ItemString // A value of type string + cComment cmdType = "comment" // a # comment at the end of the line + cKey = "key" // set key name + cNewKeyLvl = "keydot" // new key stack level + cAssign = "assign" // assign a value + csetStrVal = "string" // set a string value ) -var ( - c, a, m = parsekit.C, parsekit.A, parsekit.M -) - -var parser = parsekit.NewParser(startKeyValuePair) - -// Parse starts the parser for the provided input string. -func Parse(input string) *parsekit.ParseRun { - return parser.Parse(input) +type parser struct { + commands []cmd + keyStack []string } + +type cmd struct { + command cmdType + args []interface{} +} + +func (cmd *cmd) String() string { + args := make([]string, len(cmd.args)) + for i, arg := range cmd.args { + args[i] = fmt.Sprintf("%q", arg) + } + return fmt.Sprintf("%s(%s)", cmd.command, strings.Join(args, ", ")) +} + +func (p *parser) emitCommand(command cmdType, args ...interface{}) { + c := cmd{command: command, args: args} + p.commands = append(p.commands, c) +} + +// Parse starts the parser for the provided input. +// func Parse(input interface{}) []cmd { +// p := &parser{} +// parse.New(p.startKeyValuePair)(input) +// return p.commands +// } diff --git a/toml_test.go b/toml_test.go index a1845f1..0437c2d 100644 --- a/toml_test.go +++ b/toml_test.go @@ -1,40 +1,34 @@ package parser_test -import ( - "testing" +// func TestEmptyInput(t *testing.T) { +// runStatesT(t, statesT{"empty string", "", "", ""}) +// } - toml "git.makaay.nl/mauricem/go-toml" -) +// func TestFullIncludesLineAndRowPosition(t *testing.T) { +// p := toml.Parse("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +") +// _, err := parseItemsToArray(p) +// actual := err.Error() +// expected := "unexpected input (expected end of file) at line 6, column 3" +// if actual != expected { +// t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual) +// } +// } -func TestEmptyInput(t *testing.T) { - runStatesT(t, statesT{"empty string", "", "", ""}) -} +// func TestInvalidUTF8Data(t *testing.T) { +// runStatesTs(t, []statesT{ +// {"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"}, +// {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"}, +// {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"}, +// {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"}, +// }) +// } -func TestErrorFullIncludesLineAndRowPosition(t *testing.T) { - p := toml.Parse("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +") - _, err := parseItemsToArray(p) - actual := err.ErrorFull() - expected := "unexpected character '+' (expected end of file) after line 6, column 3" - if actual != expected { - t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual) - } -} - -func TestInvalidUTF8Data(t *testing.T) { - runStatesTs(t, []statesT{ - {"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"}, - {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"}, - {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"}, - {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"}, - }) -} - -func TestWhiteSpaceAndNewlines(t *testing.T) { - runStatesTs(t, []statesT{ - {"space", " ", "", ""}, - {"tab", "\t", "", ""}, - {"newline", "\n", "", ""}, - {"all whitespace and newlines", " \t \t \r\n\n \n \t", "", ""}, - {"bare carriage return", "\r", "", "unexpected character '\\r' (expected end of file)"}, - }) -} +// func TestWhiteSpaceAndNewlines(t *testing.T) { +// runStatesTs(t, []statesT{ +// {"space", " ", "", ""}, +// {"tab", "\t", "", ""}, +// {"newline", "\n", "", ""}, +// {"all blanks and newlines", " \t \t \r\n\n \n \t", "", ""}, +// {"bare carriage return", "\r", "", "unexpected character '\\r' (expected end of file)"}, +// }) +// } diff --git a/value_string.go b/value_string.go index 172e8f1..4123a0a 100644 --- a/value_string.go +++ b/value_string.go @@ -1,6 +1,10 @@ package parser -import "git.makaay.nl/mauricem/go-parsekit" +import ( + "strings" + + "git.makaay.nl/mauricem/go-parsekit/parse" +) var ( // There are four ways to express strings: basic, multi-line basic, @@ -8,12 +12,12 @@ var ( // UTF-8 characters. * Multi-line basic strings are surrounded by three // quotation marks on each side. * Basic strings are surrounded by // quotation marks. - doubleQuote3 = c.Str(`"""`) + doubleQuote3 = a.Str(`"""`) // Any Unicode character may be used except those that must be escaped: // quotation mark, backslash, and the control characters (U+0000 to // U+001F, U+007F). - charThatMustBeEscaped = c.Any(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F')) + charThatMustBeEscaped = a.RuneRange('\u0000', '\u001F').Or(a.Rune('\u007F')) // For convenience, some popular characters have a compact escape sequence. // @@ -26,43 +30,21 @@ var ( // \\ - backslash (U+005C) // \uXXXX - unicode (U+XXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX) - validEscapeChar = c.Any(c.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash) + validEscapeChar = c.Any(a.Runes('b', 't', 'n', 'f', 'r'), a.DoubleQuote, a.Backslash) shortEscape = c.Seq(a.Backslash, validEscapeChar) - shortUTF8Escape = c.Seq(a.Backslash, c.Rune('u'), c.Rep(4, a.HexDigit)) - longUTF8Escape = c.Seq(a.Backslash, c.Rune('U'), c.Rep(8, a.HexDigit)) + shortUTF8Escape = c.Seq(a.Backslash, a.Rune('u'), a.HexDigit.Times(4)) + longUTF8Escape = c.Seq(a.Backslash, a.Rune('U'), a.HexDigit.Times(8)) validEscape = c.Any(shortEscape, shortUTF8Escape, longUTF8Escape) ) -func startString(p *parsekit.ParseAPI) { - p.Expects("a string value") +func (t *parser) startString(p *parse.API) { switch { - case p.On(doubleQuote3).Stay(): - p.RouteTo(startMultiLineBasicString) - case p.On(a.DoubleQuote).Stay(): - p.RouteTo(startBasicString) - } -} - -func startBasicString(p *parsekit.ParseAPI) { - p.Expects("a basic string") - if p.On(a.DoubleQuote).Skip() { - p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics) - } -} - -func parseBasicString(p *parsekit.ParseAPI) { - p.Expects("string contents") - switch { - case p.On(charThatMustBeEscaped).Stay(): - p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch) - case p.On(validEscape).Accept(): - p.RouteRepeat() - case p.On(a.Backslash).Stay(): - p.RouteReturn() - case p.On(a.DoubleQuote).Stay(): - p.RouteReturn() - case p.On(a.AnyRune).Accept(): - p.RouteRepeat() + case p.Peek(doubleQuote3): + p.Handle(t.startMultiLineBasicString) + case p.Peek(a.DoubleQuote): + p.Handle(t.startBasicString) + default: + p.Expected("a string value") } } @@ -71,20 +53,41 @@ func parseBasicString(p *parsekit.ParseAPI) { // * No additional \escape sequences are allowed. What the spec say about this: // "All other escape sequences [..] are reserved and, if used, TOML should // produce an error."" -func basicStringSpecifics(p *parsekit.ParseAPI) { - p.Expects("string contents") - switch { - case p.On(a.DoubleQuote).Skip(): - p.EmitInterpreted(ItemString) - p.RouteTo(startKeyValuePair) - case p.On(a.Backslash).Stay(): - p.EmitError("invalid escape sequence") +func (t *parser) startBasicString(p *parse.API) { + if !p.Accept(a.DoubleQuote) { + p.Expected("a basic string") + return + } + sb := &strings.Builder{} + for { + switch { + case p.Peek(charThatMustBeEscaped): + p.Error("invalid character in basic string: %q (must be escaped)", p.Result().Rune(0)) + return + case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))): + sb.WriteString(p.Result().Value(0).(string)) + case p.Peek(a.Backslash): + p.Error("invalid escape sequence") + return + case p.Accept(m.Drop(a.DoubleQuote)): + t.emitCommand(csetStrVal, sb.String()) + return + case p.Accept(a.ValidRune): + sb.WriteString(p.Result().String()) + case p.Peek(a.InvalidRune): + p.Error("invalid UTF8 rune") + return + default: + p.Expected("end of string") + return + } } } -func startMultiLineBasicString(p *parsekit.ParseAPI) { - p.Expects("a multi-line basic string") - if p.On(doubleQuote3).Skip() { - p.EmitError("not yet implemented") +func (t *parser) startMultiLineBasicString(p *parse.API) { + if p.Accept(doubleQuote3) { + p.Error("not yet implemented") + } else { + p.Expected("a multi-line basic string") } } diff --git a/value_string_test.go b/value_string_test.go index b3ff934..7b4da94 100644 --- a/value_string_test.go +++ b/value_string_test.go @@ -1,73 +1,49 @@ -package parser_test +package parser import ( "fmt" "testing" ) -func TestUnterminatedBasicString(t *testing.T) { - runStatesT(t, statesT{ - "missing closing quote", `a="value`, "[a]=", - "unexpected end of file (expected string contents)"}) -} - -func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { - runStatesTs(t, []statesT{ - {"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`}, - {"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`}, - {"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`}, - }) - - // No need to write all test cases for disallowed characters by hand. - for i := 0x00; i <= 0x1F; i++ { - name := fmt.Sprintf("control character %x", rune(i)) - runStatesT( - t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", - fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))}) +func TestString(t *testing.T) { + for _, test := range []parseTest{ + {``, []string{`Error: unexpected end of file (expected a string value) at start of file`}}, + {`no start quote"`, []string{`Error: unexpected input (expected a string value) at start of file`}}, + {`"simple string"`, []string{`string("simple string")`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startString, test) } } -func TestEmptyBasicString(t *testing.T) { - runStatesTs(t, []statesT{ - {"empty", `a=""`, "[a]=STR()", ""}, - {"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""}, - {"with whitespaces", ` a = "" `, "[a]=STR()", ""}, - {"dotted", ` a.b = "" `, "[a].[b]=STR()", ""}, - {"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""}, - {"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""}, - }) -} - func TestBasicString(t *testing.T) { - runStatesTs(t, []statesT{ - {"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""}, - {"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""}, - }) + for _, test := range []parseTest{ + {``, []string{`Error: unexpected end of file (expected a basic string) at start of file`}}, + {`no start quote"`, []string{`Error: unexpected input (expected a basic string) at start of file`}}, + {`"no end quote`, []string{`Error: unexpected end of file (expected end of string) at line 1, column 14`}}, + {`""`, []string{`string("")`}}, + {`"simple string"`, []string{`string("simple string")`}}, + {`"with\tsome\r\nvalid escapes\b"`, []string{`string("with\tsome\r\nvalid escapes\b")`}}, + {`"with an \invalid escape"`, []string{`Error: invalid escape sequence at line 1, column 10`}}, + {`"A cool UTF8 ƃuıɹʇs"`, []string{`string("A cool UTF8 ƃuıɹʇs")`}}, + {`"A string with UTF8 escape \u2318"`, []string{`string("A string with UTF8 escape ⌘")`}}, + {"\"Invalid character for UTF \xcd\"", []string{`Error: invalid UTF8 rune at line 1, column 28`}}, + {"\"Character that mus\t be escaped\"", []string{`Error: invalid character in basic string: '\t' (must be escaped) at line 1, column 20`}}, + {"\"Character that must be escaped \u0000\"", []string{`Error: invalid character in basic string: '\x00' (must be escaped) at line 1, column 33`}}, + {"\"Character that must be escaped \x7f\"", []string{`Error: invalid character in basic string: '\u007f' (must be escaped) at line 1, column 33`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startBasicString, test) + } } -func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { - err := "invalid escape sequence" - runStatesTs(t, []statesT{ - {"invalid escape sequence", `a="\x"`, "[a]=", err}, - {"too short \\u UTF8", `a="\u123"`, "[a]=", err}, - {"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err}, - {"too short \\U UTF8", `a="\U1234567"`, "[a]=", err}, - {"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err}, - }) -} - -func TestBasicStringEscapes(t *testing.T) { - runStatesTs(t, []statesT{ - {"bell escape", `_="\b"`, "[_]=STR(\b)", ""}, - {"tab escape", `_="\t"`, "[_]=STR(\t)", ""}, - {"newline escape", `_="\n"`, "[_]=STR(\n)", ""}, - {"form feed escape", `_="\f"`, "[_]=STR(\f)", ""}, - {"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""}, - {"double quote escape", `_="\""`, `[_]=STR(")`, ""}, - {"backslash escape", `_="\\"`, `[_]=STR(\)`, ""}, - {"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""}, - {"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""}, - {"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""}, - {"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""}, - }) +func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { + // A quick check for almost all characters that must be escaped. + // The missing one (\x7f) is covered in the previous test. + for i := 0x00; i <= 0x1F; i++ { + p := &parser{} + input := fmt.Sprintf(`"%c"`, rune(i)) + expected := fmt.Sprintf(`Error: invalid character in basic string: %q (must be escaped) at line 1, column 2`, rune(i)) + testParseHandler(t, p, p.startString, parseTest{input, []string{expected}}) + } }