package lexer_test import ( "fmt" "strings" "testing" "github.com/mmakaay/toml/lexer" "github.com/mmakaay/toml/parser" ) func TestErrorsIncludeLineAndRowPosition(t *testing.T) { _, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray() t.Logf("Got error: %s", err.Error()) if err.Row != 4 { t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4) } if err.Column != 6 { t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6) } } func TestEmptyInput(t *testing.T) { runStatesT(t, statesT{"empty string", "", "", ""}) } func TestInvalidUtf8Data(t *testing.T) { runStatesTs(t, []statesT{ {"inside comment", "# \xbc", "", "invalid UTF8 character"}, {"bare key 1", "\xbc", "", "invalid UTF8 character"}, {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"}, {"assignment", "key \xbc", "[key]", "invalid UTF8 character"}, {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"}, {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"}, }) } func TestWhiteSpaceAndNewlines(t *testing.T) { runStatesTs(t, []statesT{ {"space", " ", "", ""}, {"tab", "\t", "", ""}, {"newline", "\n", "", ""}, {"carriage return", "\r", "", ""}, {"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""}, }) } func TestComments(t *testing.T) { runStatesTs(t, []statesT{ {"empty comment", "#", "#()", ""}, {"empty comment with spaces", "# \t \r\n", `#()`, ""}, {"basic comment", "#chicken", "#(chicken)", ""}, {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, }) } func TestKeyWithoutAssignment(t *testing.T) { err := "unexpected end of file" runStatesTs(t, []statesT{ {"bare with whitespace", " a ", "[a]", err}, {"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err}, {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err}, {"bare numbers", "0123456789", "[0123456789]", err}, {"bare underscore", "_", "[_]", err}, {"bare dash", "-", "[-]", err}, {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err}, {"bare dotted", "a._.c", "[a].[_].[c]", err}, {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err}, }) } func TestKeyWithAssignmentButNoValue(t *testing.T) { err := "unexpected end of file" runStatesTs(t, []statesT{ {"bare", "a=", "[a]=", err}, {"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"}, {"bare dotted", "a.b=", "[a].[b]=", err}, {"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, }) } func TestUnterminatedBasicString(t *testing.T) { runStatesT(t, statesT{ "missing closing quote", `a="value`, "[a]=", "Unexpected end of file (expected basic string token)"}) } func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { runStatesTs(t, []statesT{ {"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`}, {"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`}, {"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`}, }) // No need to write all test cases for disallowed characters by hand. for i := 0x00; i <= 0x1F; i++ { name := fmt.Sprintf("control character %x", rune(i)) runStatesT( t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))}) } } func TestEmptyBasicString(t *testing.T) { runStatesTs(t, []statesT{ {"empty", `a=""`, "[a]=STR()", ""}, {"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""}, {"with whitespaces", ` a = "" `, "[a]=STR()", ""}, {"dotted", ` a.b = "" `, "[a].[b]=STR()", ""}, {"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""}, {"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""}, }) } func TestBasicString(t *testing.T) { runStatesTs(t, []statesT{ {"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""}, {"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""}, }) } func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { err := "Invalid escape sequence in basic string" runStatesTs(t, []statesT{ {"invalid escape sequence", `a="\x"`, "[a]=", err}, {"too short \\u UTF8", `a="\u123"`, "[a]=", err}, {"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err}, {"too short \\U UTF8", `a="\U1234567"`, "[a]=", err}, {"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err}, }) } func TestBasicStringEscapes(t *testing.T) { runStatesTs(t, []statesT{ {"bell escape", `_="\b"`, "[_]=STR(\b)", ""}, {"tab escape", `_="\t"`, "[_]=STR(\t)", ""}, {"newline escape", `_="\n"`, "[_]=STR(\n)", ""}, {"form feed escape", `_="\f"`, "[_]=STR(\f)", ""}, {"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""}, {"double quote escape", `_="\""`, `[_]=STR(")`, ""}, {"backslash escape", `_="\\"`, `[_]=STR(\)`, ""}, {"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""}, {"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""}, {"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""}, {"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""}, }) } type statesT struct { name string in string out interface{} err string } func runStatesTs(t *testing.T, tests []statesT) { for _, c := range tests { runStatesT(t, c) } } func runStatesT(t *testing.T, c statesT) { l, err := lexer.NewParser(c.in).ToArray() if err == nil && c.err != "" { t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err) } if err != nil && c.err == "" { t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err) } if err != nil && c.err != "" && err.Error() != c.err { t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err) } switch expected := c.out.(type) { case []string: if len(expected) != len(l) { t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l)) } for i, e := range expected { v := ParserItemToString(l[i]) if v != e { t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v) } } case string: a := make([]string, len(l)) for _, v := range l { a = append(a, ParserItemToString(v)) } actual := strings.Join(a, "") if actual != expected { t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual) } } } // ParserItemToString returns a string representation of the // parser.Item. This is used for unit testing purposes. func ParserItemToString(i parser.Item) string { switch i.Type { case lexer.ItemComment: return fmt.Sprintf("#(%s)", i.Value) case lexer.ItemKey: return fmt.Sprintf("[%s]", i.Value) case lexer.ItemString: return fmt.Sprintf("STR(%s)", i.Value) case lexer.ItemKeyDot: return "." case lexer.ItemAssignment: return "=" default: panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type)) } }