go-toml/lexer/states_test.go

package lexer_test

import (
	"fmt"
	"strings"
	"testing"

	"github.com/mmakaay/toml/lexer"
)

func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
	_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
	t.Logf("Got error: %s", err.Error())
	if err.LineNr != 4 {
		t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 4)
	}
	if err.LinePos != 6 {
		t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
	}
}

func TestInvalidUtf8Data(t *testing.T) {
	runStatesTs(t, []statesT{
		{"inside comment", "# \xbc", "", "invalid UTF8 character"},
		{"bare key 1", "\xbc", "", "invalid UTF8 character"},
		{"bare key 2", "key\xbc", "", "invalid UTF8 character"},
		{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
		{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
	})
}

func TestEmptyInput(t *testing.T) {
	runStatesT(t, statesT{"empty string", "", "", ""})
}

func TestWhiteSpaceAndNewlines(t *testing.T) {
	runStatesTs(t, []statesT{
		{"space", " ", "", ""},
		{"tab", "\t", "", ""},
		{"newline", "\n", "", ""},
		{"carriage return", "\r", "", ""},
		{"all whitespace and newlines", " \t \t \r\r\n\n  \n \t", "", ""},
	})
}

func TestComments(t *testing.T) {
	runStatesTs(t, []statesT{
		{"empty comment", "#", "#()", ""},
		{"empty comment with spaces", "# \t \r\n", `#()`, ""},
		{"basic comment", "#chicken", "#(chicken)", ""},
		{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
		{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
		{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
		{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
		{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
	})
}

func TestKeyWithoutAssignment(t *testing.T) {
	err := "unexpected end of file"
	runStatesTs(t, []statesT{
		{"bare with whitespace", " a ", "[a]", err},
		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
		{"bare numbers", "0123456789", "", err},
		{"bare underscore", "_", "", err},
		{"bare dash", "-", "", err},
		{"bare big mix", "-hey_good_Lookin123-", "", err},
		{"bare dotted", "a._.c", "[a].[_].", err},
		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
	})
}

func TestKeyWithAssignmentButNoValue(t *testing.T) {
	err := "unexpected end of file"
	runStatesTs(t, []statesT{
		{"bare", "a=", "[a]=", err},
		{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
		{"bare dotted", "a.b=", "[a].[b]=", err},
		{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
	})
}

func TestUnterminatedBasicString(t *testing.T) {
	runStatesT(t, statesT{
		"missing closing quote", `a="value`, "[a]=",
		"Unexpected end of file (expected basic string token)"})
}

func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
	runStatesTs(t, []statesT{
		{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00'`},
		{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n'`},
		{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f'`},
	})

	// No need to write all test cases for disallowed characters by hand.
	for i := 0x00; i <= 0x1F; i++ {
		name := fmt.Sprintf("control character %x", rune(i))
		runStatesT(
			t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
				fmt.Sprintf(`Invalid character in basic string: %q`, rune(i))})
	}
}

func TestEmptyBasicString(t *testing.T) {
	runStatesTs(t, []statesT{
		{"empty", `a=""`, "[a]=STR()", ""},
		{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
		{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
		{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
		{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
		{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
	})
}

func TestBasicString(t *testing.T) {
	runStatesTs(t, []statesT{
		{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
		{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
	})
}

func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
	err := "Invalid escape sequence in basic string"
	runStatesTs(t, []statesT{
		{"invalid escape sequence", `a="\x"`, "[a]=", err},
		{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
		{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
		{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
		{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
	})
}

func TestBasicStringEscapes(t *testing.T) {
	runStatesTs(t, []statesT{
		{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
		{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
		{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
		{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
		{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
		{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
		{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
		{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
		{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
		{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
		{"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""},
	})
}

type statesT struct {
	name string
	in   string
	out  interface{}
	err  string
}

func runStatesTs(t *testing.T, tests []statesT) {
	for _, c := range tests {
		runStatesT(t, c)
	}
}

func runStatesT(t *testing.T, c statesT) {
	l, err := lexer.Lex(c.in).ToArray()
	if err == nil && c.err != "" {
		t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
	}
	if err != nil && c.err == "" {
		t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
	}
	if err != nil && c.err != "" && err.Error() != c.err {
		t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
	}
	switch expected := c.out.(type) {
	case []string:
		if len(expected) != len(l) {
			t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
		}
		for i, e := range expected {
			if l[i].String() != e {
				t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, l[i])
			}
		}
	case string:
		a := make([]string, len(l))
		for _, v := range l {
			a = append(a, v.String())
		}
		actual := strings.Join(a, "")
		if actual != expected {
			t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
		}
	}
}