Code cleanup and refactoring run, both functional code and the tests.

2019-05-16 14:17:06 +00:00 · 2019-05-16 14:17:06 +00:00 · cbc4f04179
parent 6636a7a672
commit cbc4f04179
7 changed files with 435 additions and 334 deletions
--- a/lexer/items.go
+++ b/lexer/items.go
@ -7,12 +7,13 @@ type itemType int
 // Definition of all the lexer item types for the TOML lexer.
 const (
-	ItemError   itemType = iota // An error occurred
+	ItemError      itemType = iota // An error occurred
-	ItemEOF                     // End of input reached
+	ItemEOF                        // End of input reached
-	ItemComment                 // Comment string, starts with # till en of line
+	ItemComment                    // Comment string, starts with # till en of line
-	ItemKey                     // Key of a key/value pair
+	ItemKey                        // Key of a key/value pair
-	ItemKeyDot                  // Dot for a dotted key
+	ItemKeyDot                     // Dot for a dotted key
-	ItemString                  // A value of type string
+	ItemAssignment                 // Value assignment coming up (=)
 	ItemString                     // A value of type string
 )
 // Item represents a lexer item returned from the scanner.
@ -26,26 +27,26 @@ func (i Item) String() string {
 	switch i.Type {
 	case ItemEOF:
 		return "EOF"
-	case ItemError:
+	case ItemKey:
-		return "Error: " + i.Value
+		return fmt.Sprintf("[%s]", i.Value)
 	case ItemKeyDot:
 		return "."
 	case ItemAssignment:
 		return "="
 	}
-	return fmt.Sprintf("%s(%q)", i.Type, i.Value)
+	return fmt.Sprintf("%s(%s)", i.Type, i.Value)
 }
 // String returns a string representation of the lexer item type.
 func (i itemType) String() string {
 	switch i {
 	case ItemError:
-		return "Error"
+		return "ERR"
 	case ItemComment:
-		return "Comment"
+		return "#"
 	case ItemKey:
 		return "Key"
 	case ItemKeyDot:
 		return "KeyDot"
 	case ItemString:
-		return "String"
+		return "STR"
 	default:
-		return fmt.Sprintf("<type id %d>", i)
+		panic(fmt.Sprintf("No translation available for type id %d", i))
 	}
 }
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@ -12,7 +12,6 @@ type Lexer struct {
 	input    string       // the scanned input string
 	state    stateFn      // a function that handles the current state
 	stack    []stateFn    // state function stack, for nested parsing
 	start    int          // start position of the currently scanned item
 	pos      int          // current scanning position in the input
 	width    int          // width of the last rune read, for supporting backup()
 	buffer   StringBuffer // an efficient buffer, used to build string values
@ -99,29 +98,44 @@ func (l *Lexer) popState() stateFn {
 	return tail
 }
-// TODO niet meer nodig?
+// atEndOfFile returns true when there is no more data available in the input.
 // getAcceptedString returns the string as accepted by the
 // accept* methods so far.
 func (l *Lexer) getAcceptedString() string {
 	return l.input[l.start:l.pos]
 }
 // emit passes a scanned item back to the client.
 func (l *Lexer) emit(t itemType, v string) {
 	l.items <- Item{t, v}
 	l.start = l.pos
 }
 // TODO niet meer nodig met stringbuilder?
 // ignore skips over the pending input before the current position.
 func (l *Lexer) ignore() {
 	l.start = l.pos
 }
 func (l *Lexer) atEndOfFile() bool {
 	return l.pos >= len(l.input)
 }
 // emit passes a lexer item back to the client, including the provided string.
 func (l *Lexer) emit(t itemType, s string) {
 	l.items <- Item{t, s}
 	l.buffer.Reset()
 }
 // emitLiteral passes a lexer item back to the client, including the accumulated
 // string buffer data as a literal string.
 func (l *Lexer) emitLiteral(t itemType) {
 	l.emit(t, l.buffer.AsLiteralString())
 }
 // emitTrimmedLiteral passes a lexer item back to the client, including the
 // accumulated string buffer data as a literal string with whitespace
 // trimmed from it.
 func (l *Lexer) emitTrimmedLiteral(t itemType) {
 	l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
 }
 // emitInterpreted passes a lexer item back to the client, including the
 // accumulated string buffer data an interpreted string (handling escape
 // codes like \n, \t, \uXXXX, etc.)
 // This method might return an error, in case there is data in the
 // string buffer that is not valid for string interpretation.
 func (l *Lexer) emitInterpreted(t itemType) error {
 	s, err := l.buffer.AsInterpretedString()
 	if err != nil {
 		return err
 	}
 	l.emit(t, s)
 	return nil
 }
 // backup steps back one rune
 // Can be called only once per call of next.
 func (l *Lexer) backup() {
@ -129,16 +143,119 @@ func (l *Lexer) backup() {
 }
 // peek returns but does not advance to the next rune(s) in the input.
-func (l *Lexer) peek() rune {
+// Returns the rune, its width and a boolean. The boolean will be false in case
-	r := l.next()
+// no upcoming rune can be peeked (end of data or invalid UTF8 character).
-	l.backup()
+func (l *Lexer) peek() (rune, int, bool) {
-	return r
+	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
 	switch {
 	case r == utf8.RuneError:
 		return utf8.RuneError, w, false
 	default:
 		return r, w, true
 	}
 }
-// TODO nog nodig met stringbuffer?
+// peekMulti takes a peek at multiple upcoming runes in the input.
-// accept consumes the next rune if it's from the valid set of runes.
+// Returns a slice of runes and a boolean. The boolean will be false in case
 // less upcoming runes can be peeked than the requested amount
 // (end of data or invalid UTF8 character).
 func (l *Lexer) peekMulti(amount int) ([]rune, bool) {
 	offset := 0
 	var peeked []rune
 	for i := 0; i < amount; i++ {
 		r, w := utf8.DecodeRuneInString(l.input[l.pos+offset:])
 		switch {
 		case r == utf8.RuneError:
 			return peeked, false
 		default:
 			offset += w
 			peeked = append(peeked, r)
 		}
 	}
 	return peeked, true
 }
 // acceptNext adds the next rune from the input to the string buffer.
 // If no rune could be read (end of file or invalid UTF8 data),
 // then false is returned.
 func (l *Lexer) acceptNext() bool {
 	r := l.next()
 	if r == endOfFile || r == utf8.RuneError {
 		return false
 	}
 	l.buffer.WriteRune(r)
 	return true
 }
 // acceptFrom adds the next rune from the input to the string buffer
 // when it matches in the provided runes. If the next rune does
 // not match, false is returned.
 func (l *Lexer) acceptFrom(runes string) bool {
 	r := l.next()
 	if strings.IndexRune(runes, r) >= 0 {
 		l.buffer.WriteRune(r)
 		return true
 	}
 	l.backup()
 	return false
 }
 // acceptRun adds consecutive runes from the input to the string
 // buffer when they match the provided runes. If no runes were added
 // at all, false it returned.
 func (l *Lexer) acceptRun(runes string) bool {
 	accepted := false
 	for l.acceptFrom(runes) {
 		accepted = true
 	}
 	return accepted
 }
 // TODO meh... ugly rune.
 var endOfFile rune = -1
 // next returns the next rune from the input.
 func (l *Lexer) next() rune {
 	l.width = 0
 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
 	switch {
 	case r == utf8.RuneError && w == 0:
 		return endOfFile
 	case r == utf8.RuneError:
 		return utf8.RuneError
 	default:
 		l.width = w
 		l.pos += w
 		return r
 	}
 }
 // skip skips a rune from the set of accepted runes.
 // Returns true when a rune was skipped.
 func (l *Lexer) skip(runes string) bool {
 	r, w, _ := l.peek()
 	if strings.IndexRune(runes, r) >= 0 {
 		l.pos += w
 		return true
 	}
 	return false
 }
 // skipRun skips a run of runes from the set of accepted runes.
 // Returns true when one or more runes were skipped.
 func (l *Lexer) skipRun(runes string) bool {
 	didSkip := false
 	for l.skip(runes) {
 		didSkip = true
 	}
 	return didSkip
 }
 // accept adds the next rune to the string buffer and returns true if it's
 // from the valid set of runes. Otherwise false is returned.
 func (l *Lexer) accept(runes string) bool {
-	if strings.IndexRune(runes, l.next()) >= 0 {
+	r := l.next()
 	if strings.IndexRune(runes, r) >= 0 {
 		return true
 	}
 	l.backup()
@ -187,34 +304,10 @@ func (l *Lexer) acceptWhile(runes string) bool {
 	return accepted
 }
 // skip skips a run of runes from the set of accepted runs.
 func (l *Lexer) skip(runes string) {
 	if l.acceptWhile(runes) {
 		l.ignore()
 	}
 }
 // skipUntil skips a run of runes, until a rune from the set of
 // runes of EOF is reached.
 func (l *Lexer) skipUntil(runes string) {
-	if l.acceptUntil(runes) {
+	l.acceptUntil(runes)
 		l.ignore()
 	}
 }
 // TODO meh... ugly rune.
 var endOfFile rune = -1
 // next returns the next rune in the input.
 func (l *Lexer) next() rune {
 	if l.atEndOfFile() {
 		l.width = 0
 		return endOfFile // TODO phase out this bizarro rune?
 	}
 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
 	l.width = w
 	l.pos += w
 	return r
 }
 // error returns an error token and terminates the scan
@ -227,15 +320,16 @@ func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
 	return nil
 }
-func (l *Lexer) unexpectedTokenError(expected string) stateFn {
+func (l *Lexer) unexpectedInputError(expected string) stateFn {
 	var actual string
 	switch {
-	case l.peek() == endOfFile:
+	case l.atEndOfFile(): // TODO maybe not hit anymore after refactoring?
 		actual = "end of file"
-	case !utf8.ValidString(l.input[l.start:]):
+	case !utf8.ValidString(l.input[l.pos:]):
 		actual = "non-UTF8 data"
 	default:
-		actual = fmt.Sprintf("token '%c'", l.peek())
+		r, _, _ := l.peek()
 		actual = fmt.Sprintf("token '%c'", r)
 	}
 	return l.errorf("Unexpected %s (expected %s)", actual, expected)
 }
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@ -1,175 +0,0 @@
 package lexer_test
 import (
 	"fmt"
 	"testing"
 	"github.com/mmakaay/toml/lexer"
 )
 func TestInvalidUtf8Data(t *testing.T) {
 	assertFailureAndCheck(t, "\xbc", []string{}, "Unexpected non-UTF8 data (expected end of file)")
 }
 func TestEmptyInput(t *testing.T) {
 	assertSuccessAndCheck(t, "", []string{})
 }
 func TestWhiteSpace(t *testing.T) {
 	assertSuccessAndCheck(t, " ", []string{})
 	assertSuccessAndCheck(t, "\t", []string{})
 	assertSuccessAndCheck(t, " \t \t ", []string{})
 }
 func TestWhiteSpaceAndNewlines(t *testing.T) {
 	assertSuccessAndCheck(t, "\n", []string{})
 	assertSuccessAndCheck(t, "\n \t\r\n", []string{})
 }
 func TestComments(t *testing.T) {
 	assertSuccessAndCheck(t, "#", []string{`Comment("#")`})
 	assertSuccessAndCheck(t, " \t \t #", []string{`Comment("#")`})
 	assertSuccessAndCheck(t, " \t \t # not empty", []string{`Comment("# not empty")`})
 	assertSuccessAndCheck(t, " \t \t # not empty\r\r\r\n", []string{`Comment("# not empty")`})
 	assertSuccessAndCheck(t, "\n \t\r\n# AAP\r\n", []string{`Comment("# AAP")`})
 	assertSuccessAndCheck(t,
 		"# two lines\n# of comments\n",
 		[]string{`Comment("# two lines")`, `Comment("# of comments")`})
 	assertSuccessAndCheck(t,
 		`# \tcomment\nwith escape-y chars`,
 		[]string{`Comment("# \\tcomment\\nwith escape-y chars")`})
 }
 func TestBareKeyWithoutValue(t *testing.T) {
 	err := "Unexpected end of file (expected an '=' value assignment)"
 	assertFailureAndCheck(t, "a", []string{`Key("a")`}, err)
 	assertFailureAndCheck(t, "_", []string{`Key("_")`}, err)
 	assertFailureAndCheck(t, " a", []string{`Key("a")`}, err)
 	assertFailureAndCheck(t, " a ", []string{`Key("a")`}, err)
 	assertFailureAndCheck(t, "ab", []string{`Key("ab")`}, err)
 	assertFailureAndCheck(t, "Ab", []string{`Key("Ab")`}, err)
 	assertFailureAndCheck(t, "Ab1", []string{`Key("Ab1")`}, err)
 	assertFailureAndCheck(t, "_Ab1", []string{`Key("_Ab1")`}, err)
 	assertFailureAndCheck(t, "_-Ab1", []string{`Key("_-Ab1")`}, err)
 	assertFailureAndCheck(t, "_-Ab1_this-is_GOOD987", []string{`Key("_-Ab1_this-is_GOOD987")`}, err)
 }
 func TestDottedKey(t *testing.T) {
 	err := "Unexpected end of file (expected an '=' value assignment)"
 	assertFailureAndCheck(t, "a.b", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
 	assertFailureAndCheck(t, " a .\t\t b\t ", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
 }
 func TestKeyWithAssignmentButNoValue(t *testing.T) {
 	err := "Unexpected end of file (expected a value)"
 	assertFailureAndCheck(t, "  some_cool_key   =  ", []string{`Key("some_cool_key")`}, err)
 }
 func TestUnterminatedBasicString(t *testing.T) {
 	assertFailure(t, `key="value`, "Unexpected end of file (expected basic string token)")
 }
 func TestBasicStringWithNewline(t *testing.T) {
 	assertFailure(t, "key=\"value\nwith\nnewlines\"", "ohoh")
 }
 func TestEmptyBasicString(t *testing.T) {
 	assertSuccessAndCheck(t, `a=""`, []string{`Key("a")`, `String("")`})
 	assertSuccessAndCheck(t, `a=""#hi`, []string{`Key("a")`, `String("")`, `Comment("#hi")`})
 	assertSuccessAndCheck(t, `a = ""`, []string{`Key("a")`, `String("")`})
 	assertSuccessAndCheck(t, `a.b = ""`, []string{`Key("a")`, `KeyDot(".")`, `Key("b")`, `String("")`})
 	assertSuccessAndCheck(t, `a=""b=""`, []string{`Key("a")`, `String("")`, `Key("b")`, `String("")`})
 }
 func TestBasicString(t *testing.T) {
 	assertSuccessAndCheck(t, `_ = "b"`,
 		[]string{
 			`Key("_")`,
 			`String("b")`})
 	assertSuccessAndCheck(t, `thing = "A cool ʎǝʞ" # huh, it's up-side down!!`,
 		[]string{
 			`Key("thing")`,
 			`String("A cool ʎǝʞ")`,
 			`Comment("# huh, it's up-side down!!")`})
 }
 func TestInvalidEscapeSequence(t *testing.T) {
 	assertFailure(t, `a="\x"`, `Invalid escape sequence \x in string value`)
 }
 func TestBasicStringEscapes(t *testing.T) {
 	for in, out := range map[string]string{
 		`\b`:              "\b",
 		`\t`:              "\t",
 		`\n`:              "\n",
 		`\f`:              "\f",
 		`\r`:              "\r",
 		`\"`:              "\"",
 		`\b\t\nhuh\f\r\"`: "\b\t\nhuh\f\r\"",
 		`\u2318`:          "⌘",
 		`\U0001014D`:      "𐅍",
 	} {
 		l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
 		if out != l[1].Value {
 			t.Fatalf("Unexpected result when parsing '%s'\nexpected: %q\nactual: %q", in, out, l[1].Value)
 		}
 	}
 }
 // func TestBasicStringUnicodeEscapes(t *testing.T) {
 // 	for in, out := range map[string]string{
 // 		`\u`: "\b",
 // 	} {
 // 		l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
 // 		s := l[2]
 // 		if out != s.Value {
 // 			t.Fatalf("Unexpected result when parsing '%s'", in)
 // 		}
 // 	}
 // }
 func TestTwoKeyValuePairs(t *testing.T) {
 	assertSuccessAndCheck(t, "a=\"Hello\" #comment1\nb=\"World!\"#comment2\r\n",
 		[]string{
 			`Key("a")`,
 			`String("Hello")`,
 			`Comment("#comment1")`,
 			`Key("b")`,
 			`String("World!")`,
 			`Comment("#comment2")`})
 }
 func assertSuccessAndCheck(t *testing.T, input string, expected []string) {
 	l := assertSuccess(t, input)
 	assertItems(t, l, expected)
 }
 func assertFailureAndCheck(t *testing.T, input string, expected []string, expectedErr string) {
 	l := assertFailure(t, input, expectedErr)
 	assertItems(t, l, expected)
 }
 func assertFailure(t *testing.T, input string, expectedErr string) []lexer.Item {
 	l, err := lexer.Lex(input).ToArray()
 	if err == nil {
 		t.Fatalf("Expected lexer error '%s', but no error occurred", expectedErr)
 	}
 	if err.Error() != expectedErr {
 		t.Fatalf("Mismatch between expected and actual error:\nExpected: %s\nActual: %s\n", expectedErr, err)
 	}
 	return l
 }
 func assertSuccess(t *testing.T, input string) []lexer.Item {
 	l, err := lexer.Lex(input).ToArray()
 	if err != nil {
 		t.Fatalf("Unexpected lexer error: %s", err)
 	}
 	return l
 }
 func assertItems(t *testing.T, l []lexer.Item, expected []string) {
 	if len(expected) != len(l) {
 		t.Fatalf("Unexpected number of lexer items: %d (expected: %d)", len(l), len(expected))
 	}
 	for i, e := range expected {
 		if l[i].String() != e {
 			t.Fatalf("Unexpected lexer item at index %d: %s (expected: %s)", i, l[i], e)
 		}
 	}
 }
--- a/lexer/states.go
+++ b/lexer/states.go
@ -1,33 +1,35 @@
 package lexer
-// stateFn represents the state of the scanner as a function
+// stateFn represents the state of the lexer as a function
 // that returns the next state.
 type stateFn func(*Lexer) stateFn
 const (
-	whitespace     string = " \t"
+	whitespace      string = " \t"
-	carriageReturn string = "\r"
+	carriageReturn  string = "\r"
-	newline        string = "\n"
+	newline         string = "\n"
-	hash           string = "#"
+	hash            string = "#"
-	equal          string = "="
+	equal           string = "="
-	lower          string = "abcdefghijklmnopqrstuvwxyz"
+	lower           string = "abcdefghijklmnopqrstuvwxyz"
-	upper          string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+	upper           string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-	digits         string = "0123456789"
+	digits          string = "0123456789"
-	dot            string = "."
+	dot             string = "."
-	underscore     string = "_"
+	underscore      string = "_"
-	dash           string = "-"
+	dash            string = "-"
-	singleQuote    string = "'"
+	singleQuote     string = "'"
-	doubleQuote    string = "\""
+	doubleQuote     string = "\""
-	backslash      string = "\\"
+	backslash       string = "\\"
-	someQuote      string = singleQuote + doubleQuote
+	quoteChars      string = singleQuote + doubleQuote
-	bareKey        string = lower + upper + digits + underscore + dash
+	bareKeyChars    string = lower + upper + digits + underscore + dash
-	startOfKey     string = bareKey + someQuote
+	startOfKey      string = bareKeyChars + quoteChars
-	quotable       string = `btnfr\"`
+	escapeChars     string = `btnfr"\`
 	shortUtf8Escape string = "u"
 	longUtf8Escape  string = "U"
 )
 func stateKeyValuePair(l *Lexer) stateFn {
-	l.skip(whitespace + carriageReturn + newline)
+	l.skipRun(whitespace + carriageReturn + newline)
-	if l.upcoming(hash) {
+	if l.skip(hash) {
 		return stateComment
 	}
 	if l.upcoming(startOfKey) {
@ -38,36 +40,34 @@ func stateKeyValuePair(l *Lexer) stateFn {
 // A '#' hash symbol marks the rest of the line as a comment.
 func stateComment(l *Lexer) stateFn {
 	l.buffer.Reset()
 	for {
 		switch {
-		case l.atEndOfFile() || l.accept(newline):
+		case l.atEndOfFile() || l.skip(newline):
-			s := l.buffer.AsLiteralString()
+			l.emitTrimmedLiteral(ItemComment)
 			l.emit(ItemComment, s)
 			return stateKeyValuePair
 		case l.accept(carriageReturn):
 			l.ignore()
 		default:
-			l.buffer.WriteRune(l.next())
+			if !l.acceptNext() {
 				return nil
 			}
 		}
 	}
 }
 // A key may be either bare, quoted or dotted.
 func stateKey(l *Lexer) stateFn {
-	if l.upcoming(bareKey) {
+	if l.acceptFrom(bareKeyChars) {
-		return stateBareKey
+		return statebareKeyChars
 	}
-	return l.unexpectedTokenError("a valid key name")
+	return l.unexpectedInputError("a valid key name")
 }
 // Bare keys may only contain ASCII letters, ASCII digits,
 // underscores, and dashes (A-Za-z0-9_-). Note that bare
 // keys are allowed to be composed of only ASCII digits,
 // e.g. 1234, but are always interpreted as strings.
-func stateBareKey(l *Lexer) stateFn {
+func statebareKeyChars(l *Lexer) stateFn {
-	l.acceptWhile(bareKey)
+	l.acceptRun(bareKeyChars)
-	l.emit(ItemKey, l.getAcceptedString())
+	l.emitLiteral(ItemKey)
 	return stateEndOfKeyOrKeyDot
 }
@ -76,10 +76,10 @@ func stateBareKey(l *Lexer) stateFn {
 func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
 	// Whitespace around dot-separated parts is ignored, however,
 	// best practice is to not use any extraneous whitespace.
-	l.skip(whitespace)
+	l.skipRun(whitespace)
-	if l.accept(dot) {
+	if l.skip(dot) {
-		l.emit(ItemKeyDot, ".")
+		l.emit(ItemKeyDot, "")
-		l.skip(whitespace)
+		l.skipRun(whitespace)
 		return stateKey
 	}
 	return stateKeyAssignment
@ -90,70 +90,57 @@ func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
 // sign, and value must be on the same line (though some values can
 // be broken over multiple lines).
 func stateKeyAssignment(l *Lexer) stateFn {
-	l.skip(whitespace)
+	l.skipRun(whitespace)
-	if l.accept(equal) {
+	if l.skip(equal) {
-		l.skip(whitespace)
+		l.emit(ItemAssignment, "")
 		l.skipRun(whitespace)
 		return stateValue
 	}
-	return l.unexpectedTokenError("an '=' value assignment")
+	return l.unexpectedInputError("a value assignment")
 }
 // Values must be of the following types: String, Integer, Float, Boolean,
 // Datetime, Array, or Inline Table. Unspecified values are invalid.
 func stateValue(l *Lexer) stateFn {
-	l.skip(whitespace)
+	l.skipRun(whitespace)
-	if l.upcoming(someQuote) {
+	if l.upcoming(quoteChars) {
 		return stateStringValue
 	}
-	return l.unexpectedTokenError("a value")
+	return l.unexpectedInputError("a value")
 }
 // There are four ways to express strings: basic, multi-line basic, literal,
 // and multi-line literal. All strings must contain only valid UTF-8 characters.
 func stateStringValue(l *Lexer) stateFn {
-	if l.accept(doubleQuote) {
+	// Basic strings are surrounded by quotation marks.
 	if l.skip(doubleQuote) {
 		return stateBasicStringValue
 	}
-	return l.unexpectedTokenError("a string value")
+	return l.unexpectedInputError("a string value")
 }
 // Basic strings are surrounded by quotation marks. Any Unicode character
 // may be used except those that must be escaped: quotation mark, backslash,
 // and the control characters (U+0000 to U+001F, U+007F).
 //
 // For convenience, some popular characters have a compact escape sequence.
 //
 // \b         - backspace       (U+0008)
 // \t         - tab             (U+0009)
 // \n         - linefeed        (U+000A)
 // \f         - form feed       (U+000C)
 // \r         - carriage return (U+000D)
 // \"         - quote           (U+0022)
 // \\         - backslash       (U+005C)
 // \uXXXX     - unicode         (U+XXXX)
 // \UXXXXXXXX - unicode         (U+XXXXXXXX)
 //
 // Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
 // The escape codes must be valid Unicode scalar values.
 //
 // All other escape sequences not listed above are reserved and,
 // if used, TOML should produce an error.
 func stateBasicStringValue(l *Lexer) stateFn {
 	// Possibly a """ multi-line string start,
 	// possibly the end of an "" empty string.
-	if l.accept(doubleQuote) {
+	if l.skip(doubleQuote) {
 		// It's a """ multi-line string.
-		if l.accept(doubleQuote) {
+		if l.skip(doubleQuote) {
 			l.ignore()
 			return stateMultiLineBasicString
 		}
 		// It's an "" empty string.
 		l.ignore()
 		l.emit(ItemString, "")
 		return stateKeyValuePair
 	}
 	l.ignore()
 	return stateBasicString
 }
 const invalidBasicStringCharacters string = "" +
 	"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
 	"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
 	"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
 	"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
 	"\u007F"
 func stateParseBasicString(l *Lexer) stateFn {
 	for {
 		switch {
@ -162,26 +149,47 @@ func stateParseBasicString(l *Lexer) stateFn {
 		case l.accept(doubleQuote):
 			return l.popState()
 		case l.accept(backslash):
-			if l.upcoming(quotable) {
+			// For convenience, some popular characters have a compact escape sequence.
 			// Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
 			// The escape codes must be valid Unicode scalar values.
 			switch {
 			case l.upcoming(escapeChars):
 				// \b         - backspace       (U+0008)
 				// \t         - tab             (U+0009)
 				// \n         - linefeed        (U+000A)
 				// \f         - form feed       (U+000C)
 				// \r         - carriage return (U+000D)
 				// \"         - quote           (U+0022)
 				// \\         - backslash       (U+005C)
 				l.buffer.WriteRune('\\')
 				l.buffer.WriteRune(l.next())
-			} else {
+			case l.upcoming(shortUtf8Escape):
 				// \uXXXX     - unicode         (U+XXXX)
 				return l.errorf("Not yet implemented: short utf8")
 			case l.upcoming(longUtf8Escape):
 				// \UXXXXXXXX - unicode         (U+XXXXXXXX)
 				return l.errorf("Not yet implemented: long utf8")
 			default:
 				// All other escape sequences not listed above are reserved and,
 				// if used, TOML should produce an error.
 				return l.errorf("Invalid escape sequence \\%c in string value", l.next())
 			}
 		case l.upcoming(invalidBasicStringCharacters):
 			// Any Unicode character may be used except those that must be escaped:
 			// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
 			return l.errorf("Invalid character in basic string: %q", l.next())
 		default:
-			l.buffer.WriteRune(l.next())
+			l.acceptNext()
 		}
 	}
 }
 func stateBasicString(l *Lexer) stateFn {
 	l.buffer.Reset()
 	l.pushState(func(l *Lexer) stateFn {
-		s, err := l.buffer.AsInterpretedString()
+		err := l.emitInterpreted(ItemString)
 		if err != nil {
 			return l.errorf("Invalid data in string: %s", err)
 		}
 		l.emit(ItemString, s)
 		return stateKeyValuePair
 	})
 	return stateParseBasicString
@ -192,10 +200,9 @@ func stateMultiLineBasicString(l *Lexer) stateFn {
 }
 func stateEndOfFile(l *Lexer) stateFn {
-	i := l.peek()
+	if l.atEndOfFile() {
 	if i == endOfFile {
 		l.emit(ItemEOF, "EOF")
 		return nil
 	}
-	return l.unexpectedTokenError("end of file")
+	return l.unexpectedInputError("end of file")
 }
--- a/lexer/states_test.go
+++ b/lexer/states_test.go
@ -0,0 +1,174 @@
 package lexer_test
 import (
 	"fmt"
 	"strings"
 	"testing"
 	"github.com/mmakaay/toml/lexer"
 )
 func TestInvalidUtf8Data(t *testing.T) {
 	runStatesT(t, statesT{
 		"invalid UTF8 data", "\xbc", "",
 		"Unexpected non-UTF8 data (expected end of file)"})
 }
 func TestEmptyInput(t *testing.T) {
 	runStatesT(t, statesT{"empty string", "", "", ""})
 }
 func TestWhiteSpaceAndNewlines(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"space", " ", "", ""},
 		{"tab", "\t", "", ""},
 		{"newline", "\n", "", ""},
 		{"carriage return", "\r", "", ""},
 		{"all whitespace and newlines", " \t \t \r\r\n\n  \n \t", "", ""},
 	})
 }
 func TestComments(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"empty comment", "#", "#()", ""},
 		{"empty comment with spaces", "# \t \r\n", `#()`, ""},
 		{"basic comment", "#chicken", "#(chicken)", ""},
 		{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
 		{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
 		{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
 		{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
 		{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
 	})
 }
 func TestKeyWithoutAssignment(t *testing.T) {
 	err := "Unexpected end of file (expected a value assignment)"
 	runStatesTs(t, []statesT{
 		{"bare with whitespace", " a ", []string{"[a]"}, err},
 		{"bare lower", "abcdefghijklmnopqrstuvwxyz", []string{"[abcdefghijklmnopqrstuvwxyz]"}, err},
 		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", []string{"[ABCDEFGHIJKLMNOPQRSTUVWXYZ]"}, err},
 		{"bare numbers", "0123456789", []string{"[0123456789]"}, err},
 		{"bare underscore", "_", []string{"[_]"}, err},
 		{"bare dash", "-", []string{"[-]"}, err},
 		{"bare big mix", "-hey_good_Lookin123-", []string{"[-hey_good_Lookin123-]"}, err},
 		{"bare dotted", "a._.c", []string{"[a]", ".", "[_]", ".", "[c]"}, err},
 		{"bare dotted with whitespace", " a .\t\t b\t ", []string{"[a]", ".", "[b]"}, err},
 	})
 }
 func TestKeyWithAssignmentButNoValue(t *testing.T) {
 	err := "Unexpected end of file (expected a value)"
 	runStatesTs(t, []statesT{
 		{"bare", "a=", "[a]=", err},
 		{"double equal sign", "a==", "[a]=", "Unexpected token '=' (expected a value)"},
 		{"bare dotted", "a.b=", "[a].[b]=", err},
 		{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
 	})
 }
 func TestUnterminatedBasicString(t *testing.T) {
 	runStatesT(t, statesT{
 		"missing closing quote", `a="value`, "[a]=",
 		"Unexpected end of file (expected basic string token)"})
 }
 func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00'`},
 		{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n'`},
 		{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f'`},
 	})
 	// No need to write all test cases for disallowed characters by hand.
 	for i := 0x00; i <= 0x1F; i++ {
 		name := fmt.Sprintf("control character %x", rune(i))
 		runStatesT(
 			t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
 				fmt.Sprintf(`Invalid character in basic string: %q`, rune(i))})
 	}
 }
 func TestEmptyBasicString(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"empty", `a=""`, "[a]=STR()", ""},
 		{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
 		{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
 		{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
 		{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
 		{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
 	})
 }
 func TestBasicString(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
 		{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
 	})
 }
 func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
 	runStatesT(t, statesT{
 		"invalid escape sequence", `a="\x"`, "[a]=", `Invalid escape sequence \x in string value`,
 	})
 }
 func TestBasicStringEscapes(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
 		{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
 		{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
 		{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
 		{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
 		{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
 		{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
 		{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
 		{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
 		{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
 	})
 }
 type statesT struct {
 	name string
 	in   string
 	out  interface{}
 	err  string
 }
 func runStatesTs(t *testing.T, tests []statesT) {
 	for _, c := range tests {
 		runStatesT(t, c)
 	}
 }
 func runStatesT(t *testing.T, c statesT) {
 	l, err := lexer.Lex(c.in).ToArray()
 	if err == nil && c.err != "" {
 		t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
 	}
 	if err != nil && c.err == "" {
 		t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
 	}
 	if err != nil && c.err != "" && err.Error() != c.err {
 		t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
 	}
 	switch expected := c.out.(type) {
 	case []string:
 		if len(expected) != len(l) {
 			t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
 		}
 		for i, e := range expected {
 			if l[i].String() != e {
 				t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, l[i])
 			}
 		}
 	case string:
 		a := make([]string, len(l))
 		for _, v := range l {
 			a = append(a, v.String())
 		}
 		actual := strings.Join(a, "")
 		if actual != expected {
 			t.Errorf("[%s] Unexpected lexer output:\nexpected; %s\nactual: %s\n", c.name, expected, actual)
 		}
 	}
 }
--- a/lexer/stringbuf.go
+++ b/lexer/stringbuf.go
@ -19,7 +19,7 @@ func (b *StringBuffer) Reset() *StringBuffer {
 	return b
 }
-// AddString adds the runes of the input string to the string buffer.
+// WriteString adds the runes of the input string to the string buffer.
 func (b *StringBuffer) WriteString(s string) *StringBuffer {
 	for _, r := range s {
 		b.WriteRune(r)
--- a/lexer/stringbuf_test.go
+++ b/lexer/stringbuf_test.go
@ -23,7 +23,7 @@ func TestResetResetsBuffer(t *testing.T) {
 	}
 }
-type testCase struct {
+type stringbufT struct {
 	name          string
 	in            string
 	out           string
@ -37,7 +37,7 @@ const (
 func TestAsLiteralString(t *testing.T) {
 	b := lexer.StringBuffer{}
-	for _, c := range []testCase{
+	for _, c := range []stringbufT{
 		{"empty string", ``, ``, OK},
 		{"simple string", `Simple string!`, `Simple string!`, OK},
 		{"single quote", `'`, `'`, OK},
@ -57,7 +57,7 @@ func TestAsLiteralString(t *testing.T) {
 func TestAsInterpretedString(t *testing.T) {
 	b := lexer.StringBuffer{}
-	for _, c := range []testCase{
+	for _, c := range []stringbufT{
 		{"empty string", "", "", OK},
 		{"one character", "Simple string!", "Simple string!", OK},
 		{"escaped single quote", `\'`, "", FAIL},