Splitting off a more generic parser (it's fun getting to know a language, but you keep refactoring with all new stuff that you learn :-)

2019-05-17 12:44:24 +00:00 · 2019-05-17 12:44:24 +00:00 · f86ef2b918
parent aeb48edc44
commit f86ef2b918
11 changed files with 720 additions and 234 deletions
--- a/lexer/items.go
+++ b/lexer/items.go
@ -1,48 +1,35 @@
 package lexer
-import "fmt"
+import (
 	"fmt"
-// itemType represents the type of lexer items.
+	"github.com/mmakaay/toml/parser"
-type itemType int
+)
 // Definition of all the lexer item types for the TOML lexer.
 const (
-	ItemError      itemType = iota // An error occurred
+	ItemComment    parser.ItemType = iota // An error occurred
 	ItemEOF                        // End of input reached
 	ItemComment                    // Comment string, starts with # till en of line
 	ItemKey                               // Key of a key/value pair
 	ItemKeyDot                            // Dot for a dotted key
 	ItemAssignment                        // Value assignment coming up (=)
 	ItemString                            // A value of type string
 )
-// Item represents a lexer item returned from the scanner.
+// ParserItemToString returns a string representation of the
-type Item struct {
+// parser.Item. This is used for unit testing purposes.
-	Type  itemType //Type, e.g. ItemComment, ItemString
+func ParserItemToString(i parser.Item) string {
 	Value string   // Value, e.g. "10.42", "["
 }
 // String returns a string representation of the lexer item.
 func (i Item) String() string {
 	switch i.Type {
 	case ItemComment:
 		return fmt.Sprintf("#(%s)", i.Value)
 	case ItemKey:
 		return fmt.Sprintf("[%s]", i.Value)
 	case ItemString:
 		return fmt.Sprintf("STR(%s)", i.Value)
 	case ItemKeyDot:
 		return "."
 	case ItemAssignment:
 		return "="
 	}
 	return fmt.Sprintf("%s(%s)", i.Type, i.Value)
 }
 // String returns a string representation of the lexer item type.
 func (i itemType) String() string {
 	switch i {
 	case ItemComment:
 		return "#"
 	case ItemString:
 		return "STR"
 	default:
-		panic(fmt.Sprintf("No translation available for type id %d", i))
+		panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type))
 	}
 }
--- a/lexer/lexer.no
+++ b/lexer/lexer.no
@ -4,21 +4,23 @@ import (
 	"fmt"
 	"strings"
 	"unicode/utf8"
 	"github.com/mmakaay/toml/parser"
 )
 // Lexer holds the state of the lexer.
 type Lexer struct {
-	input        string       // the scanned input string
+	input        string           // the scanned input
-	state        stateFn      // a function that handles the current state
+	state        parser.StateFn   // a function that handles the current state
-	stack        []stateFn    // state function stack, for nested parsing
+	stack        []parser.StateFn // state function stack, for nested parsing
 	len          int              // the total length of the input in bytes
 	pos          int              // current byte scanning position in the input
 	newline      bool             // keep track of when we have scanned a newline
 	cursorRow    int              // current row number in the input
 	cursorColumn int              // current column position in the input
 	width        int          // width of the last rune read, for supporting backup()
 	buffer       StringBuffer     // an efficient buffer, used to build string values
-	items        chan Item    // channel of resulting lexer items
+	items        chan parser.Item // channel of resulting lexer items
-	item         Item         // the current item as reached by Next() and retrieved by Get()
+	item         parser.Item      // the current item as reached by Next() and retrieved by Get()
 	err          *Error           // an error when lexing failed, retrieved by Error()
 }
@ -35,46 +37,45 @@ func (err *Error) Error() string {
 	return err.Message
 }
-// Lex takes an input string and initializes the TOML lexer for it.
+// New takes an input string and initializes the lexer for it.
-func Lex(input string) *Lexer {
+func New(input string) *Lexer {
 	return &Lexer{
 		input: input,
 		len:   len(input),
 		state: stateKeyValuePair,
-		items: make(chan Item, 2),
+		items: make(chan parser.Item, 2),
 	}
 }
 // Next advances to the next lexer item in the input string.
-// When a valid item was found, then the boolean return parameter is returned.
+// When a valid item was found, then the boolean return parameter will be true.
 // On error or when reaching the end of the input, false is returned.
-// When an error occurred, it will be set in the error return value.
+// When an error occurred, it will be set in the error return value, nil otherwise.
-func (l *Lexer) Next() (Item, *Error, bool) {
+func (l *Lexer) Next() (parser.Item, *Error, bool) {
 	if l.state == nil {
 		panic("This should not happen: nil state reached, but entering Next()")
 	}
 	for {
 		select {
 		case i := <-l.items:
-			if i.Type == ItemEOF {
+			switch {
 			case i.Type == ItemEOF:
 				return i, nil, false
-			}
+			case i.Type == ItemError:
 			if i.Type == ItemError {
 				l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
 				return i, l.err, false
-			}
+			default:
 				l.item = i
 				return i, nil, true
 			}
 		default:
 			l.state = l.state(l)
 		}
 	}
 }
-// ToArray returns lexer items as an array.
+// ToArray returns lexer items as an array (mainly intended for testing purposes)
 // When an error occurs during scanning, a partial result will be
 // returned, accompanied by the error that occurred.
-func (l *Lexer) ToArray() ([]Item, *Error) {
+func (l *Lexer) ToArray() ([]parser.Item, *Error) {
-	var items []Item
+	var items []parser.Item
 	for {
 		item, err, more := l.Next()
 		if !more {
@ -100,25 +101,25 @@ func (l *Lexer) popState() stateFn {
 // atEndOfFile returns true when there is no more data available in the input.
 func (l *Lexer) atEndOfFile() bool {
-	return l.pos >= len(l.input)
+	return l.pos >= l.len
 }
 // emit passes a lexer item back to the client, including the provided string.
-func (l *Lexer) emit(t itemType, s string) {
+func (l *Lexer) emit(t parser.ItemType, s string) {
-	l.items <- Item{t, s}
+	l.items <- parser.Item{Type: t, Value: s}
 	l.buffer.Reset()
 }
 // emitLiteral passes a lexer item back to the client, including the accumulated
 // string buffer data as a literal string.
-func (l *Lexer) emitLiteral(t itemType) {
+func (l *Lexer) emitLiteral(t parser.ItemType) {
 	l.emit(t, l.buffer.AsLiteralString())
 }
 // emitTrimmedLiteral passes a lexer item back to the client, including the
 // accumulated string buffer data as a literal string with whitespace
 // trimmed from it.
-func (l *Lexer) emitTrimmedLiteral(t itemType) {
+func (l *Lexer) emitTrimmedLiteral(t parser.ItemType) {
 	l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
 }
@ -127,7 +128,7 @@ func (l *Lexer) emitTrimmedLiteral(t itemType) {
 // codes like \n, \t, \uXXXX, etc.)
 // This method might return an error, in case there is data in the
 // string buffer that is not valid for string interpretation.
-func (l *Lexer) emitInterpreted(t itemType) error {
+func (l *Lexer) emitInterpreted(t parser.ItemType) error {
 	s, err := l.buffer.AsInterpretedString()
 	if err != nil {
 		return err
@ -137,15 +138,10 @@ func (l *Lexer) emitInterpreted(t itemType) error {
 }
 // emitError emits a lexer error item back to the client.
-func (l *Lexer) emitError(message string) {
+func (l *Lexer) emitError(format string, args ...interface{}) stateFn {
 	message := fmt.Sprintf(format, args...)
 	l.emit(ItemError, message)
-}
+	return nil
 // backup steps back one rune
 // Can be called only once per call of next.
 func (l *Lexer) backup() {
 	l.pos -= l.width
 	l.cursorColumn--
 }
 // peek returns but does not advance to the next rune(s) in the input.
@ -176,17 +172,40 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
 	return peeked, width, true
 }
-// acceptNext adds the specified amount of runes from the input to the string buffer.
+// acceptAny adds the next rune from the input to the string buffer.
-// If not enough runes could be read (end of file or invalid UTF8 data), then false is returned.
+// If no rune could be read (end of file or invalid UTF8 data), then
-func (l *Lexer) acceptNext(count int) bool {
+// false is returned.
-	for i := 0; i < count; i++ {
+func (l *Lexer) acceptAny() bool {
 	if r, ok := l.next(); ok {
 		l.buffer.WriteRune(r)
-		} else {
+		return true
 			return false
 	}
 	return false
 }
 // accept adds the next rune to the string buffer and returns true if it's
 // from the valid set of runes. Otherwise false is returned.
 func (l *Lexer) accept(matches ...string) bool {
 	return l.acceptPattern(matches...)
 }
 // AcceptMatching adds the next runes to the string buffer, but only
 // if the upcoming runes satisfy the provided pattern.
 // When runes were added then true is returned, false otherwise.
 func (l *Lexer) acceptPattern(pattern ...string) bool {
 	return l.progress(func(r rune) { l.buffer.WriteRune(r) }, pattern...)
 }
 func (l *Lexer) progress(callback func(rune), matches ...string) bool {
 	if runes, w, ok := l.match(matches...); ok {
 		l.pos += w
 		for _, r := range runes {
 			callback(r)
 			l.advanceCursor(r)
 		}
 		return true
 	}
 	return false
 }
 // acceptConsecutive adds consecutive runes from the input to the string
@ -200,27 +219,9 @@ func (l *Lexer) acceptConsecutive(match string) bool {
 	return accepted
 }
-// next returns the next rune from the input and a boolean indicating if
+// advanceCursor advances the rune cursor one position in the
-// reading the input was successful.
+// input data. While doing so, it keeps tracks of newlines,
-// When the end of input is reached, or an invalid UTF8 character is
+// so we can report on row + column positions on error.
 // read, then false is returned.
 func (l *Lexer) next() (rune, bool) {
 	r, w, ok := l.peek()
 	if ok {
 		l.width = w
 		l.pos += w
 		l.advanceCursor(r)
 		return r, true
 	}
 	l.width = 0
 	if r == utf8.RuneError && w == 0 {
 		l.emitError("unexpected end of file")
 	} else {
 		l.emitError("invalid UTF8 character")
 	}
 	return r, false
 }
 func (l *Lexer) advanceCursor(r rune) {
 	if l.newline {
 		l.cursorColumn = 0
@ -233,40 +234,20 @@ func (l *Lexer) advanceCursor(r rune) {
 // skip skips runes, but only when all provided matches are satisfied.
 // Returns true when one or more runes were skipped.
-func (l *Lexer) skipMatching(matches ...string) bool {
+func (l *Lexer) skipMatching(pattern ...string) bool {
-	if runes, w, ok := l.match(matches...); ok {
+	return l.progress(func(r rune) {}, pattern...)
 		l.pos += w
 		for _, r := range runes {
 			l.advanceCursor(r)
 		}
 		return true
 	}
 	return false
 }
 // skipConsecutive skips consecutive runes from the provided match.
 // Returns true when one or more runes were skipped.
-func (l *Lexer) skipConsecutive(match string) bool {
+func (l *Lexer) skipConsecutive(pattern string) bool {
 	didSkip := false
-	for l.skipMatching(match) {
+	for l.skipMatching(pattern) {
 		didSkip = true
 	}
 	return didSkip
 }
 // accept adds the next rune to the string buffer and returns true if it's
 // from the valid set of runes. Otherwise false is returned.
 func (l *Lexer) accept(match string) bool {
 	if r, ok := l.next(); ok {
 		if strings.IndexRune(match, r) >= 0 {
 			l.buffer.WriteRune(r)
 			return true
 		}
 	}
 	l.backup()
 	return false
 }
 // upcoming checks if the upcoming runes satisfy the provided rune matches.
 // This is a lot like the match method, with the difference that
 // this one only returns the boolean value.
@ -275,6 +256,25 @@ func (l *Lexer) upcoming(matches ...string) bool {
 	return ok
 }
 // next returns the next rune from the input and a boolean indicating if
 // reading the input was successful.
 // When the end of input is reached, or an invalid UTF8 character is
 // read, then false is returned.
 func (l *Lexer) next() (rune, bool) {
 	r, w, ok := l.peek()
 	if ok {
 		l.pos += w
 		l.advanceCursor(r)
 		return r, true
 	}
 	if r == utf8.RuneError && w == 0 {
 		l.emitError("unexpected end of file")
 	} else {
 		l.emitError("invalid UTF8 character")
 	}
 	return r, false
 }
 // match checks if the upcoming runes satisfy the provided rune matches.
 // It returns a slice of runes that were found, their total byte width
 // and a boolean indicating whether or not all provided matches matched
@ -292,24 +292,14 @@ func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
 	return peeked, width, false
 }
 // error returns an error token and terminates the scan
 // by returning nil to l.run.
 func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
 	l.items <- Item{
 		ItemError,
 		fmt.Sprintf(format, args...),
 	}
 	return nil
 }
 func (l *Lexer) unexpectedInputError(expected string) stateFn {
-	// next() takes care of error messages for ok == false.
+	// next() takes care of emitting errors for ok == false.
 	if r, ok := l.next(); ok {
-		l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
+		return l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
 	}
 	return nil
 }
 func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
-	return l.errorf("Unexpected end of file (expected %s)", expected)
+	return l.emitError("Unexpected end of file (expected %s)", expected)
 }
--- a/lexer/states.go
+++ b/lexer/states.go
@ -1,8 +1,6 @@
 package lexer
-// stateFn represents the state of the lexer as a function
+import "github.com/mmakaay/toml/parser"
 // that returns the next state.
 type stateFn func(*Lexer) stateFn
 const (
 	whitespace      string = " \t"
@ -28,59 +26,65 @@ const (
 	longUtf8Escape  string = "U"
 )
-func stateKeyValuePair(l *Lexer) stateFn {
+// NewParser creates a new parser, using the provided input string
-	l.skipConsecutive(whitespace + carriageReturn + newline)
+// as the data to parse.
-	if l.skipMatching(hash) {
+func NewParser(input string) *parser.Parser {
 	return parser.New(input, stateKeyValuePair)
 }
 func stateKeyValuePair(l *parser.Parser) parser.StateFn {
 	l.SkipConsecutive(whitespace + carriageReturn + newline)
 	if l.SkipMatching(hash) {
 		return stateComment
 	}
-	if l.upcoming(startOfKey) {
+	if l.Upcoming(startOfKey) {
 		return stateKey
 	}
 	return stateEndOfFile
 }
 // A '#' hash symbol marks the rest of the line as a comment.
-func stateComment(l *Lexer) stateFn {
+func stateComment(l *parser.Parser) parser.StateFn {
 	for {
 		switch {
-		case l.atEndOfFile() || l.skipMatching(newline):
+		case l.AtEndOfFile() || l.SkipMatching(newline):
-			l.emitTrimmedLiteral(ItemComment)
+			l.EmitLiteralTrim(ItemComment)
 			return stateKeyValuePair
 		default:
-			if !l.acceptNext(1) {
+			if !l.AcceptAny() {
-				return l.unexpectedInputError("comment")
+				return nil
 			}
 		}
 	}
 }
 // A key may be either bare, quoted or dotted.
-func stateKey(l *Lexer) stateFn {
+func stateKey(l *parser.Parser) parser.StateFn {
-	if l.accept(bareKeyChars) {
+	if l.AcceptMatching(bareKeyChars) {
 		return statebareKeyChars
 	}
-	return l.unexpectedInputError("a valid key name")
+	return l.UnexpectedInputError("a valid key name")
 }
 // Bare keys may only contain ASCII letters, ASCII digits,
 // underscores, and dashes (A-Za-z0-9_-). Note that bare
 // keys are allowed to be composed of only ASCII digits,
 // e.g. 1234, but are always interpreted as strings.
-func statebareKeyChars(l *Lexer) stateFn {
+func statebareKeyChars(l *parser.Parser) parser.StateFn {
-	l.acceptConsecutive(bareKeyChars)
+	l.AcceptConsecutive(bareKeyChars)
-	l.emitLiteral(ItemKey)
+	l.EmitLiteral(ItemKey)
 	return stateEndOfKeyOrKeyDot
 }
 // Dotted keys are a sequence of bare or quoted keys joined with a dot.
 // This allows for grouping similar properties together:
-func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
+func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
 	// Whitespace around dot-separated parts is ignored, however,
 	// best practice is to not use any extraneous whitespace.
-	l.skipConsecutive(whitespace)
+	l.SkipConsecutive(whitespace)
-	if l.skipMatching(dot) {
+	if l.SkipMatching(dot) {
-		l.emit(ItemKeyDot, "")
+		l.Emit(ItemKeyDot, "")
-		l.skipConsecutive(whitespace)
+		l.SkipConsecutive(whitespace)
 		return stateKey
 	}
 	return stateKeyAssignment
@ -90,62 +94,69 @@ func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
 // Whitespace is ignored around key names and values. The key, equals
 // sign, and value must be on the same line (though some values can
 // be broken over multiple lines).
-func stateKeyAssignment(l *Lexer) stateFn {
+func stateKeyAssignment(l *parser.Parser) parser.StateFn {
-	l.skipConsecutive(whitespace)
+	l.SkipConsecutive(whitespace)
-	if l.skipMatching(equal) {
+	if l.SkipMatching(equal) {
-		l.emit(ItemAssignment, "")
+		l.Emit(ItemAssignment, "")
-		l.skipConsecutive(whitespace)
+		l.SkipConsecutive(whitespace)
 		return stateValue
 	}
-	return l.unexpectedInputError("a value assignment")
+	return l.UnexpectedInputError("a value assignment")
 }
 // Values must be of the following types: String, Integer, Float, Boolean,
 // Datetime, Array, or Inline Table. Unspecified values are invalid.
-func stateValue(l *Lexer) stateFn {
+func stateValue(l *parser.Parser) parser.StateFn {
-	l.skipConsecutive(whitespace)
+	l.SkipConsecutive(whitespace)
-	if l.upcoming(quoteChars) {
+	if l.Upcoming(quoteChars) {
 		return stateStringValue
 	}
-	return l.unexpectedInputError("a value")
+	return l.UnexpectedInputError("a value")
 }
 // There are four ways to express strings: basic, multi-line basic, literal,
 // and multi-line literal. All strings must contain only valid UTF-8 characters.
-func stateStringValue(l *Lexer) stateFn {
+func stateStringValue(l *parser.Parser) parser.StateFn {
 	switch {
-	case l.skipMatching(doubleQuote, doubleQuote, doubleQuote):
+	case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote):
 		// Multi-line basic strings are surrounded by three quotation marks on each side.
 		return stateMultiLineBasicString
-	case l.skipMatching(doubleQuote):
+	case l.SkipMatching(doubleQuote):
 		// Basic strings are surrounded by quotation marks.
-		return stateBasicStringValue
+		return stateSingleLineBasicString
 	}
-	return l.unexpectedInputError("a string value")
+	return l.UnexpectedInputError("a string value")
 }
-func stateBasicStringValue(l *Lexer) stateFn {
+func stateSingleLineBasicString(l *parser.Parser) parser.StateFn {
-	if l.upcoming(doubleQuote, doubleQuote) {
+	if l.Upcoming(doubleQuote, doubleQuote) {
 		return stateMultiLineBasicString
 	}
 	return stateBasicString
 }
-const invalidBasicStringCharacters string = "" +
+func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
 	l.EmitError("Not yet implemented")
 	return nil
 }
 // Any Unicode character may be used except those that must be escaped:
 // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
 const invalidBasicStringCharacters string = "\"\\" +
 	"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
 	"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
 	"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
 	"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
 	"\u007F"
-func stateParseBasicString(l *Lexer) stateFn {
+func stateParseBasicString(l *parser.Parser) parser.StateFn {
 	for {
 		switch {
-		case l.atEndOfFile():
+		case l.AtEndOfFile():
-			return l.unexpectedEndOfFile("basic string token")
+			return l.UnexpectedEndOfFile("basic string token")
-		case l.skipMatching(doubleQuote):
+		case l.SkipMatching(doubleQuote):
-			return l.popState()
+			return l.PopState()
-		case l.upcoming(backslash, escapeChars):
+		case l.AcceptMatching(backslash, escapeChars):
 			// For convenience, some popular characters have a compact escape sequence.
 			// \b         - backspace       (U+0008)
 			// \t         - tab             (U+0009)
@ -154,50 +165,45 @@ func stateParseBasicString(l *Lexer) stateFn {
 			// \r         - carriage return (U+000D)
 			// \"         - quote           (U+0022)
 			// \\         - backslash       (U+005C)
-			l.acceptNext(2)
+		case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex):
 		case l.upcoming(backslash, shortUtf8Escape, hex, hex, hex, hex):
 			// \uXXXX     - unicode         (U+XXXX)
-			l.acceptNext(6)
+		case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
 		case l.upcoming(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
 			// \UXXXXXXXX - unicode         (U+XXXXXXXX)
-			l.acceptNext(10)
+		case l.Upcoming(backslash):
 		case l.upcoming(backslash):
 			// All other escape sequences not listed above are reserved and,
 			// if used, TOML should produce an error.
-			return l.errorf("Invalid escape sequence in basic string")
+			return l.EmitError("Invalid escape sequence in basic string")
-		case l.upcoming(invalidBasicStringCharacters):
+		case l.Upcoming(invalidBasicStringCharacters):
 			// Any Unicode character may be used except those that must be escaped:
 			// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
-			r, _ := l.next()
+			r, _, _ := l.Match(invalidBasicStringCharacters)
-			return l.errorf("Invalid character in basic string: %q", r)
+			l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
 			return nil
 		default:
-			if !l.acceptNext(1) {
+			if !l.AcceptAny() {
-				return l.unexpectedInputError("string value")
+				return nil
 			}
 		}
 	}
 }
-func stateBasicString(l *Lexer) stateFn {
+func stateBasicString(l *parser.Parser) parser.StateFn {
-	l.pushState(func(l *Lexer) stateFn {
+	l.PushState(func(l *parser.Parser) parser.StateFn {
-		err := l.emitInterpreted(ItemString)
+		err := l.EmitInterpreted(ItemString)
 		if err != nil {
-			return l.errorf("Invalid data in string: %s", err)
+			l.EmitError("Invalid data in string: %s", err)
 			return nil
 		}
 		return stateKeyValuePair
 	})
 	return stateParseBasicString
 }
-func stateMultiLineBasicString(l *Lexer) stateFn {
+func stateEndOfFile(l *parser.Parser) parser.StateFn {
-	return l.errorf("Not yet implemented")
+	if l.AtEndOfFile() {
-}
+		l.Emit(parser.ItemEOF, "EOF") // todo Automate within parser?
 func stateEndOfFile(l *Lexer) stateFn {
 	if l.atEndOfFile() {
 		l.emit(ItemEOF, "EOF")
 	} else {
-		l.unexpectedInputError("end of file")
+		l.UnexpectedInputError("end of file")
 	}
 	return nil
 }
--- a/lexer/states_test.go
+++ b/lexer/states_test.go
@ -9,7 +9,7 @@ import (
 )
 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
-	_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
+	_, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
 	t.Logf("Got error: %s", err.Error())
 	if err.Row != 4 {
 		t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
@ -19,21 +19,20 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
 	}
 }
 func TestEmptyInput(t *testing.T) {
 	runStatesT(t, statesT{"empty string", "", "", ""})
 }
 func TestInvalidUtf8Data(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"inside comment", "# \xbc", "", "invalid UTF8 character"},
 		{"bare key 1", "\xbc", "", "invalid UTF8 character"},
-		{"bare key 2", "key\xbc", "", "invalid UTF8 character"},
+		{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
 		{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
 		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
 		{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
 	})
 }
 func TestEmptyInput(t *testing.T) {
 	runStatesT(t, statesT{"empty string", "", "", ""})
 }
 func TestWhiteSpaceAndNewlines(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"space", " ", "", ""},
@ -61,13 +60,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
 	err := "unexpected end of file"
 	runStatesTs(t, []statesT{
 		{"bare with whitespace", " a ", "[a]", err},
-		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
+		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
-		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
+		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
-		{"bare numbers", "0123456789", "", err},
+		{"bare numbers", "0123456789", "[0123456789]", err},
-		{"bare underscore", "_", "", err},
+		{"bare underscore", "_", "[_]", err},
-		{"bare dash", "-", "", err},
+		{"bare dash", "-", "[-]", err},
-		{"bare big mix", "-hey_good_Lookin123-", "", err},
+		{"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
-		{"bare dotted", "a._.c", "[a].[_].", err},
+		{"bare dotted", "a._.c", "[a].[_].[c]", err},
 		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
 	})
 }
@ -90,9 +89,9 @@ func TestUnterminatedBasicString(t *testing.T) {
 func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
 	runStatesTs(t, []statesT{
-		{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00'`},
+		{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`},
-		{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n'`},
+		{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`},
-		{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f'`},
+		{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`},
 	})
 	// No need to write all test cases for disallowed characters by hand.
@ -100,7 +99,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
 		name := fmt.Sprintf("control character %x", rune(i))
 		runStatesT(
 			t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
-				fmt.Sprintf(`Invalid character in basic string: %q`, rune(i))})
+				fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))})
 	}
 }
@ -163,7 +162,7 @@ func runStatesTs(t *testing.T, tests []statesT) {
 }
 func runStatesT(t *testing.T, c statesT) {
-	l, err := lexer.Lex(c.in).ToArray()
+	l, err := lexer.NewParser(c.in).ToArray()
 	if err == nil && c.err != "" {
 		t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
 	}
@ -179,14 +178,15 @@ func runStatesT(t *testing.T, c statesT) {
 			t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
 		}
 		for i, e := range expected {
-			if l[i].String() != e {
+			v := lexer.ParserItemToString(l[i])
-				t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, l[i])
+			if v != e {
 				t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
 			}
 		}
 	case string:
 		a := make([]string, len(l))
 		for _, v := range l {
-			a = append(a, v.String())
+			a = append(a, lexer.ParserItemToString(v))
 		}
 		actual := strings.Join(a, "")
 		if actual != expected {
--- a/lexer/stringbuf.no
+++ b/lexer/stringbuf.no
--- a/lexer/stringbuf_test.no
+++ b/lexer/stringbuf_test.no
--- a/parser/parser.go
+++ b/parser/parser.go
@ -0,0 +1,261 @@
 package parser
 import (
 	"fmt"
 	"strings"
 	"unicode/utf8"
 )
 // New takes an input string and a start state,
 // and initializes the parser for it.
 func New(input string, startState StateFn) *Parser {
 	return &Parser{
 		input: input,
 		len:   len(input),
 		state: startState,
 		items: make(chan Item, 2),
 	}
 }
 // PushState adds the state function to the state stack.
 // This is used for implementing nested parsing.
 func (l *Parser) PushState(state StateFn) {
 	l.stack = append(l.stack, state)
 }
 // PopState pops the last pushed state from the state stack.
 func (l *Parser) PopState() StateFn {
 	last := len(l.stack) - 1
 	head, tail := l.stack[:last], l.stack[last]
 	l.stack = head
 	return tail
 }
 // AtEndOfFile returns true when there is no more data available in the input.
 func (l *Parser) AtEndOfFile() bool {
 	return l.pos >= l.len
 }
 // Emit passes a Parser item to the client, including the provided string.
 func (l *Parser) Emit(t ItemType, s string) {
 	l.items <- Item{t, s}
 	l.buffer.Reset()
 }
 // EmitLiteral passes a Parser item to the client, including the accumulated
 // string buffer data as a literal string.
 func (l *Parser) EmitLiteral(t ItemType) {
 	l.Emit(t, l.buffer.AsLiteralString())
 }
 // EmitLiteralTrim passes a Parser item to the client, including the
 // accumulated string buffer data as a literal string with whitespace
 // trimmed from it.
 func (l *Parser) EmitLiteralTrim(t ItemType) {
 	l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
 }
 // EmitInterpreted passes a Parser item to the client, including the
 // accumulated string buffer data a Go doubled quoted interpreted string
 // (handling escape codes like \n, \t, \uXXXX, etc.)
 // This method might return an error, in case there is data in the
 // string buffer that is not valid for string interpretation.
 func (l *Parser) EmitInterpreted(t ItemType) error {
 	s, err := l.buffer.AsInterpretedString()
 	if err != nil {
 		return err
 	}
 	l.Emit(t, s)
 	return nil
 }
 // EmitError emits a Parser error item to the client.
 func (l *Parser) EmitError(format string, args ...interface{}) StateFn {
 	message := fmt.Sprintf(format, args...)
 	l.Emit(ItemError, message)
 	return nil
 }
 // Match checks if the upcoming runes satisfy all provided patterns.
 // It returns a slice of runes that were found, their total byte width
 // and a boolean indicating whether or not all provided patterns were
 // satisfied by the input data.
 func (l *Parser) Match(patterns ...string) ([]rune, int, bool) {
 	peeked, width, ok := l.peekMulti(len(patterns))
 	if ok {
 		for i, r := range patterns {
 			if strings.IndexRune(r, peeked[i]) < 0 {
 				return peeked, width, false
 			}
 		}
 		return peeked, width, true
 	}
 	return peeked, width, false
 }
 // Upcoming checks if the upcoming runes satisfy all provided patterns.
 // Returns true if all provided patterns are satisfied.
 func (l *Parser) Upcoming(patterns ...string) bool {
 	_, _, ok := l.Match(patterns...)
 	return ok
 }
 // AcceptAny adds the next rune from the input to the string buffer.
 // If no rune could be read (end of file or invalid UTF8 data),
 // then false is returned.
 func (l *Parser) AcceptAny() bool {
 	if r, ok := l.next(); ok {
 		l.buffer.WriteRune(r)
 		return true
 	}
 	return false
 }
 // AcceptMatching adds the next runes to the string buffer, but only
 // if the upcoming runes satisfy the provided patterns.
 // When runes were added then true is returned, false otherwise.
 func (l *Parser) AcceptMatching(patterns ...string) bool {
 	return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...)
 }
 // AcceptConsecutive adds consecutive runes from the input to the string
 // buffer, as long as they exist in the pattern.
 // If any runes were added then true is returned, false otherwise.
 func (l *Parser) AcceptConsecutive(pattern string) bool {
 	accepted := false
 	for l.AcceptMatching(pattern) {
 		accepted = true
 	}
 	return accepted
 }
 // SkipMatching skips runes, but only when all provided patterns are satisfied.
 // Returns true when one or more runes were skipped.
 func (l *Parser) SkipMatching(patterns ...string) bool {
 	if runes, w, ok := l.Match(patterns...); ok {
 		l.pos += w
 		for _, r := range runes {
 			l.advanceCursor(r)
 		}
 		return true
 	}
 	return false
 }
 // SkipConsecutive skips consecutive runes from the provided pattern.
 // Returns true when one or more runes were skipped.
 func (l *Parser) SkipConsecutive(pattern string) bool {
 	didSkip := false
 	for l.SkipMatching(pattern) {
 		didSkip = true
 	}
 	return didSkip
 }
 // ============================================================================
 // EMIT DATA AND ERRORS
 // ============================================================================
 // UnexpectedInputError is used by a parser implementation to emit an
 // error item that tells the client that an unexpected rune was
 // encountered in the input.
 // The parameter 'expected' is used to provide some context to the error.
 func (l *Parser) UnexpectedInputError(expected string) StateFn {
 	// next() takes care of error messages for ok == false.
 	if r, ok := l.next(); ok {
 		return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
 	}
 	return nil
 }
 // UnexpectedEndOfFile is used by a parser implementation to emit an
 // error item that tells the client that more data was expected from
 // the input.
 // The parameter 'expected' is used to provide some context to the error.
 func (l *Parser) UnexpectedEndOfFile(expected string) StateFn {
 	return l.EmitError("Unexpected end of file (expected %s)", expected)
 }
 // ============================================================================
 // LEXER : our lexer is quite low level, it only returns UTF8 runes
 // ============================================================================
 // peek returns but does not advance to the next rune(s) in the input.
 // Returns the rune, its width and a boolean. The boolean will be false in case
 // no upcoming rune can be peeked (end of data or invalid UTF8 character).
 func (l *Parser) peek() (rune, int, bool) {
 	peeked, width := utf8.DecodeRuneInString(l.input[l.pos:])
 	return peeked, width, peeked != utf8.RuneError
 }
 // peekMulti takes a peek at multiple upcoming runes in the input.
 // Returns a slice of runes, their total width in bytes and a boolean.
 // The boolean will be false in case less runes can be peeked than
 // the requested amount (end of data or invalid UTF8 character).
 func (l *Parser) peekMulti(amount int) ([]rune, int, bool) {
 	width := 0
 	var peeked []rune
 	for i := 0; i < amount; i++ {
 		r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
 		switch {
 		case r == utf8.RuneError:
 			return peeked, width, false
 		default:
 			width += w
 			peeked = append(peeked, r)
 		}
 	}
 	return peeked, width, true
 }
 // progress moves the cursor forward in the input, returning one rune
 // for every specified pattern. The cursor is only moved forward when
 // all patterns are satisfied.
 // Returns true when all patterns were satisfied and the cursor was
 // moved forward, false otherwise.
 // A callback function can be provided to specify what to do with
 // the runes that are encountered in the input.
 func (l *Parser) progress(callback func(rune), patterns ...string) bool {
 	if runes, w, ok := l.Match(patterns...); ok {
 		l.pos += w
 		for _, r := range runes {
 			callback(r)
 			l.advanceCursor(r)
 		}
 		return true
 	}
 	return false
 }
 // next returns the next rune from the input and a boolean indicating if
 // reading the input was successful.
 // When the end of input is reached, or an invalid UTF8 character is
 // read, then false is returned. Both are considered error cases,
 // and for that reason these automatically emit an error to the client.
 func (l *Parser) next() (rune, bool) {
 	r, w, ok := l.peek()
 	if ok {
 		l.pos += w
 		l.advanceCursor(r)
 		return r, true
 	}
 	if r == utf8.RuneError && w == 0 {
 		l.EmitError("unexpected end of file")
 	} else {
 		l.EmitError("invalid UTF8 character")
 	}
 	return r, false
 }
 // advanceCursor advances the rune cursor one position in the
 // input data. While doing so, it keeps tracks of newlines,
 // so we can report on row + column positions on error.
 func (l *Parser) advanceCursor(r rune) {
 	if l.newline {
 		l.cursorColumn = 0
 		l.cursorRow++
 	} else {
 		l.cursorColumn++
 	}
 	l.newline = r == '\n'
 }
--- a/parser/stringbuf.go
+++ b/parser/stringbuf.go
@ -0,0 +1,62 @@
 package parser
 import (
 	"bytes"
 	"strconv"
 	"strings"
 )
 // StringBuffer is a string buffer implementation, which is used by the parser
 // to efficiently accumulate runes from the input and eventually turn these
 // into a string, either literal or interpreted.
 type StringBuffer struct {
 	buffer bytes.Buffer
 }
 // Reset resets the string buffer, in order to build a new string.
 func (b *StringBuffer) Reset() *StringBuffer {
 	b.buffer.Reset()
 	return b
 }
 // WriteString adds the runes of the input string to the string buffer.
 func (b *StringBuffer) WriteString(s string) *StringBuffer {
 	for _, r := range s {
 		b.WriteRune(r)
 	}
 	return b
 }
 // WriteRune adds a single rune to the string buffer.
 func (b *StringBuffer) WriteRune(r rune) *StringBuffer {
 	b.buffer.WriteRune(r)
 	return b
 }
 // AsLiteralString returns the string buffer as a literal string.
 // Literal means that no escape sequences are processed.
 func (b *StringBuffer) AsLiteralString() string {
 	return b.buffer.String()
 }
 // AsInterpretedString returns the string in its interpreted form.
 // Interpreted means that escape sequences are handled in the way that Go would
 // have, had it been inside double quotes. It translates for example escape
 // sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
 // representations.
 // Since the input might contain invalid escape sequences, this method
 // also returns an error. When an error is returned, the returned string will
 // contain the string as far as it could be interpreted.
 func (b *StringBuffer) AsInterpretedString() (string, error) {
 	var sb strings.Builder
 	tail := b.buffer.String()
 	for len(tail) > 0 {
 		r, _, newtail, err := strconv.UnquoteChar(tail, '"')
 		if err != nil {
 			return sb.String(), err
 		}
 		tail = newtail
 		sb.WriteRune(r)
 	}
 	return sb.String(), nil
 }
--- a/parser/stringbuf_test.go
+++ b/parser/stringbuf_test.go
@ -0,0 +1,90 @@
 package parser_test
 import (
 	"testing"
 	"github.com/mmakaay/toml/parser"
 )
 func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
 	var b parser.StringBuffer
 	s1, _ := b.WriteString(`hi\nthere`).AsInterpretedString()
 	s2 := b.AsLiteralString()
 	if s1 != "hi\nthere" {
 		t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
 	}
 	if s2 != "hi\\nthere" {
 		t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
 	}
 }
 func TestResetResetsBuffer(t *testing.T) {
 	var b parser.StringBuffer
 	s := b.WriteRune('X').Reset().AsLiteralString()
 	if s != "" {
 		t.Fatalf("Did not get expected empty string, but %q", s)
 	}
 }
 func TestAsLiteralString(t *testing.T) {
 	b := parser.StringBuffer{}
 	for _, c := range []stringbufT{
 		{"empty string", ``, ``, OK},
 		{"simple string", `Simple string!`, `Simple string!`, OK},
 		{"single quote", `'`, `'`, OK},
 		{"double quote", `"`, `"`, OK},
 		{"escaped single quote", `\'`, `\'`, OK},
 		{"escaped double quote", `\"`, `\"`, OK},
 		{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
 		{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
 		{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
 	} {
 		s := b.Reset().WriteString(c.in).AsLiteralString()
 		if s != c.out {
 			t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
 		}
 	}
 }
 func TestAsInterpretedString(t *testing.T) {
 	b := parser.StringBuffer{}
 	for _, c := range []stringbufT{
 		{"empty string", "", "", OK},
 		{"one character", "Simple string!", "Simple string!", OK},
 		{"escaped single quote", `\'`, "", FAIL},
 		{"escaped double quote", `\"`, `"`, OK},
 		{"bare single quote", `'`, "'", OK},
 		{"string in single quotes", `'Hello'`, `'Hello'`, OK},
 		{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
 		{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
 		{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
 		{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
 		{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
 		{"example from spec",
 			`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
 			"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
 	} {
 		s, err := b.Reset().WriteString(c.in).AsInterpretedString()
 		if c.isSuccessCase && err != nil {
 			t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
 		}
 		if !c.isSuccessCase && err == nil {
 			t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
 		}
 		if s != c.out && c.isSuccessCase {
 			t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
 		}
 	}
 }
 type stringbufT struct {
 	name          string
 	in            string
 	out           string
 	isSuccessCase bool
 }
 const (
 	OK   bool = true
 	FAIL bool = false
 )
--- a/parser/types.go
+++ b/parser/types.go
@ -0,0 +1,51 @@
 package parser
 // Parser holds the internal state of the Parser.
 type Parser struct {
 	state        StateFn      // a function that handles the current state
 	stack        []StateFn    // state function stack, for nested parsing
 	input        string       // the scanned input
 	len          int          // the total length of the input in bytes
 	pos          int          // current byte scanning position in the input
 	newline      bool         // keep track of when we have scanned a newline
 	cursorRow    int          // current row number in the input
 	cursorColumn int          // current column position in the input
 	buffer       StringBuffer // an efficient buffer, used to build string values
 	items        chan Item    // channel of resulting Parser items
 	item         Item         // the current item as reached by Next() and retrieved by Get()
 	err          *Error       // an error when lexing failed, retrieved by Error()
 }
 // StateFn represents the state of the parser as a function
 // that returns the next state.
 type StateFn func(*Parser) StateFn
 // ItemType represents the type of a parser Item.
 type ItemType int
 // ItemEOF is a built-in parser item type that is used for flagging that the
 // end of the input was reached.
 const ItemEOF ItemType = -1
 // ItemError is a built-in parser item type that is used for flagging that
 // an error has occurred during parsing.
 const ItemError ItemType = -2
 // Item represents an item returned from the parser.
 type Item struct {
 	Type  ItemType
 	Value string
 }
 // Error is used as the error type when parsing errors occur.
 // The error includes some extra meta information to allow for useful
 // error messages to the user.
 type Error struct {
 	Message string
 	Row     int
 	Column  int
 }
 func (err *Error) Error() string {
 	return err.Message
 }
--- a/parser/user_api.go
+++ b/parser/user_api.go
@ -0,0 +1,39 @@
 package parser
 // Next retrieves the next parsed item.
 // When a valid item was found, then the boolean return parameter will be true.
 // On error or when successfully reaching the end of the input, false is returned.
 // When an error occurred, it will be set in the error return value, nil otherwise.
 func (l *Parser) Next() (Item, *Error, bool) {
 	for {
 		select {
 		case i := <-l.items:
 			switch {
 			case i.Type == ItemEOF:
 				return i, nil, false
 			case i.Type == ItemError:
 				l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
 				return i, l.err, false
 			default:
 				l.item = i
 				return i, nil, true
 			}
 		default:
 			l.state = l.state(l)
 		}
 	}
 }
 // ToArray returns Parser items as an array (mainly intended for testing purposes)
 // When an error occurs during scanning, a partial result will be
 // returned, accompanied by the error that occurred.
 func (l *Parser) ToArray() ([]Item, *Error) {
 	var items []Item
 	for {
 		item, err, more := l.Next()
 		if !more {
 			return items, err
 		}
 		items = append(items, item)
 	}
 }