Ahhhh found a name that clicked for the more general layer of the parser code: parsekit. That is short and tells me what it is. It's not a parser, but something to build parsers with. Now I could also name the actual parsing code as I would like to, namely 'toml/parser'. So it feels like the structure is settling down.

2019-05-17 22:03:10 +00:00 · 2019-05-17 22:03:10 +00:00 · 666cff3af3
parent 3f638c59cd
commit 666cff3af3
23 changed files with 408 additions and 394 deletions
--- a/4
+++ b/4
@ -1,3 +1,3 @@
 test:
-	cd parser && go test
-	cd lexer && go test
+	@cd parsekit && go test
+	@cd parser && go test
--- a/lexer/syn_eof.go
+++ b/lexer/syn_eof.go
@ -1,12 +0,0 @@
-package lexer
-
-import "github.com/mmakaay/toml/parser"
-
-func stateEndOfFile(l *parser.Parser) parser.StateFn {
-	if l.AtEndOfFile() {
-		l.Emit(parser.ItemEOF, "EOF") // todo Automate within parser?
-	} else {
-		l.UnexpectedInputError("end of file")
-	}
-	return nil
-}
--- a/lexer/syn_value.go
+++ b/lexer/syn_value.go
@ -1,13 +0,0 @@
-package lexer
-
-import "github.com/mmakaay/toml/parser"
-
-// Values must be of the following types: String, Integer, Float, Boolean,
-// Datetime, Array, or Inline Table. Unspecified values are invalid.
-func stateValue(l *parser.Parser) parser.StateFn {
-	l.SkipConsecutive(whitespace)
-	if l.Upcoming(quoteChars) {
-		return stateStringValue
-	}
-	return l.UnexpectedInputError("a value")
-}
--- a/parsekit/emitting.go
+++ b/parsekit/emitting.go
@ -0,0 +1,67 @@
+package parsekit
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Emit passes a Parser item to the client, including the provided string.
+func (p *P) Emit(t ItemType, s string) {
+	p.items <- Item{t, s}
+	p.buffer.reset()
+}
+
+// EmitLiteral passes a Parser item to the client, including the accumulated
+// string buffer data as a literal string.
+func (p *P) EmitLiteral(t ItemType) {
+	p.Emit(t, p.buffer.asLiteralString())
+}
+
+// EmitLiteralTrim passes a Parser item to the client, including the
+// accumulated string buffer data as a literal string with whitespace
+// trimmed from it.
+func (p *P) EmitLiteralTrim(t ItemType) {
+	p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
+}
+
+// EmitInterpreted passes a Parser item to the client, including the
+// accumulated string buffer data a Go doubled quoted interpreted string
+// (handling escape codes like \n, \t, \uXXXX, etc.)
+// This method might return an error, in case there is data in the
+// string buffer that is not valid for string interpretation.
+func (p *P) EmitInterpreted(t ItemType) error {
+	s, err := p.buffer.asInterpretedString()
+	if err != nil {
+		return err
+	}
+	p.Emit(t, s)
+	return nil
+}
+
+// EmitError emits a Parser error item to the client.
+func (p *P) EmitError(format string, args ...interface{}) StateFn {
+	message := fmt.Sprintf(format, args...)
+	p.Emit(ItemError, message)
+	return nil
+}
+
+// UnexpectedInput is used by a parser implementation to emit an
+// error item that tells the client that an unexpected rune was
+// encountered in the input.
+// The parameter 'expected' is used to provide some context to the error.
+func (p *P) UnexpectedInput(expected string) StateFn {
+	// next() takes care of error messages in cases where ok == false.
+	// Therefore, we only provide an error message for the ok case here.
+	if r, ok := p.next(); ok {
+		return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
+	}
+	return nil
+}
+
+// UnexpectedEndOfFile is used by a parser implementation to emit an
+// error item that tells the client that more data was expected from
+// the input.
+// The parameter 'expected' is used to provide some context to the error.
+func (p *P) UnexpectedEndOfFile(expected string) StateFn {
+	return p.EmitError("Unexpected end of file (expected %s)", expected)
+}
--- a/parsekit/internals.go
+++ b/parsekit/internals.go
@ -0,0 +1,88 @@
+package parsekit
+
+import (
+	"unicode/utf8"
+)
+
+// next returns the next rune from the input and a boolean indicating if
+// reading the input was successful.
+// When the end of input is reached, or an invalid UTF8 character is
+// read, then false is returned. Both are considered error cases,
+// and for that reason these automatically emit an error to the client.
+func (p *P) next() (rune, bool) {
+	r, w, ok := p.peek()
+	if ok {
+		p.advanceCursor(r, w)
+		return r, true
+	}
+	if r == utf8.RuneError && w == 0 {
+		p.EmitError("unexpected end of file")
+	} else {
+		p.EmitError("invalid UTF8 character")
+	}
+	return r, false
+}
+
+// peek returns but does not advance the cursor to the next rune(s) in the input.
+// Returns the rune, its width in bytes and a boolean.
+// The boolean will be false in case no upcoming rune can be peeked
+// (end of data or invalid UTF8 character).
+func (p *P) peek() (rune, int, bool) {
+	peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
+	return peeked, width, peeked != utf8.RuneError
+}
+
+// peekMulti takes a peek at multiple upcoming runes in the input.
+// Returns a slice of runes, a slice containing their respective
+// widths in bytes and a boolean.
+// The boolean will be false in case less runes can be peeked than
+// the requested amount (end of data or invalid UTF8 character).
+func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
+	var runes []rune
+	var widths []int
+	offset := 0
+	for i := 0; i < amount; i++ {
+		r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
+		switch {
+		case r == utf8.RuneError:
+			return runes, widths, false
+		default:
+			offset += w
+			runes = append(runes, r)
+			widths = append(widths, w)
+		}
+	}
+	return runes, widths, true
+}
+
+// progress moves the cursor forward in the input, returning one rune
+// for every specified pattern. The cursor will only be moved forward when
+// all requested patterns can be satisfied.
+// Returns true when all patterns were satisfied and the cursor was
+// moved forward, false otherwise.
+// A callback function can be provided to specify what to do with
+// the runes that are encountered in the input.
+func (p *P) progress(callback func(rune), patterns ...string) bool {
+	if runes, widths, ok := p.Match(patterns...); ok {
+		for i, r := range runes {
+			callback(r)
+			p.advanceCursor(r, widths[i])
+		}
+		return true
+	}
+	return false
+}
+
+// advanceCursor advances the rune cursor one position in the
+// input data. While doing so, it keeps tracks of newlines,
+// so we can report on row + column positions on error.
+func (p *P) advanceCursor(r rune, w int) {
+	p.pos += w
+	if p.newline {
+		p.cursorColumn = 0
+		p.cursorRow++
+	} else {
+		p.cursorColumn++
+	}
+	p.newline = r == '\n'
+}
--- a/parsekit/matching.go
+++ b/parsekit/matching.go
@ -0,0 +1,120 @@
+package parsekit
+
+import (
+	"strings"
+)
+
+// AtEndOfFile returns true when there is no more data available in the input.
+func (p *P) AtEndOfFile() bool {
+	return p.pos >= p.len
+}
+
+// AtEndOfLine returns true when the cursor is either at the end of the line
+// or at the end of the file. The cursor is not moved to a new position
+// by this method.
+func (p *P) AtEndOfLine() bool {
+	return p.AtEndOfFile() ||
+		p.Upcoming("\r", "\n") ||
+		p.Upcoming("\n")
+}
+
+// SkipEndOfLine returns true when the cursor is either at the end of the line
+// or at the end of the file. Additionally, when not at the end of the file,
+// the cursor is moved forward to beyond the newline.
+func (p *P) SkipEndOfLine() bool {
+	return p.AtEndOfFile() ||
+		p.SkipMatching("\r", "\n") ||
+		p.SkipMatching("\n")
+}
+
+// AcceptEndOfLine returns true when the cursor is either at the end of the line
+// or at the end of the file. When not at the end of the file, a normalized
+// newline (only a '\n' character, even with '\r\n' on the input)
+// is added to the string buffer.
+func (p *P) AcceptEndOfLine() bool {
+	if p.AtEndOfFile() {
+		return true
+	}
+	if p.SkipEndOfLine() {
+		p.buffer.writeRune('\n')
+		return true
+	}
+	return false
+}
+
+// Match checks if the upcoming runes satisfy all provided patterns.
+// It returns a slice of runes that were found, a slice containing
+// their respective byte widths, and a boolean indicating whether
+// or not all provided patterns were satisfied by the input data.
+func (p *P) Match(patterns ...string) ([]rune, []int, bool) {
+	peeked, widths, ok := p.peekMulti(len(patterns))
+	if ok {
+		for i, r := range patterns {
+			if strings.IndexRune(r, peeked[i]) < 0 {
+				return peeked, widths, false
+			}
+		}
+		return peeked, widths, true
+	}
+	return peeked, widths, false
+}
+
+// Upcoming checks if the upcoming runes satisfy all provided patterns.
+// Returns true if all provided patterns are satisfied.
+// This is basically the same as the Match method, but with only
+// the boolean return parameter for programmer convenciency.
+func (p *P) Upcoming(patterns ...string) bool {
+	_, _, ok := p.Match(patterns...)
+	return ok
+}
+
+// AcceptAny adds the next rune from the input to the string buffer.
+// If no rune could be read (end of file or invalid UTF8 data),
+// then false is returned.
+func (p *P) AcceptAny() bool {
+	if r, ok := p.next(); ok {
+		p.buffer.writeRune(r)
+		return true
+	}
+	return false
+}
+
+// AcceptMatching adds the next runes to the string buffer, but only
+// if the upcoming runes satisfy the provided patterns.
+// When runes were added then true is returned, false otherwise.
+func (p *P) AcceptMatching(patterns ...string) bool {
+	return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
+}
+
+// AcceptConsecutive adds consecutive runes from the input to the string
+// buffer, as long as they exist in the pattern.
+// If any runes were added then true is returned, false otherwise.
+func (p *P) AcceptConsecutive(pattern string) bool {
+	accepted := false
+	for p.AcceptMatching(pattern) {
+		accepted = true
+	}
+	return accepted
+}
+
+// SkipMatching skips runes, but only when all provided patterns are satisfied.
+// Returns true when one or more runes were skipped.
+func (p *P) SkipMatching(patterns ...string) bool {
+	if runes, widths, ok := p.Match(patterns...); ok {
+		for i, r := range runes {
+			p.advanceCursor(r, widths[i])
+		}
+		return true
+	}
+	return false
+}
+
+// SkipConsecutive skips consecutive runes from the provided pattern.
+// Returns true when one or more runes were skipped.
+func (p *P) SkipConsecutive(pattern string) bool {
+	didSkip := false
+	for p.SkipMatching(pattern) {
+		didSkip = true
+	}
+	return didSkip
+}
--- a/parsekit/parsekit.go
+++ b/parsekit/parsekit.go
@ -1,25 +1,36 @@
-package parser
+package parsekit
+
+// New takes an input string and a start state,
+// and initializes the parser for it.
+func New(input string, startState StateFn) *P {
+	return &P{
+		input: input,
+		len:   len(input),
+		state: startState,
+		items: make(chan Item, 2),
+	}
+}

 // Next retrieves the next parsed item.
 // When a valid item was found, then the boolean return parameter will be true.
 // On error or when successfully reaching the end of the input, false is returned.
 // When an error occurred, it will be set in the error return value, nil otherwise.
-func (l *Parser) Next() (Item, *Error, bool) {
+func (p *P) Next() (Item, *Error, bool) {
 	for {
 		select {
-		case i := <-l.items:
+		case i := <-p.items:
 			switch {
 			case i.Type == ItemEOF:
 				return i, nil, false
 			case i.Type == ItemError:
-				l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
-				return i, l.err, false
+				p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
+				return i, p.err, false
 			default:
-				l.item = i
+				p.item = i
 				return i, nil, true
 			}
 		default:
-			l.state = l.state(l)
+			p.state = p.state(p)
 		}
 	}
 }
@ -27,10 +38,10 @@ func (l *Parser) Next() (Item, *Error, bool) {
 // ToArray returns Parser items as an array (mainly intended for testing purposes)
 // When an error occurs during scanning, a partial result will be
 // returned, accompanied by the error that occurred.
-func (l *Parser) ToArray() ([]Item, *Error) {
+func (p *P) ToArray() ([]Item, *Error) {
 	var items []Item
 	for {
-		item, err, more := l.Next()
+		item, err, more := p.Next()
 		if !more {
 			return items, err
 		}
--- a/parsekit/staterouting.go
+++ b/parsekit/staterouting.go
@ -1,6 +1,6 @@
-package parser
+package parsekit

-func (p *Parser) QueueStates(states ...StateFn) StateFn {
+func (p *P) QueueStates(states ...StateFn) StateFn {
 	first, followup := states[0], states[1:]
 	for reverse := range followup {
 		p.PushState(followup[len(followup)-reverse-1])
@ -8,24 +8,24 @@ func (p *Parser) QueueStates(states ...StateFn) StateFn {
 	return first
 }

-func (p *Parser) ToChildState(state StateFn) StateFn {
+func (p *P) ToChildState(state StateFn) StateFn {
 	p.PushState(p.state)
 	return state
 }

-func (p *Parser) ToParentState() StateFn {
+func (p *P) ToParentState() StateFn {
 	state := p.PopState()
 	return state
 }

 // PushState adds the state function to the state stack.
 // This is used for implementing nested parsing.
-func (p *Parser) PushState(state StateFn) {
+func (p *P) PushState(state StateFn) {
 	p.stack = append(p.stack, state)
 }

 // PopState pops the last pushed state from the state stack.
-func (p *Parser) PopState() StateFn {
+func (p *P) PopState() StateFn {
 	last := len(p.stack) - 1
 	head, tail := p.stack[:last], p.stack[last]
 	p.stack = head
--- a/parsekit/stringbuf.go
+++ b/parsekit/stringbuf.go
@ -1,4 +1,4 @@
-package parser
+package parsekit

 import (
 	"bytes"
--- a/parsekit/stringbuf_test.go
+++ b/parsekit/stringbuf_test.go
@ -1,4 +1,4 @@
-package parser
+package parsekit

 import (
 	"testing"
--- a/parsekit/types.go
+++ b/parsekit/types.go
@ -1,7 +1,7 @@
-package parser
+package parsekit

-// Parser holds the internal state of the Parser.
-type Parser struct {
+// P holds the internal state of the parser.
+type P struct {
 	state        StateFn      // a function that handles the current state
 	stack        []StateFn    // state function stack, for nested parsing
 	input        string       // the scanned input
@ -18,7 +18,7 @@ type Parser struct {

 // StateFn represents the state of the parser as a function
 // that returns the next state.
-type StateFn func(*Parser) StateFn
+type StateFn func(*P) StateFn

 // ItemType represents the type of a parser Item.
 type ItemType int
--- a/parser/definitions.go
+++ b/parser/definitions.go
@ -1,10 +1,10 @@
-package lexer
+package parser

-import "github.com/mmakaay/toml/parser"
+import "github.com/mmakaay/toml/parsekit"

-// Item types that are emitted by this parser.
+// Item types that are produced by this parser.
 const (
-	ItemComment    parser.ItemType = iota // An error occurred
+	ItemComment    parsekit.ItemType = iota // Comment string
 	ItemKey                                 // Key of a key/value pair
 	ItemKeyDot                              // Dot for a dotted key
 	ItemAssignment                          // Value assignment coming up (=)
@ -43,6 +43,6 @@ var (

 // NewParser creates a new parser, using the provided input string
 // as the data to parse.
-func NewParser(input string) *parser.Parser {
-	return parser.New(input, stateKeyValuePair)
+func NewParser(input string) *parsekit.P {
+	return parsekit.New(input, stateKeyValuePair)
 }
--- a/parser/helpers_test.go
+++ b/parser/helpers_test.go
@ -1,12 +1,12 @@
-package lexer_test
+package parser_test

 import (
 	"fmt"
 	"strings"
 	"testing"

-	"github.com/mmakaay/toml/lexer"
-	"github.com/mmakaay/toml/parser"
+	"github.com/mmakaay/toml/parsekit"
+	lexer "github.com/mmakaay/toml/parser"
 )

 type statesT struct {
@ -56,8 +56,8 @@ func runStatesT(t *testing.T, c statesT) {
 	}
 }

-// ParserItemToString returns a string representation of the parser.Item.
-func ParserItemToString(i parser.Item) string {
+// ParserItemToString returns a string representation of the parsekit.Item.
+func ParserItemToString(i parsekit.Item) string {
 	switch i.Type {
 	case lexer.ItemComment:
 		return fmt.Sprintf("#(%s)", i.Value)
@ -70,6 +70,6 @@ func ParserItemToString(i parser.Item) string {
 	case lexer.ItemAssignment:
 		return "="
 	default:
-		panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type))
+		panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
 	}
 }
--- a/parser/lexer_test.go
+++ b/parser/lexer_test.go
@ -1,13 +1,13 @@
-package lexer_test
+package parser_test

 import (
 	"testing"

-	"github.com/mmakaay/toml/lexer"
+	"github.com/mmakaay/toml/parser"
 )

 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
-	_, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
+	_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
 	t.Logf("Got error: %s", err.Error())
 	if err.Row != 4 {
 		t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
--- a/parser/parser.go
+++ b/parser/parser.go
@ -1,274 +0,0 @@
-package parser
-
-import (
-	"fmt"
-	"strings"
-	"unicode/utf8"
-)
-
-// New takes an input string and a start state,
-// and initializes the parser for it.
-func New(input string, startState StateFn) *Parser {
-	return &Parser{
-		input: input,
-		len:   len(input),
-		state: startState,
-		items: make(chan Item, 2),
-	}
-}
-
-// AtEndOfFile returns true when there is no more data available in the input.
-func (p *Parser) AtEndOfFile() bool {
-	return p.pos >= p.len
-}
-
-func (p *Parser) AtEndOfLine() bool {
-	return p.AtEndOfFile() ||
-		p.Upcoming("\r", "\n") ||
-		p.Upcoming("\n")
-}
-
-func (p *Parser) SkipEndOfLine() bool {
-	return p.AtEndOfFile() ||
-		p.SkipMatching("\r", "\n") ||
-		p.SkipMatching("\n")
-}
-
-func (p *Parser) AcceptEndOfLine() bool {
-	// No newline, but we're defintely at the end of the line here.
-	if p.AtEndOfFile() {
-		return true
-	}
-	// If we see some kind of end of line, then we accept a
-	// normalized newline, which is just a '\n'. This will normalize
-	// '\r\n' into '\n'.
-	if p.SkipEndOfLine() {
-		p.buffer.writeRune('\n')
-		return true
-	}
-	return false
-}
-
-// Emit passes a Parser item to the client, including the provided string.
-func (p *Parser) Emit(t ItemType, s string) {
-	p.items <- Item{t, s}
-	p.buffer.reset()
-}
-
-// EmitLiteral passes a Parser item to the client, including the accumulated
-// string buffer data as a literal string.
-func (p *Parser) EmitLiteral(t ItemType) {
-	p.Emit(t, p.buffer.asLiteralString())
-}
-
-// EmitLiteralTrim passes a Parser item to the client, including the
-// accumulated string buffer data as a literal string with whitespace
-// trimmed from it.
-func (p *Parser) EmitLiteralTrim(t ItemType) {
-	p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
-}
-
-// EmitInterpreted passes a Parser item to the client, including the
-// accumulated string buffer data a Go doubled quoted interpreted string
-// (handling escape codes like \n, \t, \uXXXX, etc.)
-// This method might return an error, in case there is data in the
-// string buffer that is not valid for string interpretation.
-func (p *Parser) EmitInterpreted(t ItemType) error {
-	s, err := p.buffer.asInterpretedString()
-	if err != nil {
-		return err
-	}
-	p.Emit(t, s)
-	return nil
-}
-
-// EmitError emits a Parser error item to the client.
-func (p *Parser) EmitError(format string, args ...interface{}) StateFn {
-	message := fmt.Sprintf(format, args...)
-	p.Emit(ItemError, message)
-	return nil
-}
-
-// Match checks if the upcoming runes satisfy all provided patterns.
-// It returns a slice of runes that were found, their total byte width
-// and a boolean indicating whether or not all provided patterns were
-// satisfied by the input data.
-func (p *Parser) Match(patterns ...string) ([]rune, int, bool) {
-	peeked, width, ok := p.peekMulti(len(patterns))
-	if ok {
-		for i, r := range patterns {
-			if strings.IndexRune(r, peeked[i]) < 0 {
-				return peeked, width, false
-			}
-		}
-		return peeked, width, true
-	}
-	return peeked, width, false
-}
-
-// Upcoming checks if the upcoming runes satisfy all provided patterns.
-// Returns true if all provided patterns are satisfied.
-func (p *Parser) Upcoming(patterns ...string) bool {
-	_, _, ok := p.Match(patterns...)
-	return ok
-}
-
-// AcceptAny adds the next rune from the input to the string buffer.
-// If no rune could be read (end of file or invalid UTF8 data),
-// then false is returned.
-func (p *Parser) AcceptAny() bool {
-	if r, ok := p.next(); ok {
-		p.buffer.writeRune(r)
-		return true
-	}
-	return false
-}
-
-// AcceptMatching adds the next runes to the string buffer, but only
-// if the upcoming runes satisfy the provided patterns.
-// When runes were added then true is returned, false otherwise.
-func (p *Parser) AcceptMatching(patterns ...string) bool {
-	return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
-}
-
-// AcceptConsecutive adds consecutive runes from the input to the string
-// buffer, as long as they exist in the pattern.
-// If any runes were added then true is returned, false otherwise.
-func (p *Parser) AcceptConsecutive(pattern string) bool {
-	accepted := false
-	for p.AcceptMatching(pattern) {
-		accepted = true
-	}
-	return accepted
-}
-
-// SkipMatching skips runes, but only when all provided patterns are satisfied.
-// Returns true when one or more runes were skipped.
-func (p *Parser) SkipMatching(patterns ...string) bool {
-	if runes, w, ok := p.Match(patterns...); ok {
-		p.pos += w
-		for _, r := range runes {
-			p.advanceCursor(r)
-		}
-		return true
-	}
-	return false
-}
-
-// SkipConsecutive skips consecutive runes from the provided pattern.
-// Returns true when one or more runes were skipped.
-func (p *Parser) SkipConsecutive(pattern string) bool {
-	didSkip := false
-	for p.SkipMatching(pattern) {
-		didSkip = true
-	}
-	return didSkip
-}
-
-// ============================================================================
-// EMIT DATA AND ERRORS
-// ============================================================================
-
-// UnexpectedInputError is used by a parser implementation to emit an
-// error item that tells the client that an unexpected rune was
-// encountered in the input.
-// The parameter 'expected' is used to provide some context to the error.
-func (p *Parser) UnexpectedInputError(expected string) StateFn {
-	// next() takes care of error messages for ok == false.
-	if r, ok := p.next(); ok {
-		return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
-	}
-	return nil
-}
-
-// UnexpectedEndOfFile is used by a parser implementation to emit an
-// error item that tells the client that more data was expected from
-// the input.
-// The parameter 'expected' is used to provide some context to the error.
-func (p *Parser) UnexpectedEndOfFile(expected string) StateFn {
-	return p.EmitError("Unexpected end of file (expected %s)", expected)
-}
-
-// ============================================================================
-// LEXER : our lexer is quite low level, it only returns UTF8 runes
-// ============================================================================
-
-// peek returns but does not advance to the next rune(s) in the input.
-// Returns the rune, its width and a boolean. The boolean will be false in case
-// no upcoming rune can be peeked (end of data or invalid UTF8 character).
-func (p *Parser) peek() (rune, int, bool) {
-	peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
-	return peeked, width, peeked != utf8.RuneError
-}
-
-// peekMulti takes a peek at multiple upcoming runes in the input.
-// Returns a slice of runes, their total width in bytes and a boolean.
-// The boolean will be false in case less runes can be peeked than
-// the requested amount (end of data or invalid UTF8 character).
-func (p *Parser) peekMulti(amount int) ([]rune, int, bool) {
-	width := 0
-	var peeked []rune
-	for i := 0; i < amount; i++ {
-		r, w := utf8.DecodeRuneInString(p.input[p.pos+width:])
-		switch {
-		case r == utf8.RuneError:
-			return peeked, width, false
-		default:
-			width += w
-			peeked = append(peeked, r)
-		}
-	}
-	return peeked, width, true
-}
-
-// progress moves the cursor forward in the input, returning one rune
-// for every specified pattern. The cursor is only moved forward when
-// all patterns are satisfied.
-// Returns true when all patterns were satisfied and the cursor was
-// moved forward, false otherwise.
-// A callback function can be provided to specify what to do with
-// the runes that are encountered in the input.
-func (p *Parser) progress(callback func(rune), patterns ...string) bool {
-	if runes, w, ok := p.Match(patterns...); ok {
-		p.pos += w
-		for _, r := range runes {
-			callback(r)
-			p.advanceCursor(r)
-		}
-		return true
-	}
-	return false
-}
-
-// next returns the next rune from the input and a boolean indicating if
-// reading the input was successful.
-// When the end of input is reached, or an invalid UTF8 character is
-// read, then false is returned. Both are considered error cases,
-// and for that reason these automatically emit an error to the client.
-func (p *Parser) next() (rune, bool) {
-	r, w, ok := p.peek()
-	if ok {
-		p.pos += w
-		p.advanceCursor(r)
-		return r, true
-	}
-	if r == utf8.RuneError && w == 0 {
-		p.EmitError("unexpected end of file")
-	} else {
-		p.EmitError("invalid UTF8 character")
-	}
-	return r, false
-}
-
-// advanceCursor advances the rune cursor one position in the
-// input data. While doing so, it keeps tracks of newlines,
-// so we can report on row + column positions on error.
-func (p *Parser) advanceCursor(r rune) {
-	if p.newline {
-		p.cursorColumn = 0
-		p.cursorRow++
-	} else {
-		p.cursorColumn++
-	}
-	p.newline = r == '\n'
-}
--- a/parser/syn_comments.go
+++ b/parser/syn_comments.go
@ -1,15 +1,17 @@
-package lexer
+package parser

-import "github.com/mmakaay/toml/parser"
+import (
+	"github.com/mmakaay/toml/parsekit"
+)

 // A '#' hash symbol marks the rest of the line as a comment.
-func stateCommentStart(p *parser.Parser) parser.StateFn {
+func stateCommentStart(p *parsekit.P) parsekit.StateFn {
 	p.SkipConsecutive(hash)
 	return stateCommentContent
 }

 // All characters up to the end of the line are included in the comment.
-func stateCommentContent(p *parser.Parser) parser.StateFn {
+func stateCommentContent(p *parsekit.P) parsekit.StateFn {
 	switch {
 	case p.AtEndOfLine():
 		p.EmitLiteralTrim(ItemComment)
--- a/parser/syn_comments_test.go
+++ b/parser/syn_comments_test.go
@ -1,4 +1,4 @@
-package lexer_test
+package parser_test

 import (
 	"testing"
--- a/parser/syn_eof.go
+++ b/parser/syn_eof.go
@ -0,0 +1,12 @@
+package parser
+
+import "github.com/mmakaay/toml/parsekit"
+
+func stateEndOfFile(p *parsekit.P) parsekit.StateFn {
+	if p.AtEndOfFile() {
+		p.Emit(parsekit.ItemEOF, "EOF") // todo Automate within parser?
+	} else {
+		p.UnexpectedInput("end of file")
+	}
+	return nil
+}
--- a/parser/syn_key.go
+++ b/parser/syn_key.go
@ -1,15 +1,15 @@
-package lexer
+package parser

-import "github.com/mmakaay/toml/parser"
+import "github.com/mmakaay/toml/parsekit"

 // The primary building block of a TOML document is the key/value pair.
-func stateKeyValuePair(l *parser.Parser) parser.StateFn {
+func stateKeyValuePair(p *parsekit.P) parsekit.StateFn {
 	switch {
-	case l.SkipConsecutive(whitespace + carriageReturn + newline):
+	case p.SkipConsecutive(whitespace + carriageReturn + newline):
 		return stateKeyValuePair
-	case l.Upcoming(hash):
-		return l.ToChildState(stateCommentStart)
-	case l.Upcoming(startOfKey):
+	case p.Upcoming(hash):
+		return p.ToChildState(stateCommentStart)
+	case p.Upcoming(startOfKey):
 		return stateKey
 	default:
 		return stateEndOfFile
@ -17,32 +17,32 @@ func stateKeyValuePair(l *parser.Parser) parser.StateFn {
 }

 // A key may be either bare, quoted or dotted.
-func stateKey(l *parser.Parser) parser.StateFn {
-	if l.AcceptMatching(bareKeyChars) {
+func stateKey(p *parsekit.P) parsekit.StateFn {
+	if p.AcceptMatching(bareKeyChars) {
 		return statebareKeyChars
 	}
-	return l.UnexpectedInputError("a valid key name")
+	return p.UnexpectedInput("a valid key name")
 }

 // Bare keys may only contain ASCII letters, ASCII digits,
 // underscores, and dashes (A-Za-z0-9_-). Note that bare
 // keys are allowed to be composed of only ASCII digits,
 // e.g. 1234, but are always interpreted as strings.
-func statebareKeyChars(l *parser.Parser) parser.StateFn {
-	l.AcceptConsecutive(bareKeyChars)
-	l.EmitLiteral(ItemKey)
+func statebareKeyChars(p *parsekit.P) parsekit.StateFn {
+	p.AcceptConsecutive(bareKeyChars)
+	p.EmitLiteral(ItemKey)
 	return stateEndOfKeyOrKeyDot
 }

 // Dotted keys are a sequence of bare or quoted keys joined with a dot.
 // This allows for grouping similar properties together:
-func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
+func stateEndOfKeyOrKeyDot(p *parsekit.P) parsekit.StateFn {
 	// Whitespace around dot-separated parts is ignored, however,
 	// best practice is to not use any extraneous whitespace.
-	l.SkipConsecutive(whitespace)
-	if l.SkipMatching(dot) {
-		l.Emit(ItemKeyDot, "")
-		l.SkipConsecutive(whitespace)
+	p.SkipConsecutive(whitespace)
+	if p.SkipMatching(dot) {
+		p.Emit(ItemKeyDot, "")
+		p.SkipConsecutive(whitespace)
 		return stateKey
 	}
 	return stateKeyAssignment
@ -52,12 +52,12 @@ func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
 // Whitespace is ignored around key names and values. The key, equals
 // sign, and value must be on the same line (though some values can
 // be broken over multiple lines).
-func stateKeyAssignment(l *parser.Parser) parser.StateFn {
-	l.SkipConsecutive(whitespace)
-	if l.SkipMatching(equal) {
-		l.Emit(ItemAssignment, "")
-		l.SkipConsecutive(whitespace)
+func stateKeyAssignment(p *parsekit.P) parsekit.StateFn {
+	p.SkipConsecutive(whitespace)
+	if p.SkipMatching(equal) {
+		p.Emit(ItemAssignment, "")
+		p.SkipConsecutive(whitespace)
 		return stateValue
 	}
-	return l.UnexpectedInputError("a value assignment")
+	return p.UnexpectedInput("a value assignment")
 }
--- a/parser/syn_key_test.go
+++ b/parser/syn_key_test.go
@ -1,4 +1,4 @@
-package lexer_test
+package parser_test

 import (
 	"testing"
--- a/parser/syn_strings.go
+++ b/parser/syn_strings.go
@ -1,19 +1,19 @@
-package lexer
+package parser

-import "github.com/mmakaay/toml/parser"
+import "github.com/mmakaay/toml/parsekit"

 // There are four ways to express strings: basic, multi-line basic, literal,
 // and multi-line literal. All strings must contain only valid UTF-8 characters.
 // * Multi-line basic strings are surrounded by three quotation marks on each side.
 // * Basic strings are surrounded by quotation marks.
-func stateStringValue(l *parser.Parser) parser.StateFn {
+func stateStringValue(p *parsekit.P) parsekit.StateFn {
 	switch {
-	case l.SkipMatching(doubleQuote3...):
+	case p.SkipMatching(doubleQuote3...):
 		return stateMultiLineBasicString
-	case l.SkipMatching(doubleQuote):
-		return l.QueueStates(stateParseString, stateBasicStringSpecific)
+	case p.SkipMatching(doubleQuote):
+		return p.QueueStates(stateParseString, stateBasicStringSpecific)
 	}
-	return l.UnexpectedInputError("a string value")
+	return p.UnexpectedInput("a string value")
 }

 // Specific handling of input for basic strings.
@ -22,7 +22,7 @@ func stateStringValue(l *parser.Parser) parser.StateFn {
 //   "All other escape sequences [..] are reserved and, if used, TOML should
 //    produce an error.""

-func stateBasicStringSpecific(p *parser.Parser) parser.StateFn {
+func stateBasicStringSpecific(p *parsekit.P) parsekit.StateFn {
 	switch {
 	case p.SkipMatching(doubleQuote):
 		if err := p.EmitInterpreted(ItemString); err != nil {
@ -36,8 +36,8 @@ func stateBasicStringSpecific(p *parser.Parser) parser.StateFn {
 	}
 }

-func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
-	l.EmitError("Not yet implemented")
+func stateMultiLineBasicString(p *parsekit.P) parsekit.StateFn {
+	p.EmitError("Not yet implemented")
 	return nil
 }

@ -50,11 +50,11 @@ const invalidBasicStringCharacters string = "\"\\" +
 	"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
 	"\u007F"

-func stateParseString(l *parser.Parser) parser.StateFn {
+func stateParseString(p *parsekit.P) parsekit.StateFn {
 	switch {
-	case l.AtEndOfFile():
-		return l.UnexpectedEndOfFile("basic string token")
-	case l.AcceptMatching(backslash, escapeChars):
+	case p.AtEndOfFile():
+		return p.UnexpectedEndOfFile("basic string token")
+	case p.AcceptMatching(backslash, escapeChars):
 		// For convenience, some popular characters have a compact escape sequence.
 		// \b         - backspace       (U+0008)
 		// \t         - tab             (U+0009)
@ -63,22 +63,22 @@ func stateParseString(l *parser.Parser) parser.StateFn {
 		// \r         - carriage return (U+000D)
 		// \"         - quote           (U+0022)
 		// \\         - backslash       (U+005C)
-	case l.AcceptMatching(shortUtf8Match...):
+	case p.AcceptMatching(shortUtf8Match...):
 		// \uXXXX     - unicode         (U+XXXX)
-	case l.AcceptMatching(longUtf8Match...):
+	case p.AcceptMatching(longUtf8Match...):
 		// \UXXXXXXXX - unicode         (U+XXXXXXXX)
-	case l.Upcoming(backslash) || l.Upcoming(doubleQuote):
+	case p.Upcoming(backslash) || p.Upcoming(doubleQuote):
 		// Returning to the parent state to have special cases handled,
 		// because there are differences between single and multi line strings.
-		return l.ToParentState()
-	case l.Upcoming(invalidBasicStringCharacters):
+		return p.ToParentState()
+	case p.Upcoming(invalidBasicStringCharacters):
 		// Any Unicode character may be used except those that must be escaped:
 		// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
-		r, _, _ := l.Match(invalidBasicStringCharacters)
-		l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
+		r, _, _ := p.Match(invalidBasicStringCharacters)
+		p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
 		return nil
 	default:
-		l.AcceptAny()
+		p.AcceptAny()
 	}
 	return stateParseString
 }
--- a/parser/syn_strings_test.go
+++ b/parser/syn_strings_test.go
@ -1,4 +1,4 @@
-package lexer_test
+package parser_test

 import (
 	"fmt"
--- a/parser/syn_value.go
+++ b/parser/syn_value.go
@ -0,0 +1,13 @@
+package parser
+
+import "github.com/mmakaay/toml/parsekit"
+
+// Values must be of the following types: String, Integer, Float, Boolean,
+// Datetime, Array, or Inline Table. Unspecified values are invalid.
+func stateValue(p *parsekit.P) parsekit.StateFn {
+	p.SkipConsecutive(whitespace)
+	if p.Upcoming(quoteChars) {
+		return stateStringValue
+	}
+	return p.UnexpectedInput("a value")
+}