Phew, that was quite the update. I've now got a working implementation of a parser/combinator-like matching API, which prevents us from having to specify everything in state functions. That is way too low level for a lot of things. I'd rather have parser/combinator-style definitions for chunks of the input and keeping the state functions for higher level document structure parsing.

2019-05-19 23:35:03 +00:00 · 2019-05-19 23:35:03 +00:00 · e3e408dfdb
parent 55e23874f7
commit e3e408dfdb
16 changed files with 721 additions and 234 deletions
--- a/parsekit/emitting.go
+++ b/parsekit/emitting.go
@ -3,6 +3,7 @@ package parsekit
 import (
 	"fmt"
 	"strings"
 	"unicode/utf8"
 )
 // Emit passes a Parser item to the client, including the provided string.
@ -51,8 +52,16 @@ func (p *P) EmitError(format string, args ...interface{}) {
 func (p *P) UnexpectedInput(expected string) {
 	// next() takes care of error messages in cases where ok == false.
 	// Therefore, we only provide an error message for the ok case here.
-	if r, ok := p.next(); ok {
+	r, _, ok := p.peek(0)
-		p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
+	switch {
 	case ok:
 		p.EmitError("unexpected character %q (expected %s)", r, expected)
 	case r == EOF:
 		p.EmitError("unexpected end of file (expected %s)", expected)
 	case r == utf8.RuneError:
 		p.EmitError("invalid UTF8 character in input (expected %s)", expected)
 	default:
 		panic("Unhandled output from peek()")
 	}
 }
--- a/parsekit/internals.go
+++ b/parsekit/internals.go
@ -4,32 +4,13 @@ import (
 	"unicode/utf8"
 )
 // next returns the next rune from the input and a boolean indicating if
 // reading the input was successful.
 // When the end of input is reached, or an invalid UTF8 character is
 // read, then false is returned. Both are considered error cases,
 // and for that reason these automatically emit an error to the client.
 func (p *P) next() (rune, bool) {
 	r, w, ok := p.peek(0)
 	if ok {
 		p.advanceCursor(r, w)
 		return r, true
 	}
 	if r == utf8.RuneError && w == 0 {
 		p.EmitError("unexpected end of file")
 	} else {
 		p.EmitError("invalid UTF8 character")
 	}
 	return r, false
 }
 // peek returns but does not advance the cursor to the next rune(s) in the input.
 // Returns the rune, its width in bytes and a boolean.
 // The boolean will be false in case no upcoming rune can be peeked
 // (end of data or invalid UTF8 character).
 func (p *P) peek(offsetInBytes int) (rune, int, bool) {
-	peeked, width := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
+	r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
-	return peeked, width, peeked != utf8.RuneError
+	return handleRuneError(r, w)
 }
 // peekMulti takes a peek at multiple upcoming runes in the input.
@ -43,13 +24,12 @@ func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
 	offset := 0
 	for i := 0; i < amount; i++ {
 		r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
-		switch {
+		r, w, ok := handleRuneError(r, w)
-		case r == utf8.RuneError:
+		runes = append(runes, r)
 		widths = append(widths, w)
 		offset += w
 		if !ok {
 			return runes, widths, false
 		default:
 			offset += w
 			runes = append(runes, r)
 			widths = append(widths, w)
 		}
 	}
 	return runes, widths, true
@ -86,3 +66,21 @@ func (p *P) advanceCursor(r rune, w int) {
 	}
 	p.newline = r == '\n'
 }
 // handleRuneError is used to normale rune value in case of errors.
 // When an error occurs, then utf8.RuneError will be in the rune.
 // This can however indicate one of two situations:
 // * w == 0: end of file is reached
 // * w == 1: invalid UTF character on input
 // This function lets these two cases return respectively the
 // package's own EOF or INVALID runes, to make it easy for client
 // code to distinct between these two cases.
 func handleRuneError(r rune, w int) (rune, int, bool) {
 	if r == utf8.RuneError {
 		if w == 0 {
 			return EOF, 0, false
 		}
 		return INVALID, w, false
 	}
 	return r, w, true
 }
--- a/parsekit/matchers.go
+++ b/parsekit/matchers.go
@ -0,0 +1,218 @@
 package parsekit
 import "unicode/utf8"
 // Not in need of it myself, but nice to have I guess:
 // - NotFollowedBy
 // - Discard
 // - Separated
 type MatchDialog struct {
 	p        *P
 	runes    []rune
 	widths   []int
 	offset   int
 	curRune  rune
 	curWidth int
 	forked   bool
 }
 func (m *MatchDialog) Fork() *MatchDialog {
 	fork := &MatchDialog{
 		p:      m.p,
 		offset: m.offset,
 		forked: true,
 	}
 	return fork
 }
 func (m *MatchDialog) Join(fork *MatchDialog) bool {
 	if !fork.forked {
 		panic("Cannot join a non-forked MatchDialog")
 	}
 	m.runes = append(m.runes, fork.runes...)
 	m.widths = append(m.widths, fork.widths...)
 	m.offset = fork.offset
 	fork.runes = []rune{}
 	fork.widths = []int{}
 	return true
 }
 func (m *MatchDialog) NextRune() (rune, bool) {
 	if m.curRune == utf8.RuneError {
 		panic("Matcher must not call NextRune() after it returned false")
 	}
 	r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
 	m.offset += w
 	m.curRune = r
 	m.curWidth = w
 	m.runes = append(m.runes, r)
 	m.widths = append(m.widths, w)
 	return r, r != EOF && r != INVALID
 }
 // Matcher is the interface that can be implemented to provide
 // a matching stategy for the match() function.
 // A MatchDialog is provided as input. This implements a
 // specific set of methods that a Matcher needs to retrieve data
 // from the parser and to report back results.
 type Matcher interface {
 	Match(*MatchDialog) bool
 }
 type MatcherConstructors struct {
 	Any        func() MatchAny
 	Rune       func(rune rune) MatchRune
 	RuneRange  func(start rune, end rune) MatchRuneRange
 	Runes      func(runes ...rune) MatchAnyOf
 	AnyOf      func(matchers ...Matcher) MatchAnyOf
 	Repeat     func(count int, matcher Matcher) MatchRepeat
 	Sequence   func(matchers ...Matcher) MatchSequence
 	ZeroOrMore func(matcher Matcher) MatchZeroOrMore
 	OneOrMore  func(matcher Matcher) MatchOneOrMore
 	Optional   func(matcher Matcher) MatchOptional
 }
 var C = MatcherConstructors{
 	Any: func() MatchAny {
 		return MatchAny{}
 	},
 	Rune: func(rune rune) MatchRune {
 		return MatchRune{rune}
 	},
 	RuneRange: func(start rune, end rune) MatchRuneRange {
 		return MatchRuneRange{start, end}
 	},
 	Runes: func(runes ...rune) MatchAnyOf {
 		m := make([]Matcher, len(runes))
 		for i, r := range runes {
 			m[i] = MatchRune{r}
 		}
 		return MatchAnyOf{m}
 	},
 	AnyOf: func(matchers ...Matcher) MatchAnyOf {
 		return MatchAnyOf{matchers}
 	},
 	Repeat: func(count int, matcher Matcher) MatchRepeat {
 		return MatchRepeat{count, matcher}
 	},
 	Sequence: func(matchers ...Matcher) MatchSequence {
 		return MatchSequence{matchers}
 	},
 	OneOrMore: func(matcher Matcher) MatchOneOrMore {
 		return MatchOneOrMore{matcher}
 	},
 	ZeroOrMore: func(matcher Matcher) MatchZeroOrMore {
 		return MatchZeroOrMore{matcher}
 	},
 	Optional: func(matcher Matcher) MatchOptional {
 		return MatchOptional{matcher}
 	},
 }
 type MatchAny struct{}
 func (c MatchAny) Match(m *MatchDialog) bool {
 	_, ok := m.NextRune()
 	return ok
 }
 type MatchRune struct {
 	match rune
 }
 func (c MatchRune) Match(m *MatchDialog) bool {
 	r, ok := m.NextRune()
 	return ok && r == c.match
 }
 type MatchRuneRange struct {
 	start rune
 	end   rune
 }
 func (c MatchRuneRange) Match(m *MatchDialog) bool {
 	r, ok := m.NextRune()
 	return ok && r >= c.start && r <= c.end
 }
 type MatchAnyOf struct {
 	matcher []Matcher
 }
 func (c MatchAnyOf) Match(m *MatchDialog) bool {
 	for _, matcher := range c.matcher {
 		mc := m.Fork()
 		if matcher.Match(mc) {
 			return m.Join(mc)
 		}
 	}
 	return false
 }
 type MatchRepeat struct {
 	count   int
 	matcher Matcher
 }
 func (c MatchRepeat) Match(m *MatchDialog) bool {
 	mc := m.Fork()
 	for i := 0; i < c.count; i++ {
 		if !c.matcher.Match(mc) {
 			return false
 		}
 	}
 	m.Join(mc)
 	return true
 }
 type MatchSequence struct {
 	matchers []Matcher
 }
 func (c MatchSequence) Match(m *MatchDialog) bool {
 	mPart := m.Fork()
 	for _, matcher := range c.matchers {
 		if !matcher.Match(mPart) {
 			return false
 		}
 	}
 	m.Join(mPart)
 	return true
 }
 type MatchOneOrMore struct {
 	matcher Matcher
 }
 func (c MatchOneOrMore) Match(m *MatchDialog) bool {
 	mc := m.Fork()
 	for c.matcher.Match(mc) {
 		m.Join(mc)
 	}
 	return len(m.runes) > 0
 }
 type MatchZeroOrMore struct {
 	matcher Matcher
 }
 func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
 	mc := m.Fork()
 	for c.matcher.Match(mc) {
 		m.Join(mc)
 	}
 	return true
 }
 type MatchOptional struct {
 	matcher Matcher
 }
 func (c MatchOptional) Match(m *MatchDialog) bool {
 	mc := m.Fork()
 	if c.matcher.Match(mc) {
 		m.Join(mc)
 	}
 	return true
 }
--- a/parsekit/matchers_test.go
+++ b/parsekit/matchers_test.go
@ -0,0 +1,260 @@
 package parsekit_test
 import (
 	"testing"
 	p "github.com/mmakaay/toml/parsekit"
 )
 var c = p.C
 const TestItem p.ItemType = 1
 func newParser(input string, matcher p.Matcher) *p.P {
 	stateFn := func(p *p.P) {
 		if p.On(matcher).Accept() {
 			p.EmitLiteral(TestItem)
 			p.Repeat()
 		} else {
 			p.UnexpectedInput("MATCH")
 		}
 	}
 	return p.New(input, stateFn)
 }
 func TestMatchAny(t *testing.T) {
 	p := newParser("o", c.Any())
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s", err)
 	}
 	if r.Type != TestItem {
 		t.Error("Parser item type not expected TestTitem")
 	}
 	if r.Value != "o" {
 		t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
 	}
 }
 func TestMatchAny_AtEndOfFile(t *testing.T) {
 	p := newParser("", c.Any())
 	_, err, ok := p.Next()
 	if ok {
 		t.Fatalf("Parsing unexpectedly succeeded")
 	}
 	expected := "unexpected end of file (expected MATCH)"
 	if err.Error() != expected {
 		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
 	}
 }
 func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) {
 	p := newParser("\xcd", c.Any())
 	_, err, ok := p.Next()
 	if ok {
 		t.Fatalf("Parsing unexpectedly succeeded")
 	}
 	expected := "invalid UTF8 character in input (expected MATCH)"
 	if err.Error() != expected {
 		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
 	}
 }
 func TestMatchRune(t *testing.T) {
 	p := newParser("xxx", c.Rune('x'))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s", err)
 	}
 	if r.Type != TestItem {
 		t.Error("Parser item type not expected TestTitem")
 	}
 	if r.Value != "x" {
 		t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
 	}
 }
 func TestMatchRune_OnMismatch(t *testing.T) {
 	p := newParser("x   ", c.Rune(' '))
 	_, err, ok := p.Next()
 	if ok {
 		t.Fatalf("Parsing did not fail unexpectedly")
 	}
 	expected := "unexpected character 'x' (expected MATCH)"
 	if err.Error() != expected {
 		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
 	}
 }
 func TestMatchRuneRange(t *testing.T) {
 	m := c.RuneRange('b', 'y')
 	s := "mnopqrstuvwxybcdefghijkl"
 	p := newParser(s, m)
 	for i := 0; i < len(s); i++ {
 		r, err, ok := p.Next()
 		if !ok {
 			t.Fatalf("Parsing failed: %s", err)
 		}
 		if s[i] != r.Value[0] {
 			t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
 		}
 	}
 	if _, _, ok := newParser("a", m).Next(); ok {
 		t.Fatalf("Unexpected parse success for input 'a'")
 	}
 	if _, _, ok := newParser("z", m).Next(); ok {
 		t.Fatalf("Unexpected parse success for input 'z'")
 	}
 }
 func TestMatchRunes(t *testing.T) {
 	m := c.Runes('+', '-', '*', '/')
 	s := "-+/*+++"
 	p := newParser(s, m)
 	for i := 0; i < len(s); i++ {
 		r, err, ok := p.Next()
 		if !ok {
 			t.Fatalf("Parsing failed: %s", err)
 		}
 		if s[i] != r.Value[0] {
 			t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
 		}
 	}
 	if _, _, ok := newParser("^", m).Next(); ok {
 		t.Fatalf("Unexpected parse success for input '^'")
 	}
 	if _, _, ok := newParser("x", m).Next(); ok {
 		t.Fatalf("Unexpected parse success for input 'x'")
 	}
 }
 func TestMatchAnyOf(t *testing.T) {
 	p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s", err)
 	}
 	if r.Type != TestItem {
 		t.Error("Parser item type not expected TestTitem")
 	}
 	if r.Value != "a" {
 		t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
 	}
 	r, err, ok = p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s", err)
 	}
 	if r.Type != TestItem {
 		t.Error("Parser item type not expected TestTitem")
 	}
 	if r.Value != "b" {
 		t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
 	}
 }
 func TestMatchRepeat(t *testing.T) {
 	p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "xxxx" {
 		t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
 	}
 }
 func TestMatchRepeat_Mismatch(t *testing.T) {
 	p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
 	_, err, ok := p.Next()
 	if ok {
 		t.Fatalf("Parsing did not fail unexpectedly")
 	}
 	expected := "unexpected character 'x' (expected MATCH)"
 	if err.Error() != expected {
 		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
 	}
 }
 func TestMatchOneOrMore(t *testing.T) {
 	p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "xxxxxxxx" {
 		t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
 	}
 }
 func TestMatchSequence(t *testing.T) {
 	p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "10" {
 		t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
 	}
 }
 func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
 	p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "101010" {
 		t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
 	}
 }
 func TestSequence_WithRepeatedRunes(t *testing.T) {
 	whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
 	equal := c.Rune('=')
 	assignment := c.Sequence(whitespace, equal, whitespace)
 	p := newParser("  ==  10", assignment)
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "  =" {
 		t.Errorf("Parser item value is %q instead of expected \"  =\"", r.Value)
 	}
 }
 func TestMatchOptional(t *testing.T) {
 	p := newParser("xyz", c.Optional(c.Rune('x')))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "x" {
 		t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
 	}
 	p = newParser("xyz", c.Optional(c.Rune('y')))
 	r, err, ok = p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "" {
 		t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
 	}
 }
 func TestMixAndMatch(t *testing.T) {
 	hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
 	backslash := c.Rune('\\')
 	x := c.Rune('x')
 	hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
 	p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != `\x9a\x01\xF0\xfC` {
 		t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
 	}
 }
--- a/parsekit/matching.go
+++ b/parsekit/matching.go
@ -64,6 +64,13 @@ func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
 			return runes, widths, false
 		}
 		switch pattern := pattern.(type) {
 		case Matcher:
 			m := &MatchDialog{p: p}
 			if pattern.Match(m) {
 				return m.runes, m.widths, true
 			} else {
 				return m.runes, m.widths, false
 			}
 		case []interface{}:
 			rs, ws, matched := p.match(offset, pattern...)
 			for i, r := range rs {
@ -98,17 +105,6 @@ func (p *P) Upcoming(patterns ...interface{}) bool {
 	return ok
 }
 // AcceptAny adds the next rune from the input to the string buffer.
 // If no rune could be read (end of file or invalid UTF8 data),
 // then false is returned.
 func (p *P) AcceptAny() bool {
 	if r, ok := p.next(); ok {
 		p.buffer.writeRune(r)
 		return true
 	}
 	return false
 }
 type action struct {
 	p      *P
 	runes  []rune
@ -129,6 +125,10 @@ func (a *action) Accept() bool {
 func (a *action) Skip() bool {
 	if a.ok {
 		for i, r := range a.runes {
 			type C struct {
 				Rune MatchRune
 			}
 			a.p.advanceCursor(r, a.widths[i])
 		}
 	}
@ -159,20 +159,10 @@ func (p *P) On(patterns ...interface{}) *action {
 // AcceptMatching adds the next runes to the string buffer, but only
 // if the upcoming runes satisfy the provided patterns.
 // When runes were added then true is returned, false otherwise.
-func (p *P) AcceptMatching(patterns ...interface{}) bool {
+// TODO not needed anymore
-	return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
+// func (p *P) AcceptMatching(patterns ...interface{}) bool {
-}
+// 	return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
-
+// }
 // AcceptConsecutive adds consecutive runes from the input to the string
 // buffer, as long as they exist in the pattern.
 // If any runes were added then true is returned, false otherwise.
 func (p *P) AcceptConsecutive(pattern string) bool {
 	accepted := false
 	for p.AcceptMatching(pattern) {
 		accepted = true
 	}
 	return accepted
 }
 // SkipMatching skips runes, but only when all provided patterns are satisfied.
 // Returns true when one or more runes were skipped.
@ -185,13 +175,3 @@ func (p *P) SkipMatching(patterns ...interface{}) bool {
 	}
 	return false
 }
 // SkipConsecutive skips consecutive runes from the provided pattern.
 // Returns true when one or more runes were skipped.
 func (p *P) SkipConsecutive(pattern string) bool {
 	didSkip := false
 	for p.SkipMatching(pattern) {
 		didSkip = true
 	}
 	return didSkip
 }
--- a/parsekit/types.go
+++ b/parsekit/types.go
@ -1,5 +1,9 @@
 package parsekit
 import (
 	"unicode/utf8"
 )
 // P holds the internal state of the parser.
 type P struct {
 	state        StateFn      // the function that handles the current state
@ -50,3 +54,14 @@ type Error struct {
 func (err *Error) Error() string {
 	return err.Message
 }
 // EOF is a special rune, which is used to indicate an end of file when
 // reading a character from the input.
 // It can be treated as a rune when writing parsing rules, so a valid way to
 // say 'I now expect the end of the file' is using something like:
 // if (p.On(c.Rune(EOF)).Skip()) { ... }
 const EOF rune = -1
 // INVALID is a special rune, which is used to indicate an invalid UTF8
 // rune on the input.
 const INVALID rune = utf8.RuneError
--- a/parser/helpers_test.go
+++ b/parser/helpers_test.go
@ -6,7 +6,7 @@ import (
 	"testing"
 	"github.com/mmakaay/toml/parsekit"
-	lexer "github.com/mmakaay/toml/parser"
+	"github.com/mmakaay/toml/parser"
 )
 type statesT struct {
@ -23,7 +23,7 @@ func runStatesTs(t *testing.T, tests []statesT) {
 }
 func runStatesT(t *testing.T, c statesT) {
-	l, err := lexer.NewParser(c.in).ToArray()
+	l, err := parser.NewParser(c.in).ToArray()
 	if err == nil && c.err != "" {
 		t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
 	}
@ -36,12 +36,12 @@ func runStatesT(t *testing.T, c statesT) {
 	switch expected := c.out.(type) {
 	case []string:
 		if len(expected) != len(l) {
-			t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
+			t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
 		}
 		for i, e := range expected {
 			v := ParserItemToString(l[i])
 			if v != e {
-				t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
+				t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
 			}
 		}
 	case string:
@ -51,7 +51,7 @@ func runStatesT(t *testing.T, c statesT) {
 		}
 		actual := strings.Join(a, "")
 		if actual != expected {
-			t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
+			t.Errorf("[%s] Unexpected parser output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
 		}
 	}
 }
@ -59,15 +59,15 @@ func runStatesT(t *testing.T, c statesT) {
 // ParserItemToString returns a string representation of the parsekit.Item.
 func ParserItemToString(i parsekit.Item) string {
 	switch i.Type {
-	case lexer.ItemComment:
+	case parser.ItemComment:
 		return fmt.Sprintf("#(%s)", i.Value)
-	case lexer.ItemKey:
+	case parser.ItemKey:
 		return fmt.Sprintf("[%s]", i.Value)
-	case lexer.ItemString:
+	case parser.ItemString:
 		return fmt.Sprintf("STR(%s)", i.Value)
-	case lexer.ItemKeyDot:
+	case parser.ItemKeyDot:
 		return "."
-	case lexer.ItemAssignment:
+	case parser.ItemAssignment:
 		return "="
 	default:
 		panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
--- a/parser/parser.go
+++ b/parser/parser.go
@ -11,40 +11,28 @@ const (
 	ItemString                              // A value of type string
 )
 const (
 	whitespace       string = " \t"
 	carriageReturn   string = "\r"
 	newline          string = "\n"
 	hash             string = "#"
 	equal            string = "="
 	lower            string = "abcdefghijklmnopqrstuvwxyz"
 	upper            string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 	digits           string = "0123456789"
 	hex              string = digits + "abcdefABCDEF"
 	dot              string = "."
 	underscore       string = "_"
 	dash             string = "-"
 	singleQuote      string = "'"
 	doubleQuote      string = "\""
 	backslash        string = "\\"
 	quoteChars       string = singleQuote + doubleQuote
 	bareKeyChars     string = lower + upper + digits + underscore + dash
 	startOfKey       string = bareKeyChars + quoteChars
 	validEscapeChars string = `btnfr"\`
 	mustBeEscaped    string = "" +
 		"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
 		"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
 		"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
 		"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
 		"\u007F"
 )
 var (
-	keySeparatorDot = []interface{}{whitespace, dot, whitespace}
+	c                    = parsekit.C
-	doubleQuote3    = []interface{}{doubleQuote, doubleQuote, doubleQuote}
+	space                = c.Rune(' ')
-	hex4            = []interface{}{hex, hex, hex, hex}
+	tab                  = c.Rune('\t')
-	shortUtf8Match  = []interface{}{backslash, 'u', hex4}
+	carriageReturn       = c.Rune('\r')
-	longUtf8Match   = []interface{}{backslash, 'U', hex4, hex4}
+	lineFeed             = c.Rune('\n')
 	hash                 = c.Rune('#')
 	underscore           = c.Rune('_')
 	dash                 = c.Rune('-')
 	equal                = c.Rune('=')
 	dot                  = c.Rune('.')
 	singleQuote          = c.Rune('\'')
 	doubleQuote          = c.Rune('"')
 	any                  = c.Any()
 	anyQuote             = c.AnyOf(singleQuote, doubleQuote)
 	backslash            = c.Rune('\\')
 	lower                = c.RuneRange('a', 'z')
 	upper                = c.RuneRange('A', 'Z')
 	digit                = c.RuneRange('0', '9')
 	whitespace           = c.OneOrMore(c.AnyOf(space, tab))
 	whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
 	optionalWhitespace   = c.Optional(whitespace)
 )
 // NewParser creates a new parser, using the provided input string
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@ -6,6 +6,10 @@ import (
 	"github.com/mmakaay/toml/parser"
 )
 func TestEmptyInput(t *testing.T) {
 	runStatesT(t, statesT{"empty string", "", "", ""})
 }
 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
 	_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
 	t.Logf("Got error: %s", err.Error())
@ -17,18 +21,13 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
 	}
 }
 func TestEmptyInput(t *testing.T) {
 	runStatesT(t, statesT{"empty string", "", "", ""})
 }
 func TestInvalidUtf8Data(t *testing.T) {
 	runStatesTs(t, []statesT{
-		{"inside comment", "# \xbc", "", "invalid UTF8 character"},
+		{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected comment contents)"},
-		{"bare key 1", "\xbc", "", "invalid UTF8 character"},
+		{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
-		{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
+		{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
-		{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
+		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
-		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
+		{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
 		{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
 	})
 }
--- a/parser/syn_comments.go
+++ b/parser/syn_comments.go
@ -6,7 +6,7 @@ import (
 // A '#' hash symbol marks the rest of the line as a comment.
 func startComment(p *parsekit.P) {
-	p.SkipConsecutive(hash)
+	p.On(c.OneOrMore(hash)).Skip()
 	p.RouteTo(commentContents)
 }
@ -16,8 +16,9 @@ func commentContents(p *parsekit.P) {
 	case p.AtEndOfLine():
 		p.EmitLiteralTrim(ItemComment)
 		p.RouteReturn()
-	default:
+	case p.On(any).Accept():
 		p.AcceptAny()
 		p.Repeat()
 	default:
 		p.UnexpectedInput("comment contents")
 	}
 }
--- a/parser/syn_key.go
+++ b/parser/syn_key.go
@ -1,65 +0,0 @@
 package parser
 import "github.com/mmakaay/toml/parsekit"
 // The primary building block of a TOML document is the key/value pair.
 func startKeyValuePair(p *parsekit.P) {
 	switch {
 	case p.On(whitespace + carriageReturn + newline).Skip():
 		p.Repeat()
 	case p.On(hash).Stay():
 		p.RouteTo(startComment).ThenReturnHere()
 	case p.On(startOfKey).RouteTo(startKey):
 	default:
 		p.RouteTo(endOfFile)
 	}
 }
 // A key may be either bare, quoted or dotted.
 func startKey(p *parsekit.P) {
 	switch {
 	case p.On(bareKeyChars).RouteTo(startBareKey):
 	default:
 		p.UnexpectedInput("a valid key name")
 	}
 }
 // Bare keys may only contain ASCII letters, ASCII digits,
 // underscores, and dashes (A-Za-z0-9_-). Note that bare
 // keys are allowed to be composed of only ASCII digits,
 // e.g. 1234, but are always interpreted as strings.
 func startBareKey(p *parsekit.P) {
 	p.AcceptConsecutive(bareKeyChars) // TODO make a plan for adding this to After()
 	p.EmitLiteral(ItemKey)
 	p.RouteTo(endOfKeyOrDot)
 }
 // Dotted keys are a sequence of bare or quoted keys joined with a dot.
 // This allows for grouping similar properties together:
 func endOfKeyOrDot(p *parsekit.P) {
 	// Whitespace around dot-separated parts is ignored, however,
 	// best practice is to not use any extraneous whitespace.
 	p.SkipConsecutive(whitespace)
 	if p.On(dot).Accept() {
 		p.SkipConsecutive(whitespace)
 		p.EmitLiteral(ItemKeyDot)
 		p.RouteTo(startKey)
 	} else {
 		p.RouteTo(startKeyAssignment)
 	}
 }
 // Keys are on the left of the equals sign and values are on the right.
 // Whitespace is ignored around key names and values. The key, equals
 // sign, and value must be on the same line (though some values can
 // be broken over multiple lines).
 func startKeyAssignment(p *parsekit.P) {
 	p.SkipConsecutive(whitespace)
 	if p.On(equal).Accept() {
 		p.EmitLiteral(ItemAssignment)
 		p.SkipConsecutive(whitespace)
 		p.RouteTo(startValue)
 	} else {
 		p.UnexpectedInput("a value assignment")
 	}
 }
--- a/parser/syn_keyvaluepair.go
+++ b/parser/syn_keyvaluepair.go
@ -0,0 +1,88 @@
 package parser
 import "github.com/mmakaay/toml/parsekit"
 // The primary building block of a TOML document is the key/value pair.
 var (
 	// Keys are on the left of the equals sign and values are on the right.
 	// Whitespace is ignored around key names and values. The key, equals
 	// sign, and value must be on the same line (though some values can be
 	// broken over multiple lines).
 	keyAssignment = c.Sequence(optionalWhitespace, equal, optionalWhitespace)
 	// A key may be either bare, quoted or dotted.
 	// Bare keys may only contain ASCII letters, ASCII digits,
 	// underscores, and dashes (A-Za-z0-9_-). Note that bare
 	// keys are allowed to be composed of only ASCII digits,
 	// e.g. 1234, but are always interpreted as strings.
 	bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
 	bareKey     = c.OneOrMore(bareKeyRune)
 	// Quoted keys follow the exact same rules as either basic
 	// strings or literal strings and allow you to use a much broader
 	// set of key names. Best practice is to use bare keys except
 	// when absolutely necessary.
 	// A bare key must be non-empty, but an empty quoted key is
 	// allowed (though discouraged).
 	startOfKey = c.AnyOf(bareKeyRune, anyQuote)
 	// Dotted keys are a sequence of bare or quoted keys joined with a dot.
 	// This allows for grouping similar properties together.
 	// Whitespace around dot-separated parts is ignored, however, best
 	// practice is to not use any extraneous whitespace.
 	keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
 )
 func startKeyValuePair(p *parsekit.P) {
 	p.On(whitespaceOrNewlines).Skip()
 	switch {
 	case p.On(hash).Stay():
 		p.RouteTo(startComment).ThenReturnHere()
 	case p.On(startOfKey).RouteTo(startKey):
 	default:
 		p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
 	}
 }
 func startKey(p *parsekit.P) {
 	switch {
 	case p.On(bareKeyRune).RouteTo(startBareKey):
 	default:
 		p.UnexpectedInput("a valid key name")
 	}
 }
 func startBareKey(p *parsekit.P) {
 	p.On(bareKey).Accept()
 	p.EmitLiteral(ItemKey)
 	p.RouteTo(endOfKeyOrDot)
 }
 func endOfKeyOrDot(p *parsekit.P) {
 	if p.On(keySeparatordDot).Skip() {
 		p.Emit(ItemKeyDot, ".")
 		p.RouteTo(startKey)
 	} else {
 		p.RouteTo(startKeyAssignment)
 	}
 }
 func startKeyAssignment(p *parsekit.P) {
 	if p.On(keyAssignment).Skip() {
 		p.Emit(ItemAssignment, "=")
 		p.RouteTo(startValue)
 	} else {
 		p.UnexpectedInput("a value assignment")
 	}
 }
 // Values must be of the following types: String, Integer, Float, Boolean,
 // Datetime, Array, or Inline Table. Unspecified values are invalid.
 func startValue(p *parsekit.P) {
 	switch {
 	case p.On(anyQuote).RouteTo(startString):
 	default:
 		p.UnexpectedInput("a value")
 	}
 }
--- a/parser/syn_keyvaluepair_test.go
+++ b/parser/syn_keyvaluepair_test.go
@ -5,9 +5,9 @@ import (
 )
 func TestKeyWithoutAssignment(t *testing.T) {
-	err := "unexpected end of file"
+	err := "unexpected end of file (expected a value assignment)"
 	runStatesTs(t, []statesT{
-		{"bare with whitespace", " a ", "[a]", err},
+		{"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"},
 		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
 		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
 		{"bare numbers", "0123456789", "[0123456789]", err},
@ -18,15 +18,14 @@ func TestKeyWithoutAssignment(t *testing.T) {
 }
 func TestDottedKey(t *testing.T) {
 	err := "unexpected end of file"
 	runStatesTs(t, []statesT{
-		{"bare dotted", "a._.c", "[a].[_].[c]", err},
+		{"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"},
-		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
+		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`},
 	})
 }
 func TestKeyWithAssignmentButNoValue(t *testing.T) {
-	err := "unexpected end of file"
+	err := "unexpected end of file (expected a value)"
 	runStatesTs(t, []statesT{
 		{"bare", "a=", "[a]=", err},
 		{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
--- a/parser/syn_strings.go
+++ b/parser/syn_strings.go
@ -2,10 +2,36 @@ package parser
 import "github.com/mmakaay/toml/parsekit"
-// There are four ways to express strings: basic, multi-line basic, literal,
+var (
-// and multi-line literal. All strings must contain only valid UTF-8 characters.
+	// There are four ways to express strings: basic, multi-line basic, literal,
-// * Multi-line basic strings are surrounded by three quotation marks on each side.
+	// and multi-line literal. All strings must contain only valid UTF-8 characters.
-// * Basic strings are surrounded by quotation marks.
+	// * Multi-line basic strings are surrounded by three quotation marks on each side.
 	// * Basic strings are surrounded by quotation marks.
 	doubleQuote3 = c.Repeat(3, doubleQuote)
 	// Any Unicode character may be used except those that must be escaped:
 	// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
 	charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
 	// For convenience, some popular characters have a compact escape sequence.
 	//
 	// \b         - backspace       (U+0008)
 	// \t         - tab             (U+0009)
 	// \n         - linefeed        (U+000A)
 	// \f         - form feed       (U+000C)
 	// \r         - carriage return (U+000D)
 	// \"         - quote           (U+0022)
 	// \\         - backslash       (U+005C)
 	// \uXXXX     - unicode         (U+XXXX)
 	// \UXXXXXXXX - unicode         (U+XXXXXXXX)
 	validEscapeChar = c.AnyOf(c.Runes('b', 't', 'n', 'f', 'r'), doubleQuote, backslash)
 	shortEscape     = c.Sequence(backslash, validEscapeChar)
 	hex             = c.AnyOf(digit, c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
 	shortUtf8Escape = c.Sequence(backslash, c.Rune('u'), c.Repeat(4, hex))
 	longUtf8Escape  = c.Sequence(backslash, c.Rune('U'), c.Repeat(8, hex))
 	validEscape     = c.AnyOf(shortEscape, shortUtf8Escape, longUtf8Escape)
 )
 func startString(p *parsekit.P) {
 	switch {
 	case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
@ -15,36 +41,21 @@ func startString(p *parsekit.P) {
 	}
 }
 // For convenience, some popular characters have a compact escape sequence.
 //
 // \b         - backspace       (U+0008)
 // \t         - tab             (U+0009)
 // \n         - linefeed        (U+000A)
 // \f         - form feed       (U+000C)
 // \r         - carriage return (U+000D)
 // \"         - quote           (U+0022)
 // \\         - backslash       (U+005C)
 // \uXXXX     - unicode         (U+XXXX)
 // \UXXXXXXXX - unicode         (U+XXXXXXXX)
 //
 // Any Unicode character may be used except those that must be escaped:
 // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
 func parseBasicString(p *parsekit.P) {
 	switch {
-	case p.AtEndOfFile():
+	case p.On(parsekit.EOF).Stay():
 		p.UnexpectedEndOfFile("basic string token")
-	case p.On(backslash, validEscapeChars).Accept() ||
+	case p.On(validEscape).Accept():
 		p.On(shortUtf8Match).Accept() ||
 		p.On(longUtf8Match).Accept():
 		p.Repeat()
-	case p.On(mustBeEscaped).Stay():
+	case p.On(charThatMustBeEscaped).Stay():
-		r, _, _ := p.Match(mustBeEscaped)
+		r, _, _ := p.Match(charThatMustBeEscaped)
 		p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
 	case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
 		p.RouteReturn()
-	default:
+	case p.On(any).Accept():
 		p.AcceptAny()
 		p.Repeat()
 	default:
 		p.UnexpectedInput("string contents")
 	}
 }
@ -69,7 +80,7 @@ func basicStringSpecifics(p *parsekit.P) {
 	case p.On(backslash).Stay():
 		p.EmitError("Invalid escape sequence")
 	default:
-		p.RouteTo(startBasicString)
+		panic("String parsing should not have ended up here")
 	}
 }
--- a/parser/syn_strings_test.go
+++ b/parser/syn_strings_test.go
@ -33,8 +33,8 @@ func TestEmptyBasicString(t *testing.T) {
 		{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
 		{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
 		{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
-		{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
+		{"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
-		{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
+		{"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
 	})
 }
--- a/parser/syn_value.go
+++ b/parser/syn_value.go
@ -1,14 +0,0 @@
 package parser
 import "github.com/mmakaay/toml/parsekit"
 // Values must be of the following types: String, Integer, Float, Boolean,
 // Datetime, Array, or Inline Table. Unspecified values are invalid.
 func startValue(p *parsekit.P) {
 	p.SkipConsecutive(whitespace)
 	if p.Upcoming(quoteChars) {
 		p.RouteTo(startString)
 	} else {
 		p.UnexpectedInput("a value")
 	}
 }