Phew, that was quite the update. I've now got a working implementation of a parser/combinator-like matching API, which prevents us from having to specify everything in state functions. That is way too low level for a lot of things. I'd rather have parser/combinator-style definitions for chunks of the input and keeping the state functions for higher level document structure parsing.

2019-05-19 23:35:03 +00:00 · 2019-05-19 23:35:03 +00:00 · e3e408dfdb
parent 55e23874f7
commit e3e408dfdb
16 changed files with 721 additions and 234 deletions
--- a/parsekit/emitting.go
+++ b/parsekit/emitting.go
@ -3,6 +3,7 @@ package parsekit
 import (
 	"fmt"
 	"strings"
+	"unicode/utf8"
 )

 // Emit passes a Parser item to the client, including the provided string.
@ -51,8 +52,16 @@ func (p *P) EmitError(format string, args ...interface{}) {
 func (p *P) UnexpectedInput(expected string) {
 	// next() takes care of error messages in cases where ok == false.
 	// Therefore, we only provide an error message for the ok case here.
-	if r, ok := p.next(); ok {
-		p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
+	r, _, ok := p.peek(0)
+	switch {
+	case ok:
+		p.EmitError("unexpected character %q (expected %s)", r, expected)
+	case r == EOF:
+		p.EmitError("unexpected end of file (expected %s)", expected)
+	case r == utf8.RuneError:
+		p.EmitError("invalid UTF8 character in input (expected %s)", expected)
+	default:
+		panic("Unhandled output from peek()")
 	}
 }

--- a/parsekit/internals.go
+++ b/parsekit/internals.go
@ -4,32 +4,13 @@ import (
 	"unicode/utf8"
 )

-// next returns the next rune from the input and a boolean indicating if
-// reading the input was successful.
-// When the end of input is reached, or an invalid UTF8 character is
-// read, then false is returned. Both are considered error cases,
-// and for that reason these automatically emit an error to the client.
-func (p *P) next() (rune, bool) {
-	r, w, ok := p.peek(0)
-	if ok {
-		p.advanceCursor(r, w)
-		return r, true
-	}
-	if r == utf8.RuneError && w == 0 {
-		p.EmitError("unexpected end of file")
-	} else {
-		p.EmitError("invalid UTF8 character")
-	}
-	return r, false
-}
-
 // peek returns but does not advance the cursor to the next rune(s) in the input.
 // Returns the rune, its width in bytes and a boolean.
 // The boolean will be false in case no upcoming rune can be peeked
 // (end of data or invalid UTF8 character).
 func (p *P) peek(offsetInBytes int) (rune, int, bool) {
-	peeked, width := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
-	return peeked, width, peeked != utf8.RuneError
+	r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
+	return handleRuneError(r, w)
 }

 // peekMulti takes a peek at multiple upcoming runes in the input.
@ -43,13 +24,12 @@ func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
 	offset := 0
 	for i := 0; i < amount; i++ {
 		r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
-		switch {
-		case r == utf8.RuneError:
+		r, w, ok := handleRuneError(r, w)
+		runes = append(runes, r)
+		widths = append(widths, w)
+		offset += w
+		if !ok {
 			return runes, widths, false
-		default:
-			offset += w
-			runes = append(runes, r)
-			widths = append(widths, w)
 		}
 	}
 	return runes, widths, true
@ -86,3 +66,21 @@ func (p *P) advanceCursor(r rune, w int) {
 	}
 	p.newline = r == '\n'
 }
+
+// handleRuneError is used to normale rune value in case of errors.
+// When an error occurs, then utf8.RuneError will be in the rune.
+// This can however indicate one of two situations:
+// * w == 0: end of file is reached
+// * w == 1: invalid UTF character on input
+// This function lets these two cases return respectively the
+// package's own EOF or INVALID runes, to make it easy for client
+// code to distinct between these two cases.
+func handleRuneError(r rune, w int) (rune, int, bool) {
+	if r == utf8.RuneError {
+		if w == 0 {
+			return EOF, 0, false
+		}
+		return INVALID, w, false
+	}
+	return r, w, true
+}
--- a/parsekit/matchers.go
+++ b/parsekit/matchers.go
@ -0,0 +1,218 @@
+package parsekit
+
+import "unicode/utf8"
+
+// Not in need of it myself, but nice to have I guess:
+// - NotFollowedBy
+// - Discard
+// - Separated
+
+type MatchDialog struct {
+	p        *P
+	runes    []rune
+	widths   []int
+	offset   int
+	curRune  rune
+	curWidth int
+	forked   bool
+}
+
+func (m *MatchDialog) Fork() *MatchDialog {
+	fork := &MatchDialog{
+		p:      m.p,
+		offset: m.offset,
+		forked: true,
+	}
+	return fork
+}
+
+func (m *MatchDialog) Join(fork *MatchDialog) bool {
+	if !fork.forked {
+		panic("Cannot join a non-forked MatchDialog")
+	}
+	m.runes = append(m.runes, fork.runes...)
+	m.widths = append(m.widths, fork.widths...)
+	m.offset = fork.offset
+	fork.runes = []rune{}
+	fork.widths = []int{}
+	return true
+}
+
+func (m *MatchDialog) NextRune() (rune, bool) {
+	if m.curRune == utf8.RuneError {
+		panic("Matcher must not call NextRune() after it returned false")
+	}
+	r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
+	m.offset += w
+	m.curRune = r
+	m.curWidth = w
+	m.runes = append(m.runes, r)
+	m.widths = append(m.widths, w)
+	return r, r != EOF && r != INVALID
+}
+
+// Matcher is the interface that can be implemented to provide
+// a matching stategy for the match() function.
+// A MatchDialog is provided as input. This implements a
+// specific set of methods that a Matcher needs to retrieve data
+// from the parser and to report back results.
+type Matcher interface {
+	Match(*MatchDialog) bool
+}
+
+type MatcherConstructors struct {
+	Any        func() MatchAny
+	Rune       func(rune rune) MatchRune
+	RuneRange  func(start rune, end rune) MatchRuneRange
+	Runes      func(runes ...rune) MatchAnyOf
+	AnyOf      func(matchers ...Matcher) MatchAnyOf
+	Repeat     func(count int, matcher Matcher) MatchRepeat
+	Sequence   func(matchers ...Matcher) MatchSequence
+	ZeroOrMore func(matcher Matcher) MatchZeroOrMore
+	OneOrMore  func(matcher Matcher) MatchOneOrMore
+	Optional   func(matcher Matcher) MatchOptional
+}
+
+var C = MatcherConstructors{
+	Any: func() MatchAny {
+		return MatchAny{}
+	},
+	Rune: func(rune rune) MatchRune {
+		return MatchRune{rune}
+	},
+	RuneRange: func(start rune, end rune) MatchRuneRange {
+		return MatchRuneRange{start, end}
+	},
+	Runes: func(runes ...rune) MatchAnyOf {
+		m := make([]Matcher, len(runes))
+		for i, r := range runes {
+			m[i] = MatchRune{r}
+		}
+		return MatchAnyOf{m}
+	},
+	AnyOf: func(matchers ...Matcher) MatchAnyOf {
+		return MatchAnyOf{matchers}
+	},
+	Repeat: func(count int, matcher Matcher) MatchRepeat {
+		return MatchRepeat{count, matcher}
+	},
+	Sequence: func(matchers ...Matcher) MatchSequence {
+		return MatchSequence{matchers}
+	},
+	OneOrMore: func(matcher Matcher) MatchOneOrMore {
+		return MatchOneOrMore{matcher}
+	},
+	ZeroOrMore: func(matcher Matcher) MatchZeroOrMore {
+		return MatchZeroOrMore{matcher}
+	},
+	Optional: func(matcher Matcher) MatchOptional {
+		return MatchOptional{matcher}
+	},
+}
+
+type MatchAny struct{}
+
+func (c MatchAny) Match(m *MatchDialog) bool {
+	_, ok := m.NextRune()
+	return ok
+}
+
+type MatchRune struct {
+	match rune
+}
+
+func (c MatchRune) Match(m *MatchDialog) bool {
+	r, ok := m.NextRune()
+	return ok && r == c.match
+}
+
+type MatchRuneRange struct {
+	start rune
+	end   rune
+}
+
+func (c MatchRuneRange) Match(m *MatchDialog) bool {
+	r, ok := m.NextRune()
+	return ok && r >= c.start && r <= c.end
+}
+
+type MatchAnyOf struct {
+	matcher []Matcher
+}
+
+func (c MatchAnyOf) Match(m *MatchDialog) bool {
+	for _, matcher := range c.matcher {
+		mc := m.Fork()
+		if matcher.Match(mc) {
+			return m.Join(mc)
+		}
+	}
+	return false
+}
+
+type MatchRepeat struct {
+	count   int
+	matcher Matcher
+}
+
+func (c MatchRepeat) Match(m *MatchDialog) bool {
+	mc := m.Fork()
+	for i := 0; i < c.count; i++ {
+		if !c.matcher.Match(mc) {
+			return false
+		}
+	}
+	m.Join(mc)
+	return true
+}
+
+type MatchSequence struct {
+	matchers []Matcher
+}
+
+func (c MatchSequence) Match(m *MatchDialog) bool {
+	mPart := m.Fork()
+	for _, matcher := range c.matchers {
+		if !matcher.Match(mPart) {
+			return false
+		}
+	}
+	m.Join(mPart)
+	return true
+}
+
+type MatchOneOrMore struct {
+	matcher Matcher
+}
+
+func (c MatchOneOrMore) Match(m *MatchDialog) bool {
+	mc := m.Fork()
+	for c.matcher.Match(mc) {
+		m.Join(mc)
+	}
+	return len(m.runes) > 0
+}
+
+type MatchZeroOrMore struct {
+	matcher Matcher
+}
+
+func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
+	mc := m.Fork()
+	for c.matcher.Match(mc) {
+		m.Join(mc)
+	}
+	return true
+}
+
+type MatchOptional struct {
+	matcher Matcher
+}
+
+func (c MatchOptional) Match(m *MatchDialog) bool {
+	mc := m.Fork()
+	if c.matcher.Match(mc) {
+		m.Join(mc)
+	}
+	return true
+}
--- a/parsekit/matchers_test.go
+++ b/parsekit/matchers_test.go
@ -0,0 +1,260 @@
+package parsekit_test
+
+import (
+	"testing"
+
+	p "github.com/mmakaay/toml/parsekit"
+)
+
+var c = p.C
+
+const TestItem p.ItemType = 1
+
+func newParser(input string, matcher p.Matcher) *p.P {
+	stateFn := func(p *p.P) {
+		if p.On(matcher).Accept() {
+			p.EmitLiteral(TestItem)
+			p.Repeat()
+		} else {
+			p.UnexpectedInput("MATCH")
+		}
+	}
+	return p.New(input, stateFn)
+}
+
+func TestMatchAny(t *testing.T) {
+	p := newParser("o", c.Any())
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s", err)
+	}
+	if r.Type != TestItem {
+		t.Error("Parser item type not expected TestTitem")
+	}
+	if r.Value != "o" {
+		t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
+	}
+}
+
+func TestMatchAny_AtEndOfFile(t *testing.T) {
+	p := newParser("", c.Any())
+	_, err, ok := p.Next()
+	if ok {
+		t.Fatalf("Parsing unexpectedly succeeded")
+	}
+	expected := "unexpected end of file (expected MATCH)"
+	if err.Error() != expected {
+		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
+	}
+}
+
+func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) {
+	p := newParser("\xcd", c.Any())
+	_, err, ok := p.Next()
+	if ok {
+		t.Fatalf("Parsing unexpectedly succeeded")
+	}
+	expected := "invalid UTF8 character in input (expected MATCH)"
+	if err.Error() != expected {
+		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
+	}
+}
+
+func TestMatchRune(t *testing.T) {
+	p := newParser("xxx", c.Rune('x'))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s", err)
+	}
+	if r.Type != TestItem {
+		t.Error("Parser item type not expected TestTitem")
+	}
+	if r.Value != "x" {
+		t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
+	}
+}
+
+func TestMatchRune_OnMismatch(t *testing.T) {
+	p := newParser("x   ", c.Rune(' '))
+	_, err, ok := p.Next()
+	if ok {
+		t.Fatalf("Parsing did not fail unexpectedly")
+	}
+	expected := "unexpected character 'x' (expected MATCH)"
+	if err.Error() != expected {
+		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
+	}
+}
+
+func TestMatchRuneRange(t *testing.T) {
+	m := c.RuneRange('b', 'y')
+	s := "mnopqrstuvwxybcdefghijkl"
+	p := newParser(s, m)
+	for i := 0; i < len(s); i++ {
+		r, err, ok := p.Next()
+		if !ok {
+			t.Fatalf("Parsing failed: %s", err)
+		}
+		if s[i] != r.Value[0] {
+			t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
+		}
+	}
+	if _, _, ok := newParser("a", m).Next(); ok {
+		t.Fatalf("Unexpected parse success for input 'a'")
+	}
+	if _, _, ok := newParser("z", m).Next(); ok {
+		t.Fatalf("Unexpected parse success for input 'z'")
+	}
+}
+
+func TestMatchRunes(t *testing.T) {
+	m := c.Runes('+', '-', '*', '/')
+	s := "-+/*+++"
+	p := newParser(s, m)
+	for i := 0; i < len(s); i++ {
+		r, err, ok := p.Next()
+		if !ok {
+			t.Fatalf("Parsing failed: %s", err)
+		}
+		if s[i] != r.Value[0] {
+			t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
+		}
+	}
+	if _, _, ok := newParser("^", m).Next(); ok {
+		t.Fatalf("Unexpected parse success for input '^'")
+	}
+	if _, _, ok := newParser("x", m).Next(); ok {
+		t.Fatalf("Unexpected parse success for input 'x'")
+	}
+}
+
+func TestMatchAnyOf(t *testing.T) {
+	p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s", err)
+	}
+	if r.Type != TestItem {
+		t.Error("Parser item type not expected TestTitem")
+	}
+	if r.Value != "a" {
+		t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
+	}
+
+	r, err, ok = p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s", err)
+	}
+	if r.Type != TestItem {
+		t.Error("Parser item type not expected TestTitem")
+	}
+	if r.Value != "b" {
+		t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
+	}
+}
+
+func TestMatchRepeat(t *testing.T) {
+	p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "xxxx" {
+		t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
+	}
+}
+
+func TestMatchRepeat_Mismatch(t *testing.T) {
+	p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
+	_, err, ok := p.Next()
+	if ok {
+		t.Fatalf("Parsing did not fail unexpectedly")
+	}
+	expected := "unexpected character 'x' (expected MATCH)"
+	if err.Error() != expected {
+		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
+	}
+}
+
+func TestMatchOneOrMore(t *testing.T) {
+	p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "xxxxxxxx" {
+		t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
+	}
+}
+
+func TestMatchSequence(t *testing.T) {
+	p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "10" {
+		t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
+	}
+}
+
+func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
+	p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "101010" {
+		t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
+	}
+}
+
+func TestSequence_WithRepeatedRunes(t *testing.T) {
+	whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
+	equal := c.Rune('=')
+	assignment := c.Sequence(whitespace, equal, whitespace)
+	p := newParser("  ==  10", assignment)
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "  =" {
+		t.Errorf("Parser item value is %q instead of expected \"  =\"", r.Value)
+	}
+}
+
+func TestMatchOptional(t *testing.T) {
+	p := newParser("xyz", c.Optional(c.Rune('x')))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "x" {
+		t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
+	}
+
+	p = newParser("xyz", c.Optional(c.Rune('y')))
+	r, err, ok = p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != "" {
+		t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
+	}
+}
+
+func TestMixAndMatch(t *testing.T) {
+	hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
+	backslash := c.Rune('\\')
+	x := c.Rune('x')
+	hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
+
+	p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
+	r, err, ok := p.Next()
+	if !ok {
+		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
+	}
+	if r.Value != `\x9a\x01\xF0\xfC` {
+		t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
+	}
+}
--- a/parsekit/matching.go
+++ b/parsekit/matching.go
@ -64,6 +64,13 @@ func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
 			return runes, widths, false
 		}
 		switch pattern := pattern.(type) {
+		case Matcher:
+			m := &MatchDialog{p: p}
+			if pattern.Match(m) {
+				return m.runes, m.widths, true
+			} else {
+				return m.runes, m.widths, false
+			}
 		case []interface{}:
 			rs, ws, matched := p.match(offset, pattern...)
 			for i, r := range rs {
@ -98,17 +105,6 @@ func (p *P) Upcoming(patterns ...interface{}) bool {
 	return ok
 }

-// AcceptAny adds the next rune from the input to the string buffer.
-// If no rune could be read (end of file or invalid UTF8 data),
-// then false is returned.
-func (p *P) AcceptAny() bool {
-	if r, ok := p.next(); ok {
-		p.buffer.writeRune(r)
-		return true
-	}
-	return false
-}
-
 type action struct {
 	p      *P
 	runes  []rune
@ -129,6 +125,10 @@ func (a *action) Accept() bool {
 func (a *action) Skip() bool {
 	if a.ok {
 		for i, r := range a.runes {
+			type C struct {
+				Rune MatchRune
+			}
+
 			a.p.advanceCursor(r, a.widths[i])
 		}
 	}
@ -159,20 +159,10 @@ func (p *P) On(patterns ...interface{}) *action {
 // AcceptMatching adds the next runes to the string buffer, but only
 // if the upcoming runes satisfy the provided patterns.
 // When runes were added then true is returned, false otherwise.
-func (p *P) AcceptMatching(patterns ...interface{}) bool {
-	return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
-}
-
-// AcceptConsecutive adds consecutive runes from the input to the string
-// buffer, as long as they exist in the pattern.
-// If any runes were added then true is returned, false otherwise.
-func (p *P) AcceptConsecutive(pattern string) bool {
-	accepted := false
-	for p.AcceptMatching(pattern) {
-		accepted = true
-	}
-	return accepted
-}
+// TODO not needed anymore
+// func (p *P) AcceptMatching(patterns ...interface{}) bool {
+// 	return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
+// }

 // SkipMatching skips runes, but only when all provided patterns are satisfied.
 // Returns true when one or more runes were skipped.
@ -185,13 +175,3 @@ func (p *P) SkipMatching(patterns ...interface{}) bool {
 	}
 	return false
 }
-
-// SkipConsecutive skips consecutive runes from the provided pattern.
-// Returns true when one or more runes were skipped.
-func (p *P) SkipConsecutive(pattern string) bool {
-	didSkip := false
-	for p.SkipMatching(pattern) {
-		didSkip = true
-	}
-	return didSkip
-}
--- a/parsekit/types.go
+++ b/parsekit/types.go
@ -1,5 +1,9 @@
 package parsekit

+import (
+	"unicode/utf8"
+)
+
 // P holds the internal state of the parser.
 type P struct {
 	state        StateFn      // the function that handles the current state
@ -50,3 +54,14 @@ type Error struct {
 func (err *Error) Error() string {
 	return err.Message
 }
+
+// EOF is a special rune, which is used to indicate an end of file when
+// reading a character from the input.
+// It can be treated as a rune when writing parsing rules, so a valid way to
+// say 'I now expect the end of the file' is using something like:
+// if (p.On(c.Rune(EOF)).Skip()) { ... }
+const EOF rune = -1
+
+// INVALID is a special rune, which is used to indicate an invalid UTF8
+// rune on the input.
+const INVALID rune = utf8.RuneError
--- a/parser/helpers_test.go
+++ b/parser/helpers_test.go
@ -6,7 +6,7 @@ import (
 	"testing"

 	"github.com/mmakaay/toml/parsekit"
-	lexer "github.com/mmakaay/toml/parser"
+	"github.com/mmakaay/toml/parser"
 )

 type statesT struct {
@ -23,7 +23,7 @@ func runStatesTs(t *testing.T, tests []statesT) {
 }

 func runStatesT(t *testing.T, c statesT) {
-	l, err := lexer.NewParser(c.in).ToArray()
+	l, err := parser.NewParser(c.in).ToArray()
 	if err == nil && c.err != "" {
 		t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
 	}
@ -36,12 +36,12 @@ func runStatesT(t *testing.T, c statesT) {
 	switch expected := c.out.(type) {
 	case []string:
 		if len(expected) != len(l) {
-			t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
+			t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
 		}
 		for i, e := range expected {
 			v := ParserItemToString(l[i])
 			if v != e {
-				t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
+				t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
 			}
 		}
 	case string:
@ -51,7 +51,7 @@ func runStatesT(t *testing.T, c statesT) {
 		}
 		actual := strings.Join(a, "")
 		if actual != expected {
-			t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
+			t.Errorf("[%s] Unexpected parser output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
 		}
 	}
 }
@ -59,15 +59,15 @@ func runStatesT(t *testing.T, c statesT) {
 // ParserItemToString returns a string representation of the parsekit.Item.
 func ParserItemToString(i parsekit.Item) string {
 	switch i.Type {
-	case lexer.ItemComment:
+	case parser.ItemComment:
 		return fmt.Sprintf("#(%s)", i.Value)
-	case lexer.ItemKey:
+	case parser.ItemKey:
 		return fmt.Sprintf("[%s]", i.Value)
-	case lexer.ItemString:
+	case parser.ItemString:
 		return fmt.Sprintf("STR(%s)", i.Value)
-	case lexer.ItemKeyDot:
+	case parser.ItemKeyDot:
 		return "."
-	case lexer.ItemAssignment:
+	case parser.ItemAssignment:
 		return "="
 	default:
 		panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
--- a/parser/parser.go
+++ b/parser/parser.go
@ -11,40 +11,28 @@ const (
 	ItemString                              // A value of type string
 )

-const (
-	whitespace       string = " \t"
-	carriageReturn   string = "\r"
-	newline          string = "\n"
-	hash             string = "#"
-	equal            string = "="
-	lower            string = "abcdefghijklmnopqrstuvwxyz"
-	upper            string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-	digits           string = "0123456789"
-	hex              string = digits + "abcdefABCDEF"
-	dot              string = "."
-	underscore       string = "_"
-	dash             string = "-"
-	singleQuote      string = "'"
-	doubleQuote      string = "\""
-	backslash        string = "\\"
-	quoteChars       string = singleQuote + doubleQuote
-	bareKeyChars     string = lower + upper + digits + underscore + dash
-	startOfKey       string = bareKeyChars + quoteChars
-	validEscapeChars string = `btnfr"\`
-	mustBeEscaped    string = "" +
-		"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
-		"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
-		"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
-		"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
-		"\u007F"
-)
-
 var (
-	keySeparatorDot = []interface{}{whitespace, dot, whitespace}
-	doubleQuote3    = []interface{}{doubleQuote, doubleQuote, doubleQuote}
-	hex4            = []interface{}{hex, hex, hex, hex}
-	shortUtf8Match  = []interface{}{backslash, 'u', hex4}
-	longUtf8Match   = []interface{}{backslash, 'U', hex4, hex4}
+	c                    = parsekit.C
+	space                = c.Rune(' ')
+	tab                  = c.Rune('\t')
+	carriageReturn       = c.Rune('\r')
+	lineFeed             = c.Rune('\n')
+	hash                 = c.Rune('#')
+	underscore           = c.Rune('_')
+	dash                 = c.Rune('-')
+	equal                = c.Rune('=')
+	dot                  = c.Rune('.')
+	singleQuote          = c.Rune('\'')
+	doubleQuote          = c.Rune('"')
+	any                  = c.Any()
+	anyQuote             = c.AnyOf(singleQuote, doubleQuote)
+	backslash            = c.Rune('\\')
+	lower                = c.RuneRange('a', 'z')
+	upper                = c.RuneRange('A', 'Z')
+	digit                = c.RuneRange('0', '9')
+	whitespace           = c.OneOrMore(c.AnyOf(space, tab))
+	whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
+	optionalWhitespace   = c.Optional(whitespace)
 )

 // NewParser creates a new parser, using the provided input string
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@ -6,6 +6,10 @@ import (
 	"github.com/mmakaay/toml/parser"
 )

+func TestEmptyInput(t *testing.T) {
+	runStatesT(t, statesT{"empty string", "", "", ""})
+}
+
 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
 	_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
 	t.Logf("Got error: %s", err.Error())
@ -17,18 +21,13 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
 	}
 }

-func TestEmptyInput(t *testing.T) {
-	runStatesT(t, statesT{"empty string", "", "", ""})
-}
-
 func TestInvalidUtf8Data(t *testing.T) {
 	runStatesTs(t, []statesT{
-		{"inside comment", "# \xbc", "", "invalid UTF8 character"},
-		{"bare key 1", "\xbc", "", "invalid UTF8 character"},
-		{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
-		{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
-		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
-		{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
+		{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected comment contents)"},
+		{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
+		{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
+		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
+		{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
 	})
 }

--- a/parser/syn_comments.go
+++ b/parser/syn_comments.go
@ -6,7 +6,7 @@ import (

 // A '#' hash symbol marks the rest of the line as a comment.
 func startComment(p *parsekit.P) {
-	p.SkipConsecutive(hash)
+	p.On(c.OneOrMore(hash)).Skip()
 	p.RouteTo(commentContents)
 }

@ -16,8 +16,9 @@ func commentContents(p *parsekit.P) {
 	case p.AtEndOfLine():
 		p.EmitLiteralTrim(ItemComment)
 		p.RouteReturn()
-	default:
-		p.AcceptAny()
+	case p.On(any).Accept():
 		p.Repeat()
+	default:
+		p.UnexpectedInput("comment contents")
 	}
 }
--- a/parser/syn_key.go
+++ b/parser/syn_key.go
@ -1,65 +0,0 @@
-package parser
-
-import "github.com/mmakaay/toml/parsekit"
-
-// The primary building block of a TOML document is the key/value pair.
-func startKeyValuePair(p *parsekit.P) {
-	switch {
-	case p.On(whitespace + carriageReturn + newline).Skip():
-		p.Repeat()
-	case p.On(hash).Stay():
-		p.RouteTo(startComment).ThenReturnHere()
-	case p.On(startOfKey).RouteTo(startKey):
-	default:
-		p.RouteTo(endOfFile)
-	}
-}
-
-// A key may be either bare, quoted or dotted.
-func startKey(p *parsekit.P) {
-	switch {
-	case p.On(bareKeyChars).RouteTo(startBareKey):
-	default:
-		p.UnexpectedInput("a valid key name")
-	}
-}
-
-// Bare keys may only contain ASCII letters, ASCII digits,
-// underscores, and dashes (A-Za-z0-9_-). Note that bare
-// keys are allowed to be composed of only ASCII digits,
-// e.g. 1234, but are always interpreted as strings.
-func startBareKey(p *parsekit.P) {
-	p.AcceptConsecutive(bareKeyChars) // TODO make a plan for adding this to After()
-	p.EmitLiteral(ItemKey)
-	p.RouteTo(endOfKeyOrDot)
-}
-
-// Dotted keys are a sequence of bare or quoted keys joined with a dot.
-// This allows for grouping similar properties together:
-func endOfKeyOrDot(p *parsekit.P) {
-	// Whitespace around dot-separated parts is ignored, however,
-	// best practice is to not use any extraneous whitespace.
-	p.SkipConsecutive(whitespace)
-	if p.On(dot).Accept() {
-		p.SkipConsecutive(whitespace)
-		p.EmitLiteral(ItemKeyDot)
-		p.RouteTo(startKey)
-	} else {
-		p.RouteTo(startKeyAssignment)
-	}
-}
-
-// Keys are on the left of the equals sign and values are on the right.
-// Whitespace is ignored around key names and values. The key, equals
-// sign, and value must be on the same line (though some values can
-// be broken over multiple lines).
-func startKeyAssignment(p *parsekit.P) {
-	p.SkipConsecutive(whitespace)
-	if p.On(equal).Accept() {
-		p.EmitLiteral(ItemAssignment)
-		p.SkipConsecutive(whitespace)
-		p.RouteTo(startValue)
-	} else {
-		p.UnexpectedInput("a value assignment")
-	}
-}
--- a/parser/syn_keyvaluepair.go
+++ b/parser/syn_keyvaluepair.go
@ -0,0 +1,88 @@
+package parser
+
+import "github.com/mmakaay/toml/parsekit"
+
+// The primary building block of a TOML document is the key/value pair.
+
+var (
+	// Keys are on the left of the equals sign and values are on the right.
+	// Whitespace is ignored around key names and values. The key, equals
+	// sign, and value must be on the same line (though some values can be
+	// broken over multiple lines).
+	keyAssignment = c.Sequence(optionalWhitespace, equal, optionalWhitespace)
+
+	// A key may be either bare, quoted or dotted.
+	// Bare keys may only contain ASCII letters, ASCII digits,
+	// underscores, and dashes (A-Za-z0-9_-). Note that bare
+	// keys are allowed to be composed of only ASCII digits,
+	// e.g. 1234, but are always interpreted as strings.
+	bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
+	bareKey     = c.OneOrMore(bareKeyRune)
+
+	// Quoted keys follow the exact same rules as either basic
+	// strings or literal strings and allow you to use a much broader
+	// set of key names. Best practice is to use bare keys except
+	// when absolutely necessary.
+	// A bare key must be non-empty, but an empty quoted key is
+	// allowed (though discouraged).
+	startOfKey = c.AnyOf(bareKeyRune, anyQuote)
+
+	// Dotted keys are a sequence of bare or quoted keys joined with a dot.
+	// This allows for grouping similar properties together.
+	// Whitespace around dot-separated parts is ignored, however, best
+	// practice is to not use any extraneous whitespace.
+	keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
+)
+
+func startKeyValuePair(p *parsekit.P) {
+	p.On(whitespaceOrNewlines).Skip()
+	switch {
+	case p.On(hash).Stay():
+		p.RouteTo(startComment).ThenReturnHere()
+	case p.On(startOfKey).RouteTo(startKey):
+	default:
+		p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
+	}
+}
+
+func startKey(p *parsekit.P) {
+	switch {
+	case p.On(bareKeyRune).RouteTo(startBareKey):
+	default:
+		p.UnexpectedInput("a valid key name")
+	}
+}
+
+func startBareKey(p *parsekit.P) {
+	p.On(bareKey).Accept()
+	p.EmitLiteral(ItemKey)
+	p.RouteTo(endOfKeyOrDot)
+}
+
+func endOfKeyOrDot(p *parsekit.P) {
+	if p.On(keySeparatordDot).Skip() {
+		p.Emit(ItemKeyDot, ".")
+		p.RouteTo(startKey)
+	} else {
+		p.RouteTo(startKeyAssignment)
+	}
+}
+
+func startKeyAssignment(p *parsekit.P) {
+	if p.On(keyAssignment).Skip() {
+		p.Emit(ItemAssignment, "=")
+		p.RouteTo(startValue)
+	} else {
+		p.UnexpectedInput("a value assignment")
+	}
+}
+
+// Values must be of the following types: String, Integer, Float, Boolean,
+// Datetime, Array, or Inline Table. Unspecified values are invalid.
+func startValue(p *parsekit.P) {
+	switch {
+	case p.On(anyQuote).RouteTo(startString):
+	default:
+		p.UnexpectedInput("a value")
+	}
+}
--- a/parser/syn_keyvaluepair_test.go
+++ b/parser/syn_keyvaluepair_test.go
@ -5,9 +5,9 @@ import (
 )

 func TestKeyWithoutAssignment(t *testing.T) {
-	err := "unexpected end of file"
+	err := "unexpected end of file (expected a value assignment)"
 	runStatesTs(t, []statesT{
-		{"bare with whitespace", " a ", "[a]", err},
+		{"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"},
 		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
 		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
 		{"bare numbers", "0123456789", "[0123456789]", err},
@ -18,15 +18,14 @@ func TestKeyWithoutAssignment(t *testing.T) {
 }

 func TestDottedKey(t *testing.T) {
-	err := "unexpected end of file"
 	runStatesTs(t, []statesT{
-		{"bare dotted", "a._.c", "[a].[_].[c]", err},
-		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
+		{"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"},
+		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`},
 	})
 }

 func TestKeyWithAssignmentButNoValue(t *testing.T) {
-	err := "unexpected end of file"
+	err := "unexpected end of file (expected a value)"
 	runStatesTs(t, []statesT{
 		{"bare", "a=", "[a]=", err},
 		{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
--- a/parser/syn_strings.go
+++ b/parser/syn_strings.go
@ -2,10 +2,36 @@ package parser

 import "github.com/mmakaay/toml/parsekit"

-// There are four ways to express strings: basic, multi-line basic, literal,
-// and multi-line literal. All strings must contain only valid UTF-8 characters.
-// * Multi-line basic strings are surrounded by three quotation marks on each side.
-// * Basic strings are surrounded by quotation marks.
+var (
+	// There are four ways to express strings: basic, multi-line basic, literal,
+	// and multi-line literal. All strings must contain only valid UTF-8 characters.
+	// * Multi-line basic strings are surrounded by three quotation marks on each side.
+	// * Basic strings are surrounded by quotation marks.
+	doubleQuote3 = c.Repeat(3, doubleQuote)
+
+	// Any Unicode character may be used except those that must be escaped:
+	// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
+	charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
+
+	// For convenience, some popular characters have a compact escape sequence.
+	//
+	// \b         - backspace       (U+0008)
+	// \t         - tab             (U+0009)
+	// \n         - linefeed        (U+000A)
+	// \f         - form feed       (U+000C)
+	// \r         - carriage return (U+000D)
+	// \"         - quote           (U+0022)
+	// \\         - backslash       (U+005C)
+	// \uXXXX     - unicode         (U+XXXX)
+	// \UXXXXXXXX - unicode         (U+XXXXXXXX)
+	validEscapeChar = c.AnyOf(c.Runes('b', 't', 'n', 'f', 'r'), doubleQuote, backslash)
+	shortEscape     = c.Sequence(backslash, validEscapeChar)
+	hex             = c.AnyOf(digit, c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
+	shortUtf8Escape = c.Sequence(backslash, c.Rune('u'), c.Repeat(4, hex))
+	longUtf8Escape  = c.Sequence(backslash, c.Rune('U'), c.Repeat(8, hex))
+	validEscape     = c.AnyOf(shortEscape, shortUtf8Escape, longUtf8Escape)
+)
+
 func startString(p *parsekit.P) {
 	switch {
 	case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
@ -15,36 +41,21 @@ func startString(p *parsekit.P) {
 	}
 }

-// For convenience, some popular characters have a compact escape sequence.
-//
-// \b         - backspace       (U+0008)
-// \t         - tab             (U+0009)
-// \n         - linefeed        (U+000A)
-// \f         - form feed       (U+000C)
-// \r         - carriage return (U+000D)
-// \"         - quote           (U+0022)
-// \\         - backslash       (U+005C)
-// \uXXXX     - unicode         (U+XXXX)
-// \UXXXXXXXX - unicode         (U+XXXXXXXX)
-//
-// Any Unicode character may be used except those that must be escaped:
-// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
 func parseBasicString(p *parsekit.P) {
 	switch {
-	case p.AtEndOfFile():
+	case p.On(parsekit.EOF).Stay():
 		p.UnexpectedEndOfFile("basic string token")
-	case p.On(backslash, validEscapeChars).Accept() ||
-		p.On(shortUtf8Match).Accept() ||
-		p.On(longUtf8Match).Accept():
+	case p.On(validEscape).Accept():
 		p.Repeat()
-	case p.On(mustBeEscaped).Stay():
-		r, _, _ := p.Match(mustBeEscaped)
+	case p.On(charThatMustBeEscaped).Stay():
+		r, _, _ := p.Match(charThatMustBeEscaped)
 		p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
 	case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
 		p.RouteReturn()
-	default:
-		p.AcceptAny()
+	case p.On(any).Accept():
 		p.Repeat()
+	default:
+		p.UnexpectedInput("string contents")
 	}
 }

@ -69,7 +80,7 @@ func basicStringSpecifics(p *parsekit.P) {
 	case p.On(backslash).Stay():
 		p.EmitError("Invalid escape sequence")
 	default:
-		p.RouteTo(startBasicString)
+		panic("String parsing should not have ended up here")
 	}
 }

--- a/parser/syn_strings_test.go
+++ b/parser/syn_strings_test.go
@ -33,8 +33,8 @@ func TestEmptyBasicString(t *testing.T) {
 		{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
 		{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
 		{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
-		{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
-		{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
+		{"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
+		{"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
 	})
 }

--- a/parser/syn_value.go
+++ b/parser/syn_value.go
@ -1,14 +0,0 @@
-package parser
-
-import "github.com/mmakaay/toml/parsekit"
-
-// Values must be of the following types: String, Integer, Float, Boolean,
-// Datetime, Array, or Inline Table. Unspecified values are invalid.
-func startValue(p *parsekit.P) {
-	p.SkipConsecutive(whitespace)
-	if p.Upcoming(quoteChars) {
-		p.RouteTo(startString)
-	} else {
-		p.UnexpectedInput("a value")
-	}
-}