Added a load of parser/combinator implementation, the system seems feasible!

2019-05-20 22:40:59 +00:00 · 2019-05-20 22:40:59 +00:00 · d9d837fe6e
parent 3677ab18cb
commit d9d837fe6e
18 changed files with 502 additions and 331 deletions
--- a/parsekit/emitting.go
+++ b/parsekit/emitting.go
@ -5,26 +5,45 @@ import (
 	"strings"
 )
 // ItemType represents the type of a parser Item.
 type ItemType int
 // TODO private?
 // ItemEOF is a built-in parser item type that is used for flagging that the
 // end of the input was reached.
 const ItemEOF ItemType = -1
 // TODO private?
 // ItemError is a built-in parser item type that is used for flagging that
 // an error has occurred during parsing.
 const ItemError ItemType = -2
 // Item represents an item that can be emitted from the parser.
 type Item struct {
 	Type  ItemType
 	Value string
 }
 // Emit passes a Parser item to the client, including the provided string.
 func (p *P) Emit(t ItemType, s string) {
 	p.items <- Item{t, s}
 	p.buffer.reset()
 }
-// EmitLiteral passes a Parser item to the client, including the accumulated
+// EmitLiteral passes a Parser item to the client, including accumulated
 // string buffer data as a literal string.
 func (p *P) EmitLiteral(t ItemType) {
 	p.Emit(t, p.buffer.asLiteralString())
 }
-// EmitLiteralTrim passes a Parser item to the client, including the
+// EmitLiteralTrim passes a Parser item to the client, including
 // accumulated string buffer data as a literal string with whitespace
 // trimmed from it.
 func (p *P) EmitLiteralTrim(t ItemType) {
 	p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
 }
-// EmitInterpreted passes a Parser item to the client, including the
+// EmitInterpreted passes a Parser item to the client, including
 // accumulated string buffer data a Go doubled quoted interpreted string
 // (handling escape codes like \n, \t, \uXXXX, etc.)
 // This method might return an error, in case there is data in the
@ -38,6 +57,19 @@ func (p *P) EmitInterpreted(t ItemType) error {
 	return nil
 }
 // Error is used as the error type when parsing errors occur.
 // The error includes some extra meta information to allow for useful
 // error messages to the user.
 type Error struct {
 	Message string
 	Row     int
 	Column  int
 }
 func (err *Error) Error() string {
 	return err.Message
 }
 // EmitError emits a Parser error item to the client.
 func (p *P) EmitError(format string, args ...interface{}) {
 	message := fmt.Sprintf(format, args...)
@ -51,17 +83,17 @@ func (p *P) UnexpectedInput() {
 	r, _, ok := p.peek(0)
 	switch {
 	case ok:
-		p.EmitError("unexpected character %q%s", r, p.fmtExpects())
+		p.EmitError("unexpected character %q%s", r, fmtExpects(p))
 	case r == EOF:
-		p.EmitError("unexpected end of file%s", p.fmtExpects())
+		p.EmitError("unexpected end of file%s", fmtExpects(p))
 	case r == INVALID:
-		p.EmitError("invalid UTF8 character in input%s", p.fmtExpects())
+		p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
 	default:
 		panic("Unhandled output from peek()")
 	}
 }
-func (p *P) fmtExpects() string {
+func fmtExpects(p *P) string {
 	if p.expecting == "" {
 		return ""
 	}
--- a/parsekit/internals.go
+++ b/parsekit/internals.go
@ -1,95 +0,0 @@
 package parsekit
 import (
 	"unicode/utf8"
 )
 // P holds the internal state of the parser.
 type P struct {
 	state        StateFn      // the function that handles the current state
 	nextState    StateFn      // the function that will handle the next state
 	stack        []StateFn    // state function stack, for nested parsing
 	input        string       // the scanned input
 	len          int          // the total length of the input in bytes
 	pos          int          // current byte scanning position in the input
 	newline      bool         // keep track of when we have scanned a newline
 	cursorRow    int          // current row number in the input
 	cursorColumn int          // current column position in the input
 	expecting    string       // a description of what the current state expects to find
 	buffer       stringBuffer // an efficient buffer, used to build string values
 	items        chan Item    // channel of resulting Parser items
 	item         Item         // the current item as reached by Next() and retrieved by Get()
 	err          *Error       // an error when lexing failed, retrieved by Error()
 }
 // peek returns but does not advance the cursor to the next rune(s) in the input.
 // Returns the rune, its width in bytes and a boolean.
 // The boolean will be false in case no upcoming rune can be peeked
 // (end of data or invalid UTF8 character).
 func (p *P) peek(offsetInBytes int) (rune, int, bool) {
 	r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
 	return handleRuneError(r, w)
 }
 // handleRuneError is used to normale rune value in case of errors.
 // When an error occurs, then utf8.RuneError will be in the rune.
 // This can however indicate one of two situations:
 // * w == 0: end of file is reached
 // * w == 1: invalid UTF character on input
 // This function lets these two cases return respectively the
 // package's own EOF or INVALID runes, to make it easy for client
 // code to distinct between these two cases.
 func handleRuneError(r rune, w int) (rune, int, bool) {
 	if r == utf8.RuneError {
 		if w == 0 {
 			return EOF, 0, false
 		}
 		return INVALID, w, false
 	}
 	return r, w, true
 }
 // EOF is a special rune, which is used to indicate an end of file when
 // reading a character from the input.
 // It can be treated as a rune when writing parsing rules, so a valid way to
 // say 'I now expect the end of the file' is using something like:
 // if (p.On(c.Rune(EOF)).Skip()) { ... }
 const EOF rune = -1
 // INVALID is a special rune, which is used to indicate an invalid UTF8
 // rune on the input.
 const INVALID rune = utf8.RuneError
 // StateFn defines the type of function that can be used to
 // handle a parser state.
 type StateFn func(*P)
 // ItemType represents the type of a parser Item.
 type ItemType int
 // ItemEOF is a built-in parser item type that is used for flagging that the
 // end of the input was reached.
 const ItemEOF ItemType = -1
 // ItemError is a built-in parser item type that is used for flagging that
 // an error has occurred during parsing.
 const ItemError ItemType = -2
 // Item represents an item returned from the parser.
 type Item struct {
 	Type  ItemType
 	Value string
 }
 // Error is used as the error type when parsing errors occur.
 // The error includes some extra meta information to allow for useful
 // error messages to the user.
 type Error struct {
 	Message string
 	Row     int
 	Column  int
 }
 func (err *Error) Error() string {
 	return err.Message
 }
--- a/parsekit/matchers.go
+++ b/parsekit/matchers.go
@ -6,8 +6,7 @@ import (
 )
 // Not in need of it myself, but nice to have I guess:
-// - NotFollowedBy
+// - LookAhead
 // - Separated
 // MatchDialog is used by Matcher implementations as a means
 // to retrieve data to match against and to report back
@ -92,6 +91,7 @@ type Matcher interface {
 }
 type matcherConstructors struct {
 	EndOfFile    func() MatchEndOfFile
 	Any          func() MatchAny
 	Rune         func(rune) MatchRune
 	RuneRange    func(rune, rune) MatchRuneRange
@ -99,20 +99,28 @@ type matcherConstructors struct {
 	String       func(string) MatchSequence
 	StringNoCase func(string) MatchSequence
 	AnyOf        func(...Matcher) MatchAnyOf
-	Repeat       func(int, Matcher) MatchRepeat
+	Not          func(Matcher) MatchNot
 	Sequence     func(...Matcher) MatchSequence
 	ZeroOrMore   func(Matcher) MatchZeroOrMore
 	OneOrMore    func(Matcher) MatchOneOrMore
 	Optional     func(Matcher) MatchOptional
 	Sequence     func(...Matcher) MatchSequence
 	Repeat       func(int, Matcher) MatchRepeat
 	Min          func(int, Matcher) MatchRepeat
 	Max          func(int, Matcher) MatchRepeat
 	Bounded      func(int, int, Matcher) MatchRepeat
 	ZeroOrMore   func(Matcher) MatchRepeat
 	OneOrMore    func(Matcher) MatchRepeat
 	Separated    func(Matcher, Matcher) MatchSeparated
 	Drop         func(Matcher) MatchDrop
 }
 // C provides access to a wide range of parser/combinator
-// constructors that can be used to build matching expressions.
+// constructorshat can be used to build matching expressions.
 // When using C in your own parser, then it is advised to create
 // an alias in your own package for easy reference:
 // var c = parsekit.C
 var C = matcherConstructors{
 	EndOfFile: func() MatchEndOfFile {
 		return MatchEndOfFile{}
 	},
 	Any: func() MatchAny {
 		return MatchAny{}
 	},
@ -130,44 +138,73 @@ var C = matcherConstructors{
 		return MatchAnyOf{m}
 	},
 	String: func(s string) MatchSequence {
-		m := make([]Matcher, len(s))
+		var m = []Matcher{}
-		for i, r := range s {
+		for _, r := range s {
-			m[i] = MatchRune{r}
+			m = append(m, MatchRune{r})
 		}
 		return MatchSequence{m}
 	},
 	StringNoCase: func(s string) MatchSequence {
-		m := make([]Matcher, len(s))
+		var m = []Matcher{}
-		for i, r := range s {
+		for _, r := range s {
 			u := MatchRune{unicode.ToUpper(r)}
 			l := MatchRune{unicode.ToLower(r)}
-			m[i] = MatchAnyOf{[]Matcher{u, l}}
+			m = append(m, MatchAnyOf{[]Matcher{u, l}})
 		}
 		return MatchSequence{m}
 	},
 	AnyOf: func(matchers ...Matcher) MatchAnyOf {
 		return MatchAnyOf{matchers}
 	},
 	Repeat: func(count int, matcher Matcher) MatchRepeat {
 		return MatchRepeat{count, matcher}
 	},
 	Sequence: func(matchers ...Matcher) MatchSequence {
 		return MatchSequence{matchers}
 	},
 	OneOrMore: func(matcher Matcher) MatchOneOrMore {
 		return MatchOneOrMore{matcher}
 	},
 	ZeroOrMore: func(matcher Matcher) MatchZeroOrMore {
 		return MatchZeroOrMore{matcher}
 	},
 	Optional: func(matcher Matcher) MatchOptional {
 		return MatchOptional{matcher}
 	},
 	Not: func(matcher Matcher) MatchNot {
 		return MatchNot{matcher}
 	},
 	AnyOf: func(matchers ...Matcher) MatchAnyOf {
 		return MatchAnyOf{matchers}
 	},
 	Sequence: func(matchers ...Matcher) MatchSequence {
 		return MatchSequence{matchers}
 	},
 	Repeat: func(count int, matcher Matcher) MatchRepeat {
 		return MatchRepeat{count, count, matcher}
 	},
 	Min: func(min int, matcher Matcher) MatchRepeat {
 		return MatchRepeat{min, -1, matcher}
 	},
 	Max: func(max int, matcher Matcher) MatchRepeat {
 		return MatchRepeat{-1, max, matcher}
 	},
 	Bounded: func(min int, max int, matcher Matcher) MatchRepeat {
 		return MatchRepeat{min, max, matcher}
 	},
 	OneOrMore: func(matcher Matcher) MatchRepeat {
 		return MatchRepeat{1, -1, matcher}
 	},
 	ZeroOrMore: func(matcher Matcher) MatchRepeat {
 		return MatchRepeat{0, -1, matcher}
 	},
 	Separated: func(separator Matcher, matcher Matcher) MatchSeparated {
 		return MatchSeparated{separator, matcher}
 	},
 	Drop: func(matcher Matcher) MatchDrop {
 		return MatchDrop{matcher}
 	},
 }
 type MatchEndOfFile struct{}
 func (c MatchEndOfFile) Match(m *MatchDialog) bool {
 	r, ok := m.NextRune()
 	return !ok && r == EOF
 }
 type MatchInvalidRune struct{}
 func (c MatchInvalidRune) Match(m *MatchDialog) bool {
 	r, ok := m.NextRune()
 	return !ok && r == INVALID
 }
 type MatchAny struct{}
 func (c MatchAny) Match(m *MatchDialog) bool {
@ -175,6 +212,31 @@ func (c MatchAny) Match(m *MatchDialog) bool {
 	return ok
 }
 type MatchNot struct {
 	matcher Matcher
 }
 func (c MatchNot) Match(m *MatchDialog) bool {
 	child := m.Fork()
 	if !c.matcher.Match(child) {
 		child.Merge()
 		return true
 	}
 	return false
 }
 type MatchOptional struct {
 	matcher Matcher
 }
 func (c MatchOptional) Match(m *MatchDialog) bool {
 	child := m.Fork()
 	if c.matcher.Match(child) {
 		child.Merge()
 	}
 	return true
 }
 type MatchRune struct {
 	match rune
 }
@ -209,18 +271,41 @@ func (c MatchAnyOf) Match(m *MatchDialog) bool {
 }
 type MatchRepeat struct {
-	count   int
+	min     int
 	max     int
 	matcher Matcher
 }
 func (c MatchRepeat) Match(m *MatchDialog) bool {
 	child := m.Fork()
-	for i := 0; i < c.count; i++ {
+	if c.min >= 0 && c.max >= 0 && c.min > c.max {
 		panic("MatchRepeat definition error: max must not be < min")
 	}
 	total := 0
 	// Specified min: check for the minimal required amount of matches.
 	for total < c.min {
 		total++
 		if !c.matcher.Match(child) {
 			return false
 		}
 	}
 	// No specified max: include the rest of the available matches.
 	if c.max < 0 {
 		child.Merge()
 		for c.matcher.Match(child) {
 			child.Merge()
 		}
 		return true
 	}
 	// Specified max: include the rest of the availble matches, up to the max.
 	child.Merge()
 	for total < c.max {
 		total++
 		if !c.matcher.Match(child) {
 			break
 		}
 		child.Merge()
 	}
 	return true
 }
@ -239,40 +324,14 @@ func (c MatchSequence) Match(m *MatchDialog) bool {
 	return true
 }
-type MatchOneOrMore struct {
+type MatchSeparated struct {
-	matcher Matcher
+	separator Matcher
 	matcher   Matcher
 }
-func (c MatchOneOrMore) Match(m *MatchDialog) bool {
+func (c MatchSeparated) Match(m *MatchDialog) bool {
-	child := m.Fork()
+	seq := C.Sequence(c.matcher, C.ZeroOrMore(C.Sequence(c.separator, c.matcher)))
-	for c.matcher.Match(child) {
+	return seq.Match(m)
 		child.Merge()
 	}
 	return len(m.runes) > 0
 }
 type MatchZeroOrMore struct {
 	matcher Matcher
 }
 func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
 	child := m.Fork()
 	for c.matcher.Match(child) {
 		child.Merge()
 	}
 	return true
 }
 type MatchOptional struct {
 	matcher Matcher
 }
 func (c MatchOptional) Match(m *MatchDialog) bool {
 	child := m.Fork()
 	if c.matcher.Match(child) {
 		child.Merge()
 	}
 	return true
 }
 type MatchDrop struct {
--- a/parsekit/matchers_test.go
+++ b/parsekit/matchers_test.go
@ -15,7 +15,7 @@ func newParser(input string, matcher p.Matcher) *p.P {
 		p.Expects("MATCH")
 		if p.On(matcher).Accept() {
 			p.EmitLiteral(TestItem)
-			p.Repeat()
+			p.RouteRepeat()
 		}
 	}
 	return p.New(input, stateFn)
@ -120,20 +120,19 @@ func TestMatchString(t *testing.T) {
 	}
 }
-// TODO
+func TestMatchStringNoCase(t *testing.T) {
-// func TestMatchStringNoCase(t *testing.T) {
+	p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
-// 	p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
+	r, err, ok := p.Next()
-// 	r, err, ok := p.Next()
+	if !ok {
-// 	if !ok {
+		t.Fatalf("Parsing failed: %s", err)
-// 		t.Fatalf("Parsing failed: %s", err)
+	}
-// 	}
+	if r.Type != TestItem {
-// 	if r.Type != TestItem {
+		t.Error("Parser item type not expected TestTitem")
-// 		t.Error("Parser item type not expected TestTitem")
+	}
-// 	}
+	if r.Value != "HellÖ" {
-// 	if r.Value != "Hello" {
+		t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value)
-// 		t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
+	}
-// 	}
+}
 // }
 func TestMatchRunes(t *testing.T) {
 	m := c.Runes('+', '-', '*', '/')
@ -156,6 +155,29 @@ func TestMatchRunes(t *testing.T) {
 	}
 }
 func TestMatchNot(t *testing.T) {
 	p := newParser("aabc", c.Not(c.Rune('b')))
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s", err)
 	}
 	if r.Value != "a" {
 		t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
 	}
 }
 func TestMatchNot_Mismatch(t *testing.T) {
 	p := newParser("aabc", c.Not(c.Rune('a')))
 	_, err, ok := p.Next()
 	if ok {
 		t.Fatalf("Parsing unexpectedly succeeded")
 	}
 	expected := "unexpected character 'a' (expected MATCH)"
 	if err.Error() != expected {
 		t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
 	}
 }
 func TestMatchAnyOf(t *testing.T) {
 	p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
 	r, err, ok := p.Next()
@ -192,6 +214,30 @@ func TestMatchRepeat(t *testing.T) {
 	}
 }
 func TestMatchRepeat_Min(t *testing.T) {
 	p := newParser("1111112345", c.Min(4, c.Rune('1')))
 	r, _, _ := p.Next()
 	if r.Value != "111111" {
 		t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value)
 	}
 }
 func TestMatchRepeat_Max(t *testing.T) {
 	p := newParser("1111112345", c.Max(4, c.Rune('1')))
 	r, _, _ := p.Next()
 	if r.Value != "1111" {
 		t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value)
 	}
 }
 func TestMatchRepeat_Bounded(t *testing.T) {
 	p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1')))
 	r, _, _ := p.Next()
 	if r.Value != "11111" {
 		t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value)
 	}
 }
 func TestMatchRepeat_Mismatch(t *testing.T) {
 	p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
 	_, err, ok := p.Next()
@ -282,6 +328,21 @@ func TestMatchDrop(t *testing.T) {
 		t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
 	}
 }
 func TestMatchSeparated(t *testing.T) {
 	number := c.Bounded(1, 3, c.RuneRange('0', '9'))
 	separators := c.Runes('|', ';', ',')
 	separated_numbers := c.Separated(separators, number)
 	p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
 	r, err, ok := p.Next()
 	if !ok {
 		t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
 	}
 	if r.Value != "1,2;3|44,55|66;777" {
 		t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
 	}
 }
 func TestMixAndMatch(t *testing.T) {
 	hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
 	backslash := c.Rune('\\')
--- a/parsekit/matching.go
+++ b/parsekit/matching.go
@ -3,7 +3,7 @@ package parsekit
 // Expects is used to let a state function describe what input it is expecting.
 // This expectation is used in error messages to make them more descriptive.
 //
-// Also, when defining an expectation inside a StateFn, you do not need
+// Also, when defining an expectation inside a StateHandler, you do not need
 // to handle unexpected input yourself. When the end of the function is
 // reached without setting the next state, an automatic error will be
 // emitted. This error differentiates between issues:
@ -14,47 +14,18 @@ func (p *P) Expects(description string) {
 	p.expecting = description
 }
-// AtEndOfFile returns true when there is no more data available in the input.
+// On checks if the current input matches the provided Matcher.
-func (p *P) AtEndOfFile() bool {
+// It returns a MatchAction struct, which provides methods that
-	return p.pos >= p.len
+// can be used to tell the parser what to do with a match.
-}
+//
-
+// The intended way to use this, is by chaining some methods,
-// AtEndOfLine returns true when the cursor is either at the end of the line
+// for example: p.On(...).Accept()
-// or at the end of the file. The cursor is not moved to a new position
+// The chained methods will as a whole return a boolean value,
-// by this method.
+// indicating whether or not a match was found and processed.
-func (p *P) AtEndOfLine() bool {
+func (p *P) On(m Matcher) *MatchAction {
-	return p.AtEndOfFile() ||
+	runes, widths, ok := p.match(m)
-		p.On(C.String("\r\n")).Stay() ||
+	p.LastMatch = string(runes)
-		p.On(C.Rune('\n')).Stay()
+	return &MatchAction{
 }
 // SkipEndOfLine returns true when the cursor is either at the end of the line
 // or at the end of the file. Additionally, when not at the end of the file,
 // the cursor is moved forward to beyond the newline.
 func (p *P) SkipEndOfLine() bool {
 	return p.AtEndOfFile() ||
 		p.On(C.String("\r\n")).Skip() ||
 		p.On(C.Rune('\n')).Skip()
 }
 // AcceptEndOfLine returns true when the cursor is either at the end of the line
 // or at the end of the file. When not at the end of the file, a normalized
 // newline (only a '\n' character, even with '\r\n' on the input)
 // is added to the string buffer.
 func (p *P) AcceptEndOfLine() bool {
 	if p.AtEndOfFile() {
 		return true
 	}
 	if p.SkipEndOfLine() {
 		p.buffer.writeRune('\n')
 		return true
 	}
 	return false
 }
 func (p *P) On(m Matcher) *action {
 	runes, widths, ok := p.Match(m)
 	return &action{
 		p:      p,
 		runes:  runes,
 		widths: widths,
@ -62,24 +33,29 @@ func (p *P) On(m Matcher) *action {
 	}
 }
-func (p *P) Match(matcher Matcher) ([]rune, []int, bool) {
+// Match checks if the provided Matcher matches the current input.
-	return p.match(0, matcher)
+// Returns a slice of matching runes, a slice of their respective
-}
+// byte widths and a boolean.
-
+// The boolean will be false and the slices will be empty in case
-func (p *P) match(offset int, matcher Matcher) ([]rune, []int, bool) {
+// the input did not match.
 func (p *P) match(matcher Matcher) ([]rune, []int, bool) {
 	m := &MatchDialog{p: p}
 	ok := matcher.Match(m)
 	return m.runes, m.widths, ok
 }
-type action struct {
+type MatchAction struct {
 	p      *P
 	runes  []rune
 	widths []int
 	ok     bool
 }
-func (a *action) Accept() bool {
+// Accept tells the parser to move the cursor past a match that was found,
 // and to store the input that matched in the string buffer.
 // Returns true in case a match was found.
 // When no match was found, then no action is taken and false is returned.
 func (a *MatchAction) Accept() bool {
 	if a.ok {
 		for i, r := range a.runes {
 			a.p.buffer.writeRune(r)
@ -89,7 +65,11 @@ func (a *action) Accept() bool {
 	return a.ok
 }
-func (a *action) Skip() bool {
+// Skip tells the parser to move the cursor past a match that was found,
 // without storing the actual match in the string buffer.
 // Returns true in case a match was found.
 // When no match was found, then no action is taken and false is returned.
 func (a *MatchAction) Skip() bool {
 	if a.ok {
 		for i, r := range a.runes {
 			type C struct {
@ -102,13 +82,31 @@ func (a *action) Skip() bool {
 	return a.ok
 }
-func (a *action) Stay() bool {
+// Stay tells the parser to not move the cursor after finding a match.
 // Returns true in case a match was found, false otherwise.
 func (a *MatchAction) Stay() bool {
 	return a.ok
 }
-// advanceCursor advances the rune cursor one position in the
+// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
-// input data. While doing so, it keeps tracks of newlines,
+func (a *MatchAction) RouteTo(state StateHandler) bool {
-// so we can report on row + column positions on error.
+	if a.ok {
 		a.p.RouteTo(state)
 	}
 	return a.ok
 }
 // RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn().
 func (a *MatchAction) RouteReturn() bool {
 	if a.ok {
 		a.p.RouteReturn()
 	}
 	return a.ok
 }
 // advanceCursor advances the rune cursor one position in the input data.
 // While doing so, it keeps tracks of newlines, so we can report on
 // row + column positions on error.
 func (p *P) advanceCursor(r rune, w int) {
 	p.pos += w
 	if p.newline {
@ -119,17 +117,3 @@ func (p *P) advanceCursor(r rune, w int) {
 	}
 	p.newline = r == '\n'
 }
 func (a *action) RouteTo(state StateFn) bool {
 	if a.ok {
 		a.p.RouteTo(state)
 	}
 	return a.ok
 }
 func (a *action) RouteReturn() bool {
 	if a.ok {
 		a.p.RouteReturn()
 	}
 	return a.ok
 }
--- a/parsekit/parsekit.go
+++ b/parsekit/parsekit.go
@ -6,13 +6,36 @@ import (
 	"runtime"
 )
 // P holds the internal state of the parser.
 type P struct {
 	state        StateHandler   // the function that handles the current state
 	nextState    StateHandler   // the function that will handle the next state
 	stack        []StateHandler // state function stack, for nested parsing
 	input        string         // the scanned input
 	len          int            // the total length of the input in bytes
 	pos          int            // current byte scanning position in the input
 	newline      bool           // keep track of when we have scanned a newline
 	cursorRow    int            // current row number in the input
 	cursorColumn int            // current column position in the input
 	expecting    string         // a description of what the current state expects to find
 	buffer       stringBuffer   // an efficient buffer, used to build string values
 	LastMatch    string         // a string representation of the last matched input data
 	items        chan Item      // channel of resulting Parser items
 	item         Item           // the current item as reached by Next() and retrieved by Get()
 	err          *Error         // an error when lexing failed, retrieved by Error()
 }
 // StateHandler defines the type of function that can be used to
 // handle a parser state.
 type StateHandler func(*P)
 // New takes an input string and a start state,
 // and initializes the parser for it.
-func New(input string, startState StateFn) *P {
+func New(input string, start StateHandler) *P {
 	return &P{
 		input:     input,
 		len:       len(input),
-		nextState: startState,
+		nextState: start,
 		items:     make(chan Item, 2),
 	}
 }
@ -25,51 +48,80 @@ func (p *P) Next() (Item, *Error, bool) {
 	for {
 		select {
 		case i := <-p.items:
-			switch {
+			return p.makeReturnValues(i)
 			case i.Type == ItemEOF:
 				return i, nil, false
 			case i.Type == ItemError:
 				p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
 				return i, p.err, false
 			default:
 				p.item = i
 				return i, nil, true
 			}
 		default:
-			// When implementing a parser, a state function must provide
+			p.runStatusHandler()
 			// a routing decision in every state function execution.
 			// When no route is specified, then it is considered a but
 			// in the parser implementation.
 			// An exception is when a function specified its expectation
 			// using the Expects() method. In that case, an unexpected
 			// input error is emitted.
 			if p.nextState == nil {
 				if p.expecting != "" {
 					p.UnexpectedInput()
 					continue
 				} else {
 					name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
 					panic(fmt.Sprintf("StateFn implementation bug: %s did not set next state or input expectation", name))
 				}
 			}
 			p.state = p.nextState
 			p.nextState = nil
 			p.expecting = ""
 			p.state(p)
 		}
 	}
 }
-// ToArray returns Parser items as an array (mainly intended for testing purposes)
+// runStatusHandler moves the parser, which is bascially a state machine,
-// When an error occurs during scanning, a partial result will be
+// to its next status. It does so by invoking a function of the
-// returned, accompanied by the error that occurred.
+// type StateHandler. This function represents the current status.
-func (p *P) ToArray() ([]Item, *Error) {
+func (p *P) runStatusHandler() {
-	var items []Item
+	if state, ok := p.getNextStateHandler(); ok {
-	for {
+		p.invokeNextStatusHandler(state)
-		item, err, more := p.Next()
+	}
-		if !more {
+}
-			return items, err
+
-		}
+// getNextStateHandler determintes the next StatusHandler to invoke in order
-		items = append(items, item)
+// to move the parsing state machine one step further.
 //
 // When implementing a parser, the StateHandler functions must provide
 // a routing decision in every invocation. A routing decision is one
 // of the following:
 //
 // * A route is specified explicitly, which means that the next StatusHandler
 //   function to invoke is registered during the StateHandler function
 //   invocation. For example: p.RouteTo(nextStatus)
 //
 // * A route is specified implicitly, which means that a previous StateHandler
 //   invocation has registered the followup route for the current state.
 //   For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
 //   In this example, the nextStatus StateHandler will not have to specify
 //   a route explicitly, but otherStatus will be used implicitly after
 //   the nextStatus function has returned.
 //
 // * An expectation is registered by the StatusHandler.
 //   For example: p.Expects("a cool thing")
 //   When the StatusHandler returns without having specified a route, this
 //   expectation is used to generate an "unexpected input" error message.
 //
 // When no routing decision is provided by a StateHandler, then this is
 // considered a bug in the state handler, and the parser will panic.
 func (p *P) getNextStateHandler() (StateHandler, bool) {
 	switch {
 	case p.nextState != nil:
 		return p.nextState, true
 	case len(p.stack) > 0:
 		return p.popState(), true
 	case p.expecting != "":
 		p.UnexpectedInput()
 		return nil, false
 	default:
 		name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
 		panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name))
 	}
 }
 // invokeNextStatusHandler moves the parser state to the provided state
 // and invokes the StatusHandler function.
 func (p *P) invokeNextStatusHandler(state StateHandler) {
 	p.state = state
 	p.nextState = nil
 	p.expecting = ""
 	p.state(p)
 }
 func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
 	switch {
 	case i.Type == ItemEOF:
 		return i, nil, false
 	case i.Type == ItemError:
 		p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
 		return i, p.err, false
 	default:
 		p.item = i
 		return i, nil, true
 	}
 }
--- a/parsekit/peek.go
+++ b/parsekit/peek.go
@ -0,0 +1,43 @@
 package parsekit
 import (
 	"unicode/utf8"
 )
 // peek returns but does not advance the cursor to the next rune(s) in the input.
 // Returns the rune, its width in bytes and a boolean.
 // The boolean will be false in case no upcoming rune can be peeked
 // (end of data or invalid UTF8 character).
 func (p *P) peek(offsetInBytes int) (rune, int, bool) {
 	r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
 	return handleRuneError(r, w)
 }
 // handleRuneError is used to normale rune value in case of errors.
 // When an error occurs, then utf8.RuneError will be in the rune.
 // This can however indicate one of two situations:
 // * w == 0: end of file is reached
 // * w == 1: invalid UTF character on input
 // This function lets these two cases return respectively the
 // package's own EOF or INVALID runes, to make it easy for client
 // code to distinct between these two cases.
 func handleRuneError(r rune, w int) (rune, int, bool) {
 	if r == utf8.RuneError {
 		if w == 0 {
 			return EOF, 0, false
 		}
 		return INVALID, w, false
 	}
 	return r, w, true
 }
 // EOF is a special rune, which is used to indicate an end of file when
 // reading a character from the input.
 // It can be treated as a rune when writing parsing rules, so a valid way to
 // say 'I now expect the end of the file' is using something like:
 // if (p.On(c.Rune(EOF)).Skip()) { ... }
 const EOF rune = -1
 // INVALID is a special rune, which is used to indicate an invalid UTF8
 // rune on the input.
 const INVALID rune = utf8.RuneError
--- a/parsekit/staterouting.go
+++ b/parsekit/staterouting.go
@ -1,40 +1,58 @@
 package parsekit
-func (p *P) Repeat() {
+// RouteRepeat indicates that on the next parsing cycle,
-	p.nextState = p.state
+// the current StateHandler must be invoked again.
-	return
+func (p *P) RouteRepeat() {
 	p.RouteTo(p.state)
 }
-func (p *P) RouteTo(state StateFn) *routeFollowup {
+// RouteTo tells the parser what StateHandler function to invoke
 // in the next parsing cycle.
 func (p *P) RouteTo(state StateHandler) *RouteFollowup {
 	p.nextState = state
-	return &routeFollowup{p}
+	return &RouteFollowup{p}
 }
-type routeFollowup struct {
+// RouteFollowup chains parsing routes.
 // It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
 type RouteFollowup struct {
 	p *P
 }
-func (r *routeFollowup) ThenTo(state StateFn) *routeFollowup {
+// ThenTo schedules a StateHandler that must be invoked
 // after the RouteTo StateHandler has been completed.
 // For example: p.RouteTo(handlerA).ThenTo(handlerB)
 func (r *RouteFollowup) ThenTo(state StateHandler) {
 	r.p.pushState(state)
 	return r
 }
-func (r *routeFollowup) ThenReturnHere() {
+// ThenReturnHere schedules the current StateHandler to be
 // invoked after the RouteTo StateHandler has been completed.
 // For example: p.RouteTo(handlerA).ThenReturnHere()
 func (r *RouteFollowup) ThenReturnHere() {
 	r.p.pushState(r.p.state)
 }
 // RouteReturn tells the parser that on the next cycle the
 // next scheduled route must be invoked.
 // Using this method is optional. When implementating a
 // StateHandler that is used as a sort of subroutine (using
 // constructions like p.RouteTo(subroutine).ThenReturnHere()),
 // then you can refrain from providing a routing decision
 // from that handler. The parser will automatically assume
 // a RouteReturn in that case.
 func (p *P) RouteReturn() {
 	p.nextState = p.popState()
 }
 // PushState adds the state function to the state stack.
 // This is used for implementing nested parsing.
-func (p *P) pushState(state StateFn) {
+func (p *P) pushState(state StateHandler) {
 	p.stack = append(p.stack, state)
 }
 // PopState pops the last pushed state from the state stack.
-func (p *P) popState() StateFn {
+func (p *P) popState() StateHandler {
 	last := len(p.stack) - 1
 	head, tail := p.stack[:last], p.stack[last]
 	p.stack = head
--- a/parser/syn_comments.go
+++ b/parser/syn_comments.go
@ -6,18 +6,20 @@ import (
 // A '#' hash symbol marks the rest of the line as a comment.
 func startComment(p *parsekit.P) {
-	p.On(c.OneOrMore(hash)).Skip()
+	p.Expects("start of comment")
-	p.RouteTo(commentContents)
+	if p.On(c.OneOrMore(hash)).Skip() {
 		p.RouteTo(commentContents)
 	}
 }
 // All characters up to the end of the line are included in the comment.
 func commentContents(p *parsekit.P) {
 	p.Expects("comment contents")
 	switch {
-	case p.AtEndOfLine() || p.On(endOfLine).Skip(): // TODO drop AtEndOfLine support
+	case p.On(endOfLine).Skip():
 		p.EmitLiteralTrim(ItemComment)
 		p.RouteReturn()
 	case p.On(any).Accept():
-		p.Repeat()
+		p.RouteRepeat()
 	}
 }
--- a/parser/syn_comments_test.go
+++ b/parser/syn_comments_test.go
--- a/parser/syn_eof.go
+++ b/parser/syn_eof.go
@ -2,9 +2,10 @@ package parser
 import "github.com/mmakaay/toml/parsekit"
 // TODO move into parsekit
 func endOfFile(p *parsekit.P) {
 	p.Expects("end of file")
-	if p.AtEndOfFile() {
+	if p.On(c.EndOfFile()).Stay() {
 		p.Emit(parsekit.ItemEOF, "EOF")
 	}
 }
--- a/parser/helpers_test.go
+++ b/parser/helpers_test.go
@ -22,8 +22,23 @@ func runStatesTs(t *testing.T, tests []statesT) {
 	}
 }
 // ToArray returns Parser items as an array.
 // When an error occurs during scanning, a partial result will be
 // returned, accompanied by the error that occurred.
 func parseItemsToArray(p *parsekit.P) ([]parsekit.Item, *parsekit.Error) {
 	var items []parsekit.Item
 	for {
 		item, err, more := p.Next()
 		if !more {
 			return items, err
 		}
 		items = append(items, item)
 	}
 }
 func runStatesT(t *testing.T, c statesT) {
-	l, err := parser.NewParser(c.in).ToArray()
+	p := parser.NewParser(c.in)
 	l, err := parseItemsToArray(p)
 	if err == nil && c.err != "" {
 		t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
 	}
--- a/parser/syn_keyvaluepair.go
+++ b/parser/syn_keyvaluepair.go
--- a/parser/syn_keyvaluepair_test.go
+++ b/parser/syn_keyvaluepair_test.go
--- a/parser/parser.go
+++ b/parser/parser.go
@ -33,7 +33,7 @@ var (
 	whitespace           = c.OneOrMore(c.AnyOf(space, tab))
 	whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
 	optionalWhitespace   = c.Optional(whitespace)
-	endOfLine            = c.AnyOf(lineFeed, c.Rune(parsekit.EOF))
+	endOfLine            = c.AnyOf(lineFeed, c.EndOfFile())
 )
 // NewParser creates a new parser, using the provided input string
--- a/parser/parser_test.go
+++ b/parser/parser_test.go
@ -11,7 +11,8 @@ func TestEmptyInput(t *testing.T) {
 }
 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
-	_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
+	p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc")
 	_, err := parseItemsToArray(p)
 	t.Logf("Got error: %s", err.Error())
 	if err.Row != 4 {
 		t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
@ -23,7 +24,7 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
 func TestInvalidUtf8Data(t *testing.T) {
 	runStatesTs(t, []statesT{
-		{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected comment contents)"},
+		{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected end of file)"},
 		{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
 		{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
 		{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
--- a/parser/value_string.go
+++ b/parser/value_string.go
@ -42,21 +42,6 @@ func startString(p *parsekit.P) {
 	}
 }
 func parseBasicString(p *parsekit.P) {
 	p.Expects("string contents")
 	switch {
 	case p.On(charThatMustBeEscaped).Stay():		
 		r, _, _ := p.Match(charThatMustBeEscaped)
 		p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
 	case p.On(validEscape).Accept():
 		p.Repeat()
 	case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
 		p.RouteReturn()
 	case p.On(any).Accept():
 		p.Repeat()
 	}
 }
 func startBasicString(p *parsekit.P) {
 	p.Expects("a basic string")
 	if p.On(doubleQuote).Skip() {
@ -64,12 +49,27 @@ func startBasicString(p *parsekit.P) {
 	}
 }
 func parseBasicString(p *parsekit.P) {
 	p.Expects("string contents")
 	switch {
 	case p.On(charThatMustBeEscaped).Stay():
 		p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch)
 	case p.On(validEscape).Accept():
 		p.RouteRepeat()
 	case p.On(backslash).RouteReturn():
 	case p.On(doubleQuote).RouteReturn():
 	case p.On(any).Accept():
 		p.RouteRepeat()
 	}
 }
 // Specific handling of input for basic strings.
 // * A double quote ends the string
 // * No additional \escape sequences are allowed. What the spec say about this:
 //   "All other escape sequences [..] are reserved and, if used, TOML should
 //    produce an error.""
 func basicStringSpecifics(p *parsekit.P) {
 	p.Expects("string contents")
 	switch {
 	case p.On(doubleQuote).Skip():
 		if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
@ -79,8 +79,6 @@ func basicStringSpecifics(p *parsekit.P) {
 		}
 	case p.On(backslash).Stay():
 		p.EmitError("Invalid escape sequence")
 	default:
 		panic("String parsing should not have ended up here")
 	}
 }
--- a/parser/value_tring_test.go
+++ b/parser/value_tring_test.go
@ -13,9 +13,9 @@ func TestUnterminatedBasicString(t *testing.T) {
 func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
 	runStatesTs(t, []statesT{
-		{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`},
+		{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: "\x00" (must be escaped)`},
-		{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`},
+		{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: "\n" (must be escaped)`},
-		{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`},
+		{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: "\u007f" (must be escaped)`},
 	})
 	// No need to write all test cases for disallowed characters by hand.
@ -23,7 +23,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
 		name := fmt.Sprintf("control character %x", rune(i))
 		runStatesT(
 			t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
-				fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))})
+				fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
 	}
 }