Pfff, okay, so today I found a rather nice way of writing a parser that can hold some internal state, while stell holding on to the basic principle of having a function signature the applies to every parse handler in the system. By using methods instead of stand-alone functions, it is possible to let the accompanying struct hold on to the required state. Implemented this principle for the two calculator example projects. The rest is quite broken now, sorry :-p

2019-05-27 23:24:07 +00:00 · 2019-05-27 23:24:07 +00:00 · 41f8733e99
parent f7d1e28fa1
commit 41f8733e99
16 changed files with 619 additions and 269 deletions
--- a/example_basiccalculator1_test.go
+++ b/example_basiccalculator1_test.go
@ -0,0 +1,109 @@
 // Let's write a parser for a really basic calculator.
 // The calculator understands input that looks like:
 //
 //     10 + 20 - 8+4
 //
 // So positive numbers that can be either added or substracted, and whitespace
 // is ignored.
 package parsekit_test
 import (
 	"fmt"
 	"strconv"
 	"git.makaay.nl/mauricem/go-parsekit"
 )
 func Example_basicCalculator1() {
 	for _, c := range []struct {
 		input    string
 		expected int64
 	}{
 		{"0", 0},
 		{"1", 1},
 		{"1+2+3", 6},
 		{" 10 + \t20 - 3 + 7   -10 ", 24},
 		{"", 0},
 		{" \t ", 0},
 		{"+", 0},
 		{"10.8 + 12", 0},
 		{"42+ ", 0},
 	} {
 		output, err := ComputeSimple(c.input)
 		if err != nil {
 			fmt.Printf("Input: %q, got error: %s\n", c.input, err)
 		} else {
 			fmt.Printf("Input: %q, got outcome: %d, correct = %t\n", c.input, output, output == c.expected)
 		}
 	}
 	// Output:
 	// Input: "0", got outcome: 0, correct = true
 	// Input: "1", got outcome: 1, correct = true
 	// Input: "1+2+3", got outcome: 6, correct = true
 	// Input: " 10 + \t20 - 3 + 7   -10 ", got outcome: 24, correct = true
 	// Input: "", got error: unexpected end of file (expected integer number)
 	// Input: " \t ", got error: unexpected character ' ' (expected integer number)
 	// Input: "+", got error: unexpected character '+' (expected integer number)
 	// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
 	// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
 }
 // ---------------------------------------------------------------------------
 // Implementation of the calculator
 // ---------------------------------------------------------------------------
 // CalculateSimple interprets a simple calculation, consisting of only integers
 // and add or subtract operators. It returns the result of the calculation.
 // An error is returned in case the calculation failed.
 func ComputeSimple(calculation string) (int64, *parsekit.Error) {
 	calculator := &simpleCalculator{op: +1}
 	parser := parsekit.NewParser(calculator.number)
 	_, err, _ := parser.Parse(calculation).Next()
 	return calculator.Result, err
 }
 // simpleCalculator defines the parsing state machine. We do this using methods
 // on a struct, so the parser can make use of state data inside that struct
 // during the parsing.
 type simpleCalculator struct {
 	Result int64 // holds the resulting outcome of the computation
 	op     int64 // represents operation for next term (+1 = add, -1 = subtract)
 }
 func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
 	// A definition of integer, which conveniently drops surrounding whitespace.
 	pc, a, m := parsekit.C, parsekit.A, parsekit.M
 	whitespace := m.Drop(pc.Opt(a.Whitespace))
 	integer := pc.Seq(whitespace, a.Integer, whitespace)
 	if p.On(integer).Accept() {
 		value, err := strconv.ParseInt(p.BufLiteral(), 10, 64)
 		p.BufClear()
 		if err != nil {
 			p.EmitError("invalid value %q: %s", value, err)
 		} else {
 			c.Result += c.op * value
 			p.Handle(c.operatorOrEndOfFile)
 		}
 	} else {
 		p.Expects("integer number")
 		p.UnexpectedInput()
 	}
 }
 func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
 	var a = parsekit.A
 	switch {
 	case p.On(a.Add).Skip():
 		c.op = +1
 		p.Handle(c.number)
 	case p.On(a.Subtract).Skip():
 		c.op = -1
 		p.Handle(c.number)
 	case !p.On(a.EndOfFile).Stay():
 		p.Expects("operator, '+' or '-'")
 		p.UnexpectedInput()
 	default:
 		p.ExpectEndOfFile()
 	}
 }
--- a/example_basiccalculator2_test.go
+++ b/example_basiccalculator2_test.go
@ -0,0 +1,213 @@
 // Let's write the hello world of parsers: a calculator that can interpret
 // calculations that looks like:
 //
 //     " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5  "
 //
 // More formally, a calculation is defined as:
 //
 //     calculation : expr EOF
 //     expr        : term ((ADD|SUB) term)*
 //     term        : factor ((MUL|DIV) factor)*
 //     space       : (SPACE|TAB)*
 //     factor      : space (FLOAT | LPAREN expr RPAREN) space
 package parsekit_test
 import (
 	"fmt"
 	"math"
 	"strconv"
 	"git.makaay.nl/mauricem/go-parsekit"
 )
 func Example_basicCalculator2() {
 	for _, c := range []struct {
 		input    string
 		expected float64
 	}{
 		{"1", 1},
 		{"(123.10)", 123.10},
 		{"1 + 2 + 3 + 4 + 5", 15},
 		{"1 * 2 * 3 * 4 * 5 * 0.6", 72},
 		{"(3.05+2)*(4.3+5.12)", 47.571},
 		{"8.10 + 999/233", 12.387554},
 		{" -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5  ", -20.168},
 		{"", 0},
 		{"(", 0},
 		{"10+20-", 0},
 		{"10+20-(4*10))", 0},
 		{"10+20-((4*10) + 17", 0},
 	} {
 		output, err := Compute(c.input)
 		output = math.Round(output*1000000) / 1000000 // to make the expectation comparisons usable
 		if err != nil {
 			fmt.Printf("Input: %q, got error: %s\n", c.input, err)
 		} else {
 			fmt.Printf("Input: %q, got outcome: %f, correct = %t\n", c.input, output, output == c.expected)
 		}
 	}
 	// Output:
 	// Input: "1", got outcome: 1.000000, correct = true
 	// Input: "(123.10)", got outcome: 123.100000, correct = true
 	// Input: "1 + 2 + 3 + 4 + 5", got outcome: 15.000000, correct = true
 	// Input: "1 * 2 * 3 * 4 * 5 * 0.6", got outcome: 72.000000, correct = true
 	// Input: "(3.05+2)*(4.3+5.12)", got outcome: 47.571000, correct = true
 	// Input: "8.10 + 999/233", got outcome: 12.387554, correct = true
 	// Input: " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5  ", got outcome: -20.168000, correct = true
 	// Input: "", got error: unexpected end of file
 	// Input: "(", got error: unexpected end of file
 	// Input: "10+20-", got error: unexpected end of file
 	// Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file)
 	// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')')
 }
 // ---------------------------------------------------------------------------
 // Implementation of the calculator
 // ---------------------------------------------------------------------------
 // calculator implements a recursive descent parser that is responsible for parsing
 // the input computation string according to the grammar.
 // It offloads the actual computation to a separate interpreter.
 type calculator struct {
 	interpreter interpreter
 	result      float64
 }
 // Compute takes a calculation string as input and returns the interpreted result
 // value for the calculation. An error can be returned as well, in case the
 // computation fails for some reason.
 func Compute(input string) (float64, *parsekit.Error) {
 	c := &calculator{}
 	parser := parsekit.NewParser(c.computation)
 	_, err, _ := parser.Parse(input).Next()
 	return c.result, err
 }
 func (c *calculator) computation(p *parsekit.ParseAPI) {
 	c.interpreter.push()
 	p.Handle(c.expr)
 	p.ExpectEndOfFile()
 	p.Handle(c.factor)
 	c.result = c.interpreter.pop()
 }
 // expr : term ((ADD|SUB) term)*
 func (c *calculator) expr(p *parsekit.ParseAPI) {
 	c.interpreter.push()
 	var pc, a = parsekit.C, parsekit.A
 	p.Handle(c.term)
 	for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
 		c.interpreter.pushOperator(p.LastMatch)
 		p.Handle(c.term)
 		c.interpreter.eval()
 	}
 	c.interpreter.pop()
 }
 // term : factor ((MUL|DIV) factor)*
 func (c *calculator) term(p *parsekit.ParseAPI) {
 	c.interpreter.push()
 	var pc, a = parsekit.C, parsekit.A
 	p.Handle(c.factor)
 	for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
 		c.interpreter.pushOperator(p.LastMatch)
 		p.Handle(c.factor)
 		c.interpreter.eval()
 	}
 	c.interpreter.pop()
 }
 // factor : space (FLOAT | LPAREN expr RPAREN) space
 func (c *calculator) factor(p *parsekit.ParseAPI) {
 	var pc, a = parsekit.C, parsekit.A
 	p.On(a.Whitespace).Skip()
 	switch {
 	case p.On(pc.Signed(a.Float)).Accept():
 		floatStr := p.BufLiteral()
 		p.BufClear()
 		value, err := strconv.ParseFloat(floatStr, 64)
 		if err != nil {
 			p.EmitError("invalid number %s: %s", floatStr, err)
 		} else {
 			c.interpreter.pushValue(value)
 		}
 	case p.On(a.LeftParen).Skip():
 		p.Handle(c.expr)
 		if !p.On(a.RightParen).Skip() {
 			p.Expects("')'")
 			p.UnexpectedInput()
 		}
 	default:
 		p.UnexpectedInput()
 	}
 	p.On(a.Whitespace).Skip()
 }
 // ---------------------------------------------------------------------------
 // The computational interpreter, used by the calculator.
 // ---------------------------------------------------------------------------
 type stackFrame struct {
 	a  float64
 	b  float64
 	op func(a, b float64) float64
 }
 type interpreter struct {
 	stack []*stackFrame
 	top   *stackFrame
 }
 func (i *interpreter) push() *stackFrame {
 	f := &stackFrame{}
 	i.stack = append(i.stack, f)
 	i.top = f
 	i.pushOperator("VAL")
 	return f
 }
 func (i *interpreter) pop() float64 {
 	value := i.eval()
 	i.stack = i.stack[0 : len(i.stack)-1]
 	if len(i.stack) > 0 {
 		i.top = i.stack[len(i.stack)-1]
 		i.pushValue(value)
 	} else {
 		i.top = nil
 	}
 	return value
 }
 func (i *interpreter) pushValue(value float64) {
 	i.top.a, i.top.b = i.top.b, value
 }
 func (i *interpreter) pushOperator(op string) {
 	switch op {
 	case "VAL":
 		i.top.op = func(a, b float64) float64 { return b }
 	case "+":
 		i.top.op = func(a, b float64) float64 { return a + b }
 	case "-":
 		i.top.op = func(a, b float64) float64 { return a - b }
 	case "*":
 		i.top.op = func(a, b float64) float64 { return a * b }
 	case "/":
 		i.top.op = func(a, b float64) float64 { return a / b }
 	default:
 		panic(fmt.Sprintf("Unhandled op name: %s", op))
 	}
 }
 func (i *interpreter) eval() float64 {
 	value := i.top.op(i.top.a, i.top.b)
 	i.pushValue(value)
 	i.pushOperator("VAL")
 	return value
 }
--- a/example_basiccalculator_test.go
+++ b/example_basiccalculator_test.go
@ -1,142 +0,0 @@
 // Let's write a small example for parsing a really basic calculator.
 // The calculator understands input that looks like:
 //
 //     10 + 20 - 8+4
 //
 // So positive numbers that can be either added or substracted, and whitespace
 // is ignored.
 package parsekit_test
 import (
 	"fmt"
 	"strconv"
 	"git.makaay.nl/mauricem/go-parsekit"
 )
 // When writing a parser, it's a good start to use the parser/combinator
 // functionality of parsekit to create some TokenHandler functions. These functions
 // can later be used in the parser state machine to check for matching strings
 // on the input data.
 //
 // For the calculator, we only need a definition of "number, surrounded by
 // optional whitespace". Skipping whitespace could be a part of the StateHandler
 // functions below too, but including it in a TokenHandler makes things really
 // practical.
 func createNumberMatcher() parsekit.TokenHandler {
 	// Easy access to parsekit definition.
 	c, a, m := parsekit.C, parsekit.A, parsekit.M
 	whitespace := m.Drop(c.Opt(a.Whitespace))
 	return c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
 }
 var calcNumber = createNumberMatcher()
 // We need to define the ItemTypes that we will use for emitting Items
 // during the parsing process.
 const (
 	numberType parsekit.ItemType = iota
 	addType
 	subtractType
 )
 // We also need to define the state machine for parsing the input.
 // The state machine is built up from functions that match the StateHandler
 // signature: func(*parsekit.ParseAPI)
 // The ParseAPI struct holds the internal state for the parser and it provides
 // some methods that form the API for your StateHandler implementation.
 // State: expect a number. When a number is found on the input,
 // it is accepted in the parser's string buffer, after which that buffer is
 // emitted as a numberType item. Then we tell the state machine to continue
 // with the calcWaitForOperatorOrEndOfInput state.
 // When no number is found, the parser will emit an error, explaining that
 // "a number" was expected.
 func calcWaitForNumber(p *parsekit.ParseAPI) {
 	p.Expects("a number")
 	if p.On(calcNumber).Accept() {
 		p.EmitLiteral(numberType)
 		p.RouteTo(calcWaitForOperatorOrEndOfInput)
 	}
 }
 // State: expect a plus or minus operator. When one of those is found, the
 // appropriate Item is emitted and the parser is sent back to the
 // numberHandler to find the next number on the input. When no operator is
 // found, then the parser is told to expect the end of the input. When more
 // input data are available (which are obviously wrong data since they do
 // not match our syntax), the parser will emit an error.
 func calcWaitForOperatorOrEndOfInput(p *parsekit.ParseAPI) {
 	switch {
 	case p.On(a.Plus).Accept():
 		p.EmitLiteral(addType)
 		p.RouteTo(calcWaitForNumber)
 	case p.On(a.Minus).Accept():
 		p.EmitLiteral(subtractType)
 		p.RouteTo(calcWaitForNumber)
 	default:
 		p.ExpectEndOfFile()
 	}
 }
 // All is ready for our parser. We now can create a new Parser struct.
 // We need to tell it what StateHandler to start with. In our case, it is the
 // calcWaitForNumber state, since the calculation must start with a number.
 var calcParser = parsekit.NewParser(calcWaitForNumber)
 func Example_basicCalculator() {
 	// Let's feed the parser some input to work with. This provides us with
 	// a parse run for that input.
 	run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
 	// We can now step through the results of the parsing process by repeated
 	// calls to run.Next(). Next() returns either the next parse item, a parse
 	// error or an end of file. Let's dump the parse results and handle the
 	// computation while we're at it.
 	// TODO this in convoluted for people using the parser code I think. Maybe use three output data types instead?
 	sum := 0
 	op := +1
 	for {
 		item, err, ok := run.Next()
 		switch {
 		case !ok && err == nil:
 			fmt.Println("End of file reached")
 			fmt.Println("Outcome of computation:", sum)
 			return
 		case !ok:
 			fmt.Printf("Error: %s\n", err)
 			return
 		default:
 			fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
 			switch {
 			case item.Type == addType:
 				op = +1
 			case item.Type == subtractType:
 				op = -1
 			case item.Type == numberType:
 				nr, err := strconv.Atoi(item.Value)
 				if err != nil {
 					fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
 					return
 				}
 				sum += op * nr
 			}
 		}
 	}
 	// Output:
 	// Type: 0, Value: "153"
 	// Type: 1, Value: "+"
 	// Type: 0, Value: "22"
 	// Type: 1, Value: "+"
 	// Type: 0, Value: "31"
 	// Type: 2, Value: "-"
 	// Type: 0, Value: "4"
 	// Type: 2, Value: "-"
 	// Type: 0, Value: "6"
 	// Type: 1, Value: "+"
 	// Type: 0, Value: "42"
 	// End of file reached
 	// Outcome of computation: 238
 }
--- a/example_dutchpostcode_test.go
+++ b/example_dutchpostcode_test.go
@ -1,4 +1,4 @@
-// In this example, a Paparserrser is created which can parse and normalize Dutch postcodes
+// In this example, a Parser is created that can parse and normalize Dutch postcodes
 // The implementation uses only TokenHandler functions and does not implement a
 // full-fledged state-based Parser for it.
 package parsekit_test
@ -26,7 +26,7 @@ func createPostcodeMatcher() *parsekit.Matcher {
 	space := m.Replace(c.Opt(a.Whitespace), " ")
 	postcode := c.Seq(pcDigits, space, pcLetters)
-	// Create a Matcher, which wraps the 'postcode' TokenHandler and allows
+	// Create a Matcher that wraps the 'postcode' TokenHandler and allows
 	// us to match some input against that handler.
 	return parsekit.NewMatcher(postcode, "a Dutch postcode")
 }
--- a/example_hellomatcher_test.go
+++ b/example_hellomatcher_test.go
@ -26,7 +26,7 @@ func createHelloMatcher() *parsekit.Matcher {
 	name := c.OneOrMore(c.Not(a.Excl))
 	greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
-	// Create a Matcher, which wraps the 'greeting' TokenHandler and allows
+	// Create a Matcher that wraps the 'greeting' TokenHandler and allows
 	// us to match some input against that handler.
 	return parsekit.NewMatcher(greeting, "a friendly greeting")
 }
--- a/examples_test.go
+++ b/examples_test.go
@ -24,7 +24,7 @@ func ExampleItem() {
 	// You define your own item types for your specific parser.
 	const QuestionItem = parsekit.ItemType(42)
-	// A StateHandler function can use the defined item type by means of
+	// A ParseHandler function can use the defined item type by means of
 	// the p.Emit* methods on parsekit.P.
 	// When errors occur, or the end of the file is reached, then the built-in
 	// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
--- a/parsehandler.go
+++ b/parsehandler.go
@ -2,20 +2,20 @@ package parsekit
 import "unicode/utf8"
-// StateHandler defines the type of function that must be implemented to handle
+// ParseHandler defines the type of function that must be implemented to handle
 // a parsing state in a Parser state machine.
 //
-// A StateHandler function gets a ParseAPI struct as its input. This struct holds
+// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
 // all the internal state for the parsing state machine and provides the
-// interface that the StateHandler uses to interact with the parser.
+// interface that the ParseHandler uses to interact with the parser.
-type StateHandler func(*ParseAPI)
+type ParseHandler func(*ParseAPI)
 // ParseAPI holds the internal state of a parse run and provides an API to
-// StateHandler methods to communicate with the parser.
+// ParseHandler methods to communicate with the parser.
 type ParseAPI struct {
-	state        StateHandler   // the function that handles the current state
+	state        ParseHandler   // the function that handles the current state
-	nextState    StateHandler   // the function that will handle the next state
+	nextState    ParseHandler   // the function that will handle the next state
-	routeStack   []StateHandler // route stack, for handling nested parsing
+	routeStack   []ParseHandler // route stack, for handling nested parsing
 	input        string         // the input that is being scanned by the parser
 	inputPos     int            // current byte cursor position in the input
 	cursorLine   int            // current rune cursor row number in the input
@ -26,7 +26,7 @@ type ParseAPI struct {
 	buffer       stringBuffer   // an efficient buffer, used to build string values (see P.Accept())
 	items        []Item         // a slice of resulting Parser items (see P.Emit())
 	item         Item           // the current item as reached by Next() and retrieved by Get()
-	err          *Error         // an error when lexing failed, retrieved by Error()
+	err          *Error         // an error when parsing failed, can be retrieved by Error()
 	LastMatch string // a string representation of the last matched input data
 }
@ -42,11 +42,11 @@ func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
 	return handleRuneError(r, w)
 }
-// eofRune is a special rune, which is used to indicate an end of file when
+// eofRune is a special rune that is used to indicate an end of file when
 // reading a character from the input.
 const eofRune rune = -1
-// invalidRune is a special rune, which is used to indicate an invalid UTF8
+// invalidRune is a special rune that is used to indicate an invalid UTF8
 // rune on the input.
 const invalidRune rune = utf8.RuneError
--- a/parsehandler_emit.go
+++ b/parsehandler_emit.go
@ -4,7 +4,7 @@ import (
 	"fmt"
 )
-// Item represents an item that can be emitted from the parser.
+// Item represents an item that can be emitted from a ParseHandler function.
 type Item struct {
 	Type  ItemType
 	Value string
@ -50,6 +50,11 @@ func (p *ParseAPI) EmitLiteral(t ItemType) {
 	p.Emit(t, p.BufLiteral())
 }
 // BufClear clears the contents of the parser string buffer.
 func (p *ParseAPI) BufClear() {
 	p.buffer.reset()
 }
 // BufInterpreted retrieves the contents of the parser's string buffer (all
 // the runes that were added to it using ParseAPI.Accept()) as an
 // interpreted string.
@ -118,9 +123,18 @@ func (p *ParseAPI) EmitError(format string, args ...interface{}) {
 	p.Emit(ItemError, message)
 }
-// UnexpectedInput is used by a StateHandler function to emit an error item
+// EmitEOF emits an EOF to the client. In effect, this will stop the parsing process.
 func (p *ParseAPI) EmitEOF() {
 	p.Emit(ItemEOF, "EOF")
 }
 // UnexpectedInput is used by a ParseHandler function to emit an error item
 // that tells the client that an unexpected rune was encountered in the input.
 func (p *ParseAPI) UnexpectedInput() {
 	// When some previous parsing step yielded an error, skip this operation.
 	if p.err != nil {
 		return
 	}
 	r, _, ok := p.peek(0)
 	switch {
 	case ok:
--- a/parsehandler_expects.go
+++ b/parsehandler_expects.go
@ -1,9 +1,9 @@
 package parsekit
-// Expects is used to let a StateHandler function describe what input it is expecting.
+// Expects is used to let a ParseHandler function describe what input it is expecting.
 // This expectation is used in error messages to make them more descriptive.
 //
-// When defining an expectation inside a StateHandler, you do not need to
+// When defining an expectation inside a ParseHandler, you do not need to
 // handle unexpected input yourself. When the end of the function is reached
 // without setting the next state, an automatic error will be emitted.
 // This error can differentiate between the following issues:
@ -14,5 +14,7 @@ package parsekit
 //
 // 3) the end of the file was reached.
 func (p *ParseAPI) Expects(description string) {
 	// TODO make this into some debugging tool?
 	// fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn)
 	p.expecting = description
 }
--- a/parsehandler_on.go
+++ b/parsehandler_on.go
@ -1,7 +1,7 @@
 package parsekit
 // On checks if the input at the current cursor position matches the provided
-// TokenHandler. On must be chained with another method, which tells the parser
+// TokenHandler. On must be chained with another method that tells the parser
 // what action to perform when a match was found:
 //
 // 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
@ -14,7 +14,7 @@ package parsekit
 //
 //     p.On(parsekit.A.Whitespace).Skip()
 //
-// The chain as a whole returns a boolean, which indicates whether or not at match
+// The chain as a whole returns a boolean that indicates whether or not at match
 // was found. When no match was found, false is returned and Skip() and Accept()
 // will have no effect. Because of this, typical use of an On() chain is as
 // expression for a conditional expression (if, switch/case, for). E.g.:
@ -37,6 +37,15 @@ package parsekit
 //         p.Emit(SomeItemType, p.BufLiteral())
 //     }
 func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
 	// When some previous parsing step yielded an error, skip this operation.
 	if p.err != nil {
 		return &MatchAction{
 			p:  p,
 			ok: false,
 		}
 	}
 	// Perform the matching operation.
 	m := &TokenAPI{p: p}
 	if tokenHandler == nil {
 		panic("internal parser error: tokenHandler argument for On() is nil")
--- a/parsehandler_route.go
+++ b/parsehandler_route.go
@ -0,0 +1,125 @@
 package parsekit
 // Handle is used to execute other ParseHandler functions from within your
 // ParseHandler function.
 func (p *ParseAPI) Handle(handlers ...ParseHandler) {
 	for _, handler := range handlers {
 		// When some previous parsing step yielded an error, skip this operation.
 		if p.err != nil {
 			break
 		}
 		handler(p)
 	}
 }
 // RouteTo tells the parser what ParseHandler function to invoke on
 // the next parse cycle.
 func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
 	p.nextState = handler
 	return &RouteFollowupAction{p}
 }
 // RouteRepeat tells the parser that on the next parsing cycle, the current
 // ParseHandler must be reinvoked.
 func (p *ParseAPI) RouteRepeat() {
 	p.RouteTo(p.state)
 }
 // RouteReturn tells the parser that on the next cycle the last ParseHandler
 // that was pushed on the route stack must be invoked.
 //
 // Using this method is optional. When implementating a ParseHandler that
 // is used as a sort of subroutine (using constructions like
 // p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
 // providing an explicit routing decision from that handler. The parser will
 // automatically assume a RouteReturn() in that case.
 func (p *ParseAPI) RouteReturn() {
 	p.nextState = p.popRoute()
 }
 // RouteFollowupAction chains parsing routes.
 // It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
 type RouteFollowupAction struct {
 	p *ParseAPI
 }
 // ThenTo schedules a ParseHandler that must be invoked after the RouteTo
 // ParseHandler has been completed.
 // For example:
 //
 //    p.RouteTo(handlerA).ThenTo(handlerB)
 func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
 	a.p.pushRoute(state)
 }
 // ThenReturnHere schedules the current ParseHandler to be invoked after
 // the RouteTo ParseHandler has been completed.
 // For example:
 //
 //    p.RouteTo(handlerA).ThenReturnHere()
 func (a *RouteFollowupAction) ThenReturnHere() {
 	a.p.pushRoute(a.p.state)
 }
 // pushRoute adds the ParseHandler to the route stack.
 // This is used for implementing nested parsing.
 func (p *ParseAPI) pushRoute(state ParseHandler) {
 	p.routeStack = append(p.routeStack, state)
 }
 // popRoute pops the last pushed ParseHandler from the route stack.
 func (p *ParseAPI) popRoute() ParseHandler {
 	last := len(p.routeStack) - 1
 	head, tail := p.routeStack[:last], p.routeStack[last]
 	p.routeStack = head
 	return tail
 }
 // ExpectEndOfFile can be used to check if the input is at end of file.
 // Intended use:
 //
 //    func yourParseHandler(p *parsekit.ParseAPI) {
 //        ...
 //        p.ExpectEndOfFile()
 //    }
 //
 // This will execute the end of file test right away. If you want to
 // use the end of file check as a StateHandler instead, you can also
 // make use of another form, for example:
 //
 //    func yourParseHandler(p *parsekit.ParseAPI) {
 //        p.RouteTo(yourHandler).ThenTo(parsekit.ExpectEndOfFile)
 //    }
 func (p *ParseAPI) ExpectEndOfFile() {
 	// When some previous parsing step yielded an error, skip this operation.
 	if p.err == nil {
 		if p.On(A.EndOfFile).Stay() {
 			p.EmitEOF()
 		} else {
 			p.Expects("end of file")
 			p.UnexpectedInput()
 		}
 	}
 }
 // ExpectEndOfFile can be scheduled as a ParseHandler function.
 // It makes sure that the input is at the end of file.
 // Intended use:
 //
 //    func yourParseHandler(p *parsekit.ParseAPI) {
 //        ...
 //        p.RouteTo(parsekit.ExpectEndOfFile)
 //    }
 //
 // It is not mandatory to use this ParseHandler. You can take care fo EOF
 // yourself too. Simply emit an ItemEOF when the end of the input was reached
 // to stop the parser loop:
 //
 //     p.EmitEOF()
 // TODO meh, get rid of this one, once we don't use state scheduling anymore.
 func ExpectEndOfFile(p *ParseAPI) {
 	p.Expects("end of file")
 	if p.On(A.EndOfFile).Stay() {
 		p.EmitEOF()
 	}
 }
--- a/parsekit.go
+++ b/parsekit.go
@ -9,24 +9,24 @@ import (
 // Parser is the top-level struct that holds the configuration for a parser.
 // The Parser can be instantiated using the parsekit.NewParser() method.
 type Parser struct {
-	startState StateHandler // the function that handles the very first state
+	startState ParseHandler // the function that handles the very first state
 }
 // NewParser instantiates a new Parser.
 //
 // The Parser is a state machine-style recursive descent parser, in which
-// StateHandler functions are used to move the state machine forward during
+// ParseHandler functions are used to move the state machine forward during
-// parsing. This style of parser is typically used for parsing languages and
+// parsing. This style of parser is typically used for parsing programming
-// structured data formats (like json, toml, etc.)
+// languages and structured data formats (like json, xml, toml, etc.)
 //
 // To start parsing input data, use the method Parser.Parse().
-func NewParser(startState StateHandler) *Parser {
+func NewParser(startState ParseHandler) *Parser {
 	return &Parser{startState: startState}
 }
 // ParseRun represents a single parse run for a Parser.
 type ParseRun struct {
-	p *ParseAPI // holds the internal state of a parse run
+	p *ParseAPI // holds parser state and provides an API to ParseHandler functions
 }
 // Parse starts a parse run on the provided input data.
@ -60,7 +60,7 @@ func (run *ParseRun) Next() (Item, *Error, bool) {
 			return run.makeReturnValues(item)
 		}
 		// Otherwise, the next state handler is looked up and invoked.
-		run.runNextStateHandler()
+		run.runNextParseHandler()
 	}
 }
@ -77,43 +77,43 @@ func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
 	}
 }
-// runNextStateHandler moves the parser, which is bascially a state machine,
+// runNextParseHandler moves the parser, that is bascially a state machine,
 // to its next status. It does so by invoking a function of the
-// type StateHandler. This function represents the current status and
+// type ParseHandler. This function represents the current status and
 // is responsible for moving the parser to its next status, depending
 // on the parsed input data.
-func (run *ParseRun) runNextStateHandler() {
+func (run *ParseRun) runNextParseHandler() {
-	if state, ok := run.getNextStateHandler(); ok {
+	if state, ok := run.getNextParseHandler(); ok {
-		run.invokeNextStateHandler(state)
+		run.invokeNextParseHandler(state)
 	}
 }
-// getNextStateHandler determines the next StateHandler to invoke in order
+// getNextParseHandler determines the next ParseHandler to invoke in order
 // to move the parsing state machine one step further.
 //
-// When implementing a parser, the StateHandler functions must provide
+// When implementing a parser, the ParseHandler functions must provide
 // a routing decision in every invocation. A routing decision is one
 // of the following:
 //
-// * A route is specified explicitly, which means that the next StateHandler
+// * A route is specified explicitly, which means that the next ParseHandler
-//   function to invoke is registered during the StateHandler function
+//   function to invoke is registered during the ParseHandler function
 //   invocation. For example: p.RouteTo(nextStatus)
 //
-// * A route is specified implicitly, which means that a previous StateHandler
+// * A route is specified implicitly, which means that a previous ParseHandler
 //   invocation has registered the followup route for the current state.
 //   For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
-//   In this example, the nextStatus StateHandler will not have to specify
+//   In this example, the nextStatus ParseHandler will not have to specify
 //   a route explicitly, but otherStatus will be used implicitly after
 //   the nextStatus function has returned.
 //
-// * An expectation is registered by the StateHandler.
+// * An expectation is registered by the ParseHandler.
 //   For example: p.Expects("a cool thing")
-//   When the StateHandler returns without having specified a route, this
+//   When the ParseHandler returns without having specified a route, this
 //   expectation is used to generate an "unexpected input" error message.
 //
-// When no routing decision is provided by a StateHandler, then this is
+// When no routing decision is provided by a ParseHandler, then this is
 // considered a bug in the state handler, and the parser will panic.
-func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
+func (run *ParseRun) getNextParseHandler() (ParseHandler, bool) {
 	switch {
 	case run.p.nextState != nil:
 		return run.p.nextState, true
@ -124,13 +124,13 @@ func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
 		return nil, false
 	default:
 		name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
-		panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
+		panic(fmt.Sprintf("internal parser error: ParseHandler %s did not provide a routing decision", name))
 	}
 }
-// invokeNextStateHandler moves the parser state to the provided state
+// invokeNextParseHandler moves the parser state to the provided state
-// and invokes the StateHandler function.
+// and invokes the ParseHandler function.
-func (run *ParseRun) invokeNextStateHandler(state StateHandler) {
+func (run *ParseRun) invokeNextParseHandler(state ParseHandler) {
 	run.p.state = state
 	run.p.nextState = nil
 	run.p.expecting = ""
--- a/statehandler_route.go
+++ b/statehandler_route.go
@ -1,76 +0,0 @@
 package parsekit
 // RouteTo tells the parser what StateHandler function to invoke on
 // the next parse cycle.
 func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction {
 	p.nextState = state
 	return &RouteFollowupAction{p}
 }
 // RouteRepeat tells the parser that on the next parsing cycle, the current
 // StateHandler must be reinvoked.
 func (p *ParseAPI) RouteRepeat() {
 	p.RouteTo(p.state)
 }
 // RouteReturn tells the parser that on the next cycle the last StateHandler
 // that was pushed on the route stack must be invoked.
 //
 // Using this method is optional. When implementating a StateHandler that
 // is used as a sort of subroutine (using constructions like
 // p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
 // providing an explicit routing decision from that handler. The parser will
 // automatically assume a RouteReturn() in that case.
 func (p *ParseAPI) RouteReturn() {
 	p.nextState = p.popRoute()
 }
 // RouteFollowupAction chains parsing routes.
 // It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
 type RouteFollowupAction struct {
 	p *ParseAPI
 }
 // ThenTo schedules a StateHandler that must be invoked after the RouteTo
 // StateHandler has been completed.
 // For example:
 //
 //    p.RouteTo(handlerA).ThenTo(handlerB)
 func (a *RouteFollowupAction) ThenTo(state StateHandler) {
 	a.p.pushRoute(state)
 }
 // ThenReturnHere schedules the current StateHandler to be invoked after
 // the RouteTo StateHandler has been completed.
 // For example:
 //
 //    p.RouteTo(handlerA).ThenReturnHere()
 func (a *RouteFollowupAction) ThenReturnHere() {
 	a.p.pushRoute(a.p.state)
 }
 // pushRoute adds the StateHandler to the route stack.
 // This is used for implementing nested parsing.
 func (p *ParseAPI) pushRoute(state StateHandler) {
 	p.routeStack = append(p.routeStack, state)
 }
 // popRoute pops the last pushed StateHandler from the route stack.
 func (p *ParseAPI) popRoute() StateHandler {
 	last := len(p.routeStack) - 1
 	head, tail := p.routeStack[:last], p.routeStack[last]
 	p.routeStack = head
 	return tail
 }
 // ExpectEndOfFile can be used from a StateHandler function to indicate that
 // your parser expects to be at the end of the file. This will schedule
 // a parsekit-provided StateHandler which will do the actual check for this.
 func (p *ParseAPI) ExpectEndOfFile() {
 	p.RouteTo(func(p *ParseAPI) {
 		p.Expects("end of file")
 		if p.On(A.EndOfFile).Stay() {
 			p.Emit(ItemEOF, "EOF")
 		}
 	})
 }
--- a/stringbuf.go
+++ b/stringbuf.go
@ -6,7 +6,7 @@ import (
 	"strings"
 )
-// stringBuffer is a string buffer implementation, which is used by the parser
+// stringBuffer is a string buffer implementation that is used by the parser
 // to efficiently accumulate runes from the input and eventually turn these
 // into a string, either literal or interpreted.
 type stringBuffer struct {
--- a/tokenhandlers_builtin.go
+++ b/tokenhandlers_builtin.go
@ -33,6 +33,7 @@ var C = struct {
 	MinMax     func(min int, max int, handler TokenHandler) TokenHandler
 	Separated  func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
 	Except     func(except TokenHandler, handler TokenHandler) TokenHandler
 	Signed     func(TokenHandler) TokenHandler
 }{
 	Rune:       MatchRune,
 	Runes:      MatchRunes,
@ -51,6 +52,7 @@ var C = struct {
 	MinMax:     MatchMinMax,
 	Separated:  MatchSeparated,
 	Except:     MatchExcept,
 	Signed:     MatchSigned,
 }
 // MatchRune creates a TokenHandler function that checks if the next rune from
@ -293,6 +295,16 @@ func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
 	}
 }
 // MatchSigned creates a TokenHandler that checks if the provided TokenHandler is
 // prefixed by an optional '+' or '-' sign. This can be used to turn numeric
 // atoms into a signed version, e.g.
 //
 //     C.Signed(A.Integer)
 func MatchSigned(handler TokenHandler) TokenHandler {
 	sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-')))
 	return MatchSeq(sign, handler)
 }
 // A provides convenient access to a range of atoms that can be used to
 // build TokenHandlers or parser rules.
 //
@ -320,18 +332,26 @@ var A = struct {
 	Amp                   TokenHandler
 	SingleQuote           TokenHandler
 	RoundOpen             TokenHandler
 	LeftParen             TokenHandler
 	RoundClose            TokenHandler
 	RightParen            TokenHandler
 	Asterisk              TokenHandler
 	Multiply              TokenHandler
 	Plus                  TokenHandler
 	Add                   TokenHandler
 	Comma                 TokenHandler
 	Minus                 TokenHandler
 	Subtract              TokenHandler
 	Dot                   TokenHandler
 	Slash                 TokenHandler
 	Divide                TokenHandler
 	Colon                 TokenHandler
 	Semicolon             TokenHandler
 	AngleOpen             TokenHandler
 	LessThan              TokenHandler
 	Equal                 TokenHandler
 	AngleClose            TokenHandler
 	GreaterThan           TokenHandler
 	Question              TokenHandler
 	At                    TokenHandler
 	SquareOpen            TokenHandler
@ -349,6 +369,10 @@ var A = struct {
 	WhitespaceAndNewlines TokenHandler
 	EndOfLine             TokenHandler
 	Digit                 TokenHandler
 	DigitNotZero          TokenHandler
 	Digits                TokenHandler
 	Float                 TokenHandler
 	Integer               TokenHandler
 	ASCII                 TokenHandler
 	ASCIILower            TokenHandler
 	ASCIIUpper            TokenHandler
@ -369,18 +393,26 @@ var A = struct {
 	Amp:                   C.Rune('&'),
 	SingleQuote:           C.Rune('\''),
 	RoundOpen:             C.Rune('('),
 	LeftParen:             C.Rune('('),
 	RoundClose:            C.Rune(')'),
 	RightParen:            C.Rune(')'),
 	Asterisk:              C.Rune('*'),
 	Multiply:              C.Rune('*'),
 	Plus:                  C.Rune('+'),
 	Add:                   C.Rune('+'),
 	Comma:                 C.Rune(','),
 	Minus:                 C.Rune('-'),
 	Subtract:              C.Rune('-'),
 	Dot:                   C.Rune('.'),
 	Slash:                 C.Rune('/'),
 	Divide:                C.Rune('/'),
 	Colon:                 C.Rune(':'),
 	Semicolon:             C.Rune(';'),
 	AngleOpen:             C.Rune('<'),
 	LessThan:              C.Rune('<'),
 	Equal:                 C.Rune('='),
 	AngleClose:            C.Rune('>'),
 	GreaterThan:           C.Rune('>'),
 	Question:              C.Rune('?'),
 	At:                    C.Rune('@'),
 	SquareOpen:            C.Rune('['),
@ -396,7 +428,11 @@ var A = struct {
 	Whitespace:            C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
 	WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
 	EndOfLine:             C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
-	Digit:                 C.RuneRange('0', '9'),
+	Digit:                 MatchDigit(),
 	DigitNotZero:          MatchDigitNotZero(),
 	Digits:                MatchDigits(),
 	Integer:               MatchInteger(),
 	Float:                 MatchFloat(),
 	ASCII:                 C.RuneRange('\x00', '\x7F'),
 	ASCIILower:            C.RuneRange('a', 'z'),
 	ASCIIUpper:            C.RuneRange('A', 'Z'),
@ -428,6 +464,42 @@ func MatchAnyRune() TokenHandler {
 	}
 }
 // MatchDigit creates a TokenHandler that checks if a single digit can be read
 // from the input.
 func MatchDigit() TokenHandler {
 	return MatchRuneRange('0', '9')
 }
 // MatchDigits creates a TokenHandler that checks if one or more digits can be read
 // from the input.
 func MatchDigits() TokenHandler {
 	return MatchOneOrMore(MatchRuneRange('0', '9'))
 }
 // MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
 // to zero '0' can be read from the input.
 func MatchDigitNotZero() TokenHandler {
 	return MatchRuneRange('1', '9')
 }
 // MatchInteger creates a TokenHandler function that checks if a valid integer
 // can be read from the input. In line with Go, a integer cannot start with
 // a zero. Starting with a zero is used to indicate other bases, like octal or
 // hexadecimal.
 func MatchInteger() TokenHandler {
 	justZero := MatchRune('0')
 	integer := C.Seq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
 	return MatchAny(integer, justZero)
 }
 // MatchFloat creates a TokenHandler function that checks if a valid float value
 // can be read from the input. In case the fractional part is missing, this
 // TokenHandler will report a match, so both "123" and "123.123" will match.
 func MatchFloat() TokenHandler {
 	digits := MatchDigits()
 	return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
 }
 // M provides convenient access to a range of modifiers (which in their nature are
 // parser/combinators) that can be used when creating TokenHandler functions.
 //
--- a/tokenhandlers_builtin_test.go
+++ b/tokenhandlers_builtin_test.go
@ -96,19 +96,27 @@ func TestAtoms(t *testing.T) {
 		{"%", a.Percent, true, "%"},
 		{"&", a.Amp, true, "&"},
 		{"'", a.SingleQuote, true, "'"},
 		{"(", a.LeftParen, true, "("},
 		{"(", a.RoundOpen, true, "("},
 		{")", a.RightParen, true, ")"},
 		{")", a.RoundClose, true, ")"},
 		{"*", a.Asterisk, true, "*"},
 		{"*", a.Multiply, true, "*"},
 		{"+", a.Plus, true, "+"},
 		{"+", a.Add, true, "+"},
 		{",", a.Comma, true, ","},
 		{"-", a.Minus, true, "-"},
 		{"-", a.Subtract, true, "-"},
 		{".", a.Dot, true, "."},
 		{"/", a.Slash, true, "/"},
 		{"/", a.Divide, true, "/"},
 		{":", a.Colon, true, ":"},
 		{";", a.Semicolon, true, ";"},
 		{"<", a.AngleOpen, true, "<"},
 		{"<", a.LessThan, true, "<"},
 		{"=", a.Equal, true, "="},
 		{">", a.AngleClose, true, ">"},
 		{">", a.GreaterThan, true, ">"},
 		{"?", a.Question, true, "?"},
 		{"@", a.At, true, "@"},
 		{"[", a.SquareOpen, true, "["},
@ -154,6 +162,22 @@ func TestAtoms(t *testing.T) {
 		{"F", a.HexDigit, true, "F"},
 		{"g", a.HexDigit, false, "g"},
 		{"G", a.HexDigit, false, "G"},
 		{"0", a.Integer, true, "0"},
 		{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
 		{"1", a.Integer, true, "1"},
 		{"-10X", a.Integer, false, ""},
 		{"+10X", a.Integer, false, ""},
 		{"-10X", c.Signed(a.Integer), true, "-10"},
 		{"+10X", c.Signed(a.Integer), true, "+10"},
 		{"+10.1X", c.Signed(a.Integer), true, "+10"},
 		{"0X", a.Float, true, "0"},
 		{"0X", a.Float, true, "0"},
 		{"1X", a.Float, true, "1"},
 		{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
 		{"123.321X", a.Float, true, "123.321"},
 		{"-3.14X", a.Float, false, ""},
 		{"-3.14X", c.Signed(a.Float), true, "-3.14"},
 		{"-003.0014X", c.Signed(a.Float), true, "-003.0014"},
 	})
 }
@ -174,8 +198,8 @@ func TestModifiers(t *testing.T) {
 func TestSequenceOfRunes(t *testing.T) {
 	sequence := c.Seq(
-		a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
+		a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
-		a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
+		a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
 		a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
 		a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
 		a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,