New implementation for performance.

2019-07-10 11:26:47 +00:00 · 2019-07-10 11:26:47 +00:00 · 48d7fda9f8
parent 7795588fe6
commit 48d7fda9f8
15 changed files with 3524 additions and 0 deletions
--- a/tokenize2/api.go
+++ b/tokenize2/api.go
@ -0,0 +1,374 @@
+package tokenize2
+
+import (
+	"git.makaay.nl/mauricem/go-parsekit/read"
+)
+
+// API holds the internal state of a tokenizer run and provides an API that
+// tokenize.Handler functions can use to:
+//
+// • read and accept runes from the input (NextRune, Accept)
+//
+// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
+//
+// • flush already read input data when not needed anymore (FlushInput)
+//
+// • retrieve the tokenizer Result struct (Result) to read or modify the results
+//
+// BASIC OPERATION:
+//
+// To retrieve the next rune from the API, call the NextRune() method.
+//
+// When the rune is to be accepted as input, call the method Accept(). The rune
+// is then added to the result runes of the API and the read cursor is moved
+// forward.
+//
+// By invoking NextRune() + Accept() multiple times, the result can be extended
+// with as many runes as needed. Runes collected this way can later on be
+// retrieved using the method Result().Runes().
+//
+// It is mandatory to call Accept() after retrieving a rune, before calling
+// NextRune() again. Failing to do so will result in a panic.
+//
+// Next to adding runes to the result, it is also possible to modify the
+// stored runes or to add lexical Tokens to the result. For all things
+// concerning results, take a look at the Result struct, which
+// can be accessed though the method Result().
+//
+// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
+//
+// Sometimes, we must be able to perform a lookahead, which might either
+// succeed or fail. In case of a failing lookahead, the state of the
+// API must be brought back to the original state, so we can try
+// a different route.
+//
+// The way in which this is supported, is by forking an API struct by
+// calling method Fork(). This will return a forked child API, with
+// empty result data, but using the same read cursor position as the
+// forked parent.
+//
+// After forking, the same interface as described for BASIC OPERATION can be
+// used to fill the results. When the lookahead was successful, then
+// Merge() can be called on the forked child to append the child's results
+// to the parent's results, and to move the read cursor position to that
+// of the child.
+//
+// When the lookahead was unsuccessful, then the forked child API can
+// disposed by calling Dispose() on the forked child. This is not mandatory.
+// Garbage collection will take care of this automatically.
+// The parent API was never modified, so it can safely be used after disposal
+// as if the lookahead never happened.
+//
+// Opinionized note:
+// Many tokenizers/parsers take a different approach on lookaheads by using
+// peeks and by moving the read cursor position back and forth, or by putting
+// read input back on the input stream. That often leads to code that is
+// efficient, however, in my opinion, not very intuitive to read. It can also
+// be tedious to get the cursor position back at the correct position, which
+// can lead to hard to track bugs. I much prefer this forking method, since
+// no bookkeeping has to be implemented when implementing a parser.
+type API struct {
+	reader      *read.Buffer // the input data reader
+	lastRune    rune         // the rune as retrieved by the last NextRune() calll
+	lastRuneErr error        // the error for the last NextRune() call
+	runeRead    bool         // whether or not a rune was read using NextRune()
+	runes       []rune       // the rune stack
+	tokens      []Token      // the token stack
+	stackFrames []stackFrame // the stack frames, containing stack level-specific data
+	stackLevel  int          // the current stack level
+	stackFrame  *stackFrame  // the current stack frame
+}
+
+type stackFrame struct {
+	offset     int // current rune offset relative to the Reader's sliding window
+	runeStart  int
+	runeEnd    int
+	tokenStart int
+	tokenEnd   int
+	cursor     Cursor
+
+	// TODO
+	err error // can be used by a Handler to report a specific issue with the input
+}
+
+const initialStackDepth = 10
+const initialTokenDepth = 10
+const initialRuneDepth = 10
+
+// NewAPI initializes a new API struct, wrapped around the provided input.
+// For an overview of allowed inputs, take a look at the documentation
+// for parsekit.read.New().
+func NewAPI(input interface{}) *API {
+	api := &API{
+		reader:      read.New(input),
+		runes:       make([]rune, 0, initialRuneDepth),
+		tokens:      make([]Token, 0, initialTokenDepth),
+		stackFrames: make([]stackFrame, 1, initialStackDepth),
+	}
+	api.stackFrame = &api.stackFrames[0]
+
+	return api
+}
+
+// NextRune returns the rune at the current read offset.
+//
+// When an invalid UTF8 rune is encountered on the input, it is replaced with
+// the utf.RuneError rune. It's up to the caller to handle this as an error
+// when needed.
+//
+// After reading a rune it must be Accept()-ed to move the read cursor forward
+// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
+// without explicitly accepting, this method will panic. You can see this as a
+// built-in unit test, enforcing correct serialization of API method calls.
+func (i *API) NextRune() (rune, error) {
+	if i.runeRead {
+		callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
+			"without a prior call to Accept()")
+	}
+
+	readRune, err := i.reader.RuneAt(i.stackFrame.offset)
+	i.lastRune = readRune
+	i.lastRuneErr = err
+	i.runeRead = true
+
+	return readRune, err
+}
+
+// Accept the last rune as read by NextRune() into the Result runes and move
+// the cursor forward.
+//
+// It is not allowed to call Accept() when the previous call to NextRune()
+// returned an error. Calling Accept() in such case will result in a panic.
+func (i *API) Accept() {
+	if !i.runeRead {
+		callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
+			"without first calling NextRune()")
+	} else if i.lastRuneErr != nil {
+		callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
+			"but the prior call to NextRune() failed")
+	}
+
+	i.runes = append(i.runes, i.lastRune)
+	i.stackFrame.runeEnd++
+	i.stackFrame.cursor.moveByRune(i.lastRune)
+	i.stackFrame.offset++
+	i.runeRead = false
+}
+
+// Fork forks off a child of the API struct. It will reuse the same
+// read buffer and cursor position, but for the rest this is a fresh API.
+//
+// By forking an API, you can freely work with the forked child, without
+// affecting the parent API. This is for example useful when you must perform
+// some form of lookahead.
+//
+// When processing of the Handler was successful and you want to add the results
+// to the parent API, you can call Merge() on the forked child.
+// This will add the results to the results of the parent (runes, tokens).
+// It also updates the read cursor position of the parent to that of the child.
+//
+// When the lookahead was unsuccessful, then the forked child API can
+// disposed by calling Dispose() on the forked child. This is not mandatory.
+// Garbage collection will take care of this automatically.
+// The parent API was never modified, so it can safely be used after disposal
+// as if the lookahead never happened.
+func (i *API) Fork() int {
+	newStackLevel := i.stackLevel + 1
+	newStackSize := newStackLevel + 1
+
+	// Grow the stack frames capacity when needed.
+	if cap(i.stackFrames) < newStackSize {
+		newFrames := make([]stackFrame, newStackSize, newStackSize*2)
+		copy(newFrames, i.stackFrames)
+		i.stackFrames = newFrames
+	} else {
+		i.stackFrames = i.stackFrames[0:newStackSize]
+	}
+
+	parent := i.stackFrame
+	i.stackLevel++
+	i.stackFrame = &i.stackFrames[i.stackLevel]
+	*i.stackFrame = *parent
+	i.stackFrame.runeStart = parent.runeEnd
+	i.stackFrame.tokenStart = parent.tokenEnd
+	i.runeRead = false
+
+	return i.stackLevel
+}
+
+// Merge appends the results of a forked child API (runes, tokens) to the
+// results of its parent. The read cursor of the parent is also updated
+// to that of the forked child.
+//
+// After the merge operation, the child results are reset so it can immediately
+// be reused for performing another match. This means that all Result data are
+// cleared, but the read cursor position is kept at its current position.
+// This allows a child to feed results in chunks to its parent.
+//
+// Once the child is no longer needed, it can be disposed of by using the
+// method Dispose(), which will return the tokenizer to the parent.
+func (i *API) Merge(stackLevel int) {
+	if stackLevel == 0 {
+		callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
+			"on the top-level API stack level 0")
+	}
+	if stackLevel != i.stackLevel {
+		callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
+			"on API stack level %d, but the current stack level is %d "+
+			"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
+	}
+
+	parent := &i.stackFrames[stackLevel-1]
+
+	if parent.runeEnd == i.stackFrame.runeStart {
+		// The end of the parent slice aligns with the start of the child slice.
+		// Because of this, to merge the parent slice can simply be expanded
+		// to include the child slice.
+		// parent :  |----------|
+		// child:               |------|
+		// After merge operation:
+		// parent:   |-----------------|
+		// child:                      |---> continue reading from here
+		parent.runeEnd = i.stackFrame.runeEnd
+		i.stackFrame.runeStart = i.stackFrame.runeEnd
+	} else {
+		// The end of the parent slice does not align with the start of the
+		// child slice. The child slice has to be copied onto the end of
+		// the parent slice.
+		// parent :  |----------|
+		// child:                    |------|
+		// After merge operation:
+		// parent:   |-----------------|
+		// child:                      |---> continue reading from here
+		i.runes = append(i.runes[:parent.runeEnd], i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]...)
+		parent.runeEnd = len(i.runes)
+		i.stackFrame.runeStart = parent.runeEnd
+		i.stackFrame.runeEnd = parent.runeEnd
+	}
+
+	// The same logic applies to tokens.
+	if parent.tokenEnd == i.stackFrame.tokenStart {
+		parent.tokenEnd = i.stackFrame.tokenEnd
+		i.stackFrame.tokenStart = i.stackFrame.tokenEnd
+	} else {
+		i.tokens = append(i.tokens[:parent.tokenEnd], i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]...)
+		parent.tokenEnd = len(i.tokens)
+		i.stackFrame.tokenStart = parent.tokenEnd
+		i.stackFrame.tokenEnd = parent.tokenEnd
+	}
+
+	parent.offset = i.stackFrame.offset
+	parent.cursor = i.stackFrame.cursor
+
+	i.stackFrame.err = nil
+	i.runeRead = false
+}
+
+func (i *API) Dispose(stackLevel int) {
+	if stackLevel == 0 {
+		callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
+			"on the top-level API stack level 0")
+	}
+	if stackLevel != i.stackLevel {
+		callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
+			"on API stack level %d, but the current stack level is %d "+
+			"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
+	}
+
+	i.runeRead = false
+	i.stackLevel = stackLevel - 1
+	i.stackFrames = i.stackFrames[:stackLevel]
+	i.stackFrame = &i.stackFrames[stackLevel-1]
+	i.runes = i.runes[0:i.stackFrame.runeEnd]
+	i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
+}
+
+func (i *API) Reset() {
+	i.runeRead = false
+	i.stackFrame.runeStart = i.stackFrame.runeEnd
+	i.stackFrame.tokenStart = i.stackFrame.tokenEnd
+	i.stackFrame.err = nil
+}
+
+// FlushInput flushes processed input data from the read.Buffer.
+// In this context 'processed' means all runes that were read using NextRune()
+// and that were added to the results using Accept().
+//
+// Note:
+// When writing your own TokenHandler, you normally won't have to call this
+// method yourself. It is automatically called by parsekit when needed.
+func (i *API) FlushInput() bool {
+	// result := &(i.state.stack[i.stackLevel])
+	if i.stackFrame.offset > 0 {
+		i.reader.Flush(i.stackFrame.offset)
+		i.stackFrame.offset = 0
+		return true
+	}
+	return false
+}
+
+func (i *API) String() string {
+	return string(i.Runes())
+}
+
+func (i *API) Runes() []rune {
+	return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
+}
+
+func (i *API) Rune(offset int) rune {
+	return i.runes[i.stackFrame.runeStart+offset]
+}
+
+func (i *API) ClearRunes() {
+	i.runes = i.runes[:i.stackFrame.runeStart]
+	i.stackFrame.runeEnd = i.stackFrame.runeStart
+}
+
+func (i *API) SetRunes(runes ...rune) {
+	i.runes = append(i.runes[:i.stackFrame.runeStart], runes...)
+	i.stackFrame.runeEnd = i.stackFrame.runeStart + len(runes)
+}
+
+func (i *API) AddRunes(runes ...rune) {
+	i.runes = append(i.runes[:i.stackFrame.runeEnd], runes...)
+	i.stackFrame.runeEnd += len(runes)
+}
+
+func (i *API) AddString(s string) {
+	i.AddRunes([]rune(s)...)
+}
+
+func (i *API) SetString(s string) {
+	i.SetRunes([]rune(s)...)
+}
+
+func (i *API) Cursor() Cursor {
+	return i.stackFrame.cursor
+}
+
+func (i *API) Tokens() []Token {
+	return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
+}
+
+func (i *API) Token(offset int) Token {
+	return i.tokens[i.stackFrame.tokenStart+offset]
+}
+
+func (i *API) TokenValue(offset int) interface{} {
+	return i.tokens[i.stackFrame.tokenStart+offset].Value
+}
+
+func (i *API) ClearTokens() {
+	i.tokens = i.tokens[:i.stackFrame.tokenStart]
+	i.stackFrame.tokenEnd = i.stackFrame.tokenStart
+}
+
+func (i *API) SetTokens(tokens ...Token) {
+	i.tokens = append(i.tokens[:i.stackFrame.tokenStart], tokens...)
+	i.stackFrame.tokenEnd = i.stackFrame.tokenStart + len(tokens)
+}
+
+func (i *API) AddTokens(tokens ...Token) {
+	i.tokens = append(i.tokens[:i.stackFrame.tokenEnd], tokens...)
+	i.stackFrame.tokenEnd += len(tokens)
+}
--- a/tokenize2/api_test.go
+++ b/tokenize2/api_test.go
@ -0,0 +1,330 @@
+package tokenize2_test
+
+import (
+	"fmt"
+	"testing"
+
+	tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
+)
+
+func ExampleNewAPI() {
+	tokenize.NewAPI("The input that the API will handle")
+
+	// Output:
+}
+
+func ExampleAPI_NextRune() {
+	api := tokenize.NewAPI("The input that the API will handle")
+	r, err := api.NextRune()
+	fmt.Printf("Rune read from input; %c\n", r)
+	fmt.Printf("The error: %v\n", err)
+	fmt.Printf("API results: %q\n", api.String())
+
+	// Output:
+	// Rune read from input; T
+	// The error: <nil>
+	// API results: ""
+}
+
+func ExampleAPI_Accept() {
+	api := tokenize.NewAPI("The input that the API will handle")
+	api.NextRune() // reads 'T'
+	api.Accept()   // adds 'T' to the API results
+	api.NextRune() // reads 'h'
+	api.Accept()   // adds 'h' to the API results
+	api.NextRune() // reads 'e', but it is not added to the API results
+
+	fmt.Printf("API results: %q\n", api.String())
+
+	// Output:
+	// API results: "Th"
+}
+
+func ExampleAPI_modifyingResults() {
+	api := tokenize.NewAPI("")
+
+	api.AddString("Some runes")
+	api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
+	api.AddRunes(' ', 'i', 'n', ' ')
+	api.AddString("various ways")
+	fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
+	fmt.Printf("API result runes as string: %q\n", api.String())
+
+	api.SetString("new ")
+	api.AddString("set ")
+	api.AddString("of ")
+	api.AddRunes('r', 'u', 'n', 'e', 's')
+	fmt.Printf("API result runes as string: %q\n", api.String())
+	fmt.Printf("API result runes: %q\n", api.Runes())
+	fmt.Printf("API third rune: %q\n", api.Rune(2))
+
+	api.AddTokens(tokenize.Token{
+		Type:  42,
+		Value: "towel"})
+	api.AddTokens(tokenize.Token{
+		Type:  73,
+		Value: "Zaphod"})
+	fmt.Printf("API result tokens: %v\n", api.Tokens())
+	fmt.Printf("API second result token: %v\n", api.Token(1))
+
+	// Output:
+	// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
+	// API result runes as string: "Some runes added in various ways"
+	// API result runes as string: "new set of runes"
+	// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
+	// API third rune: 'w'
+	// API result tokens: [42("towel") 73("Zaphod")]
+	// API second result token: 73("Zaphod")
+}
+
+func ExampleAPI_Reset() {
+	api := tokenize.NewAPI("Very important input!")
+
+	api.NextRune()
+	api.Accept()
+	api.NextRune()
+	api.Accept()
+	fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
+
+	// Reset clears the results, but keeps the cursor position.
+	api.Reset()
+	fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
+
+	api.NextRune()
+	api.Accept()
+	api.NextRune()
+	api.Accept()
+	fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
+
+	// Output:
+	// API results: "Ve" at line 1, column 3
+	// API results: "" at line 1, column 3
+	// API results: "ry" at line 1, column 5
+}
+
+func ExampleAPI_Fork() {
+	// This custom Handler checks for input 'a', 'b' or 'c'.
+	abcHandler := func(t *tokenize.API) bool {
+		a := tokenize.A
+		for _, r := range []rune{'a', 'b', 'c'} {
+			child := t.Fork() // fork, so we won't change parent t
+			if a.Rune(r)(t) {
+				t.Merge(child)   // accept results into parent of child
+				t.Dispose(child) // return to the parent level
+				return true      // and report a successful match
+			}
+			t.Dispose(child) // return to the parent level
+		}
+		// If we get here, then no match was found. Return false to communicate
+		// this to the caller.
+		return false
+	}
+
+	// Note: a custom Handler is normally not what you need.
+	// You can make use of the parser/combinator tooling to make the
+	// implementation a lot simpler and to take care of forking at
+	// the appropriate places. The handler from above can be replaced with:
+	simpler := tokenize.A.RuneRange('a', 'c')
+
+	result, err := tokenize.New(abcHandler)("another test")
+	fmt.Println(result, err)
+	result, err = tokenize.New(simpler)("curious")
+	fmt.Println(result, err)
+	result, err = tokenize.New(abcHandler)("bang on!")
+	fmt.Println(result, err)
+	result, err = tokenize.New(abcHandler)("not a match")
+	fmt.Println(result, err)
+
+	// Output:
+	// a <nil>
+	// c <nil>
+	// b <nil>
+	// <nil> mismatch at start of file
+}
+
+func ExampleAPI_Merge() {
+	tokenHandler := func(t *tokenize.API) bool {
+		child1 := t.Fork()
+		t.NextRune() // reads 'H'
+		t.Accept()
+		t.NextRune() // reads 'i'
+		t.Accept()
+
+		child2 := t.Fork()
+		t.NextRune() // reads ' '
+		t.Accept()
+		t.NextRune() // reads 'm'
+		t.Accept()
+		t.Dispose(child2)
+
+		t.Merge(child1)   // We merge child1, which has read 'H' and 'i' only.
+		t.Dispose(child1) // and clean up child1 to return to the parent
+		return true
+	}
+
+	result, _ := tokenize.New(tokenHandler)("Hi mister X!")
+	fmt.Println(result.String())
+
+	// Output:
+	// Hi
+}
+
+func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
+	api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
+
+	// Fork a few levels.
+	child1 := api.Fork()
+	child2 := api.Fork()
+	child3 := api.Fork()
+	child4 := api.Fork()
+
+	// Read a rune 'a' from child4.
+	r, _ := api.NextRune()
+	AssertEqual(t, 'a', r, "child4 rune 1")
+	api.Accept()
+	AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
+
+	// Read another rune 'b' from child4.
+	r, _ = api.NextRune()
+	AssertEqual(t, 'b', r, "child4 rune 2")
+	api.Accept()
+	AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
+
+	// Merge "ab" from child4 to child3.
+	api.Merge(child4)
+	AssertEqual(t, "", api.String(), "child4 runes after first merge")
+
+	// Read some more from child4.
+	r, _ = api.NextRune()
+	AssertEqual(t, 'c', r, "child4 rune 3")
+	api.Accept()
+	AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
+	AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
+
+	// Merge "c" from child4 to child3.
+	api.Merge(child4)
+
+	// And dispose of child4, making child3 the active stack level.
+	api.Dispose(child4)
+
+	// Child3 should now have the compbined results "abc" from child4's work.
+	AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
+	AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
+
+	// Now read some data from child3.
+	r, _ = api.NextRune()
+	AssertEqual(t, 'd', r, "child3 rune 5")
+	api.Accept()
+
+	r, _ = api.NextRune()
+	AssertEqual(t, 'e', r, "child3 rune 5")
+	api.Accept()
+
+	r, _ = api.NextRune()
+	AssertEqual(t, 'f', r, "child3 rune 5")
+	api.Accept()
+
+	AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
+
+	// Temporarily go some new forks from here, but don't use their outcome.
+	child3sub1 := api.Fork()
+	api.NextRune()
+	api.Accept()
+	api.NextRune()
+	api.Accept()
+	child3sub2 := api.Fork()
+	api.NextRune()
+	api.Accept()
+	api.Merge(child3sub2)   // do merge sub2 down to sub1
+	api.Dispose(child3sub2) // and dispose of sub2
+	api.Dispose(child3sub1) // but dispose of sub1 without merging
+
+	// Instead merge the results from before this forking segway from child3 to child2
+	// and dispose of it.
+	api.Merge(child3)
+	api.Dispose(child3)
+
+	AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
+	AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
+
+	// Merge child2 to child1 and dispose of it.
+	api.Merge(child2)
+	api.Dispose(child2)
+
+	// Merge child1 a few times to the top level api.
+	api.Merge(child1)
+	api.Merge(child1)
+	api.Merge(child1)
+	api.Merge(child1)
+
+	// And dispose of it.
+	api.Dispose(child1)
+
+	// Read some data from the top level api.
+	r, _ = api.NextRune()
+	api.Accept()
+
+	AssertEqual(t, "abcdefg", api.String(), "api string end result")
+	AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
+}
+
+func TestClearRunes(t *testing.T) {
+	api := tokenize.NewAPI("Laphroaig")
+	api.NextRune()   // Read 'L'
+	api.Accept()     // Add to runes
+	api.NextRune()   // Read 'a'
+	api.Accept()     // Add to runes
+	api.ClearRunes() // Clear the runes
+	api.NextRune()   // Read 'p'
+	api.Accept()     // Add to runes
+	api.NextRune()   // Read 'r'
+	api.Accept()     // Add to runes
+
+	AssertEqual(t, "ph", api.String(), "api string end result")
+}
+
+func TestMergeScenariosForTokens(t *testing.T) {
+	api := tokenize.NewAPI("")
+
+	token1 := tokenize.Token{Value: 1}
+	token2 := tokenize.Token{Value: 2}
+	token3 := tokenize.Token{Value: 3}
+	token4 := tokenize.Token{Value: 4}
+
+	api.SetTokens(token1)
+	tokens := api.Tokens()
+	AssertEqual(t, 1, len(tokens), "Tokens 1")
+
+	child := api.Fork()
+
+	tokens = api.Tokens()
+	AssertEqual(t, 0, len(tokens), "Tokens 2")
+
+	api.AddTokens(token2)
+
+	// Here we can merge by expanding the token slice on the parent,
+	// because the end of the parent slice and the start of the child
+	// slice align.
+	api.Merge(child)
+	api.Dispose(child)
+
+	tokens = api.Tokens()
+	AssertEqual(t, 2, len(tokens), "Tokens 3")
+
+	child = api.Fork()
+	api.AddTokens(token3)
+	api.Reset()
+	api.AddTokens(token4)
+
+	// Here the merge means that token4 will be copied to the end of
+	// the token slice of the parent, since there's a gap at the place
+	// where token3 used to be.
+	api.Merge(child)
+	api.Dispose(child)
+
+	tokens = api.Tokens()
+	AssertEqual(t, 3, len(tokens), "Tokens 4")
+	AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
+	AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
+	AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
+}
--- a/tokenize2/assertions_test.go
+++ b/tokenize2/assertions_test.go
@ -0,0 +1,118 @@
+package tokenize2_test
+
+// This file contains some tools that are used for writing tests.
+
+import (
+	"regexp"
+	"testing"
+
+	tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
+)
+
+func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
+	if expected != actual {
+		t.Errorf(
+			"Unexpected value for %s:\nexpected: %q\nactual: %q",
+			forWhat, expected, actual)
+	}
+}
+
+func AssertTrue(t *testing.T, b bool, assertion string) {
+	if !b {
+		t.Errorf("Assertion %s is false", assertion)
+	}
+}
+
+type PanicT struct {
+	Function func()
+	Regexp   bool
+	Expect   string
+}
+
+func AssertPanics(t *testing.T, testSet []PanicT) {
+	for _, test := range testSet {
+		AssertPanic(t, test)
+	}
+}
+
+func AssertPanic(t *testing.T, p PanicT) {
+	defer func() {
+		if r := recover(); r != nil {
+			mismatch := false
+			if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
+				mismatch = true
+			}
+			if !p.Regexp && p.Expect != r.(string) {
+				mismatch = true
+			}
+			if mismatch {
+				t.Errorf(
+					"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
+					p.Expect, r)
+			}
+		} else {
+			t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
+		}
+	}()
+	p.Function()
+}
+
+type HandlerT struct {
+	Input     string
+	Handler   tokenize.Handler
+	MustMatch bool
+	Expected  string
+}
+
+func AssertHandlers(t *testing.T, testSet []HandlerT) {
+	for _, test := range testSet {
+		AssertHandler(t, test)
+	}
+}
+
+func AssertHandler(t *testing.T, test HandlerT) {
+	result, err := tokenize.New(test.Handler)(test.Input)
+	if test.MustMatch {
+		if err != nil {
+			t.Errorf("Test %q failed with error: %s", test.Input, err)
+		} else if output := result.String(); output != test.Expected {
+			t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
+		}
+	} else {
+		if err == nil {
+			t.Errorf("Test %q failed: should not match, but it did", test.Input)
+		}
+	}
+}
+
+type TokenMakerT struct {
+	Input    string
+	Handler  tokenize.Handler
+	Expected []tokenize.Token
+}
+
+func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
+	for _, test := range testSet {
+		AssertTokenMaker(t, test)
+	}
+}
+
+func AssertTokenMaker(t *testing.T, test TokenMakerT) {
+	result, err := tokenize.New(test.Handler)(test.Input)
+	if err != nil {
+		t.Errorf("Test %q failed with error: %s", test.Input, err)
+	} else {
+		if len(result.Tokens()) != len(test.Expected) {
+			t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
+		}
+		for i, expected := range test.Expected {
+			actual := result.Token(i)
+			if expected.Type != actual.Type {
+				t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
+			}
+			if expected.Value != actual.Value {
+				t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
+			}
+		}
+	}
+}
--- a/tokenize2/callerinfo.go
+++ b/tokenize2/callerinfo.go
@ -0,0 +1,33 @@
+package tokenize2
+
+import (
+	"fmt"
+	"runtime"
+	"strings"
+)
+
+func callerPanic(name, f string, data ...interface{}) {
+	filepos := callerBefore(name)
+	m := fmt.Sprintf(f, data...)
+	m = strings.Replace(m, "{caller}", filepos, -1)
+	m = strings.Replace(m, "{name}", name, -1)
+	panic(m)
+}
+
+func callerBefore(name string) string {
+	found := false
+	for i := 1; ; i++ {
+		pc, file, line, ok := runtime.Caller(i)
+		if found {
+			return fmt.Sprintf("%s:%d", file, line)
+		}
+		if !ok {
+			return "unknown caller"
+		}
+		f := runtime.FuncForPC(pc)
+
+		if strings.HasSuffix(f.Name(), "."+name) {
+			found = true
+		}
+	}
+}
--- a/tokenize2/callerinfo_test.go
+++ b/tokenize2/callerinfo_test.go
@ -0,0 +1,35 @@
+package tokenize2
+
+import (
+	"strings"
+	"testing"
+)
+
+func SomeFunc1() {
+	SomeFunc2()
+}
+
+func SomeFunc2() {
+	SomeFunc3()
+}
+
+func SomeFunc3() {
+	callerPanic("SomeFunc2", "{name} was called from {caller}")
+}
+
+func TestCallerPanic(t *testing.T) {
+	defer func() {
+		r := recover()
+		err := r.(string)
+
+		if !strings.Contains(err, "SomeFunc2 was called from") || !strings.Contains(err, "callerinfo_test.go:") {
+			t.Fatalf("Unexpected error message: %s", err)
+		}
+	}()
+	SomeFunc1()
+}
+
+func TestCallerBefore_WithFunctionNameNotInStack(t *testing.T) {
+	caller := callerBefore("NotExistingAtAll")
+	AssertEqual(t, "unknown caller", caller, "result for name not in stack")
+}
--- a/tokenize2/cursor.go
+++ b/tokenize2/cursor.go
@ -0,0 +1,45 @@
+package tokenize2
+
+import (
+	"fmt"
+	"unicode/utf8"
+)
+
+// Cursor represents the position of a cursor in various ways.
+type Cursor struct {
+	Byte   int // The cursor offset in bytes
+	Rune   int // The cursor offset in UTF8 runes
+	Column int // The column at which the cursor is (0-indexed)
+	Line   int // The line at which the cursor is (0-indexed)
+}
+
+// String produces a string representation of the cursor position.
+func (c Cursor) String() string {
+	if c.Line == 0 && c.Column == 0 {
+		return fmt.Sprintf("start of file")
+	}
+	return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
+}
+
+// move updates the position of the cursor, based on the provided input string.
+// The input string represents the runes that the cursor must be moved over.
+// This method will take newlines into account to keep track of line numbers and
+// column positions automatically.
+func (c *Cursor) move(input string) *Cursor {
+	for _, r := range input {
+		c.moveByRune(r)
+	}
+	return c
+}
+
+func (c *Cursor) moveByRune(r rune) *Cursor {
+	c.Byte += utf8.RuneLen(r)
+	c.Rune++
+	if r == '\n' {
+		c.Column = 0
+		c.Line++
+	} else {
+		c.Column++
+	}
+	return c
+}
--- a/tokenize2/cursor_test.go
+++ b/tokenize2/cursor_test.go
@ -0,0 +1,69 @@
+package tokenize2
+
+import (
+	"fmt"
+	"testing"
+)
+
+func ExampleCursor_move() {
+	c := Cursor{}
+	fmt.Printf("after initialization    : %s\n", c)
+	fmt.Printf("after 'some words'      : %s\n", c.move("some words"))
+	fmt.Printf("after '\\n'              : %s\n", c.move("\n"))
+	fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
+
+	// Output:
+	// after initialization    : start of file
+	// after 'some words'      : line 1, column 11
+	// after '\n'              : line 2, column 1
+	// after '\r\nskip\nlines' : line 4, column 6
+}
+
+func ExampleCursor_String() {
+	c := Cursor{}
+	fmt.Println(c.String())
+
+	c.move("\nfoobar")
+	fmt.Println(c.String())
+
+	// Output:
+	// start of file
+	// line 2, column 7
+}
+
+func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
+	for _, test := range []struct {
+		name   string
+		input  []string
+		byte   int
+		rune   int
+		line   int
+		column int
+	}{
+		{"No input at all", []string{""}, 0, 0, 0, 0},
+		{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
+		{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
+		{"One newline", []string{"\n"}, 1, 1, 1, 0},
+		{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
+		{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
+		{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
+		{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
+	} {
+		c := Cursor{}
+		for _, s := range test.input {
+			c.move(s)
+		}
+		if c.Byte != test.byte {
+			t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
+		}
+		if c.Rune != test.rune {
+			t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
+		}
+		if c.Line != test.line {
+			t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
+		}
+		if c.Column != test.column {
+			t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
+		}
+	}
+}
--- a/tokenize2/handler.go
+++ b/tokenize2/handler.go
@ -0,0 +1,53 @@
+package tokenize2
+
+// Handler is the function type that is involved in turning a low level
+// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
+// data matches some kind of pattern and to report back the results.
+//
+// A Handler function gets an API as its input and returns a boolean to
+// indicate whether or not it found a match on the input. The API is used
+// for retrieving input data to match against and for reporting back results.
+type Handler func(t *API) bool
+
+// Match is syntactic sugar that allows you to write a construction like
+// NewTokenizer(handler).Execute(input) as handler.Match(input).
+func (handler Handler) Match(input interface{}) (*API, error) {
+	tokenizer := New(handler)
+	return tokenizer(input)
+}
+
+// Or is syntactic sugar that allows you to write a construction like
+// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
+func (handler Handler) Or(otherHandler Handler) Handler {
+	return MatchAny(handler, otherHandler)
+}
+
+// Times is syntactic sugar that allows you to write a construction like
+// MatchRep(3, handler) as handler.Times(3).
+func (handler Handler) Times(n int) Handler {
+	return MatchRep(n, handler)
+}
+
+// Then is syntactic sugar that allows you to write a construction like
+// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
+func (handler Handler) Then(otherHandler Handler) Handler {
+	return MatchSeq(handler, otherHandler)
+}
+
+// SeparatedBy is syntactic sugar that allows you to write a construction like
+// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
+func (handler Handler) SeparatedBy(separator Handler) Handler {
+	return MatchSeparated(separator, handler)
+}
+
+// Optional is syntactic sugar that allows you to write a construction like
+// MatchOptional(handler) as handler.Optional().
+func (handler Handler) Optional() Handler {
+	return MatchOptional(handler)
+}
+
+// Except is syntactic sugar that allows you to write a construction like
+// MatchExcept(handler) as handler.Optional().
+func (handler Handler) Except(exceptHandler Handler) Handler {
+	return MatchExcept(handler, exceptHandler)
+}
--- a/tokenize2/handler_test.go
+++ b/tokenize2/handler_test.go
@ -0,0 +1,101 @@
+package tokenize2_test
+
+import (
+	"fmt"
+	"testing"
+
+	tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
+)
+
+func TestSyntacticSugar(t *testing.T) {
+	var a = tokenize.A
+	AssertHandlers(t, []HandlerT{
+		{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
+		{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
+		{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
+		{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
+		{"cccccc", a.Rune('c').Optional(), true, "c"},
+		{"dddddd", a.Rune('c').Optional(), true, ""},
+		{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
+		{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
+		{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
+		{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
+		{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
+	})
+}
+
+func ExampleHandler_Times() {
+	c, a := tokenize.C, tokenize.A
+	phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
+
+	fmt.Println(phoneNumber.Match("0201234567"))
+	// Output:
+	// 0201234567 <nil>
+}
+
+func ExampleHandler_Then() {
+	c, a := tokenize.C, tokenize.A
+	phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
+
+	fmt.Println(phoneNumber.Match("0208888888"))
+	// Output:
+	// 0208888888 <nil>
+}
+
+func ExampleHandler_Or() {
+	c, a := tokenize.C, tokenize.A
+	phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit))
+
+	fmt.Println(phoneNumber.Match("+31209876543"))
+	fmt.Println(phoneNumber.Match("0031209876543"))
+	fmt.Println(phoneNumber.Match("0031020991234"))
+	fmt.Println(phoneNumber.Match("0031201234"))
+	// Output:
+	// +31209876543 <nil>
+	// 0031209876543 <nil>
+	// <nil> mismatch at start of file
+	// <nil> mismatch at start of file
+}
+
+func ExampleHandler_SeparatedBy() {
+	a, t := tokenize.A, tokenize.T
+	csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
+
+	r, _ := csv.Match("123,456,7,8,9")
+	for i, token := range r.Tokens() {
+		fmt.Printf("[%d] %v\n", i, token)
+	}
+	// Output:
+	// [0] number((int)123)
+	// [1] number((int)456)
+	// [2] number((int)7)
+	// [3] number((int)8)
+	// [4] number((int)9)
+}
+
+func ExampleHandler_Optional() {
+	c, a := tokenize.C, tokenize.A
+
+	spanish := c.Seq(
+		a.Rune('¿').Optional(),
+		c.OneOrMore(a.AnyRune.Except(a.Question)),
+		a.Rune('?').Optional())
+
+	fmt.Println(spanish.Match("¿Habla español María?"))
+	fmt.Println(spanish.Match("Sí, María habla español."))
+	// Output:
+	// ¿Habla español María? <nil>
+	// Sí, María habla español. <nil>
+}
+
+func ExampleHandler_Match() {
+	r, err := tokenize.A.IPv4.Match("001.002.003.004")
+	fmt.Println(r, err)
+
+	r, err = tokenize.A.IPv4.Match("1.2.3")
+	fmt.Println(r, err)
+
+	// Output:
+	// 1.2.3.4 <nil>
+	// <nil> mismatch at start of file
+}
--- a/tokenize2/handlers_builtin.go
+++ b/tokenize2/handlers_builtin.go
--- a/tokenize2/handlers_builtin_test.go
+++ b/tokenize2/handlers_builtin_test.go
@ -0,0 +1,445 @@
+package tokenize2_test
+
+import (
+	"fmt"
+	"testing"
+
+	tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
+)
+
+func TestCombinatorsTempDebug(t *testing.T) {
+	var a = tokenize.A
+	AssertHandlers(t, []HandlerT{
+		// {"024", a.IPv4CIDRMask, true, "24"},
+		// {"024", a.Octet, true, "24"},
+		{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
+	})
+}
+
+func TestCombinators(t *testing.T) {
+	var c, a, m = tokenize.C, tokenize.A, tokenize.M
+	AssertHandlers(t, []HandlerT{
+		{"abc not", c.Not(a.Rune('b')), true, "a"},
+		{"bcd not", c.Not(a.Rune('b')), false, ""},
+		{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
+		{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
+		{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
+		{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
+		{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
+		{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
+		{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
+		{"", c.Min(0, a.Rune('a')), true, ""},
+		{"a", c.Min(0, a.Rune('a')), true, "a"},
+		{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
+		{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
+		{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
+		{"", c.Max(4, a.Rune('b')), true, ""},
+		{"X", c.Max(4, a.Rune('b')), true, ""},
+		{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
+		{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
+		{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
+		{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
+		{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
+		{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
+		{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
+		{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
+		{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
+		{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
+		{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
+		{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
+		{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
+		{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
+		{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
+		{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
+		{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
+		{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
+		{"", c.OneOrMore(a.Rune('d')), false, ""},
+		{"X", c.OneOrMore(a.Rune('d')), false, ""},
+		{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
+		{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
+		{"", c.ZeroOrMore(a.Rune('e')), true, ""},
+		{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
+		{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
+		{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
+		{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
+		{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
+		{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
+		{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
+		{"ghijkl", c.Optional(a.Rune('h')), true, ""},
+		{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
+		{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
+		{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
+		{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
+		{"  ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
+		{"  ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
+		{"  ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
+	})
+}
+
+func TestCombinatorPanics(t *testing.T) {
+	var c, a = tokenize.C, tokenize.A
+	AssertPanics(t, []PanicT{
+		{func() { a.RuneRange('z', 'a') }, true,
+			`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
+		{func() { c.MinMax(-1, 1, a.Space) }, true,
+			`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
+		{func() { c.MinMax(1, -1, a.Space) }, true,
+			`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
+		{func() { c.MinMax(10, 5, a.Space) }, true,
+			`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
+		{func() { c.Min(-10, a.Space) }, true,
+			`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
+		{func() { c.Max(-42, a.Space) }, true,
+			`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
+		{func() { a.IntegerBetween(10, -10) }, true,
+			`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
+	})
+}
+
+func TestAtoms(t *testing.T) {
+	var a = tokenize.A
+	AssertHandlers(t, []HandlerT{
+		{"dd", a.RuneRange('b', 'e'), true, "d"},
+		{"ee", a.RuneRange('b', 'e'), true, "e"},
+		{"ff", a.RuneRange('b', 'e'), false, ""},
+		{"Hello, world!", a.Str("Hello"), true, "Hello"},
+		{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
+		{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
+		{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
+		{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
+		{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
+		{"!X", a.Runes('+', '-', '*', '/'), false, ""},
+		{"xxx", a.Rune('x'), true, "x"},
+		{"x   ", a.Rune(' '), false, ""},
+		{"aa", a.RuneRange('b', 'e'), false, ""},
+		{"bb", a.RuneRange('b', 'e'), true, "b"},
+		{"cc", a.RuneRange('b', 'e'), true, "c"},
+		{"", a.EndOfFile, true, ""},
+		{"⌘", a.AnyRune, true, "⌘"},
+		{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
+		{"", a.AnyRune, false, ""},
+		{"⌘", a.ValidRune, true, "⌘"},
+		{"\xbc with ValidRune", a.ValidRune, false, "<22>"},
+		{"", a.ValidRune, false, ""},
+		{" ", a.Space, true, " "},
+		{"X", a.Space, false, ""},
+		{"\t", a.Tab, true, "\t"},
+		{"\r", a.CR, true, "\r"},
+		{"\n", a.LF, true, "\n"},
+		{"!", a.Excl, true, "!"},
+		{"\"", a.DoubleQuote, true, "\""},
+		{"#", a.Hash, true, "#"},
+		{"$", a.Dollar, true, "$"},
+		{"%", a.Percent, true, "%"},
+		{"&", a.Amp, true, "&"},
+		{"'", a.SingleQuote, true, "'"},
+		{"(", a.LeftParen, true, "("},
+		{"(", a.RoundOpen, true, "("},
+		{")", a.RightParen, true, ")"},
+		{")", a.RoundClose, true, ")"},
+		{"*", a.Asterisk, true, "*"},
+		{"*", a.Multiply, true, "*"},
+		{"+", a.Plus, true, "+"},
+		{"+", a.Add, true, "+"},
+		{",", a.Comma, true, ","},
+		{"-", a.Minus, true, "-"},
+		{"-", a.Subtract, true, "-"},
+		{".", a.Dot, true, "."},
+		{"/", a.Slash, true, "/"},
+		{"/", a.Divide, true, "/"},
+		{":", a.Colon, true, ":"},
+		{";", a.Semicolon, true, ";"},
+		{"<", a.AngleOpen, true, "<"},
+		{"<", a.LessThan, true, "<"},
+		{"=", a.Equal, true, "="},
+		{">", a.AngleClose, true, ">"},
+		{">", a.GreaterThan, true, ">"},
+		{"?", a.Question, true, "?"},
+		{"@", a.At, true, "@"},
+		{"[", a.SquareOpen, true, "["},
+		{"\\", a.Backslash, true, "\\"},
+		{"]", a.SquareClose, true, "]"},
+		{"^", a.Caret, true, "^"},
+		{"_", a.Underscore, true, "_"},
+		{"`", a.Backquote, true, "`"},
+		{"{", a.CurlyOpen, true, "{"},
+		{"|", a.Pipe, true, "|"},
+		{"}", a.CurlyClose, true, "}"},
+		{"~", a.Tilde, true, "~"},
+		{"\t \t \r\n", a.Blank, true, "\t"},
+		{" \t \t \r\n", a.Blanks, true, " \t \t "},
+		{"xxx", a.Whitespace, false, ""},
+		{" ", a.Whitespace, true, " "},
+		{"\t", a.Whitespace, true, "\t"},
+		{"\n", a.Whitespace, true, "\n"},
+		{"\r\n", a.Whitespace, true, "\r\n"},
+		{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
+		{"xxx", a.UnicodeSpace, false, ""},
+		{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
+		{"", a.EndOfLine, true, ""},
+		{"\r\n", a.EndOfLine, true, "\r\n"},
+		{"\n", a.EndOfLine, true, "\n"},
+		{"0", a.Digit, true, "0"},
+		{"1", a.Digit, true, "1"},
+		{"2", a.Digit, true, "2"},
+		{"3", a.Digit, true, "3"},
+		{"4", a.Digit, true, "4"},
+		{"5", a.Digit, true, "5"},
+		{"6", a.Digit, true, "6"},
+		{"7", a.Digit, true, "7"},
+		{"8", a.Digit, true, "8"},
+		{"9", a.Digit, true, "9"},
+		{"X", a.Digit, false, ""},
+		{"a", a.ASCIILower, true, "a"},
+		{"z", a.ASCIILower, true, "z"},
+		{"A", a.ASCIILower, false, ""},
+		{"Z", a.ASCIILower, false, ""},
+		{"A", a.ASCIIUpper, true, "A"},
+		{"Z", a.ASCIIUpper, true, "Z"},
+		{"a", a.ASCIIUpper, false, ""},
+		{"z", a.ASCIIUpper, false, ""},
+		{"1", a.Letter, false, ""},
+		{"a", a.Letter, true, "a"},
+		{"Ø", a.Letter, true, "Ø"},
+		{"Ë", a.Lower, false, ""},
+		{"ë", a.Lower, true, "ë"},
+		{"ä", a.Upper, false, "ä"},
+		{"Ä", a.Upper, true, "Ä"},
+		{"0", a.HexDigit, true, "0"},
+		{"9", a.HexDigit, true, "9"},
+		{"a", a.HexDigit, true, "a"},
+		{"f", a.HexDigit, true, "f"},
+		{"A", a.HexDigit, true, "A"},
+		{"F", a.HexDigit, true, "F"},
+		{"g", a.HexDigit, false, "g"},
+		{"G", a.HexDigit, false, "G"},
+		{"0", a.Integer, true, "0"},
+		{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
+		{"1", a.Integer, true, "1"},
+		{"-10X", a.Integer, false, ""},
+		{"+10X", a.Integer, false, ""},
+		{"-10X", a.Signed(a.Integer), true, "-10"},
+		{"+10X", a.Signed(a.Integer), true, "+10"},
+		{"+10.1X", a.Signed(a.Integer), true, "+10"},
+		{"0X", a.Float, true, "0"},
+		{"0X", a.Float, true, "0"},
+		{"1X", a.Float, true, "1"},
+		{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
+		{"123.321X", a.Float, true, "123.321"},
+		{"-3.14X", a.Float, false, ""},
+		{"-3.14X", a.Signed(a.Float), true, "-3.14"},
+		{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
+		{"-11", a.IntegerBetween(-10, 10), false, "0"},
+		{"-10", a.IntegerBetween(-10, 10), true, "-10"},
+		{"0", a.IntegerBetween(-10, 10), true, "0"},
+		{"10", a.IntegerBetween(-10, 10), true, "10"},
+		{"11", a.IntegerBetween(0, 10), false, ""},
+	})
+}
+
+func TestIPv4Atoms(t *testing.T) {
+	var a = tokenize.A
+	AssertHandlers(t, []HandlerT{
+		// Not normalized octet.
+		{"0X", tokenize.MatchOctet(false), true, "0"},
+		{"00X", tokenize.MatchOctet(false), true, "00"},
+		{"000X", tokenize.MatchOctet(false), true, "000"},
+		{"10X", tokenize.MatchOctet(false), true, "10"},
+		{"010X", tokenize.MatchOctet(false), true, "010"},
+		{"255123", tokenize.MatchOctet(false), true, "255"},
+		{"256123", tokenize.MatchOctet(false), false, ""},
+		{"300", tokenize.MatchOctet(false), false, ""},
+
+		// Normalized octet.
+		{"0X", a.Octet, true, "0"},
+		{"00X", a.Octet, true, "0"},
+		{"000X", a.Octet, true, "0"},
+		{"10X", a.Octet, true, "10"},
+		{"010X", a.Octet, true, "10"},
+		{"255123", a.Octet, true, "255"},
+		{"256123", a.Octet, false, ""},
+		{"300", a.Octet, false, ""},
+
+		// IPv4 address.
+		{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
+		{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
+		{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
+		{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
+		{"256.255.255.255", a.IPv4, false, ""},
+
+		// IPv4 CIDR netmask.
+		{"0", a.IPv4CIDRMask, true, "0"},
+		{"00", a.IPv4CIDRMask, true, "0"},
+		{"000", a.IPv4CIDRMask, true, "0"},
+		{"32", a.IPv4CIDRMask, true, "32"},
+		{"032", a.IPv4CIDRMask, true, "32"},
+		{"33", a.IPv4CIDRMask, false, ""},
+
+		// IPv4 netmask in dotted quad format.
+		{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
+		{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
+		{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
+		{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
+
+		// IPv4 address + CIDR or dotted quad netmask.
+		{"192.168.6.123", a.IPv4Net, false, ""},
+		{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
+		{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
+		{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
+		{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""},                       // invalid netmask and 193 is also invalid cidr
+		{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
+	})
+}
+
+func TestIPv6Atoms(t *testing.T) {
+	var a = tokenize.A
+	AssertHandlers(t, []HandlerT{
+		{"", a.IPv6, false, ""},
+		{"::", a.IPv6, true, "::"},
+		{"1::", a.IPv6, true, "1::"},
+		{"1::1", a.IPv6, true, "1::1"},
+		{"::1", a.IPv6, true, "::1"},
+		{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
+		{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
+		{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
+		{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
+		{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
+		{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
+		{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
+		{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
+		{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
+		{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
+		{"0", a.IPv6CIDRMask, true, "0"},
+		{"128", a.IPv6CIDRMask, true, "128"},
+		{"129", a.IPv6CIDRMask, false, ""},
+		{"::1/128", a.IPv6Net, true, "::1/128"},
+		{"::1/129", a.IPv6Net, false, ""},
+		{"1.1.1.1/24", a.IPv6Net, false, ""},
+		{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
+		{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
+	})
+}
+
+func TestModifiers(t *testing.T) {
+	var c, a, m = tokenize.C, tokenize.A, tokenize.M
+	AssertHandlers(t, []HandlerT{
+		{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
+		{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
+		{"  trim  ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
+		{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
+		{"  trim  ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim  "},
+		{"  trim  ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "  trim"},
+		{" \t  trim  \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t  trim"},
+		{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
+		{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
+		{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
+		{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
+	})
+}
+
+// When a TokenMaker encounters an error, this is considered a programmer error.
+// A TokenMaker should not be called, unless the input is already validated to
+// follow the correct pattern. Therefore, tokenmakers will panic when the
+// input cannot be processed successfully.
+func TestTokenMakerErrorHandling(t *testing.T) {
+	var a, tok = tokenize.A, tokenize.T
+	invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
+	tokenizer := tokenize.New(invalid)
+	AssertPanic(t, PanicT{
+		func() { tokenizer("no") }, false,
+		`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
+	})
+}
+
+func TestTokenMakers(t *testing.T) {
+	var c, a, tok = tokenize.C, tokenize.A, tokenize.T
+	AssertTokenMakers(t, []TokenMakerT{
+		{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
+			[]tokenize.Token{{Type: "A", Value: ""}}},
+
+		{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
+			[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
+
+		{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
+			[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
+
+		{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
+		{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
+			{Type: "bar", Value: byte('R')},
+			{Type: "bar", Value: byte('O')},
+			{Type: "bar", Value: byte('C')},
+			{Type: "bar", Value: byte('K')},
+			{Type: "bar", Value: byte('S')},
+		}},
+
+		{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
+
+		{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
+		{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
+		{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
+		{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
+		{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
+		{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
+		{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
+		{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
+		{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
+
+		{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
+		{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
+		{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
+		{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
+		{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
+
+		{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
+		{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
+
+		{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
+			{Type: "P", Value: true},
+			{Type: "P", Value: true},
+			{Type: "P", Value: true},
+			{Type: "P", Value: true},
+			{Type: "P", Value: true},
+			{Type: "P", Value: true},
+		}},
+
+		{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
+			{Type: "P", Value: false},
+			{Type: "P", Value: false},
+			{Type: "P", Value: false},
+			{Type: "P", Value: false},
+			{Type: "P", Value: false},
+			{Type: "P", Value: false},
+		}},
+	})
+}
+
+// I know, this is hell, but that's the whole point for this test :->
+func TestCombination(t *testing.T) {
+	var c, a, m = tokenize.C, tokenize.A, tokenize.M
+	demonic := c.Seq(
+		c.Optional(a.SquareOpen),
+		m.Trim(
+			c.Seq(
+				c.Optional(a.Blanks),
+				c.Repeated(3, a.AngleClose),
+				m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
+					return fmt.Sprintf("%d", len(s))
+				}),
+				m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
+				m.ToUpper(c.Min(1, a.ASCIILower)),
+				m.Drop(a.Excl),
+				c.Repeated(3, a.AngleOpen),
+				c.Optional(a.Blanks),
+			),
+			" \t",
+		),
+		c.Optional(a.SquareClose),
+	)
+
+	AssertHandlers(t, []HandlerT{
+		{"[ \t >>>Hello, world!<<<   ]", demonic, true, "[>>>5, WORLD<<<]"},
+		{"[ \t >>>Hello, world!<<<   ", demonic, true, "[>>>5, WORLD<<<"},
+		{">>>HellohellO, world!<<<   ]", demonic, true, ">>>10, WORLD<<<]"},
+		{"[ \t >>>HellohellO , , , world!<<<   ", demonic, true, "[>>>10, WORLD<<<"},
+	})
+}
--- a/tokenize2/token.go
+++ b/tokenize2/token.go
@ -0,0 +1,47 @@
+package tokenize2
+
+import (
+	"fmt"
+)
+
+// Token defines a lexical token as produced by tokenize.Handlers.
+//
+// The only mandatory data in a Token are the Runes. The Type and Value fields
+// are optional fields that can be filled with data at will.
+//
+// The use of the Type field is to let a tokenizer communicate to
+// the parser what type of token it's handling.
+//
+// The use of the Value field is to store any kind af data along with the token.
+// One use of this can be found in the built-in token maker functions like
+// MakeInt8Token(), which store an interpreted version of the input string
+// in the Value field.
+type Token struct {
+	Type  interface{} // optional token type, can be any type that a parser author sees fit
+	Value interface{} // optional token value, of any type as well
+}
+
+func (t Token) String() string {
+	tokenType := ""
+	if t.Type != nil {
+		tokenType = fmt.Sprintf("%v", t.Type)
+	}
+
+	value := ""
+	if t.Value != nil {
+		switch t.Value.(type) {
+		case []*Token:
+			return fmt.Sprintf("%v%v", tokenType, t.Value)
+		case string:
+			value = fmt.Sprintf("%q", t.Value)
+		case rune:
+			value = fmt.Sprintf("%v", t.Value)
+		case bool:
+			value = fmt.Sprintf("%v", t.Value)
+		default:
+			value = fmt.Sprintf("(%T)%v", t.Value, t.Value)
+		}
+	}
+
+	return fmt.Sprintf("%v(%s)", tokenType, value)
+}
--- a/tokenize2/tokenize.go
+++ b/tokenize2/tokenize.go
@ -0,0 +1,41 @@
+// Package tokenize provides tooling to build a tokenizer in
+// parser/combinator-style, used to feed data to the parser.
+package tokenize2
+
+import (
+	"fmt"
+)
+
+// Func is the function signature as returned by New: a function that takes
+// any supported type of input, executes a tokenizer run and returns a
+// Result struct (possibly nil) and an error (possibly nil).
+type Func func(input interface{}) (*API, error)
+
+// New instantiates a new tokenizer.
+//
+// The tokenizer is a tokenizing state machine, in which tokenize.Handler
+// functions are used to move the state machine forward during tokenizing.
+// Using the New function, you can wrap a tokenize.Handler in a simple way,
+// making it possible to feed some input to the handler and retrieve the
+// tokenizing results.
+//
+// The startHandler argument points the tokenizer to the tokenize.Handler function
+// that must be executed at the start of the tokenizing process. From there on
+// other tokenize.Handler functions can be invoked recursively to implement the
+// tokenizing process.
+//
+// THis function returns a function that can be invoked to run the tokenizer
+// against the provided input data. For an overview of allowed inputs, take a
+// look at the documentation for parsekit.read.New().
+func New(tokenHandler Handler) Func {
+	return func(input interface{}) (*API, error) {
+		api := NewAPI(input)
+		ok := tokenHandler(api)
+
+		if !ok {
+			err := fmt.Errorf("mismatch at %s", Cursor{})
+			return nil, err
+		}
+		return api, nil
+	}
+}
--- a/tokenize2/tokenizer_test.go
+++ b/tokenize2/tokenizer_test.go
@ -0,0 +1,223 @@
+package tokenize2_test
+
+import (
+	"fmt"
+	"io"
+	"strings"
+	"testing"
+	"unicode/utf8"
+
+	tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
+)
+
+// TODO For error handling, it would be really cool if for example the
+// 10.0.300.1/24 case would return an actual error stating that
+// 300 is not a valid octet for an IPv4 address.
+// Biggest thing to take care of here, is that errors should not stop
+// a Parser flow (since we might be trying to match different cases in
+// sequence), but a Parser flow should optionally be able to make use
+// of the actual error.
+// The same goes for a Tokenizer, since those can also make use of
+// optional matching using tokenize.C.Any(...) for example. If matching
+// for Any(IPv4, Digits), the example case should simply end up with 10
+// after the IPv4 mismatch.
+func ExampleNew() {
+	// Build the tokenizer for ip/mask.
+	var c, a, t = tokenize.C, tokenize.A, tokenize.T
+	ip := t.Str("ip", a.IPv4)
+	mask := t.Int8("mask", a.IPv4CIDRMask)
+	cidr := c.Seq(ip, a.Slash, mask)
+	tokenizer := tokenize.New(cidr)
+
+	for _, input := range []string{
+		"000.000.000.000/000",
+		"192.168.0.1/24",
+		"255.255.255.255/32",
+		"10.0.300.1/24",
+		"not an IPv4 CIDR",
+	} {
+		// Execute returns a Result and an error, which is nil on success.
+		result, err := tokenizer(input)
+
+		if err == nil {
+			fmt.Printf("Result: %s\n", result.Tokens())
+		} else {
+			fmt.Printf("Error: %s\n", err)
+		}
+	}
+	// Output:
+	// Result: [ip("0.0.0.0") mask((int8)0)]
+	// Result: [ip("192.168.0.1") mask((int8)24)]
+	// Result: [ip("255.255.255.255") mask((int8)32)]
+	// Error: mismatch at start of file
+	// Error: mismatch at start of file
+}
+
+func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
+	api := makeTokenizeAPI()
+	r, _ := api.NextRune()
+	AssertEqual(t, 'T', r, "first rune")
+}
+
+func TestInputCanAcceptRunesFromReader(t *testing.T) {
+	i := makeTokenizeAPI()
+	i.NextRune()
+	i.Accept()
+	i.NextRune()
+	i.Accept()
+	i.NextRune()
+	i.Accept()
+	AssertEqual(t, "Tes", i.String(), "i.String()")
+}
+
+func TestCallingNextRuneTwice_Panics(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			i.NextRune()
+			i.NextRune()
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
+			`without a prior call to Accept\(\)`,
+	})
+}
+
+func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
+	api := makeTokenizeAPI()
+	AssertPanic(t, PanicT{
+		Function: api.Accept,
+		Regexp:   true,
+		Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
+			`without first calling NextRune\(\)`,
+	})
+}
+
+func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
+	api := tokenize.NewAPI("")
+	AssertPanic(t, PanicT{
+		Function: func() {
+			api.NextRune()
+			api.Accept()
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
+			`, but the prior call to NextRune\(\) failed`,
+	})
+}
+
+func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			i.Merge(0)
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
+}
+
+func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			child := i.Fork()
+			i.Fork()
+			i.Merge(child)
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
+			`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
+}
+
+func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			i.Dispose(0)
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
+}
+
+func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			child := i.Fork()
+			i.Fork()
+			i.Dispose(child)
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
+			`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
+}
+
+func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			i.Fork()
+			g := i.Fork()
+			i.Fork()
+			i.Merge(g)
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
+			`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
+}
+
+func TestForkingInput_ClearsLastRune(t *testing.T) {
+	AssertPanic(t, PanicT{
+		Function: func() {
+			i := makeTokenizeAPI()
+			i.NextRune()
+			i.Fork()
+			i.Accept()
+		},
+		Regexp: true,
+		Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
+	})
+}
+
+func TestAccept_UpdatesCursor(t *testing.T) {
+	i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
+	AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
+	for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
+		i.NextRune()
+		i.Accept()
+	}
+	AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
+	i.NextRune() // read "\n", cursor ends up at start of new line
+	i.Accept()
+	AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
+	for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
+		i.NextRune()
+		i.Accept()
+	}
+	AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
+}
+
+func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
+	i := tokenize.NewAPI(strings.NewReader("X"))
+	i.NextRune()
+	i.Accept()
+	r, err := i.NextRune()
+	AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
+	AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
+}
+func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
+	i := tokenize.NewAPI(strings.NewReader("X"))
+	child := i.Fork()
+	i.NextRune()
+	i.Accept()
+	r, err := i.NextRune()
+	AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
+	i.Dispose(child)      // brings the read offset back to the start
+	r, err = i.NextRune() // so here we should see the same rune
+	AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
+	AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
+}
+
+func makeTokenizeAPI() *tokenize.API {
+	return tokenize.NewAPI("Testing")
+}
--- a/tokenize2/tokenizer_whitebox_test.go
+++ b/tokenize2/tokenizer_whitebox_test.go
@ -0,0 +1,110 @@
+package tokenize2
+
+import (
+	"testing"
+)
+
+func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
+	// Create input, accept the first rune.
+	i := NewAPI("Testing")
+	i.NextRune()
+	i.Accept() // T
+	AssertEqual(t, "T", i.String(), "accepted rune in input")
+	// Fork
+	child := i.Fork()
+	AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
+	AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
+	AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
+	AssertEqual(t, 1, i.stackFrame.offset, "child offset")
+	// Accept two runes via fork.
+	i.NextRune()
+	i.Accept() // e
+	i.NextRune()
+	i.Accept() // s
+	AssertEqual(t, "es", i.String(), "result runes in fork")
+	AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
+	AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
+	AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
+	AssertEqual(t, 3, i.stackFrame.offset, "child offset")
+	// Merge fork back into parent
+	i.Merge(child)
+	i.Dispose(child)
+	AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
+	AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
+	AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
+}
+
+func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
+	i := NewAPI("Testing")
+	i.NextRune()
+	i.Accept()
+	f1 := i.Fork()
+	i.NextRune()
+	i.Accept()
+	f2 := i.Fork()
+	i.NextRune()
+	i.Accept()
+	AssertEqual(t, "s", i.String(), "f2 String()")
+	AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
+	i.Merge(f2)
+	i.Dispose(f2)
+	AssertEqual(t, "es", i.String(), "f1 String()")
+	AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
+	i.Merge(f1)
+	i.Dispose(f1)
+	AssertEqual(t, "Tes", i.String(), "top-level API String()")
+	AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
+}
+
+func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
+	i := NewAPI("Testing")
+	r, _ := i.NextRune()
+	AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
+	AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
+	AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
+	i.Accept()
+	AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
+	AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
+	r, _ = i.NextRune()
+	AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
+}
+
+func TestFlushInput(t *testing.T) {
+	api := NewAPI("cool")
+
+	// Flushing without any read data is okay. FlushInput() will return
+	// false in this case, and nothing else happens.
+	AssertTrue(t, api.FlushInput() == false, "flush input at start")
+
+	api.NextRune()
+	api.Accept()
+	api.NextRune()
+	api.Accept()
+
+	AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
+	AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
+
+	AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
+
+	// Read offset is now zero, but reading should continue after "co".
+	api.NextRune()
+	api.Accept()
+	api.NextRune()
+	api.Accept()
+
+	AssertEqual(t, "cool", api.String(), "end result")
+}
+
+func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
+	if expected != actual {
+		t.Errorf(
+			"Unexpected value for %s:\nexpected: %q\nactual: %q",
+			forWhat, expected, actual)
+	}
+}
+
+func AssertTrue(t *testing.T, b bool, assertion string) {
+	if !b {
+		t.Errorf("Assertion %s is false", assertion)
+	}
+}