Further code cleaning for the interaction between ParseAPI and TokenAPI. Extra atoms added, also one based on a callback which can accept single runes based on thhat callback function.

2019-06-07 15:48:49 +00:00 · 2019-06-07 15:48:49 +00:00 · 9a5bf8b9af
parent 98d2db0374
commit 9a5bf8b9af
11 changed files with 286 additions and 220 deletions
--- a/examples/example_basiccalculator1_test.go
+++ b/examples/example_basiccalculator1_test.go
@ -3,8 +3,8 @@
 //
 //     10 + 20 - 8+4
 //
-// So positive numbers that can be either added or substracted, and whitespace
-// is ignored.
+// So positive numbers that can be either added or substracted, and blanks
+// around numbers are ignored.
 package examples

 import (
@ -69,9 +69,9 @@ type simpleCalculator struct {
 	op     int64 // represents operation for next term (+1 = add, -1 = subtract)
 }

-// A definition of an int64, which conveniently drops surrounding whitespace.
-var dropWhitespace = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Whitespace))
-var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitespace)
+// A definition of an int64, which conveniently drops surrounding blanks.
+var dropBlank = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Blank))
+var bareInteger = parsekit.C.Seq(dropBlank, parsekit.A.Integer, dropBlank)
 var int64Token = parsekit.T.Int64(nil, bareInteger)

 func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
--- a/examples/example_basiccalculator2_test.go
+++ b/examples/example_basiccalculator2_test.go
@ -130,7 +130,7 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
 // <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
 func (c *calculator) factor(p *parsekit.ParseAPI) {
 	var A, T = parsekit.A, parsekit.T
-	p.On(A.Whitespace).Skip()
+	p.On(A.Blank).Skip()
 	switch {
 	case p.On(T.Float64(nil, A.Signed(A.Float))).Accept():
 		value := p.Result().Value(0).(float64)
@ -147,7 +147,7 @@ func (c *calculator) factor(p *parsekit.ParseAPI) {
 		p.UnexpectedInput("factor or (expression)")
 		return
 	}
-	p.On(A.Whitespace).Skip()
+	p.On(A.Blank).Skip()
 }

 // ---------------------------------------------------------------------------
--- a/examples/example_dutchpostcode_test.go
+++ b/examples/example_dutchpostcode_test.go
@ -65,7 +65,7 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
 	pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
 	pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
 	pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
-	space := M.Replace(C.Opt(A.Whitespace), " ")
+	space := M.Replace(C.Opt(A.Blank), " ")
 	postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)

 	// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
--- a/examples/example_helloManyStateParser_test.go
+++ b/examples/example_helloManyStateParser_test.go
@ -90,7 +90,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
 func (h *helloparser1) comma(p *parsekit.ParseAPI) {
 	a := parsekit.A
 	switch {
-	case p.On(a.Whitespace).Skip():
+	case p.On(a.Blank).Skip():
 		p.Handle(h.comma)
 	case p.On(a.Comma).Skip():
 		p.Handle(h.startName)
@ -102,7 +102,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) {
 func (h *helloparser1) startName(p *parsekit.ParseAPI) {
 	c, a := parsekit.C, parsekit.A
 	switch {
-	case p.On(a.Whitespace).Skip():
+	case p.On(a.Blank).Skip():
 		p.Handle(h.startName)
 	case p.On(c.Not(a.Excl)).Stay():
 		p.Handle(h.name)
--- a/examples/example_helloParserCombinator_test.go
+++ b/examples/example_helloParserCombinator_test.go
@ -54,8 +54,8 @@ func createHelloTokenizer() *parsekit.Tokenizer {
 	// that does all the work. The 'greeting' TokenHandler matches the whole input and
 	// drops all but the name from it.
 	hello := a.StrNoCase("hello")
-	comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
-	separator := c.Any(comma, a.Whitespace)
+	comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
+	separator := c.Any(comma, a.Blank)
 	name := c.OneOrMore(c.Not(a.Excl))
 	greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)

--- a/examples/example_helloSingleStateParser_test.go
+++ b/examples/example_helloSingleStateParser_test.go
@ -84,7 +84,7 @@ func (h *helloparser2) start(p *parsekit.ParseAPI) {
 		p.Error("the greeting is not being friendly")
 		return
 	}
-	if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
+	if !p.On(c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))).Skip() {
 		p.Error("the greeting is not properly separated")
 		return
 	}
--- a/parseapi.go
+++ b/parseapi.go
@ -66,7 +66,7 @@ func (p *ParseAPI) checkForLoops() {
 //
 // So an example chain could look like this:
 //
-//     p.On(parsekit.A.Whitespace).Skip()
+//     p.On(parsekit.A.Blank).Skip()
 //
 // The chain as a whole returns a boolean that indicates whether or not at match
 // was found. When no match was found, false is returned and Skip() and Accept()
@ -103,31 +103,36 @@ func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
 	ok := tokenHandler(child)

 	return &ParseAPIOnAction{
-		parseAPI: p,
-		tokenAPI: child,
-		ok:       ok,
+		parseAPI:       p,
+		tokenAPI:       p.tokenAPI,
+		forkedTokenAPI: child,
+		ok:             ok,
 	}
 }

 // ParseAPIOnAction is a struct that is used for building the On()-method chain.
 // The On() method will return an initialized struct of this type.
 type ParseAPIOnAction struct {
-	parseAPI *ParseAPI
-	tokenAPI *TokenAPI
-	ok       bool
+	parseAPI       *ParseAPI
+	tokenAPI       *TokenAPI
+	forkedTokenAPI *TokenAPI
+	ok             bool
 }

 // Accept tells the parser to move the read cursor past a match that was
-// found, and to make the TokenHandlerResult from the TokenAPI available in the
-// ParseAPI through the ParseAPI.Result() method.
+// found by a TokenHandler, and to make the TokenHandlerResult from the
+// TokenAPI available in the ParseAPI through the ParseAPI.Result() method.
 //
 // Returns true in case a match was found.
 // When no match was found, then no action is taken and false is returned.
 func (a *ParseAPIOnAction) Accept() bool {
 	if a.ok {
-		a.tokenAPI.Merge()
-		a.flushReader()
-		a.parseAPI.result = a.tokenAPI.root.result
+		a.forkedTokenAPI.Merge()
+		a.parseAPI.result = a.tokenAPI.Result()
+		a.tokenAPI.detachChilds()
+		if a.tokenAPI.flushReader() {
+			a.parseAPI.initLoopCheck()
+		}
 	}
 	return a.ok
 }
@ -145,10 +150,12 @@ func (a *ParseAPIOnAction) Accept() bool {
 func (a *ParseAPIOnAction) Skip() bool {
 	if a.ok {
 		a.parseAPI.result = nil
-		a.tokenAPI.clearResults()
-		a.tokenAPI.syncCursorTo(a.tokenAPI.root)
+		a.forkedTokenAPI.clearResults()
 		a.tokenAPI.detachChilds()
-		a.flushReader()
+		a.forkedTokenAPI.syncCursorTo(a.tokenAPI)
+		if a.tokenAPI.flushReader() {
+			a.parseAPI.initLoopCheck()
+		}
 	}
 	return a.ok
 }
@ -170,14 +177,6 @@ func (a *ParseAPIOnAction) Stay() bool {
 	return a.ok
 }

-func (a *ParseAPIOnAction) flushReader() {
-	if a.tokenAPI.result.offset > 0 {
-		a.tokenAPI.root.reader.Flush(a.tokenAPI.root.result.offset)
-		a.tokenAPI.root.result.offset = 0
-		a.parseAPI.initLoopCheck()
-	}
-}
-
 // Result returns a TokenHandlerResult struct, containing results as produced by the
 // last ParseAPI.On().Accept() call.
 func (p *ParseAPI) Result() *TokenHandlerResult {
--- a/parser_test.go
+++ b/parser_test.go
@ -298,7 +298,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
 //    p.On(c.Max(5, a.AnyRune))
 //
 // The problem here is that Max(5, ...) will also match when there is
-// no more input, since Max(5, ---) is actually MinMax(0, 5, ...).
+// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
 // Therefore the loop will never stop. Solving the loop was simple:
 //
 //    p.On(c.MinMax(1, 5, a.AnyRune))
--- a/tokenapi.go
+++ b/tokenapi.go
@ -8,15 +8,15 @@ import (
 )

 // TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
-// the reader and to report back tokenizing results. For easy lookahead support,
-// a forking strategy is provided.
+// a parsekit.reader.Reader and to report back tokenizing results. For easy
+// lookahead support, a forking strategy is provided.
 //
 // BASIC OPERATION:
 //
 // To retrieve the next rune from the TokenAPI, call the NextRune() method.
 //
 // When the rune is to be accepted as input, call the method Accept(). The rune
-// is then added to the results of the TokenAPI and the read cursor is moved
+// is then added to the result runes of the TokenAPI and the read cursor is moved
 // forward.
 //
 // By invoking NextRune() + Accept() multiple times, the result can be extended
@ -63,7 +63,6 @@ import (
 // no bookkeeping has to be implemented when implementing a parser.
 type TokenAPI struct {
 	reader *reader.Reader
-	root   *TokenAPI           // the root TokenAPI
 	parent *TokenAPI           // parent TokenAPI in case this TokenAPI is a fork child
 	child  *TokenAPI           // child TokenAPI in case this TokenAPI is a fork parent
 	result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
@ -75,7 +74,6 @@ func NewTokenAPI(r io.Reader) *TokenAPI {
 		reader: reader.New(r),
 		result: newTokenHandlerResult(),
 	}
-	input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
 	return input
 }

@ -141,7 +139,6 @@ func (i *TokenAPI) Fork() *TokenAPI {
 	// Create the new fork.
 	child := &TokenAPI{
 		reader: i.reader,
-		root:   i.root,
 		parent: i,
 	}
 	child.result = newTokenHandlerResult()
@ -200,6 +197,15 @@ func (i *TokenAPI) detachChildsRecurse() {
 	i.parent = nil
 }

+func (i *TokenAPI) flushReader() bool {
+	if i.result.offset > 0 {
+		i.reader.Flush(i.result.offset)
+		i.result.offset = 0
+		return true
+	}
+	return false
+}
+
 // Result returns the TokenHandlerResult data for the TokenAPI. The returned struct
 // can be used to retrieve and to modify result data.
 func (i *TokenAPI) Result() *TokenHandlerResult {
--- a/tokenhandlers_builtin.go
+++ b/tokenhandlers_builtin.go
@ -62,160 +62,168 @@ var C = struct {
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var A = struct {
-	Rune                  func(rune) TokenHandler
-	Runes                 func(...rune) TokenHandler
-	RuneRange             func(rune, rune) TokenHandler
-	Str                   func(string) TokenHandler
-	StrNoCase             func(string) TokenHandler
-	EndOfFile             TokenHandler
-	AnyRune               TokenHandler
-	ValidRune             TokenHandler
-	Space                 TokenHandler
-	Tab                   TokenHandler
-	CR                    TokenHandler
-	LF                    TokenHandler
-	CRLF                  TokenHandler
-	Excl                  TokenHandler
-	DoubleQuote           TokenHandler
-	Hash                  TokenHandler
-	Dollar                TokenHandler
-	Percent               TokenHandler
-	Amp                   TokenHandler
-	SingleQuote           TokenHandler
-	RoundOpen             TokenHandler
-	LeftParen             TokenHandler
-	RoundClose            TokenHandler
-	RightParen            TokenHandler
-	Asterisk              TokenHandler
-	Multiply              TokenHandler
-	Plus                  TokenHandler
-	Add                   TokenHandler
-	Comma                 TokenHandler
-	Minus                 TokenHandler
-	Subtract              TokenHandler
-	Dot                   TokenHandler
-	Slash                 TokenHandler
-	Divide                TokenHandler
-	Colon                 TokenHandler
-	Semicolon             TokenHandler
-	AngleOpen             TokenHandler
-	LessThan              TokenHandler
-	Equal                 TokenHandler
-	AngleClose            TokenHandler
-	GreaterThan           TokenHandler
-	Question              TokenHandler
-	At                    TokenHandler
-	SquareOpen            TokenHandler
-	Backslash             TokenHandler
-	SquareClose           TokenHandler
-	Caret                 TokenHandler
-	Underscore            TokenHandler
-	Backquote             TokenHandler
-	CurlyOpen             TokenHandler
-	Pipe                  TokenHandler
-	CurlyClose            TokenHandler
-	Tilde                 TokenHandler
-	Newline               TokenHandler
-	Whitespace            TokenHandler
-	WhitespaceAndNewlines TokenHandler
-	EndOfLine             TokenHandler
-	Digit                 TokenHandler
-	DigitNotZero          TokenHandler
-	Digits                TokenHandler
-	Float                 TokenHandler
-	Boolean               TokenHandler
-	Integer               TokenHandler
-	Signed                func(TokenHandler) TokenHandler
-	IntegerBetween        func(min int64, max int64) TokenHandler
-	ASCII                 TokenHandler
-	ASCIILower            TokenHandler
-	ASCIIUpper            TokenHandler
-	HexDigit              TokenHandler
-	Octet                 TokenHandler
-	IPv4                  TokenHandler
-	IPv4CIDRMask          TokenHandler
-	IPv4Netmask           TokenHandler
-	IPv4Net               TokenHandler
-	IPv6                  TokenHandler
-	IPv6CIDRMask          TokenHandler
-	IPv6Net               TokenHandler
+	Rune           func(rune) TokenHandler
+	Runes          func(...rune) TokenHandler
+	RuneRange      func(rune, rune) TokenHandler
+	Str            func(string) TokenHandler
+	StrNoCase      func(string) TokenHandler
+	EndOfFile      TokenHandler
+	AnyRune        TokenHandler
+	ValidRune      TokenHandler
+	Space          TokenHandler
+	Tab            TokenHandler
+	CR             TokenHandler
+	LF             TokenHandler
+	CRLF           TokenHandler
+	Excl           TokenHandler
+	DoubleQuote    TokenHandler
+	Hash           TokenHandler
+	Dollar         TokenHandler
+	Percent        TokenHandler
+	Amp            TokenHandler
+	SingleQuote    TokenHandler
+	RoundOpen      TokenHandler
+	LeftParen      TokenHandler
+	RoundClose     TokenHandler
+	RightParen     TokenHandler
+	Asterisk       TokenHandler
+	Multiply       TokenHandler
+	Plus           TokenHandler
+	Add            TokenHandler
+	Comma          TokenHandler
+	Minus          TokenHandler
+	Subtract       TokenHandler
+	Dot            TokenHandler
+	Slash          TokenHandler
+	Divide         TokenHandler
+	Colon          TokenHandler
+	Semicolon      TokenHandler
+	AngleOpen      TokenHandler
+	LessThan       TokenHandler
+	Equal          TokenHandler
+	AngleClose     TokenHandler
+	GreaterThan    TokenHandler
+	Question       TokenHandler
+	At             TokenHandler
+	SquareOpen     TokenHandler
+	Backslash      TokenHandler
+	SquareClose    TokenHandler
+	Caret          TokenHandler
+	Underscore     TokenHandler
+	Backquote      TokenHandler
+	CurlyOpen      TokenHandler
+	Pipe           TokenHandler
+	CurlyClose     TokenHandler
+	Tilde          TokenHandler
+	Newline        TokenHandler
+	Blank          TokenHandler
+	Blanks         TokenHandler
+	Whitespace     TokenHandler
+	EndOfLine      TokenHandler
+	Digit          TokenHandler
+	DigitNotZero   TokenHandler
+	Digits         TokenHandler
+	Float          TokenHandler
+	Boolean        TokenHandler
+	Integer        TokenHandler
+	Signed         func(TokenHandler) TokenHandler
+	IntegerBetween func(min int64, max int64) TokenHandler
+	ASCII          TokenHandler
+	ASCIILower     TokenHandler
+	ASCIIUpper     TokenHandler
+	Letter         TokenHandler
+	Lower          TokenHandler
+	Upper          TokenHandler
+	HexDigit       TokenHandler
+	Octet          TokenHandler
+	IPv4           TokenHandler
+	IPv4CIDRMask   TokenHandler
+	IPv4Netmask    TokenHandler
+	IPv4Net        TokenHandler
+	IPv6           TokenHandler
+	IPv6CIDRMask   TokenHandler
+	IPv6Net        TokenHandler
 }{
-	Rune:                  MatchRune,
-	Runes:                 MatchRunes,
-	RuneRange:             MatchRuneRange,
-	Str:                   MatchStr,
-	StrNoCase:             MatchStrNoCase,
-	EndOfFile:             MatchEndOfFile(),
-	AnyRune:               MatchAnyRune(),
-	ValidRune:             MatchValidRune(),
-	Space:                 MatchRune(' '),
-	Tab:                   MatchRune('\t'),
-	CR:                    MatchRune('\r'),
-	LF:                    MatchRune('\n'),
-	CRLF:                  MatchStr("\r\n"),
-	Excl:                  MatchRune('!'),
-	DoubleQuote:           MatchRune('"'),
-	Hash:                  MatchRune('#'),
-	Dollar:                MatchRune('$'),
-	Percent:               MatchRune('%'),
-	Amp:                   MatchRune('&'),
-	SingleQuote:           MatchRune('\''),
-	RoundOpen:             MatchRune('('),
-	LeftParen:             MatchRune('('),
-	RoundClose:            MatchRune(')'),
-	RightParen:            MatchRune(')'),
-	Asterisk:              MatchRune('*'),
-	Multiply:              MatchRune('*'),
-	Plus:                  MatchRune('+'),
-	Add:                   MatchRune('+'),
-	Comma:                 MatchRune(','),
-	Minus:                 MatchRune('-'),
-	Subtract:              MatchRune('-'),
-	Dot:                   MatchRune('.'),
-	Slash:                 MatchRune('/'),
-	Divide:                MatchRune('/'),
-	Colon:                 MatchRune(':'),
-	Semicolon:             MatchRune(';'),
-	AngleOpen:             MatchRune('<'),
-	LessThan:              MatchRune('<'),
-	Equal:                 MatchRune('='),
-	AngleClose:            MatchRune('>'),
-	GreaterThan:           MatchRune('>'),
-	Question:              MatchRune('?'),
-	At:                    MatchRune('@'),
-	SquareOpen:            MatchRune('['),
-	Backslash:             MatchRune('\\'),
-	SquareClose:           MatchRune(']'),
-	Caret:                 MatchRune('^'),
-	Underscore:            MatchRune('_'),
-	Backquote:             MatchRune('`'),
-	CurlyOpen:             MatchRune('{'),
-	Pipe:                  MatchRune('|'),
-	CurlyClose:            MatchRune('}'),
-	Tilde:                 MatchRune('~'),
-	Whitespace:            MatchWhitespace(),
-	WhitespaceAndNewlines: MatchWhitespaceAndNewlines(),
-	EndOfLine:             MatchEndOfLine(),
-	Digit:                 MatchDigit(),
-	DigitNotZero:          MatchDigitNotZero(),
-	Digits:                MatchDigits(),
-	Integer:               MatchInteger(),
-	Signed:                MatchSigned,
-	IntegerBetween:        MatchIntegerBetween,
-	Float:                 MatchFloat(),
-	Boolean:               MatchBoolean(),
-	ASCII:                 MatchASCII(),
-	ASCIILower:            MatchASCIILower(),
-	ASCIIUpper:            MatchASCIIUpper(),
-	HexDigit:              MatchHexDigit(),
-	Octet:                 MatchOctet(false),
-	IPv4:                  MatchIPv4(true),
-	IPv4CIDRMask:          MatchIPv4CIDRMask(true),
-	IPv4Netmask:           MatchIPv4Netmask(true),
-	IPv4Net:               MatchIPv4Net(true),
-	IPv6:                  MatchIPv6(true),
-	IPv6CIDRMask:          MatchIPv6CIDRMask(true),
-	IPv6Net:               MatchIPv6Net(true),
+	Rune:           MatchRune,
+	Runes:          MatchRunes,
+	RuneRange:      MatchRuneRange,
+	Str:            MatchStr,
+	StrNoCase:      MatchStrNoCase,
+	EndOfFile:      MatchEndOfFile(),
+	AnyRune:        MatchAnyRune(),
+	ValidRune:      MatchValidRune(),
+	Space:          MatchRune(' '),
+	Tab:            MatchRune('\t'),
+	CR:             MatchRune('\r'),
+	LF:             MatchRune('\n'),
+	CRLF:           MatchStr("\r\n"),
+	Excl:           MatchRune('!'),
+	DoubleQuote:    MatchRune('"'),
+	Hash:           MatchRune('#'),
+	Dollar:         MatchRune('$'),
+	Percent:        MatchRune('%'),
+	Amp:            MatchRune('&'),
+	SingleQuote:    MatchRune('\''),
+	RoundOpen:      MatchRune('('),
+	LeftParen:      MatchRune('('),
+	RoundClose:     MatchRune(')'),
+	RightParen:     MatchRune(')'),
+	Asterisk:       MatchRune('*'),
+	Multiply:       MatchRune('*'),
+	Plus:           MatchRune('+'),
+	Add:            MatchRune('+'),
+	Comma:          MatchRune(','),
+	Minus:          MatchRune('-'),
+	Subtract:       MatchRune('-'),
+	Dot:            MatchRune('.'),
+	Slash:          MatchRune('/'),
+	Divide:         MatchRune('/'),
+	Colon:          MatchRune(':'),
+	Semicolon:      MatchRune(';'),
+	AngleOpen:      MatchRune('<'),
+	LessThan:       MatchRune('<'),
+	Equal:          MatchRune('='),
+	AngleClose:     MatchRune('>'),
+	GreaterThan:    MatchRune('>'),
+	Question:       MatchRune('?'),
+	At:             MatchRune('@'),
+	SquareOpen:     MatchRune('['),
+	Backslash:      MatchRune('\\'),
+	SquareClose:    MatchRune(']'),
+	Caret:          MatchRune('^'),
+	Underscore:     MatchRune('_'),
+	Backquote:      MatchRune('`'),
+	CurlyOpen:      MatchRune('{'),
+	Pipe:           MatchRune('|'),
+	CurlyClose:     MatchRune('}'),
+	Tilde:          MatchRune('~'),
+	Blank:          MatchBlank(),
+	Blanks:         MatchBlanks(),
+	Whitespace:     MatchWhitespace(),
+	EndOfLine:      MatchEndOfLine(),
+	Digit:          MatchDigit(),
+	DigitNotZero:   MatchDigitNotZero(),
+	Digits:         MatchDigits(),
+	Integer:        MatchInteger(),
+	Signed:         MatchSigned,
+	IntegerBetween: MatchIntegerBetween,
+	Float:          MatchFloat(),
+	Boolean:        MatchBoolean(),
+	ASCII:          MatchASCII(),
+	ASCIILower:     MatchASCIILower(),
+	ASCIIUpper:     MatchASCIIUpper(),
+	Letter:         MatchUnicodeLetter(),
+	Lower:          MatchUnicodeLower(),
+	Upper:          MatchUnicodeUpper(),
+	HexDigit:       MatchHexDigit(),
+	Octet:          MatchOctet(false),
+	IPv4:           MatchIPv4(true),
+	IPv4CIDRMask:   MatchIPv4CIDRMask(true),
+	IPv4Netmask:    MatchIPv4Netmask(true),
+	IPv4Net:        MatchIPv4Net(true),
+	IPv6:           MatchIPv6(true),
+	IPv6CIDRMask:   MatchIPv6CIDRMask(true),
+	IPv6Net:        MatchIPv6Net(true),
 }

 // M provides convenient access to a range of modifiers (which in their nature are
@ -352,20 +360,45 @@ func MatchRuneRange(start rune, end rune) TokenHandler {
 	}
 }

-// MatchWhitespace creates a TokenHandler that matches the input against one
-// or more whitespace characters, meansing tabs and spaces.
+// MatchBlank creates a TokenHandler that matches one rune from the input
+// against blank characters, meaning tabs and spaces.
 //
-// When you need whitespace matching to also include newlines, then make use
-// of MatchWhitespaceAndNewlines().
-func MatchWhitespace() TokenHandler {
-	return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t')))
+// When you need whitespace matching, which also includes characters like
+// newlines, then take a look at MatchWhitespace().
+func MatchBlank() TokenHandler {
+	return MatchAny(MatchRune(' '), MatchRune('\t'))
 }

-// MatchWhitespaceAndNewlines creates a TokenHandler that matches the input
-// against one or more whitespace and/or newline characters, meaning tabs,
-// spaces and newlines ("\r\n" and "\n").
-func MatchWhitespaceAndNewlines() TokenHandler {
-	return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n')))
+// MatchBlanks creates a TokenHandler that matches the input against one
+// or more blank characters, meaning tabs and spaces.
+//
+// When you need whitespace matching, which also includes characters like
+// newlines, then make use of MatchSpace().
+func MatchBlanks() TokenHandler {
+	return MatchOneOrMore(MatchBlank())
+}
+
+// MatchWhitespace creates a TokenHandler that matches the input against one or more
+// whitespace characters, as defined by unicode.
+func MatchWhitespace() TokenHandler {
+	return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
+}
+
+// MatchRuneByCallback creates a TokenHandler that matches a single rune from the
+// input against the provided callback function. When the callback returns true,
+// it is considered a match.
+//
+// Note that the callback function matches the signature of the unicode.Is* functions,
+// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
+func MatchRuneByCallback(callback func(rune) bool) TokenHandler {
+	return func(t *TokenAPI) bool {
+		input, err := t.NextRune()
+		if err == nil && callback(input) {
+			t.Accept()
+			return true
+		}
+		return false
+	}
 }

 // MatchEndOfLine creates a TokenHandler that matches a newline ("\r\n" or "\n") or EOF.
@ -649,7 +682,7 @@ func MatchDigit() TokenHandler {
 // MatchDigits creates a TokenHandler that checks if one or more digits can be read
 // from the input.
 func MatchDigits() TokenHandler {
-	return MatchOneOrMore(MatchRuneRange('0', '9'))
+	return MatchOneOrMore(MatchDigit())
 }

 // MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
@ -707,6 +740,24 @@ func MatchASCIIUpper() TokenHandler {
 	return MatchRuneRange('A', 'Z')
 }

+// MatchUnicodeLetter creates a TokenHandler function that matches against any
+// unicode letter on the input (see unicode.IsLetter(rune)).
+func MatchUnicodeLetter() TokenHandler {
+	return MatchRuneByCallback(unicode.IsLetter)
+}
+
+// MatchUnicodeUpper creates a TokenHandler function that matches against any
+// upper case unicode letter on the input (see unicode.IsUpper(rune)).
+func MatchUnicodeUpper() TokenHandler {
+	return MatchRuneByCallback(unicode.IsUpper)
+}
+
+// MatchUnicodeLower creates a TokenHandler function that matches against any
+// lower case unicode letter on the input (see unicode.IsLower(rune)).
+func MatchUnicodeLower() TokenHandler {
+	return MatchRuneByCallback(unicode.IsLower)
+}
+
 // MatchHexDigit creates a TokenHandler function that check if a single hexadecimal
 // digit can be read from the input.
 func MatchHexDigit() TokenHandler {
@ -908,15 +959,15 @@ func MatchIPv6Net(normalize bool) TokenHandler {
 //
 // Note that if the TokenHandler does not apply, a mismatch will be reported back,
 // even though we would have dropped the output anyway. So if you would like
-// to drop optional whitespace, then use something like:
+// to drop optional blanks (spaces and tabs), then use something like:
 //
-//     M.Drop(C.Opt(A.Whitespace))
+//     M.Drop(C.Opt(A.Blank))
 //
 // instead of:
 //
-//     M.Drop(A.Whitespace)
+//     M.Drop(A.Blank)
 //
-// Since whitespace is defined as "1 or more spaces and/or tabs", the input
+// Since A.Blanks is defined as "1 or more spaces and/or tabs", the input
 // string "bork" would not match against the second form, but " bork" would.
 // In both cases, it would match the first form.
 func ModifyDrop(handler TokenHandler) TokenHandler {
@ -960,8 +1011,8 @@ func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bo
 }

 // ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
-// If it does, then its output is taken and all leading and trailing whitespace charcters,
-// as defined by Unicode (spaces, tabs, carriage returns and newlines) are removed from it.
+// If it does, then its output is taken and all leading and trailing whitespace characters,
+// as defined by Unicode are removed from it.
 func ModifyTrimSpace(handler TokenHandler) TokenHandler {
 	return ModifyByCallback(handler, strings.TrimSpace)
 }
--- a/tokenhandlers_builtin_test.go
+++ b/tokenhandlers_builtin_test.go
@ -157,9 +157,12 @@ func TestAtoms(t *testing.T) {
 		{"|", a.Pipe, true, "|"},
 		{"}", a.CurlyClose, true, "}"},
 		{"~", a.Tilde, true, "~"},
-		{" \t \t \r\n", a.Whitespace, true, " \t \t "},
-		{"\r", a.WhitespaceAndNewlines, false, ""},
-		{" \t\r\n \r", a.WhitespaceAndNewlines, true, " \t\r\n "},
+		{"\t \t \r\n", a.Blank, true, "\t"},
+		{" \t \t \r\n", a.Blanks, true, " \t \t "},
+		{"xxx", a.Whitespace, false, ""},
+		{" ", a.Whitespace, true, " "},
+		{"\t", a.Whitespace, true, "\t"},
+		{" \t\r\n \r\v\f ", a.Whitespace, true, " \t\r\n \r\v\f "},
 		{"", a.EndOfLine, true, ""},
 		{"\r\n", a.EndOfLine, true, "\r\n"},
 		{"\n", a.EndOfLine, true, "\n"},
@ -182,6 +185,13 @@ func TestAtoms(t *testing.T) {
 		{"Z", a.ASCIIUpper, true, "Z"},
 		{"a", a.ASCIIUpper, false, ""},
 		{"z", a.ASCIIUpper, false, ""},
+		{"1", a.Letter, false, ""},
+		{"a", a.Letter, true, "a"},
+		{"Ø", a.Letter, true, "Ø"},
+		{"Ë", a.Lower, false, ""},
+		{"ë", a.Lower, true, "ë"},
+		{"ä", a.Upper, false, "ä"},
+		{"Ä", a.Upper, true, "Ä"},
 		{"0", a.HexDigit, true, "0"},
 		{"9", a.HexDigit, true, "9"},
 		{"a", a.HexDigit, true, "a"},
@ -403,16 +413,16 @@ func TestCombination(t *testing.T) {
 		c.Opt(a.SquareOpen),
 		m.Trim(
 			c.Seq(
-				c.Opt(a.Whitespace),
+				c.Opt(a.Blanks),
 				c.Rep(3, a.AngleClose),
 				m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
 					return fmt.Sprintf("%d", len(s))
 				}),
-				m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
+				m.Replace(c.Separated(a.Comma, c.Opt(a.Blanks)), ", "),
 				m.ToUpper(c.Min(1, a.ASCIILower)),
 				m.Drop(a.Excl),
 				c.Rep(3, a.AngleOpen),
-				c.Opt(a.Whitespace),
+				c.Opt(a.Blanks),
 			),
 			" \t",
 		),