Further code cleaning for the interaction between ParseAPI and TokenAPI. Extra atoms added, also one based on a callback which can accept single runes based on thhat callback function.

This commit is contained in:
Maurice Makaay 2019-06-07 15:48:49 +00:00
parent 98d2db0374
commit 9a5bf8b9af
11 changed files with 286 additions and 220 deletions

View File

@ -3,8 +3,8 @@
//
// 10 + 20 - 8+4
//
// So positive numbers that can be either added or substracted, and whitespace
// is ignored.
// So positive numbers that can be either added or substracted, and blanks
// around numbers are ignored.
package examples
import (
@ -69,9 +69,9 @@ type simpleCalculator struct {
op int64 // represents operation for next term (+1 = add, -1 = subtract)
}
// A definition of an int64, which conveniently drops surrounding whitespace.
var dropWhitespace = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Whitespace))
var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitespace)
// A definition of an int64, which conveniently drops surrounding blanks.
var dropBlank = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Blank))
var bareInteger = parsekit.C.Seq(dropBlank, parsekit.A.Integer, dropBlank)
var int64Token = parsekit.T.Int64(nil, bareInteger)
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {

View File

@ -130,7 +130,7 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
func (c *calculator) factor(p *parsekit.ParseAPI) {
var A, T = parsekit.A, parsekit.T
p.On(A.Whitespace).Skip()
p.On(A.Blank).Skip()
switch {
case p.On(T.Float64(nil, A.Signed(A.Float))).Accept():
value := p.Result().Value(0).(float64)
@ -147,7 +147,7 @@ func (c *calculator) factor(p *parsekit.ParseAPI) {
p.UnexpectedInput("factor or (expression)")
return
}
p.On(A.Whitespace).Skip()
p.On(A.Blank).Skip()
}
// ---------------------------------------------------------------------------

View File

@ -65,7 +65,7 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
space := M.Replace(C.Opt(A.Whitespace), " ")
space := M.Replace(C.Opt(A.Blank), " ")
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows

View File

@ -90,7 +90,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
a := parsekit.A
switch {
case p.On(a.Whitespace).Skip():
case p.On(a.Blank).Skip():
p.Handle(h.comma)
case p.On(a.Comma).Skip():
p.Handle(h.startName)
@ -102,7 +102,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) {
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
c, a := parsekit.C, parsekit.A
switch {
case p.On(a.Whitespace).Skip():
case p.On(a.Blank).Skip():
p.Handle(h.startName)
case p.On(c.Not(a.Excl)).Stay():
p.Handle(h.name)

View File

@ -54,8 +54,8 @@ func createHelloTokenizer() *parsekit.Tokenizer {
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := a.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
separator := c.Any(comma, a.Blank)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)

View File

@ -84,7 +84,7 @@ func (h *helloparser2) start(p *parsekit.ParseAPI) {
p.Error("the greeting is not being friendly")
return
}
if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
if !p.On(c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))).Skip() {
p.Error("the greeting is not properly separated")
return
}

View File

@ -66,7 +66,7 @@ func (p *ParseAPI) checkForLoops() {
//
// So an example chain could look like this:
//
// p.On(parsekit.A.Whitespace).Skip()
// p.On(parsekit.A.Blank).Skip()
//
// The chain as a whole returns a boolean that indicates whether or not at match
// was found. When no match was found, false is returned and Skip() and Accept()
@ -103,31 +103,36 @@ func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
ok := tokenHandler(child)
return &ParseAPIOnAction{
parseAPI: p,
tokenAPI: child,
ok: ok,
parseAPI: p,
tokenAPI: p.tokenAPI,
forkedTokenAPI: child,
ok: ok,
}
}
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
// The On() method will return an initialized struct of this type.
type ParseAPIOnAction struct {
parseAPI *ParseAPI
tokenAPI *TokenAPI
ok bool
parseAPI *ParseAPI
tokenAPI *TokenAPI
forkedTokenAPI *TokenAPI
ok bool
}
// Accept tells the parser to move the read cursor past a match that was
// found, and to make the TokenHandlerResult from the TokenAPI available in the
// ParseAPI through the ParseAPI.Result() method.
// found by a TokenHandler, and to make the TokenHandlerResult from the
// TokenAPI available in the ParseAPI through the ParseAPI.Result() method.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Accept() bool {
if a.ok {
a.tokenAPI.Merge()
a.flushReader()
a.parseAPI.result = a.tokenAPI.root.result
a.forkedTokenAPI.Merge()
a.parseAPI.result = a.tokenAPI.Result()
a.tokenAPI.detachChilds()
if a.tokenAPI.flushReader() {
a.parseAPI.initLoopCheck()
}
}
return a.ok
}
@ -145,10 +150,12 @@ func (a *ParseAPIOnAction) Accept() bool {
func (a *ParseAPIOnAction) Skip() bool {
if a.ok {
a.parseAPI.result = nil
a.tokenAPI.clearResults()
a.tokenAPI.syncCursorTo(a.tokenAPI.root)
a.forkedTokenAPI.clearResults()
a.tokenAPI.detachChilds()
a.flushReader()
a.forkedTokenAPI.syncCursorTo(a.tokenAPI)
if a.tokenAPI.flushReader() {
a.parseAPI.initLoopCheck()
}
}
return a.ok
}
@ -170,14 +177,6 @@ func (a *ParseAPIOnAction) Stay() bool {
return a.ok
}
func (a *ParseAPIOnAction) flushReader() {
if a.tokenAPI.result.offset > 0 {
a.tokenAPI.root.reader.Flush(a.tokenAPI.root.result.offset)
a.tokenAPI.root.result.offset = 0
a.parseAPI.initLoopCheck()
}
}
// Result returns a TokenHandlerResult struct, containing results as produced by the
// last ParseAPI.On().Accept() call.
func (p *ParseAPI) Result() *TokenHandlerResult {

View File

@ -298,7 +298,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
// p.On(c.Max(5, a.AnyRune))
//
// The problem here is that Max(5, ...) will also match when there is
// no more input, since Max(5, ---) is actually MinMax(0, 5, ...).
// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
// Therefore the loop will never stop. Solving the loop was simple:
//
// p.On(c.MinMax(1, 5, a.AnyRune))

View File

@ -8,15 +8,15 @@ import (
)
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
// the reader and to report back tokenizing results. For easy lookahead support,
// a forking strategy is provided.
// a parsekit.reader.Reader and to report back tokenizing results. For easy
// lookahead support, a forking strategy is provided.
//
// BASIC OPERATION:
//
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
//
// When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the results of the TokenAPI and the read cursor is moved
// is then added to the result runes of the TokenAPI and the read cursor is moved
// forward.
//
// By invoking NextRune() + Accept() multiple times, the result can be extended
@ -63,7 +63,6 @@ import (
// no bookkeeping has to be implemented when implementing a parser.
type TokenAPI struct {
reader *reader.Reader
root *TokenAPI // the root TokenAPI
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
@ -75,7 +74,6 @@ func NewTokenAPI(r io.Reader) *TokenAPI {
reader: reader.New(r),
result: newTokenHandlerResult(),
}
input.root = input // TODO remove this one from root input, input.root == nil is also a good check for "is root?".
return input
}
@ -141,7 +139,6 @@ func (i *TokenAPI) Fork() *TokenAPI {
// Create the new fork.
child := &TokenAPI{
reader: i.reader,
root: i.root,
parent: i,
}
child.result = newTokenHandlerResult()
@ -200,6 +197,15 @@ func (i *TokenAPI) detachChildsRecurse() {
i.parent = nil
}
func (i *TokenAPI) flushReader() bool {
if i.result.offset > 0 {
i.reader.Flush(i.result.offset)
i.result.offset = 0
return true
}
return false
}
// Result returns the TokenHandlerResult data for the TokenAPI. The returned struct
// can be used to retrieve and to modify result data.
func (i *TokenAPI) Result() *TokenHandlerResult {

View File

@ -62,160 +62,168 @@ var C = struct {
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
Rune func(rune) TokenHandler
Runes func(...rune) TokenHandler
RuneRange func(rune, rune) TokenHandler
Str func(string) TokenHandler
StrNoCase func(string) TokenHandler
EndOfFile TokenHandler
AnyRune TokenHandler
ValidRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
LF TokenHandler
CRLF TokenHandler
Excl TokenHandler
DoubleQuote TokenHandler
Hash TokenHandler
Dollar TokenHandler
Percent TokenHandler
Amp TokenHandler
SingleQuote TokenHandler
RoundOpen TokenHandler
LeftParen TokenHandler
RoundClose TokenHandler
RightParen TokenHandler
Asterisk TokenHandler
Multiply TokenHandler
Plus TokenHandler
Add TokenHandler
Comma TokenHandler
Minus TokenHandler
Subtract TokenHandler
Dot TokenHandler
Slash TokenHandler
Divide TokenHandler
Colon TokenHandler
Semicolon TokenHandler
AngleOpen TokenHandler
LessThan TokenHandler
Equal TokenHandler
AngleClose TokenHandler
GreaterThan TokenHandler
Question TokenHandler
At TokenHandler
SquareOpen TokenHandler
Backslash TokenHandler
SquareClose TokenHandler
Caret TokenHandler
Underscore TokenHandler
Backquote TokenHandler
CurlyOpen TokenHandler
Pipe TokenHandler
CurlyClose TokenHandler
Tilde TokenHandler
Newline TokenHandler
Whitespace TokenHandler
WhitespaceAndNewlines TokenHandler
EndOfLine TokenHandler
Digit TokenHandler
DigitNotZero TokenHandler
Digits TokenHandler
Float TokenHandler
Boolean TokenHandler
Integer TokenHandler
Signed func(TokenHandler) TokenHandler
IntegerBetween func(min int64, max int64) TokenHandler
ASCII TokenHandler
ASCIILower TokenHandler
ASCIIUpper TokenHandler
HexDigit TokenHandler
Octet TokenHandler
IPv4 TokenHandler
IPv4CIDRMask TokenHandler
IPv4Netmask TokenHandler
IPv4Net TokenHandler
IPv6 TokenHandler
IPv6CIDRMask TokenHandler
IPv6Net TokenHandler
Rune func(rune) TokenHandler
Runes func(...rune) TokenHandler
RuneRange func(rune, rune) TokenHandler
Str func(string) TokenHandler
StrNoCase func(string) TokenHandler
EndOfFile TokenHandler
AnyRune TokenHandler
ValidRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
LF TokenHandler
CRLF TokenHandler
Excl TokenHandler
DoubleQuote TokenHandler
Hash TokenHandler
Dollar TokenHandler
Percent TokenHandler
Amp TokenHandler
SingleQuote TokenHandler
RoundOpen TokenHandler
LeftParen TokenHandler
RoundClose TokenHandler
RightParen TokenHandler
Asterisk TokenHandler
Multiply TokenHandler
Plus TokenHandler
Add TokenHandler
Comma TokenHandler
Minus TokenHandler
Subtract TokenHandler
Dot TokenHandler
Slash TokenHandler
Divide TokenHandler
Colon TokenHandler
Semicolon TokenHandler
AngleOpen TokenHandler
LessThan TokenHandler
Equal TokenHandler
AngleClose TokenHandler
GreaterThan TokenHandler
Question TokenHandler
At TokenHandler
SquareOpen TokenHandler
Backslash TokenHandler
SquareClose TokenHandler
Caret TokenHandler
Underscore TokenHandler
Backquote TokenHandler
CurlyOpen TokenHandler
Pipe TokenHandler
CurlyClose TokenHandler
Tilde TokenHandler
Newline TokenHandler
Blank TokenHandler
Blanks TokenHandler
Whitespace TokenHandler
EndOfLine TokenHandler
Digit TokenHandler
DigitNotZero TokenHandler
Digits TokenHandler
Float TokenHandler
Boolean TokenHandler
Integer TokenHandler
Signed func(TokenHandler) TokenHandler
IntegerBetween func(min int64, max int64) TokenHandler
ASCII TokenHandler
ASCIILower TokenHandler
ASCIIUpper TokenHandler
Letter TokenHandler
Lower TokenHandler
Upper TokenHandler
HexDigit TokenHandler
Octet TokenHandler
IPv4 TokenHandler
IPv4CIDRMask TokenHandler
IPv4Netmask TokenHandler
IPv4Net TokenHandler
IPv6 TokenHandler
IPv6CIDRMask TokenHandler
IPv6Net TokenHandler
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
Space: MatchRune(' '),
Tab: MatchRune('\t'),
CR: MatchRune('\r'),
LF: MatchRune('\n'),
CRLF: MatchStr("\r\n"),
Excl: MatchRune('!'),
DoubleQuote: MatchRune('"'),
Hash: MatchRune('#'),
Dollar: MatchRune('$'),
Percent: MatchRune('%'),
Amp: MatchRune('&'),
SingleQuote: MatchRune('\''),
RoundOpen: MatchRune('('),
LeftParen: MatchRune('('),
RoundClose: MatchRune(')'),
RightParen: MatchRune(')'),
Asterisk: MatchRune('*'),
Multiply: MatchRune('*'),
Plus: MatchRune('+'),
Add: MatchRune('+'),
Comma: MatchRune(','),
Minus: MatchRune('-'),
Subtract: MatchRune('-'),
Dot: MatchRune('.'),
Slash: MatchRune('/'),
Divide: MatchRune('/'),
Colon: MatchRune(':'),
Semicolon: MatchRune(';'),
AngleOpen: MatchRune('<'),
LessThan: MatchRune('<'),
Equal: MatchRune('='),
AngleClose: MatchRune('>'),
GreaterThan: MatchRune('>'),
Question: MatchRune('?'),
At: MatchRune('@'),
SquareOpen: MatchRune('['),
Backslash: MatchRune('\\'),
SquareClose: MatchRune(']'),
Caret: MatchRune('^'),
Underscore: MatchRune('_'),
Backquote: MatchRune('`'),
CurlyOpen: MatchRune('{'),
Pipe: MatchRune('|'),
CurlyClose: MatchRune('}'),
Tilde: MatchRune('~'),
Whitespace: MatchWhitespace(),
WhitespaceAndNewlines: MatchWhitespaceAndNewlines(),
EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
Integer: MatchInteger(),
Signed: MatchSigned,
IntegerBetween: MatchIntegerBetween,
Float: MatchFloat(),
Boolean: MatchBoolean(),
ASCII: MatchASCII(),
ASCIILower: MatchASCIILower(),
ASCIIUpper: MatchASCIIUpper(),
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(true),
IPv4Netmask: MatchIPv4Netmask(true),
IPv4Net: MatchIPv4Net(true),
IPv6: MatchIPv6(true),
IPv6CIDRMask: MatchIPv6CIDRMask(true),
IPv6Net: MatchIPv6Net(true),
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
Space: MatchRune(' '),
Tab: MatchRune('\t'),
CR: MatchRune('\r'),
LF: MatchRune('\n'),
CRLF: MatchStr("\r\n"),
Excl: MatchRune('!'),
DoubleQuote: MatchRune('"'),
Hash: MatchRune('#'),
Dollar: MatchRune('$'),
Percent: MatchRune('%'),
Amp: MatchRune('&'),
SingleQuote: MatchRune('\''),
RoundOpen: MatchRune('('),
LeftParen: MatchRune('('),
RoundClose: MatchRune(')'),
RightParen: MatchRune(')'),
Asterisk: MatchRune('*'),
Multiply: MatchRune('*'),
Plus: MatchRune('+'),
Add: MatchRune('+'),
Comma: MatchRune(','),
Minus: MatchRune('-'),
Subtract: MatchRune('-'),
Dot: MatchRune('.'),
Slash: MatchRune('/'),
Divide: MatchRune('/'),
Colon: MatchRune(':'),
Semicolon: MatchRune(';'),
AngleOpen: MatchRune('<'),
LessThan: MatchRune('<'),
Equal: MatchRune('='),
AngleClose: MatchRune('>'),
GreaterThan: MatchRune('>'),
Question: MatchRune('?'),
At: MatchRune('@'),
SquareOpen: MatchRune('['),
Backslash: MatchRune('\\'),
SquareClose: MatchRune(']'),
Caret: MatchRune('^'),
Underscore: MatchRune('_'),
Backquote: MatchRune('`'),
CurlyOpen: MatchRune('{'),
Pipe: MatchRune('|'),
CurlyClose: MatchRune('}'),
Tilde: MatchRune('~'),
Blank: MatchBlank(),
Blanks: MatchBlanks(),
Whitespace: MatchWhitespace(),
EndOfLine: MatchEndOfLine(),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
Integer: MatchInteger(),
Signed: MatchSigned,
IntegerBetween: MatchIntegerBetween,
Float: MatchFloat(),
Boolean: MatchBoolean(),
ASCII: MatchASCII(),
ASCIILower: MatchASCIILower(),
ASCIIUpper: MatchASCIIUpper(),
Letter: MatchUnicodeLetter(),
Lower: MatchUnicodeLower(),
Upper: MatchUnicodeUpper(),
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(true),
IPv4Netmask: MatchIPv4Netmask(true),
IPv4Net: MatchIPv4Net(true),
IPv6: MatchIPv6(true),
IPv6CIDRMask: MatchIPv6CIDRMask(true),
IPv6Net: MatchIPv6Net(true),
}
// M provides convenient access to a range of modifiers (which in their nature are
@ -352,20 +360,45 @@ func MatchRuneRange(start rune, end rune) TokenHandler {
}
}
// MatchWhitespace creates a TokenHandler that matches the input against one
// or more whitespace characters, meansing tabs and spaces.
// MatchBlank creates a TokenHandler that matches one rune from the input
// against blank characters, meaning tabs and spaces.
//
// When you need whitespace matching to also include newlines, then make use
// of MatchWhitespaceAndNewlines().
func MatchWhitespace() TokenHandler {
return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t')))
// When you need whitespace matching, which also includes characters like
// newlines, then take a look at MatchWhitespace().
func MatchBlank() TokenHandler {
return MatchAny(MatchRune(' '), MatchRune('\t'))
}
// MatchWhitespaceAndNewlines creates a TokenHandler that matches the input
// against one or more whitespace and/or newline characters, meaning tabs,
// spaces and newlines ("\r\n" and "\n").
func MatchWhitespaceAndNewlines() TokenHandler {
return MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n')))
// MatchBlanks creates a TokenHandler that matches the input against one
// or more blank characters, meaning tabs and spaces.
//
// When you need whitespace matching, which also includes characters like
// newlines, then make use of MatchSpace().
func MatchBlanks() TokenHandler {
return MatchOneOrMore(MatchBlank())
}
// MatchWhitespace creates a TokenHandler that matches the input against one or more
// whitespace characters, as defined by unicode.
func MatchWhitespace() TokenHandler {
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
}
// MatchRuneByCallback creates a TokenHandler that matches a single rune from the
// input against the provided callback function. When the callback returns true,
// it is considered a match.
//
// Note that the callback function matches the signature of the unicode.Is* functions,
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchRuneByCallback(callback func(rune) bool) TokenHandler {
return func(t *TokenAPI) bool {
input, err := t.NextRune()
if err == nil && callback(input) {
t.Accept()
return true
}
return false
}
}
// MatchEndOfLine creates a TokenHandler that matches a newline ("\r\n" or "\n") or EOF.
@ -649,7 +682,7 @@ func MatchDigit() TokenHandler {
// MatchDigits creates a TokenHandler that checks if one or more digits can be read
// from the input.
func MatchDigits() TokenHandler {
return MatchOneOrMore(MatchRuneRange('0', '9'))
return MatchOneOrMore(MatchDigit())
}
// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
@ -707,6 +740,24 @@ func MatchASCIIUpper() TokenHandler {
return MatchRuneRange('A', 'Z')
}
// MatchUnicodeLetter creates a TokenHandler function that matches against any
// unicode letter on the input (see unicode.IsLetter(rune)).
func MatchUnicodeLetter() TokenHandler {
return MatchRuneByCallback(unicode.IsLetter)
}
// MatchUnicodeUpper creates a TokenHandler function that matches against any
// upper case unicode letter on the input (see unicode.IsUpper(rune)).
func MatchUnicodeUpper() TokenHandler {
return MatchRuneByCallback(unicode.IsUpper)
}
// MatchUnicodeLower creates a TokenHandler function that matches against any
// lower case unicode letter on the input (see unicode.IsLower(rune)).
func MatchUnicodeLower() TokenHandler {
return MatchRuneByCallback(unicode.IsLower)
}
// MatchHexDigit creates a TokenHandler function that check if a single hexadecimal
// digit can be read from the input.
func MatchHexDigit() TokenHandler {
@ -908,15 +959,15 @@ func MatchIPv6Net(normalize bool) TokenHandler {
//
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
// even though we would have dropped the output anyway. So if you would like
// to drop optional whitespace, then use something like:
// to drop optional blanks (spaces and tabs), then use something like:
//
// M.Drop(C.Opt(A.Whitespace))
// M.Drop(C.Opt(A.Blank))
//
// instead of:
//
// M.Drop(A.Whitespace)
// M.Drop(A.Blank)
//
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
// Since A.Blanks is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler TokenHandler) TokenHandler {
@ -960,8 +1011,8 @@ func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bo
}
// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and all leading and trailing whitespace charcters,
// as defined by Unicode (spaces, tabs, carriage returns and newlines) are removed from it.
// If it does, then its output is taken and all leading and trailing whitespace characters,
// as defined by Unicode are removed from it.
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.TrimSpace)
}

View File

@ -157,9 +157,12 @@ func TestAtoms(t *testing.T) {
{"|", a.Pipe, true, "|"},
{"}", a.CurlyClose, true, "}"},
{"~", a.Tilde, true, "~"},
{" \t \t \r\n", a.Whitespace, true, " \t \t "},
{"\r", a.WhitespaceAndNewlines, false, ""},
{" \t\r\n \r", a.WhitespaceAndNewlines, true, " \t\r\n "},
{"\t \t \r\n", a.Blank, true, "\t"},
{" \t \t \r\n", a.Blanks, true, " \t \t "},
{"xxx", a.Whitespace, false, ""},
{" ", a.Whitespace, true, " "},
{"\t", a.Whitespace, true, "\t"},
{" \t\r\n \r\v\f ", a.Whitespace, true, " \t\r\n \r\v\f "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
@ -182,6 +185,13 @@ func TestAtoms(t *testing.T) {
{"Z", a.ASCIIUpper, true, "Z"},
{"a", a.ASCIIUpper, false, ""},
{"z", a.ASCIIUpper, false, ""},
{"1", a.Letter, false, ""},
{"a", a.Letter, true, "a"},
{"Ø", a.Letter, true, "Ø"},
{"Ë", a.Lower, false, ""},
{"ë", a.Lower, true, "ë"},
{"ä", a.Upper, false, "ä"},
{"Ä", a.Upper, true, "Ä"},
{"0", a.HexDigit, true, "0"},
{"9", a.HexDigit, true, "9"},
{"a", a.HexDigit, true, "a"},
@ -403,16 +413,16 @@ func TestCombination(t *testing.T) {
c.Opt(a.SquareOpen),
m.Trim(
c.Seq(
c.Opt(a.Whitespace),
c.Opt(a.Blanks),
c.Rep(3, a.AngleClose),
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
m.Replace(c.Separated(a.Comma, c.Opt(a.Blanks)), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl),
c.Rep(3, a.AngleOpen),
c.Opt(a.Whitespace),
c.Opt(a.Blanks),
),
" \t",
),