Added some more modifiers (ModifyReplace and ModifyByCallback).

This commit is contained in:
Maurice Makaay 2019-05-24 15:57:54 +00:00
parent c164f320cb
commit 6fe3c16a6d
5 changed files with 255 additions and 176 deletions

View File

@ -51,145 +51,6 @@ var C = struct {
Separated: MatchSeparated,
}
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
//
// In parsekit, an atom is defined as a ready to go Matcher function.
//
// When using A in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CR Matcher
LF Matcher
CRLF Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Newline Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.Str("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
// M provides convenient access to a range of modifiers that can be
// used when creating Matcher functions.
//
// In parsekit, a modifier is defined as a Matcher function that modifies the
// resulting output of another Matcher in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(Matcher) Matcher
Trim func(Matcher, string) Matcher
TrimLeft func(Matcher, string) Matcher
TrimRight func(Matcher, string) Matcher
ToLower func(Matcher) Matcher
ToUpper func(Matcher) Matcher
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
}
// MatchRune creates a Matcher function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) Matcher {
@ -417,6 +278,116 @@ func MatchSeparated(separated Matcher, separator Matcher) Matcher {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
//
// In parsekit, an atom is defined as a ready to go Matcher function.
//
// When using A in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CR Matcher
LF Matcher
CRLF Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Newline Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.Str("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
// MatchEndOfFile creates a Matcher that checks if the end of the input data
// has been reached. This Matcher will never produce output. It only reports
// a successful or a failing match through its boolean return value.
@ -442,6 +413,39 @@ func MatchAnyRune() Matcher {
}
}
// M provides convenient access to a range of modifiers that can be
// used when creating Matcher functions.
//
// In parsekit, a modifier is defined as a Matcher function that modifies the
// resulting output of another Matcher in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(Matcher) Matcher
Trim func(Matcher, string) Matcher
TrimLeft func(Matcher, string) Matcher
TrimRight func(Matcher, string) Matcher
ToLower func(Matcher) Matcher
ToUpper func(Matcher) Matcher
Replace func(Matcher, string) Matcher
ModifyByCallback func(Matcher, func(string) string) Matcher
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ModifyByCallback: ModifyByCallback,
}
// ModifyDrop creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is discarded completely.
//
@ -459,7 +463,7 @@ func MatchAnyRune() Matcher {
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(matcher Matcher) Matcher {
return modifyStrCallback(matcher, func(s string) string {
return ModifyByCallback(matcher, func(s string) string {
return ""
})
}
@ -495,24 +499,37 @@ func modifyTrim(matcher Matcher, cutset string, trimLeft bool, trimRight bool) M
}
return s
}
return modifyStrCallback(matcher, modfunc)
return ModifyByCallback(matcher, modfunc)
}
// ModifyToUpper creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.
func ModifyToUpper(matcher Matcher) Matcher {
return modifyStrCallback(matcher, strings.ToUpper)
return ModifyByCallback(matcher, strings.ToUpper)
}
// ModifyToLower creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into lower case.
func ModifyToLower(matcher Matcher) Matcher {
return modifyStrCallback(matcher, strings.ToLower)
return ModifyByCallback(matcher, strings.ToLower)
}
func modifyStrCallback(matcher Matcher, modfunc func(string) string) Matcher {
// ModifyReplace creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is replaced by the provided string.
func ModifyReplace(matcher Matcher, s string) Matcher {
return ModifyByCallback(matcher, func(string) string {
return s
})
}
// ModifyByCallback creates a Matcher that checks if the provided matcher applies.
// If it does, then its output is taken and it is fed to the provided modfunc.
// This is a simple function that takes a string on input and returns a possibly
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(matcher Matcher, modfunc func(string) string) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {

View File

@ -7,21 +7,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleMatchAnyRune() {
parser := parsekit.New(
func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
})
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func TestCombinators(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
{"xxx", c.Rune('x'), true, "x"},
@ -93,17 +78,6 @@ func TestCombinators(t *testing.T) {
})
}
func TestModifiers(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
})
}
func TestAtoms(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
{"", a.EndOfFile, true, ""},
@ -183,6 +157,51 @@ func TestAtoms(t *testing.T) {
})
}
func TestModifiers(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
{"abcdefghijk", m.ModifyByCallback(c.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"NoTaLlUpPeR", m.ToUpper(c.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(c.StrNoCase("NOTALLlower")), true, "notalllower"},
})
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
demonic := c.Seq(
c.Opt(a.SquareOpen),
m.Trim(
c.Seq(
c.Opt(a.Whitespace),
c.Rep(3, a.AngleClose),
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl),
c.Rep(3, a.AngleOpen),
c.Opt(a.Whitespace),
),
" \t",
),
c.Opt(a.SquareClose),
)
RunMatcherTests(t, []MatcherTest{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
})
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Seq(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
@ -206,3 +225,46 @@ func TestSequenceOfRunes(t *testing.T) {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}
func ExampleMatchAnyRune() {
handler := func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
}
parser := parsekit.New(handler)
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
// This will output '¡', since a.AnyRune matches exactly 1 rune.
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func ExampleModifyToUpper() {
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
// The numbers never start with a zero.
digitNotZero := c.RuneRange('1', '9')
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
// It is good form to write the letters in upper case.
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
letters := m.ToUpper(c.Seq(letter, letter))
// It is good form to use a single space between letters and numbers,
// but it is not mandatory.
space := m.Replace(c.Opt(a.Whitespace), " ")
// With all the building blocks, we can now build the postcode parser.
postcode := c.Seq(numbers, space, letters)
// Create a parser and let is parse some postcode inputs.
// This will print "1234 AB" for every input, because of the built-in normalization.
p := parsekit.New(postcode)
for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
r, _, _ := p.Parse("1234 AB").Next()
fmt.Printf("Input: %q, output: %q", input, r.Value)
}
}

View File

@ -57,7 +57,7 @@ func makeParserForStateHandler(handler StateHandler) *Parser {
func makeParserForMatcher(matcher Matcher) *Parser {
return New(StateHandler(func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().RouteRep().End() {
if p.On(matcher).Accept().RouteRepeat().End() {
p.EmitLiteral(MatchedItem)
}
}))

View File

@ -93,9 +93,9 @@ func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
return &routeFollowupAction{chainAction: chainAction{p, true}}
}
// RouteRep indicates that on the next parsing cycle, the current
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRep() *chainAction {
func (p *P) RouteRepeat() *chainAction {
p.RouteTo(p.state)
return &chainAction{nil, true}
}

View File

@ -134,11 +134,11 @@ type routeAction struct {
chainAction
}
// RouteRep indicates that on the next parsing cycle,
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *routeAction) RouteRep() *chainAction {
func (a *routeAction) RouteRepeat() *chainAction {
if a.ok {
return a.p.RouteRep()
return a.p.RouteRepeat()
}
return &chainAction{nil, false}
}