Added some more modifiers (ModifyReplace and ModifyByCallback).

2019-05-24 15:57:54 +00:00 · 2019-05-24 15:57:54 +00:00 · 6fe3c16a6d
parent c164f320cb
commit 6fe3c16a6d
5 changed files with 255 additions and 176 deletions
--- a/matcher_builtin.go
+++ b/matcher_builtin.go
@ -51,145 +51,6 @@ var C = struct {
 	Separated:  MatchSeparated,
 }
 // A provides convenient access to a range of atoms that can be used to
 // build combinators or parsing rules.
 //
 // In parsekit, an atom is defined as a ready to go Matcher function.
 //
 // When using A in your own parser, then it is advised to create
 // a variable in your own package to reference it:
 //
 //     var a = parsekit.A
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var A = struct {
 	EndOfFile             Matcher
 	AnyRune               Matcher
 	Space                 Matcher
 	Tab                   Matcher
 	CR                    Matcher
 	LF                    Matcher
 	CRLF                  Matcher
 	Excl                  Matcher
 	DoubleQuote           Matcher
 	Hash                  Matcher
 	Dollar                Matcher
 	Percent               Matcher
 	Amp                   Matcher
 	SingleQuote           Matcher
 	RoundOpen             Matcher
 	RoundClose            Matcher
 	Asterisk              Matcher
 	Plus                  Matcher
 	Comma                 Matcher
 	Minus                 Matcher
 	Dot                   Matcher
 	Slash                 Matcher
 	Colon                 Matcher
 	Semicolon             Matcher
 	AngleOpen             Matcher
 	Equal                 Matcher
 	AngleClose            Matcher
 	Question              Matcher
 	At                    Matcher
 	SquareOpen            Matcher
 	Backslash             Matcher
 	SquareClose           Matcher
 	Caret                 Matcher
 	Underscore            Matcher
 	Backquote             Matcher
 	CurlyOpen             Matcher
 	Pipe                  Matcher
 	CurlyClose            Matcher
 	Tilde                 Matcher
 	Newline               Matcher
 	Whitespace            Matcher
 	WhitespaceAndNewlines Matcher
 	EndOfLine             Matcher
 	Digit                 Matcher
 	ASCII                 Matcher
 	ASCIILower            Matcher
 	ASCIIUpper            Matcher
 	HexDigit              Matcher
 }{
 	EndOfFile:             MatchEndOfFile(),
 	AnyRune:               MatchAnyRune(),
 	Space:                 C.Rune(' '),
 	Tab:                   C.Rune('\t'),
 	CR:                    C.Rune('\r'),
 	LF:                    C.Rune('\n'),
 	CRLF:                  C.Str("\r\n"),
 	Excl:                  C.Rune('!'),
 	DoubleQuote:           C.Rune('"'),
 	Hash:                  C.Rune('#'),
 	Dollar:                C.Rune('$'),
 	Percent:               C.Rune('%'),
 	Amp:                   C.Rune('&'),
 	SingleQuote:           C.Rune('\''),
 	RoundOpen:             C.Rune('('),
 	RoundClose:            C.Rune(')'),
 	Asterisk:              C.Rune('*'),
 	Plus:                  C.Rune('+'),
 	Comma:                 C.Rune(','),
 	Minus:                 C.Rune('-'),
 	Dot:                   C.Rune('.'),
 	Slash:                 C.Rune('/'),
 	Colon:                 C.Rune(':'),
 	Semicolon:             C.Rune(';'),
 	AngleOpen:             C.Rune('<'),
 	Equal:                 C.Rune('='),
 	AngleClose:            C.Rune('>'),
 	Question:              C.Rune('?'),
 	At:                    C.Rune('@'),
 	SquareOpen:            C.Rune('['),
 	Backslash:             C.Rune('\\'),
 	SquareClose:           C.Rune(']'),
 	Caret:                 C.Rune('^'),
 	Underscore:            C.Rune('_'),
 	Backquote:             C.Rune('`'),
 	CurlyOpen:             C.Rune('{'),
 	Pipe:                  C.Rune('|'),
 	CurlyClose:            C.Rune('}'),
 	Tilde:                 C.Rune('~'),
 	Whitespace:            C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
 	WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
 	EndOfLine:             C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
 	Digit:                 C.RuneRange('0', '9'),
 	ASCII:                 C.RuneRange('\x00', '\x7F'),
 	ASCIILower:            C.RuneRange('a', 'z'),
 	ASCIIUpper:            C.RuneRange('A', 'Z'),
 	HexDigit:              C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
 }
 // M provides convenient access to a range of modifiers that can be
 // used when creating Matcher functions.
 //
 // In parsekit, a modifier is defined as a Matcher function that modifies the
 // resulting output of another Matcher in some way. It does not do any matching
 // against input of its own.
 //
 // When using M in your own parser, then it is advised to create
 // a variable in your own package to reference it:
 //
 //     var m = parsekit.M
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var M = struct {
 	Drop      func(Matcher) Matcher
 	Trim      func(Matcher, string) Matcher
 	TrimLeft  func(Matcher, string) Matcher
 	TrimRight func(Matcher, string) Matcher
 	ToLower   func(Matcher) Matcher
 	ToUpper   func(Matcher) Matcher
 }{
 	Drop:      ModifyDrop,
 	Trim:      ModifyTrim,
 	TrimLeft:  ModifyTrimLeft,
 	TrimRight: ModifyTrimRight,
 	ToLower:   ModifyToLower,
 	ToUpper:   ModifyToUpper,
 }
 // MatchRune creates a Matcher function that checks if the next rune from
 // the input matches the provided rune.
 func MatchRune(expected rune) Matcher {
@ -417,6 +278,116 @@ func MatchSeparated(separated Matcher, separator Matcher) Matcher {
 	return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
 }
 // A provides convenient access to a range of atoms that can be used to
 // build combinators or parsing rules.
 //
 // In parsekit, an atom is defined as a ready to go Matcher function.
 //
 // When using A in your own parser, then it is advised to create
 // a variable in your own package to reference it:
 //
 //     var a = parsekit.A
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var A = struct {
 	EndOfFile             Matcher
 	AnyRune               Matcher
 	Space                 Matcher
 	Tab                   Matcher
 	CR                    Matcher
 	LF                    Matcher
 	CRLF                  Matcher
 	Excl                  Matcher
 	DoubleQuote           Matcher
 	Hash                  Matcher
 	Dollar                Matcher
 	Percent               Matcher
 	Amp                   Matcher
 	SingleQuote           Matcher
 	RoundOpen             Matcher
 	RoundClose            Matcher
 	Asterisk              Matcher
 	Plus                  Matcher
 	Comma                 Matcher
 	Minus                 Matcher
 	Dot                   Matcher
 	Slash                 Matcher
 	Colon                 Matcher
 	Semicolon             Matcher
 	AngleOpen             Matcher
 	Equal                 Matcher
 	AngleClose            Matcher
 	Question              Matcher
 	At                    Matcher
 	SquareOpen            Matcher
 	Backslash             Matcher
 	SquareClose           Matcher
 	Caret                 Matcher
 	Underscore            Matcher
 	Backquote             Matcher
 	CurlyOpen             Matcher
 	Pipe                  Matcher
 	CurlyClose            Matcher
 	Tilde                 Matcher
 	Newline               Matcher
 	Whitespace            Matcher
 	WhitespaceAndNewlines Matcher
 	EndOfLine             Matcher
 	Digit                 Matcher
 	ASCII                 Matcher
 	ASCIILower            Matcher
 	ASCIIUpper            Matcher
 	HexDigit              Matcher
 }{
 	EndOfFile:             MatchEndOfFile(),
 	AnyRune:               MatchAnyRune(),
 	Space:                 C.Rune(' '),
 	Tab:                   C.Rune('\t'),
 	CR:                    C.Rune('\r'),
 	LF:                    C.Rune('\n'),
 	CRLF:                  C.Str("\r\n"),
 	Excl:                  C.Rune('!'),
 	DoubleQuote:           C.Rune('"'),
 	Hash:                  C.Rune('#'),
 	Dollar:                C.Rune('$'),
 	Percent:               C.Rune('%'),
 	Amp:                   C.Rune('&'),
 	SingleQuote:           C.Rune('\''),
 	RoundOpen:             C.Rune('('),
 	RoundClose:            C.Rune(')'),
 	Asterisk:              C.Rune('*'),
 	Plus:                  C.Rune('+'),
 	Comma:                 C.Rune(','),
 	Minus:                 C.Rune('-'),
 	Dot:                   C.Rune('.'),
 	Slash:                 C.Rune('/'),
 	Colon:                 C.Rune(':'),
 	Semicolon:             C.Rune(';'),
 	AngleOpen:             C.Rune('<'),
 	Equal:                 C.Rune('='),
 	AngleClose:            C.Rune('>'),
 	Question:              C.Rune('?'),
 	At:                    C.Rune('@'),
 	SquareOpen:            C.Rune('['),
 	Backslash:             C.Rune('\\'),
 	SquareClose:           C.Rune(']'),
 	Caret:                 C.Rune('^'),
 	Underscore:            C.Rune('_'),
 	Backquote:             C.Rune('`'),
 	CurlyOpen:             C.Rune('{'),
 	Pipe:                  C.Rune('|'),
 	CurlyClose:            C.Rune('}'),
 	Tilde:                 C.Rune('~'),
 	Whitespace:            C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
 	WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
 	EndOfLine:             C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
 	Digit:                 C.RuneRange('0', '9'),
 	ASCII:                 C.RuneRange('\x00', '\x7F'),
 	ASCIILower:            C.RuneRange('a', 'z'),
 	ASCIIUpper:            C.RuneRange('A', 'Z'),
 	HexDigit:              C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
 }
 // MatchEndOfFile creates a Matcher that checks if the end of the input data
 // has been reached. This Matcher will never produce output. It only reports
 // a successful or a failing match through its boolean return value.
@ -442,6 +413,39 @@ func MatchAnyRune() Matcher {
 	}
 }
 // M provides convenient access to a range of modifiers that can be
 // used when creating Matcher functions.
 //
 // In parsekit, a modifier is defined as a Matcher function that modifies the
 // resulting output of another Matcher in some way. It does not do any matching
 // against input of its own.
 //
 // When using M in your own parser, then it is advised to create
 // a variable in your own package to reference it:
 //
 //     var m = parsekit.M
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var M = struct {
 	Drop             func(Matcher) Matcher
 	Trim             func(Matcher, string) Matcher
 	TrimLeft         func(Matcher, string) Matcher
 	TrimRight        func(Matcher, string) Matcher
 	ToLower          func(Matcher) Matcher
 	ToUpper          func(Matcher) Matcher
 	Replace          func(Matcher, string) Matcher
 	ModifyByCallback func(Matcher, func(string) string) Matcher
 }{
 	Drop:             ModifyDrop,
 	Trim:             ModifyTrim,
 	TrimLeft:         ModifyTrimLeft,
 	TrimRight:        ModifyTrimRight,
 	ToLower:          ModifyToLower,
 	ToUpper:          ModifyToUpper,
 	Replace:          ModifyReplace,
 	ModifyByCallback: ModifyByCallback,
 }
 // ModifyDrop creates a Matcher that checks if the provided Matcher applies.
 // If it does, then its output is discarded completely.
 //
@ -459,7 +463,7 @@ func MatchAnyRune() Matcher {
 // string "bork" would not match against the second form, but " bork" would.
 // In both cases, it would match the first form.
 func ModifyDrop(matcher Matcher) Matcher {
-	return modifyStrCallback(matcher, func(s string) string {
+	return ModifyByCallback(matcher, func(s string) string {
 		return ""
 	})
 }
@ -495,24 +499,37 @@ func modifyTrim(matcher Matcher, cutset string, trimLeft bool, trimRight bool) M
 		}
 		return s
 	}
-	return modifyStrCallback(matcher, modfunc)
+	return ModifyByCallback(matcher, modfunc)
 }
 // ModifyToUpper creates a Matcher that checks if the provided Matcher applies.
 // If it does, then its output is taken and characters from the provided
 // cutset are converted into upper case.
 func ModifyToUpper(matcher Matcher) Matcher {
-	return modifyStrCallback(matcher, strings.ToUpper)
+	return ModifyByCallback(matcher, strings.ToUpper)
 }
 // ModifyToLower creates a Matcher that checks if the provided Matcher applies.
 // If it does, then its output is taken and characters from the provided
 // cutset are converted into lower case.
 func ModifyToLower(matcher Matcher) Matcher {
-	return modifyStrCallback(matcher, strings.ToLower)
+	return ModifyByCallback(matcher, strings.ToLower)
 }
-func modifyStrCallback(matcher Matcher, modfunc func(string) string) Matcher {
+// ModifyReplace creates a Matcher that checks if the provided Matcher applies.
 // If it does, then its output is replaced by the provided string.
 func ModifyReplace(matcher Matcher, s string) Matcher {
 	return ModifyByCallback(matcher, func(string) string {
 		return s
 	})
 }
 // ModifyByCallback creates a Matcher that checks if the provided matcher applies.
 // If it does, then its output is taken and it is fed to the provided modfunc.
 // This is a simple function that takes a string on input and returns a possibly
 // modified string on output. The return value of the modfunc will replace the
 // resulting output.
 func ModifyByCallback(matcher Matcher, modfunc func(string) string) Matcher {
 	return func(m *MatchDialog) bool {
 		child := m.Fork()
 		if matcher(child) {
--- a/matcher_builtin_test.go
+++ b/matcher_builtin_test.go
@ -7,21 +7,6 @@ import (
 	"git.makaay.nl/mauricem/go-parsekit"
 )
 func ExampleMatchAnyRune() {
 	parser := parsekit.New(
 		func(p *parsekit.P) {
 			p.Expects("Any valid rune")
 			if p.On(a.AnyRune).Accept().End() {
 				p.EmitLiteral(TestItem)
 			}
 		})
 	run := parser.Parse("¡Any / valid / character will dö!")
 	match, _, ok := run.Next()
 	if ok {
 		fmt.Printf("Match = %q\n", match)
 	}
 }
 func TestCombinators(t *testing.T) {
 	RunMatcherTests(t, []MatcherTest{
 		{"xxx", c.Rune('x'), true, "x"},
@ -93,17 +78,6 @@ func TestCombinators(t *testing.T) {
 	})
 }
 func TestModifiers(t *testing.T) {
 	RunMatcherTests(t, []MatcherTest{
 		{"  trim  ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
 		{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
 		{"  trim  ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim  "},
 		{"  trim  ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "  trim"},
 		{" \t  trim  \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t  trim"},
 		{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
 	})
 }
 func TestAtoms(t *testing.T) {
 	RunMatcherTests(t, []MatcherTest{
 		{"", a.EndOfFile, true, ""},
@ -183,6 +157,51 @@ func TestAtoms(t *testing.T) {
 	})
 }
 func TestModifiers(t *testing.T) {
 	RunMatcherTests(t, []MatcherTest{
 		{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
 		{"  trim  ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
 		{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
 		{"  trim  ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim  "},
 		{"  trim  ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "  trim"},
 		{" \t  trim  \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t  trim"},
 		{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
 		{"abcdefghijk", m.ModifyByCallback(c.Str("abc"), func(s string) string { return "X" }), true, "X"},
 		{"NoTaLlUpPeR", m.ToUpper(c.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
 		{"NoTaLlLoWeR", m.ToLower(c.StrNoCase("NOTALLlower")), true, "notalllower"},
 	})
 }
 // I know, this is hell, but that's the whole point for this test :->
 func TestCombination(t *testing.T) {
 	demonic := c.Seq(
 		c.Opt(a.SquareOpen),
 		m.Trim(
 			c.Seq(
 				c.Opt(a.Whitespace),
 				c.Rep(3, a.AngleClose),
 				m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
 					return fmt.Sprintf("%d", len(s))
 				}),
 				m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "),
 				m.ToUpper(c.Min(1, a.ASCIILower)),
 				m.Drop(a.Excl),
 				c.Rep(3, a.AngleOpen),
 				c.Opt(a.Whitespace),
 			),
 			" \t",
 		),
 		c.Opt(a.SquareClose),
 	)
 	RunMatcherTests(t, []MatcherTest{
 		{"[ \t >>>Hello, world!<<<   ]", demonic, true, "[>>>5, WORLD<<<]"},
 		{"[ \t >>>Hello, world!<<<   ", demonic, true, "[>>>5, WORLD<<<"},
 		{">>>HellohellO, world!<<<   ]", demonic, true, ">>>10, WORLD<<<]"},
 		{"[ \t >>>HellohellO , , , world!<<<   ", demonic, true, "[>>>10, WORLD<<<"},
 	})
 }
 func TestSequenceOfRunes(t *testing.T) {
 	sequence := c.Seq(
 		a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
@ -206,3 +225,46 @@ func TestSequenceOfRunes(t *testing.T) {
 		t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
 	}
 }
 func ExampleMatchAnyRune() {
 	handler := func(p *parsekit.P) {
 		p.Expects("Any valid rune")
 		if p.On(a.AnyRune).Accept().End() {
 			p.EmitLiteral(TestItem)
 		}
 	}
 	parser := parsekit.New(handler)
 	run := parser.Parse("¡Any / valid / character will dö!")
 	match, _, ok := run.Next()
 	// This will output '¡', since a.AnyRune matches exactly 1 rune.
 	if ok {
 		fmt.Printf("Match = %q\n", match)
 	}
 }
 func ExampleModifyToUpper() {
 	// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
 	// The numbers never start with a zero.
 	digitNotZero := c.RuneRange('1', '9')
 	numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
 	// It is good form to write the letters in upper case.
 	letter := c.Any(a.ASCIILower, a.ASCIIUpper)
 	letters := m.ToUpper(c.Seq(letter, letter))
 	// It is good form to use a single space between letters and numbers,
 	// but it is not mandatory.
 	space := m.Replace(c.Opt(a.Whitespace), " ")
 	// With all the building blocks, we can now build the postcode parser.
 	postcode := c.Seq(numbers, space, letters)
 	// Create a parser and let is parse some postcode inputs.
 	// This will print "1234 AB" for every input, because of the built-in normalization.
 	p := parsekit.New(postcode)
 	for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
 		r, _, _ := p.Parse("1234 AB").Next()
 		fmt.Printf("Input: %q, output: %q", input, r.Value)
 	}
 }
--- a/parsekit.go
+++ b/parsekit.go
@ -57,7 +57,7 @@ func makeParserForStateHandler(handler StateHandler) *Parser {
 func makeParserForMatcher(matcher Matcher) *Parser {
 	return New(StateHandler(func(p *P) {
 		p.Expects("match")
-		if p.On(matcher).Accept().RouteRep().End() {
+		if p.On(matcher).Accept().RouteRepeat().End() {
 			p.EmitLiteral(MatchedItem)
 		}
 	}))
--- a/statehandler.go
+++ b/statehandler.go
@ -93,9 +93,9 @@ func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
 	return &routeFollowupAction{chainAction: chainAction{p, true}}
 }
-// RouteRep indicates that on the next parsing cycle, the current
+// RouteRepeat indicates that on the next parsing cycle, the current
 // StateHandler must be reinvoked.
-func (p *P) RouteRep() *chainAction {
+func (p *P) RouteRepeat() *chainAction {
 	p.RouteTo(p.state)
 	return &chainAction{nil, true}
 }
--- a/statehandler_on.go
+++ b/statehandler_on.go
@ -134,11 +134,11 @@ type routeAction struct {
 	chainAction
 }
-// RouteRep indicates that on the next parsing cycle,
+// RouteRepeat indicates that on the next parsing cycle,
 // the current StateHandler must be reinvoked.
-func (a *routeAction) RouteRep() *chainAction {
+func (a *routeAction) RouteRepeat() *chainAction {
 	if a.ok {
-		return a.p.RouteRep()
+		return a.p.RouteRepeat()
 	}
 	return &chainAction{nil, false}
 }