Banged some sense into the constructors. Instead of one convulated parsekit.New(), we now have parsekit.NewParser() and parsekit.NewMatcherWrapper(). ALso playing with adding examples to the documentation.

2019-05-24 20:50:31 +00:00 · 2019-05-24 20:50:31 +00:00 · 3e87e010fb
parent 6fe3c16a6d
commit 3e87e010fb
7 changed files with 170 additions and 113 deletions
--- a/examples_test.go
+++ b/examples_test.go
@ -0,0 +1,91 @@
+package parsekit_test
+
+import (
+	"fmt"
+
+	"git.makaay.nl/mauricem/go-parsekit"
+)
+
+func ExampleItemType() {
+	// Make use of positive values. Ideally, define your ItemTypes using
+	// iota for easy automatic value management like this:
+	const (
+		ItemWord parsekit.ItemType = iota
+		ItemNumber
+		ItemBlob
+		// ...
+	)
+}
+
+func ExampleError() {
+	error := parsekit.Error{
+		Message: "it broke down",
+		Line:    10,
+		Column:  42}
+
+	fmt.Println(error.Error())
+	fmt.Println(error.ErrorFull())
+	// Output:
+	// it broke down
+	// it broke down after line 10, column 42
+}
+
+func ExampleMatchAnyRune() {
+	// Easy access to the parsekit definitions.
+	var a = parsekit.A
+
+	handler := func(p *parsekit.P) {
+		p.Expects("Any valid rune")
+		if p.On(a.AnyRune).Accept().End() {
+			p.EmitLiteral(TestItem)
+			p.RouteRepeat()
+		}
+	}
+	parser := parsekit.NewParser(handler)
+	run := parser.Parse("¡Any / valid / character will dö!")
+
+	for i := 0; i < 5; i++ {
+		match, _, _ := run.Next()
+		fmt.Printf("Match = %q\n", match.Value)
+	}
+	// Output:
+	// Match = "¡"
+	// Match = "A"
+	// Match = "n"
+	// Match = "y"
+	// Match = " "
+}
+
+func ExampleModifyToUpper() {
+	// Easy access to the parsekit definitions.
+	var c, a, m = parsekit.C, parsekit.A, parsekit.M
+
+	// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
+	// The numbers never start with a zero.
+	digitNotZero := c.Except(c.Rune('0'), a.Digit)
+	numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
+
+	// It is good form to write the letters in upper case.
+	letter := c.Any(a.ASCIILower, a.ASCIIUpper)
+	letters := m.ToUpper(c.Seq(letter, letter))
+
+	// It is good form to use a single space between letters and numbers,
+	// but it is not mandatory.
+	space := m.Replace(c.Opt(a.Whitespace), " ")
+
+	// With all the building blocks, we can now build the postcode parser.
+	postcode := c.Seq(numbers, space, letters)
+
+	// Create a parser and let is parse some postcode inputs.
+	// This will print "1234 AB" for every input, because of the built-in normalization.
+	p := parsekit.NewMatcherWrapper(postcode)
+	for _, input := range []string{"1234  AB", "1234Ab", "1234 ab", "1234ab"} {
+		output, _, _ := p.Match("1234 AB")
+		fmt.Printf("Input: %q, output: %q\n", input, output)
+	}
+	// Output:
+	// Input: "1234  AB", output: "1234 AB"
+	// Input: "1234Ab", output: "1234 AB"
+	// Input: "1234 ab", output: "1234 AB"
+	// Input: "1234ab", output: "1234 AB"
+}
--- a/matcher_builtin.go
+++ b/matcher_builtin.go
@ -31,7 +31,8 @@ var C = struct {
 	ZeroOrMore func(Matcher) Matcher
 	OneOrMore  func(Matcher) Matcher
 	MinMax     func(int, int, Matcher) Matcher
-	Separated  func(Matcher, Matcher) Matcher
+	Separated  func(separated Matcher, separator Matcher) Matcher
+	Except     func(except Matcher, matcher Matcher) Matcher
 }{
 	Rune:       MatchRune,
 	Runes:      MatchRunes,
@ -49,6 +50,7 @@ var C = struct {
 	OneOrMore:  MatchOneOrMore,
 	MinMax:     MatchMinMax,
 	Separated:  MatchSeparated,
+	Except:     MatchExcept,
 }

 // MatchRune creates a Matcher function that checks if the next rune from
@ -274,10 +276,23 @@ func matchMinMax(min int, max int, matcher Matcher) Matcher {
 // Matchers of one type (the separated), separated by Matches of another type
 // (the separator). All matches (separated + separator) are included in the
 // output.
-func MatchSeparated(separated Matcher, separator Matcher) Matcher {
+func MatchSeparated(separator Matcher, separated Matcher) Matcher {
 	return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
 }

+// MatchExcept creates a Matcher that checks if the provided matcher can be
+// applied to the upcoming input. It also checks if the except Matcher can be
+// applied. If the matcher applies, but the except Matcher too, then the match
+// as a whole will be treated as a mismatch.
+func MatchExcept(except Matcher, matcher Matcher) Matcher {
+	return func(m *MatchDialog) bool {
+		if except(m.Fork()) {
+			return false
+		}
+		return matcher(m)
+	}
+}
+
 // A provides convenient access to a range of atoms that can be used to
 // build combinators or parsing rules.
 //
--- a/matcher_builtin_test.go
+++ b/matcher_builtin_test.go
@ -70,7 +70,7 @@ func TestCombinators(t *testing.T) {
 		{"ghijkl", c.Opt(c.Rune('h')), true, ""},
 		{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
 		{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
-		{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"},
+		{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
 		{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
 		{"  ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
 		{"  ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
@ -183,7 +183,7 @@ func TestCombination(t *testing.T) {
 				m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
 					return fmt.Sprintf("%d", len(s))
 				}),
-				m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "),
+				m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
 				m.ToUpper(c.Min(1, a.ASCIILower)),
 				m.Drop(a.Excl),
 				c.Rep(3, a.AngleOpen),
@ -211,7 +211,7 @@ func TestSequenceOfRunes(t *testing.T) {
 		a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
 	)
 	input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-	parser := parsekit.New(func(p *parsekit.P) {
+	parser := parsekit.NewParser(func(p *parsekit.P) {
 		p.Expects("Sequence of runes")
 		if p.On(sequence).Accept().End() {
 			p.EmitLiteral(TestItem)
@ -225,46 +225,3 @@ func TestSequenceOfRunes(t *testing.T) {
 		t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
 	}
 }
-
-func ExampleMatchAnyRune() {
-	handler := func(p *parsekit.P) {
-		p.Expects("Any valid rune")
-		if p.On(a.AnyRune).Accept().End() {
-			p.EmitLiteral(TestItem)
-		}
-	}
-	parser := parsekit.New(handler)
-	run := parser.Parse("¡Any / valid / character will dö!")
-	match, _, ok := run.Next()
-
-	// This will output '¡', since a.AnyRune matches exactly 1 rune.
-	if ok {
-		fmt.Printf("Match = %q\n", match)
-	}
-}
-
-func ExampleModifyToUpper() {
-	// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
-	// The numbers never start with a zero.
-	digitNotZero := c.RuneRange('1', '9')
-	numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
-
-	// It is good form to write the letters in upper case.
-	letter := c.Any(a.ASCIILower, a.ASCIIUpper)
-	letters := m.ToUpper(c.Seq(letter, letter))
-
-	// It is good form to use a single space between letters and numbers,
-	// but it is not mandatory.
-	space := m.Replace(c.Opt(a.Whitespace), " ")
-
-	// With all the building blocks, we can now build the postcode parser.
-	postcode := c.Seq(numbers, space, letters)
-
-	// Create a parser and let is parse some postcode inputs.
-	// This will print "1234 AB" for every input, because of the built-in normalization.
-	p := parsekit.New(postcode)
-	for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
-		r, _, _ := p.Parse("1234 AB").Next()
-		fmt.Printf("Input: %q, output: %q", input, r.Value)
-	}
-}
--- a/parsekit.go
+++ b/parsekit.go
@ -7,63 +7,25 @@ import (
 )

 // Parser is the top-level struct that holds the configuration for a parser.
-// The Parser can be instantiated using the parsekit.New() method.
-//
-// To start parsing input data, use the method Parser.Parse().
+// The Parser can be instantiated using the parsekit.NewParser() method.
 type Parser struct {
 	startState StateHandler // the function that handles the very first state
 }

-// New instantiates a new Parser.
-// The logic parameter provides the parsing logic to apply. This can be:
+// NewParser instantiates a new Parser.
 //
-// 1) A StateHandler function: in this case, a state machine-style
-// recursive descent parser is created, in which StateHandler functions
-// are used to move the state machine forward during parsing.
-// This type of parser offers a lot of flexibility and it is possible to
-// emit multiple items from the parse flow.
-//
-// This style of parser is typically used for parsing languages and
+// The Parser is a state machine-style recursive descent parser, in which
+// StateHandler functions are used to move the state machine forward during
+// parsing. This style of parser is typically used for parsing languages and
 // structured data formats (like json, toml, etc.)
 //
-// 2) A Matcher function: in this case, a parser/combinator-style parser
-// is created, which can be used to match against the provided logic.
-// The parser can only check input against the Matcher function, and
-// reports back a successful match or a failure.
-//
-// This style of parser can typically be used for validation and normalization
-// of input data. However, when you are about to use parsekit for that
-// task, consider using regular expressions instead. They might serve
-// you better.
-func New(logic interface{}) *Parser {
-	switch logic := logic.(type) {
-	case func(*P):
-		return makeParserForStateHandler(logic)
-	case StateHandler:
-		return makeParserForStateHandler(logic)
-	case func(m *MatchDialog) bool:
-		return makeParserForMatcher(logic)
-	case Matcher:
-		return makeParserForMatcher(logic)
-	default:
-		panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
-	}
-}
-
-func makeParserForStateHandler(handler StateHandler) *Parser {
-	return &Parser{startState: handler}
-}
-
-func makeParserForMatcher(matcher Matcher) *Parser {
-	return New(StateHandler(func(p *P) {
-		p.Expects("match")
-		if p.On(matcher).Accept().RouteRepeat().End() {
-			p.EmitLiteral(MatchedItem)
-		}
-	}))
+// To start parsing input data, use the method Parser.Parse().
+func NewParser(startState StateHandler) *Parser {
+	return &Parser{startState: startState}
 }

 // Run represents a single parse run for a Parser.
+// TODO rename to ParseRun
 type Run struct {
 	p *P // a struct holding the internal state of a parse run
 }
@ -106,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {

 func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
 	switch {
-	case i.Type == ItemEOF:
+	case i.Type == itemEOF:
 		return i, nil, false
-	case i.Type == ItemError:
+	case i.Type == itemError:
 		run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
 		return i, run.p.err, false
 	default:
@ -176,3 +138,38 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
 	run.p.expecting = ""
 	run.p.state(run.p)
 }
+
+// MatcherWrapper is the top-level struct that holds the configuration for
+// a parser that is based solely on a Wrapper function.
+// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
+// method.
+//
+// To match input data against the wrapped Matcher function, use the method
+// MatcherWrapper.Match().
+type MatcherWrapper struct {
+	parser *Parser
+}
+
+// NewMatcherWrapper instantiates a new MatcherWrapper.
+//
+// This is a simple wrapper around a Matcher function. It can be used to
+// match an input string against that Matcher function and retrieve the
+// results in a straight forward way.
+func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
+	handler := func(p *P) {
+		p.Expects("match")
+		if p.On(matcher).Accept().End() {
+			p.EmitLiteral(0) // ItemType is irrelevant
+		}
+	}
+	return &MatcherWrapper{parser: NewParser(handler)}
+}
+
+// Match runs the wrapped Matcher function against the provided input data.
+func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
+	item, err, ok := w.parser.Parse(input).Next()
+	if !ok {
+		return "", err, false
+	}
+	return item.Value, nil, true
+}
--- a/parsekit_test.go
+++ b/parsekit_test.go
@ -27,16 +27,13 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
 }

 func RunMatcherTest(t *testing.T, test MatcherTest) {
-	parser := parsekit.New(test.matcher).Parse(test.input)
-	item, err, ok := parser.Next()
+	output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)

 	if test.mustMatch {
 		if !ok {
 			t.Errorf("Test %q failed with error: %s", test.input, err)
-		} else if item.Type != parsekit.MatchedItem {
-			t.Errorf("Test %q failed: should match, but it didn't", test.input)
-		} else if item.Value != test.expected {
-			t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
+		} else if output != test.expected {
+			t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
 		}
 	} else {
 		if ok {
--- a/statehandler.go
+++ b/statehandler.go
@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
 	p.RouteTo(func(p *P) {
 		p.Expects("end of file")
 		if p.On(A.EndOfFile).Stay().End() {
-			p.Emit(ItemEOF, "EOF")
+			p.Emit(itemEOF, "EOF")
 		}
 	})
 }
--- a/statehandler_emit.go
+++ b/statehandler_emit.go
@ -5,19 +5,19 @@ import (
 )

 // ItemType represents the type of a parser Item.
+//
+// When creating your own ItemType values, then make use of positive integer
+// values. Negative values are possible, but they are reserved for internal
+// use by parsekit.
 type ItemType int

-// ItemEOF is a built-in parser item type that is used for flagging that the
+// itemEOF is a built-in parser item type that is used for flagging that the
 // end of the input was reached.
-const ItemEOF ItemType = -1
+const itemEOF ItemType = -1

-// ItemError is a built-in parser item type that is used for flagging that
+// itemError is a built-in parser item type that is used for flagging that
 // an error has occurred during parsing.
-const ItemError ItemType = -2
-
-// MatchedItem is a built-in parser item type that is used for indicating a
-// successful match when using a parser that is based on a Matcher.
-const MatchedItem ItemType = -3
+const itemError ItemType = -2

 // Item represents an item that can be emitted from the parser.
 type Item struct {
@ -81,7 +81,7 @@ func (err *Error) ErrorFull() string {
 // EmitError emits a Parser error item to the client.
 func (p *P) EmitError(format string, args ...interface{}) {
 	message := fmt.Sprintf(format, args...)
-	p.Emit(ItemError, message)
+	p.Emit(itemError, message)
 }

 // UnexpectedInput is used by a StateHandler function to emit an error item