Banged some sense into the constructors. Instead of one convulated parsekit.New(), we now have parsekit.NewParser() and parsekit.NewMatcherWrapper(). ALso playing with adding examples to the documentation.

2019-05-24 20:50:31 +00:00 · 2019-05-24 20:50:31 +00:00 · 3e87e010fb
parent 6fe3c16a6d
commit 3e87e010fb
7 changed files with 170 additions and 113 deletions
--- a/examples_test.go
+++ b/examples_test.go
@ -0,0 +1,91 @@
 package parsekit_test
 import (
 	"fmt"
 	"git.makaay.nl/mauricem/go-parsekit"
 )
 func ExampleItemType() {
 	// Make use of positive values. Ideally, define your ItemTypes using
 	// iota for easy automatic value management like this:
 	const (
 		ItemWord parsekit.ItemType = iota
 		ItemNumber
 		ItemBlob
 		// ...
 	)
 }
 func ExampleError() {
 	error := parsekit.Error{
 		Message: "it broke down",
 		Line:    10,
 		Column:  42}
 	fmt.Println(error.Error())
 	fmt.Println(error.ErrorFull())
 	// Output:
 	// it broke down
 	// it broke down after line 10, column 42
 }
 func ExampleMatchAnyRune() {
 	// Easy access to the parsekit definitions.
 	var a = parsekit.A
 	handler := func(p *parsekit.P) {
 		p.Expects("Any valid rune")
 		if p.On(a.AnyRune).Accept().End() {
 			p.EmitLiteral(TestItem)
 			p.RouteRepeat()
 		}
 	}
 	parser := parsekit.NewParser(handler)
 	run := parser.Parse("¡Any / valid / character will dö!")
 	for i := 0; i < 5; i++ {
 		match, _, _ := run.Next()
 		fmt.Printf("Match = %q\n", match.Value)
 	}
 	// Output:
 	// Match = "¡"
 	// Match = "A"
 	// Match = "n"
 	// Match = "y"
 	// Match = " "
 }
 func ExampleModifyToUpper() {
 	// Easy access to the parsekit definitions.
 	var c, a, m = parsekit.C, parsekit.A, parsekit.M
 	// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
 	// The numbers never start with a zero.
 	digitNotZero := c.Except(c.Rune('0'), a.Digit)
 	numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
 	// It is good form to write the letters in upper case.
 	letter := c.Any(a.ASCIILower, a.ASCIIUpper)
 	letters := m.ToUpper(c.Seq(letter, letter))
 	// It is good form to use a single space between letters and numbers,
 	// but it is not mandatory.
 	space := m.Replace(c.Opt(a.Whitespace), " ")
 	// With all the building blocks, we can now build the postcode parser.
 	postcode := c.Seq(numbers, space, letters)
 	// Create a parser and let is parse some postcode inputs.
 	// This will print "1234 AB" for every input, because of the built-in normalization.
 	p := parsekit.NewMatcherWrapper(postcode)
 	for _, input := range []string{"1234  AB", "1234Ab", "1234 ab", "1234ab"} {
 		output, _, _ := p.Match("1234 AB")
 		fmt.Printf("Input: %q, output: %q\n", input, output)
 	}
 	// Output:
 	// Input: "1234  AB", output: "1234 AB"
 	// Input: "1234Ab", output: "1234 AB"
 	// Input: "1234 ab", output: "1234 AB"
 	// Input: "1234ab", output: "1234 AB"
 }
--- a/matcher_builtin.go
+++ b/matcher_builtin.go
@ -31,7 +31,8 @@ var C = struct {
 	ZeroOrMore func(Matcher) Matcher
 	OneOrMore  func(Matcher) Matcher
 	MinMax     func(int, int, Matcher) Matcher
-	Separated  func(Matcher, Matcher) Matcher
+	Separated  func(separated Matcher, separator Matcher) Matcher
 	Except     func(except Matcher, matcher Matcher) Matcher
 }{
 	Rune:       MatchRune,
 	Runes:      MatchRunes,
@ -49,6 +50,7 @@ var C = struct {
 	OneOrMore:  MatchOneOrMore,
 	MinMax:     MatchMinMax,
 	Separated:  MatchSeparated,
 	Except:     MatchExcept,
 }
 // MatchRune creates a Matcher function that checks if the next rune from
@ -274,10 +276,23 @@ func matchMinMax(min int, max int, matcher Matcher) Matcher {
 // Matchers of one type (the separated), separated by Matches of another type
 // (the separator). All matches (separated + separator) are included in the
 // output.
-func MatchSeparated(separated Matcher, separator Matcher) Matcher {
+func MatchSeparated(separator Matcher, separated Matcher) Matcher {
 	return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
 }
 // MatchExcept creates a Matcher that checks if the provided matcher can be
 // applied to the upcoming input. It also checks if the except Matcher can be
 // applied. If the matcher applies, but the except Matcher too, then the match
 // as a whole will be treated as a mismatch.
 func MatchExcept(except Matcher, matcher Matcher) Matcher {
 	return func(m *MatchDialog) bool {
 		if except(m.Fork()) {
 			return false
 		}
 		return matcher(m)
 	}
 }
 // A provides convenient access to a range of atoms that can be used to
 // build combinators or parsing rules.
 //
--- a/matcher_builtin_test.go
+++ b/matcher_builtin_test.go
@ -70,7 +70,7 @@ func TestCombinators(t *testing.T) {
 		{"ghijkl", c.Opt(c.Rune('h')), true, ""},
 		{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
 		{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
-		{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"},
+		{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
 		{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
 		{"  ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
 		{"  ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
@ -183,7 +183,7 @@ func TestCombination(t *testing.T) {
 				m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
 					return fmt.Sprintf("%d", len(s))
 				}),
-				m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "),
+				m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
 				m.ToUpper(c.Min(1, a.ASCIILower)),
 				m.Drop(a.Excl),
 				c.Rep(3, a.AngleOpen),
@ -211,7 +211,7 @@ func TestSequenceOfRunes(t *testing.T) {
 		a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
 	)
 	input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-	parser := parsekit.New(func(p *parsekit.P) {
+	parser := parsekit.NewParser(func(p *parsekit.P) {
 		p.Expects("Sequence of runes")
 		if p.On(sequence).Accept().End() {
 			p.EmitLiteral(TestItem)
@ -225,46 +225,3 @@ func TestSequenceOfRunes(t *testing.T) {
 		t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
 	}
 }
 func ExampleMatchAnyRune() {
 	handler := func(p *parsekit.P) {
 		p.Expects("Any valid rune")
 		if p.On(a.AnyRune).Accept().End() {
 			p.EmitLiteral(TestItem)
 		}
 	}
 	parser := parsekit.New(handler)
 	run := parser.Parse("¡Any / valid / character will dö!")
 	match, _, ok := run.Next()
 	// This will output '¡', since a.AnyRune matches exactly 1 rune.
 	if ok {
 		fmt.Printf("Match = %q\n", match)
 	}
 }
 func ExampleModifyToUpper() {
 	// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
 	// The numbers never start with a zero.
 	digitNotZero := c.RuneRange('1', '9')
 	numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
 	// It is good form to write the letters in upper case.
 	letter := c.Any(a.ASCIILower, a.ASCIIUpper)
 	letters := m.ToUpper(c.Seq(letter, letter))
 	// It is good form to use a single space between letters and numbers,
 	// but it is not mandatory.
 	space := m.Replace(c.Opt(a.Whitespace), " ")
 	// With all the building blocks, we can now build the postcode parser.
 	postcode := c.Seq(numbers, space, letters)
 	// Create a parser and let is parse some postcode inputs.
 	// This will print "1234 AB" for every input, because of the built-in normalization.
 	p := parsekit.New(postcode)
 	for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
 		r, _, _ := p.Parse("1234 AB").Next()
 		fmt.Printf("Input: %q, output: %q", input, r.Value)
 	}
 }
--- a/parsekit.go
+++ b/parsekit.go
@ -7,63 +7,25 @@ import (
 )
 // Parser is the top-level struct that holds the configuration for a parser.
-// The Parser can be instantiated using the parsekit.New() method.
+// The Parser can be instantiated using the parsekit.NewParser() method.
 //
 // To start parsing input data, use the method Parser.Parse().
 type Parser struct {
 	startState StateHandler // the function that handles the very first state
 }
-// New instantiates a new Parser.
+// NewParser instantiates a new Parser.
 // The logic parameter provides the parsing logic to apply. This can be:
 //
-// 1) A StateHandler function: in this case, a state machine-style
+// The Parser is a state machine-style recursive descent parser, in which
-// recursive descent parser is created, in which StateHandler functions
+// StateHandler functions are used to move the state machine forward during
-// are used to move the state machine forward during parsing.
+// parsing. This style of parser is typically used for parsing languages and
 // This type of parser offers a lot of flexibility and it is possible to
 // emit multiple items from the parse flow.
 //
 // This style of parser is typically used for parsing languages and
 // structured data formats (like json, toml, etc.)
 //
-// 2) A Matcher function: in this case, a parser/combinator-style parser
+// To start parsing input data, use the method Parser.Parse().
-// is created, which can be used to match against the provided logic.
+func NewParser(startState StateHandler) *Parser {
-// The parser can only check input against the Matcher function, and
+	return &Parser{startState: startState}
 // reports back a successful match or a failure.
 //
 // This style of parser can typically be used for validation and normalization
 // of input data. However, when you are about to use parsekit for that
 // task, consider using regular expressions instead. They might serve
 // you better.
 func New(logic interface{}) *Parser {
 	switch logic := logic.(type) {
 	case func(*P):
 		return makeParserForStateHandler(logic)
 	case StateHandler:
 		return makeParserForStateHandler(logic)
 	case func(m *MatchDialog) bool:
 		return makeParserForMatcher(logic)
 	case Matcher:
 		return makeParserForMatcher(logic)
 	default:
 		panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
 	}
 }
 func makeParserForStateHandler(handler StateHandler) *Parser {
 	return &Parser{startState: handler}
 }
 func makeParserForMatcher(matcher Matcher) *Parser {
 	return New(StateHandler(func(p *P) {
 		p.Expects("match")
 		if p.On(matcher).Accept().RouteRepeat().End() {
 			p.EmitLiteral(MatchedItem)
 		}
 	}))
 }
 // Run represents a single parse run for a Parser.
 // TODO rename to ParseRun
 type Run struct {
 	p *P // a struct holding the internal state of a parse run
 }
@ -106,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {
 func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
 	switch {
-	case i.Type == ItemEOF:
+	case i.Type == itemEOF:
 		return i, nil, false
-	case i.Type == ItemError:
+	case i.Type == itemError:
 		run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
 		return i, run.p.err, false
 	default:
@ -176,3 +138,38 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
 	run.p.expecting = ""
 	run.p.state(run.p)
 }
 // MatcherWrapper is the top-level struct that holds the configuration for
 // a parser that is based solely on a Wrapper function.
 // The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
 // method.
 //
 // To match input data against the wrapped Matcher function, use the method
 // MatcherWrapper.Match().
 type MatcherWrapper struct {
 	parser *Parser
 }
 // NewMatcherWrapper instantiates a new MatcherWrapper.
 //
 // This is a simple wrapper around a Matcher function. It can be used to
 // match an input string against that Matcher function and retrieve the
 // results in a straight forward way.
 func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
 	handler := func(p *P) {
 		p.Expects("match")
 		if p.On(matcher).Accept().End() {
 			p.EmitLiteral(0) // ItemType is irrelevant
 		}
 	}
 	return &MatcherWrapper{parser: NewParser(handler)}
 }
 // Match runs the wrapped Matcher function against the provided input data.
 func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
 	item, err, ok := w.parser.Parse(input).Next()
 	if !ok {
 		return "", err, false
 	}
 	return item.Value, nil, true
 }
--- a/parsekit_test.go
+++ b/parsekit_test.go
@ -27,16 +27,13 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
 }
 func RunMatcherTest(t *testing.T, test MatcherTest) {
-	parser := parsekit.New(test.matcher).Parse(test.input)
+	output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
 	item, err, ok := parser.Next()
 	if test.mustMatch {
 		if !ok {
 			t.Errorf("Test %q failed with error: %s", test.input, err)
-		} else if item.Type != parsekit.MatchedItem {
+		} else if output != test.expected {
-			t.Errorf("Test %q failed: should match, but it didn't", test.input)
+			t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
 		} else if item.Value != test.expected {
 			t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
 		}
 	} else {
 		if ok {
--- a/statehandler.go
+++ b/statehandler.go
@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
 	p.RouteTo(func(p *P) {
 		p.Expects("end of file")
 		if p.On(A.EndOfFile).Stay().End() {
-			p.Emit(ItemEOF, "EOF")
+			p.Emit(itemEOF, "EOF")
 		}
 	})
 }
--- a/statehandler_emit.go
+++ b/statehandler_emit.go
@ -5,19 +5,19 @@ import (
 )
 // ItemType represents the type of a parser Item.
 //
 // When creating your own ItemType values, then make use of positive integer
 // values. Negative values are possible, but they are reserved for internal
 // use by parsekit.
 type ItemType int
-// ItemEOF is a built-in parser item type that is used for flagging that the
+// itemEOF is a built-in parser item type that is used for flagging that the
 // end of the input was reached.
-const ItemEOF ItemType = -1
+const itemEOF ItemType = -1
-// ItemError is a built-in parser item type that is used for flagging that
+// itemError is a built-in parser item type that is used for flagging that
 // an error has occurred during parsing.
-const ItemError ItemType = -2
+const itemError ItemType = -2
 // MatchedItem is a built-in parser item type that is used for indicating a
 // successful match when using a parser that is based on a Matcher.
 const MatchedItem ItemType = -3
 // Item represents an item that can be emitted from the parser.
 type Item struct {
@ -81,7 +81,7 @@ func (err *Error) ErrorFull() string {
 // EmitError emits a Parser error item to the client.
 func (p *P) EmitError(format string, args ...interface{}) {
 	message := fmt.Sprintf(format, args...)
-	p.Emit(ItemError, message)
+	p.Emit(itemError, message)
 }
 // UnexpectedInput is used by a StateHandler function to emit an error item