Ah, found a good way to document larger examples which need function definitions and such. Let's see if this works in godoc nicely.

2019-05-25 14:37:38 +00:00 · 2019-05-25 14:37:38 +00:00 · bb1e462892
parent 8a6815332e
commit bb1e462892
5 changed files with 156 additions and 131 deletions
--- a/example_dutchpostcode_test.go
+++ b/example_dutchpostcode_test.go
@ -0,0 +1,55 @@
+// In this example, a parser is created which can parse and normalize Dutch postcodes.
+package parsekit_test
+
+import (
+	"fmt"
+
+	"git.makaay.nl/mauricem/go-parsekit"
+)
+
+func createPostcodeMatcher() *parsekit.MatcherWrapper {
+	// Easy access to the parsekit definitions.
+	var c, a, m = parsekit.C, parsekit.A, parsekit.M
+
+	// Matcher functions are created and combined to satisfy these rules:
+	// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
+	// - The first digit is never a zero.
+	// - A space between letters and digits is optional.
+	// - It is good form to write the letters in upper case.
+	// - It is good form to use a single space between digits and letters.
+	var digitNotZero = c.Except(c.Rune('0'), a.Digit)
+	var pcDigits = c.Seq(digitNotZero, c.Rep(3, a.Digit))
+	var pcLetter = c.Any(a.ASCIILower, a.ASCIIUpper)
+	var pcLetters = m.ToUpper(c.Seq(pcLetter, pcLetter))
+	var space = m.Replace(c.Opt(a.Whitespace), " ")
+	var postcode = c.Seq(pcDigits, space, pcLetters)
+
+	return parsekit.NewMatcher(postcode, "a Dutch postcode")
+}
+
+func Example_dutchPostcodeUsingMatcher() {
+	pcParser := createPostcodeMatcher()
+
+	for i, input := range []string{
+		"1234  AB",
+		"2233Ab",
+		"1001\t\tab",
+		"1818ab",
+		"1234",
+		"huh",
+	} {
+		output, err, ok := pcParser.Parse(input)
+		if !ok {
+			fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
+		} else {
+			fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
+		}
+	}
+	// Output:
+	// [0] Input: "1234  AB" Output: 1234 AB
+	// [1] Input: "2233Ab" Output: 2233 AB
+	// [2] Input: "1001\t\tab" Output: 1001 AB
+	// [3] Input: "1818ab" Output: 1818 AB
+	// [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode)
+	// [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode)
+}
--- a/examples_test.go
+++ b/examples_test.go
@ -7,32 +7,84 @@ import (
 	"git.makaay.nl/mauricem/go-parsekit"
 )

-func Example_minimalAnnotated() {
+func Example_helloWorldUsingParser() {
+}
+
+func Example_helloWorldUsingMatcher() {
+	// In this example, a parser is created that is able to parse input that looks
+	// like "Hello, <name>!", and that extracts the name from it.
+	// The implementation uses only a Matcher function and does not implement a
+	// full-fledged state-based Parser for it.
+
+	// Easy access to parsekit parser/combinators, atoms and modifiers.
+	var c, a, m = parsekit.C, parsekit.A, parsekit.M
+
+	// Using the parser/combinator support of parsekit, we create a Matcher function
+	// that does all the work. The 'greeting' Matcher matches the whole input and
+	// drops all but the name from it.
+	var hello = c.StrNoCase("hello")
+	var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
+	var separator = c.Any(comma, a.Whitespace)
+	var name = c.OneOrMore(c.Not(a.Excl))
+	var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
+
+	// Now we can already do some parsing, by using a Matcher.
+	var parser = parsekit.NewMatcher(greeting, "a friendly greeting")
+	for i, input := range []string{
+		"Hello, world!",
+		"HELLO ,Johnny!",
+		"hello , Bob123!",
+		"hello Pizza!",
+		"Oh no!",
+		"Hello, world",
+	} {
+		output, err, ok := parser.Parse(input)
+		if !ok {
+			fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
+		} else {
+			fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
+		}
+	}
+	// Output:
+	// [0] Input: "Hello, world!" Output: world
+	// [1] Input: "HELLO ,Johnny!" Output: Johnny
+	// [2] Input: "hello , Bob123!" Output: Bob123
+	// [3] Input: "hello Pizza!" Output: Pizza
+	// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
+	// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
+}
+
+func Example_basicCalculator() {
 	// Let's write a small example for parsing a really basic calculator.
 	// The calculator understands input that looks like:
 	//
 	//     10 + 20 - 8+4
 	//
-	// So positive numbers that can be either added or substracted, with
-	// optional whitespace around the operators.
+	// So positive numbers that can be either added or substracted, and whitespace
+	// is ignored.

-	// Easy access to parser/combinators, atoms and modifiers.
+	// Easy access to parsekit  parser/combinators, atoms and modifiers.
 	var c, a, m = parsekit.C, parsekit.A, parsekit.M

 	// When writing a parser, it's a good start to use the parser/combinator
 	// functionality of parsekit to create some Matcher functions. These
 	// functions can later be used in the parser state machine to find the
 	// matching tokens on the input data.
-	var number = c.OneOrMore(a.Digit)
+	//
+	// In our case, we only need a definition of "number, surrounded by
+	// optional whitespace". Skipping whitespace could be a part of the
+	// StateHandler functions below too, but including it in a Matcher makes
+	// things really practical here.
 	var whitespace = m.Drop(c.Opt(a.Whitespace))
-	var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
+	var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)

 	// We also must define the types of items that the parser will emit.
-	// We only need two: for numbers and for operators.
+	// We only need three of them here, for numbers, plus and minus.
 	// The recommended way to define these, is using 'iota' for auto numbering.
 	const (
 		numberType parsekit.ItemType = iota
-		operatorType
+		addType
+		subtractType
 	)

 	// Now it is time to define the state machine for parsing the input.
@ -41,12 +93,18 @@ func Example_minimalAnnotated() {
 	// The P struct holds the internal state for the parser and it provides
 	// some methods that form the API for your StateHandler implementation.
 	//
-	// Note that normally you'd write normal functions and not anonymous
+	// (note that normally you'd write normal functions and not anonymous
 	// functions like I did here. I had to use these to be able to write the
-	// example code.
+	// example code)

 	var operatorHandler parsekit.StateHandler

+	// In this state, we expect a number. When a number is found on the input,
+	// it is accepted in the output buffer, after which the output buffer is
+	// emitted as a numberType item. Then we tell the state machine to continue
+	// with the operatorHandler state.
+	// When no number is found, the parser will emit an error, explaining that
+	// "a number" was expected.
 	numberHandler := func(p *parsekit.P) {
 		p.Expects("a number")
 		if p.On(number).Accept().End() {
@ -55,11 +113,21 @@ func Example_minimalAnnotated() {
 		}
 	}

+	// In this state, we expect a plus or minus operator. When one of those
+	// is found, the appropriate Item is emitted and the parser is sent back
+	// to the numberHandler to find the next number on the input.
+	// When no operator is found, then the parser is told to expect the end of
+	// the input. When more input data is available (which is obviously wrong
+	// data since it does not match our syntax), the parser will emit an error.
 	operatorHandler = func(p *parsekit.P) {
-		if p.On(operator).Accept().End() {
-			p.EmitLiteral(operatorType)
+		switch {
+		case p.On(a.Plus).Accept().End():
+			p.EmitLiteral(addType)
 			p.RouteTo(numberHandler)
-		} else {
+		case p.On(a.Minus).Accept().End():
+			p.EmitLiteral(subtractType)
+			p.RouteTo(numberHandler)
+		default:
 			p.ExpectEndOfFile()
 		}
 	}
@ -70,7 +138,7 @@ func Example_minimalAnnotated() {
 	parser := parsekit.NewParser(numberHandler)

 	// Let's feed the parser some input to work with.
-	run := parser.Parse("153+ 22+31 - 4- 6+42")
+	run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ")

 	// We can now step through the results of the parsing process by repeated
 	// calls to run.Next(). Next() returns either the next parse item, a parse
@ -91,12 +159,16 @@ func Example_minimalAnnotated() {
 		default:
 			fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
 			switch {
-			case item.Type == operatorType && item.Value == "+":
+			case item.Type == addType:
 				op = +1
-			case item.Type == operatorType && item.Value == "-":
+			case item.Type == subtractType:
 				op = -1
-			default:
-				nr, _ := strconv.Atoi(item.Value)
+			case item.Type == numberType:
+				nr, err := strconv.Atoi(item.Value)
+				if err != nil {
+					fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
+					return
+				}
 				sum += op * nr
 			}
 		}
@ -108,9 +180,9 @@ func Example_minimalAnnotated() {
 	// Type: 0, Value: "22"
 	// Type: 1, Value: "+"
 	// Type: 0, Value: "31"
-	// Type: 1, Value: "-"
+	// Type: 2, Value: "-"
 	// Type: 0, Value: "4"
-	// Type: 1, Value: "-"
+	// Type: 2, Value: "-"
 	// Type: 0, Value: "6"
 	// Type: 1, Value: "+"
 	// Type: 0, Value: "42"
@ -118,76 +190,6 @@ func Example_minimalAnnotated() {
 	// Outcome of computation: 238
 }

-func Example_minimal() {
-	// Let's write a small example for parsing a really basic calculator.
-	// The calculator understands input that looks like:
-	//
-	//     10 + 20 - 8+4
-	//
-	// So positive numbers that can be either added or substracted, with
-	// optional whitespace around the operators.
-
-	var c, a, m = parsekit.C, parsekit.A, parsekit.M
-
-	var number = c.OneOrMore(a.Digit)
-	var whitespace = m.Drop(c.Opt(a.Whitespace))
-	var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
-
-	const (
-		numberType parsekit.ItemType = iota
-		operatorType
-	)
-
-	var operatorHandler parsekit.StateHandler
-
-	numberHandler := func(p *parsekit.P) {
-		p.Expects("a number")
-		if p.On(number).Accept().End() {
-			p.EmitLiteral(numberType)
-			p.RouteTo(operatorHandler)
-		}
-	}
-
-	operatorHandler = func(p *parsekit.P) {
-		if p.On(operator).Accept().End() {
-			p.EmitLiteral(operatorType)
-			p.RouteTo(numberHandler)
-		} else {
-			p.ExpectEndOfFile()
-		}
-	}
-
-	parser := parsekit.NewParser(numberHandler)
-	run := parser.Parse("153+ 22+31 - 4- 6+42")
-
-	sum := 0
-	op := +1
-	for {
-		item, err, ok := run.Next()
-		switch {
-		case !ok && err == nil:
-			fmt.Println("Outcome of computation:", sum)
-			return
-		case !ok:
-			fmt.Printf("Error: %s\n", err)
-			return
-		default:
-			switch {
-			case item.Type == operatorType && item.Value == "+":
-				op = +1
-			case item.Type == operatorType && item.Value == "-":
-				op = -1
-			default:
-				nr, _ := strconv.Atoi(item.Value)
-				sum += op * nr
-			}
-		}
-	}
-
-	// Output:
-	// Outcome of computation: 238
-}
-
 func ExampleItemType() {
 	// Make use of positive values. Ideally, define your ItemTypes using
 	// iota for easy automatic value management like this:
@ -301,37 +303,3 @@ func ExampleMatchAnyRune() {
 	// Match = "y"
 	// Match = " "
 }
-
-func ExampleModifyToUpper() {
-	// Easy access to the parsekit definitions.
-	var c, a, m = parsekit.C, parsekit.A, parsekit.M
-
-	// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
-	// The first digit is never a zero.
-	digitNotZero := c.Except(c.Rune('0'), a.Digit)
-	pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
-
-	// It is good form to write the letters in upper case.
-	pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
-	pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
-
-	// It is good form to use a single space between letters and numbers,
-	// but it is not mandatory.
-	space := m.Replace(c.Opt(a.Whitespace), " ")
-
-	// With all the building blocks, we can now build the postcode parser.
-	postcode := c.Seq(pcDigits, space, pcLetters)
-
-	// Create a parser and let is parse some postcode inputs.
-	// This will print "1234 AB" for every input, because of the built-in normalization.
-	p := parsekit.NewMatcherWrapper(postcode)
-	for _, input := range []string{"1234  AB", "1234Ab", "1234\t\tab", "1234ab"} {
-		output, _, _ := p.Match(input)
-		fmt.Printf("Input: %q, output: %q\n", input, output)
-	}
-	// Output:
-	// Input: "1234  AB", output: "1234 AB"
-	// Input: "1234Ab", output: "1234 AB"
-	// Input: "1234\t\tab", output: "1234 AB"
-	// Input: "1234ab", output: "1234 AB"
-}
--- a/matcher_builtin.go
+++ b/matcher_builtin.go
@ -486,6 +486,7 @@ func ModifyDrop(matcher Matcher) Matcher {
 // ModifyTrim creates a Matcher that checks if the provided Matcher applies.
 // If it does, then its output is taken and characters from the provided
 // cutset are trimmed from both the left and the right of the output.
+// TODO move cutset to the left arg
 func ModifyTrim(matcher Matcher, cutset string) Matcher {
 	return modifyTrim(matcher, cutset, true, true)
 }
--- a/parsekit.go
+++ b/parsekit.go
@ -145,19 +145,19 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
 // method.
 //
 // To match input data against the wrapped Matcher function, use the method
-// MatcherWrapper.Match().
+// MatcherWrapper.Parse().
 type MatcherWrapper struct {
 	parser *Parser
 }

-// NewMatcherWrapper instantiates a new MatcherWrapper.
+// NewMatcher instantiates a new MatcherWrapper.
 //
 // This is a simple wrapper around a Matcher function. It can be used to
 // match an input string against that Matcher function and retrieve the
 // results in a straight forward way.
-func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
+func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
 	handler := func(p *P) {
-		p.Expects("match")
+		p.Expects(expects)
 		if p.On(matcher).Accept().End() {
 			p.EmitLiteral(0) // ItemType is irrelevant
 		}
@ -165,8 +165,8 @@ func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
 	return &MatcherWrapper{parser: NewParser(handler)}
 }

-// Match runs the wrapped Matcher function against the provided input data.
-func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
+// Parse runs the wrapped Matcher function against the provided input data.
+func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) {
 	item, err, ok := w.parser.Parse(input).Next()
 	if !ok {
 		return "", err, false
--- a/parsekit_test.go
+++ b/parsekit_test.go
@ -11,6 +11,7 @@ import (

 const TestItem parsekit.ItemType = 1

+// Easy access to the parsekit definitions.
 var c, a, m = parsekit.C, parsekit.A, parsekit.M

 type MatcherTest struct {
@ -27,7 +28,7 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
 }

 func RunMatcherTest(t *testing.T, test MatcherTest) {
-	output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
+	output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input)

 	if test.mustMatch {
 		if !ok {