Ah, found a good way to document larger examples which need function definitions and such. Let's see if this works in godoc nicely.

This commit is contained in:
Maurice Makaay 2019-05-25 14:37:38 +00:00
parent 8a6815332e
commit bb1e462892
5 changed files with 156 additions and 131 deletions

View File

@ -0,0 +1,55 @@
// In this example, a parser is created which can parse and normalize Dutch postcodes.
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func createPostcodeMatcher() *parsekit.MatcherWrapper {
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// Matcher functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
// - The first digit is never a zero.
// - A space between letters and digits is optional.
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
var digitNotZero = c.Except(c.Rune('0'), a.Digit)
var pcDigits = c.Seq(digitNotZero, c.Rep(3, a.Digit))
var pcLetter = c.Any(a.ASCIILower, a.ASCIIUpper)
var pcLetters = m.ToUpper(c.Seq(pcLetter, pcLetter))
var space = m.Replace(c.Opt(a.Whitespace), " ")
var postcode = c.Seq(pcDigits, space, pcLetters)
return parsekit.NewMatcher(postcode, "a Dutch postcode")
}
func Example_dutchPostcodeUsingMatcher() {
pcParser := createPostcodeMatcher()
for i, input := range []string{
"1234 AB",
"2233Ab",
"1001\t\tab",
"1818ab",
"1234",
"huh",
} {
output, err, ok := pcParser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "1234 AB" Output: 1234 AB
// [1] Input: "2233Ab" Output: 2233 AB
// [2] Input: "1001\t\tab" Output: 1001 AB
// [3] Input: "1818ab" Output: 1818 AB
// [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode)
// [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode)
}

View File

@ -7,32 +7,84 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func Example_minimalAnnotated() {
func Example_helloWorldUsingParser() {
}
func Example_helloWorldUsingMatcher() {
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
// The implementation uses only a Matcher function and does not implement a
// full-fledged state-based Parser for it.
// Easy access to parsekit parser/combinators, atoms and modifiers.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a Matcher function
// that does all the work. The 'greeting' Matcher matches the whole input and
// drops all but the name from it.
var hello = c.StrNoCase("hello")
var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
var separator = c.Any(comma, a.Whitespace)
var name = c.OneOrMore(c.Not(a.Excl))
var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
// Now we can already do some parsing, by using a Matcher.
var parser = parsekit.NewMatcher(greeting, "a friendly greeting")
for i, input := range []string{
"Hello, world!",
"HELLO ,Johnny!",
"hello , Bob123!",
"hello Pizza!",
"Oh no!",
"Hello, world",
} {
output, err, ok := parser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
}
func Example_basicCalculator() {
// Let's write a small example for parsing a really basic calculator.
// The calculator understands input that looks like:
//
// 10 + 20 - 8+4
//
// So positive numbers that can be either added or substracted, with
// optional whitespace around the operators.
// So positive numbers that can be either added or substracted, and whitespace
// is ignored.
// Easy access to parser/combinators, atoms and modifiers.
// Easy access to parsekit parser/combinators, atoms and modifiers.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// When writing a parser, it's a good start to use the parser/combinator
// functionality of parsekit to create some Matcher functions. These
// functions can later be used in the parser state machine to find the
// matching tokens on the input data.
var number = c.OneOrMore(a.Digit)
//
// In our case, we only need a definition of "number, surrounded by
// optional whitespace". Skipping whitespace could be a part of the
// StateHandler functions below too, but including it in a Matcher makes
// things really practical here.
var whitespace = m.Drop(c.Opt(a.Whitespace))
var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
// We also must define the types of items that the parser will emit.
// We only need two: for numbers and for operators.
// We only need three of them here, for numbers, plus and minus.
// The recommended way to define these, is using 'iota' for auto numbering.
const (
numberType parsekit.ItemType = iota
operatorType
addType
subtractType
)
// Now it is time to define the state machine for parsing the input.
@ -41,12 +93,18 @@ func Example_minimalAnnotated() {
// The P struct holds the internal state for the parser and it provides
// some methods that form the API for your StateHandler implementation.
//
// Note that normally you'd write normal functions and not anonymous
// (note that normally you'd write normal functions and not anonymous
// functions like I did here. I had to use these to be able to write the
// example code.
// example code)
var operatorHandler parsekit.StateHandler
// In this state, we expect a number. When a number is found on the input,
// it is accepted in the output buffer, after which the output buffer is
// emitted as a numberType item. Then we tell the state machine to continue
// with the operatorHandler state.
// When no number is found, the parser will emit an error, explaining that
// "a number" was expected.
numberHandler := func(p *parsekit.P) {
p.Expects("a number")
if p.On(number).Accept().End() {
@ -55,11 +113,21 @@ func Example_minimalAnnotated() {
}
}
// In this state, we expect a plus or minus operator. When one of those
// is found, the appropriate Item is emitted and the parser is sent back
// to the numberHandler to find the next number on the input.
// When no operator is found, then the parser is told to expect the end of
// the input. When more input data is available (which is obviously wrong
// data since it does not match our syntax), the parser will emit an error.
operatorHandler = func(p *parsekit.P) {
if p.On(operator).Accept().End() {
p.EmitLiteral(operatorType)
switch {
case p.On(a.Plus).Accept().End():
p.EmitLiteral(addType)
p.RouteTo(numberHandler)
} else {
case p.On(a.Minus).Accept().End():
p.EmitLiteral(subtractType)
p.RouteTo(numberHandler)
default:
p.ExpectEndOfFile()
}
}
@ -70,7 +138,7 @@ func Example_minimalAnnotated() {
parser := parsekit.NewParser(numberHandler)
// Let's feed the parser some input to work with.
run := parser.Parse("153+ 22+31 - 4- 6+42")
run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ")
// We can now step through the results of the parsing process by repeated
// calls to run.Next(). Next() returns either the next parse item, a parse
@ -91,12 +159,16 @@ func Example_minimalAnnotated() {
default:
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
switch {
case item.Type == operatorType && item.Value == "+":
case item.Type == addType:
op = +1
case item.Type == operatorType && item.Value == "-":
case item.Type == subtractType:
op = -1
default:
nr, _ := strconv.Atoi(item.Value)
case item.Type == numberType:
nr, err := strconv.Atoi(item.Value)
if err != nil {
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
return
}
sum += op * nr
}
}
@ -108,9 +180,9 @@ func Example_minimalAnnotated() {
// Type: 0, Value: "22"
// Type: 1, Value: "+"
// Type: 0, Value: "31"
// Type: 1, Value: "-"
// Type: 2, Value: "-"
// Type: 0, Value: "4"
// Type: 1, Value: "-"
// Type: 2, Value: "-"
// Type: 0, Value: "6"
// Type: 1, Value: "+"
// Type: 0, Value: "42"
@ -118,76 +190,6 @@ func Example_minimalAnnotated() {
// Outcome of computation: 238
}
func Example_minimal() {
// Let's write a small example for parsing a really basic calculator.
// The calculator understands input that looks like:
//
// 10 + 20 - 8+4
//
// So positive numbers that can be either added or substracted, with
// optional whitespace around the operators.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
var number = c.OneOrMore(a.Digit)
var whitespace = m.Drop(c.Opt(a.Whitespace))
var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
const (
numberType parsekit.ItemType = iota
operatorType
)
var operatorHandler parsekit.StateHandler
numberHandler := func(p *parsekit.P) {
p.Expects("a number")
if p.On(number).Accept().End() {
p.EmitLiteral(numberType)
p.RouteTo(operatorHandler)
}
}
operatorHandler = func(p *parsekit.P) {
if p.On(operator).Accept().End() {
p.EmitLiteral(operatorType)
p.RouteTo(numberHandler)
} else {
p.ExpectEndOfFile()
}
}
parser := parsekit.NewParser(numberHandler)
run := parser.Parse("153+ 22+31 - 4- 6+42")
sum := 0
op := +1
for {
item, err, ok := run.Next()
switch {
case !ok && err == nil:
fmt.Println("Outcome of computation:", sum)
return
case !ok:
fmt.Printf("Error: %s\n", err)
return
default:
switch {
case item.Type == operatorType && item.Value == "+":
op = +1
case item.Type == operatorType && item.Value == "-":
op = -1
default:
nr, _ := strconv.Atoi(item.Value)
sum += op * nr
}
}
}
// Output:
// Outcome of computation: 238
}
func ExampleItemType() {
// Make use of positive values. Ideally, define your ItemTypes using
// iota for easy automatic value management like this:
@ -301,37 +303,3 @@ func ExampleMatchAnyRune() {
// Match = "y"
// Match = " "
}
func ExampleModifyToUpper() {
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
// The first digit is never a zero.
digitNotZero := c.Except(c.Rune('0'), a.Digit)
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
// It is good form to write the letters in upper case.
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
// It is good form to use a single space between letters and numbers,
// but it is not mandatory.
space := m.Replace(c.Opt(a.Whitespace), " ")
// With all the building blocks, we can now build the postcode parser.
postcode := c.Seq(pcDigits, space, pcLetters)
// Create a parser and let is parse some postcode inputs.
// This will print "1234 AB" for every input, because of the built-in normalization.
p := parsekit.NewMatcherWrapper(postcode)
for _, input := range []string{"1234 AB", "1234Ab", "1234\t\tab", "1234ab"} {
output, _, _ := p.Match(input)
fmt.Printf("Input: %q, output: %q\n", input, output)
}
// Output:
// Input: "1234 AB", output: "1234 AB"
// Input: "1234Ab", output: "1234 AB"
// Input: "1234\t\tab", output: "1234 AB"
// Input: "1234ab", output: "1234 AB"
}

View File

@ -486,6 +486,7 @@ func ModifyDrop(matcher Matcher) Matcher {
// ModifyTrim creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
// TODO move cutset to the left arg
func ModifyTrim(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, true, true)
}

View File

@ -145,19 +145,19 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
// method.
//
// To match input data against the wrapped Matcher function, use the method
// MatcherWrapper.Match().
// MatcherWrapper.Parse().
type MatcherWrapper struct {
parser *Parser
}
// NewMatcherWrapper instantiates a new MatcherWrapper.
// NewMatcher instantiates a new MatcherWrapper.
//
// This is a simple wrapper around a Matcher function. It can be used to
// match an input string against that Matcher function and retrieve the
// results in a straight forward way.
func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
handler := func(p *P) {
p.Expects("match")
p.Expects(expects)
if p.On(matcher).Accept().End() {
p.EmitLiteral(0) // ItemType is irrelevant
}
@ -165,8 +165,8 @@ func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
return &MatcherWrapper{parser: NewParser(handler)}
}
// Match runs the wrapped Matcher function against the provided input data.
func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
// Parse runs the wrapped Matcher function against the provided input data.
func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) {
item, err, ok := w.parser.Parse(input).Next()
if !ok {
return "", err, false

View File

@ -11,6 +11,7 @@ import (
const TestItem parsekit.ItemType = 1
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
type MatcherTest struct {
@ -27,7 +28,7 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
}
func RunMatcherTest(t *testing.T, test MatcherTest) {
output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input)
if test.mustMatch {
if !ok {