Ah, found a good way to document larger examples which need function definitions and such. Let's see if this works in godoc nicely.
This commit is contained in:
parent
8a6815332e
commit
bb1e462892
|
@ -0,0 +1,55 @@
|
|||
// In this example, a parser is created which can parse and normalize Dutch postcodes.
|
||||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
||||
// Easy access to the parsekit definitions.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// Matcher functions are created and combined to satisfy these rules:
|
||||
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||
// - The first digit is never a zero.
|
||||
// - A space between letters and digits is optional.
|
||||
// - It is good form to write the letters in upper case.
|
||||
// - It is good form to use a single space between digits and letters.
|
||||
var digitNotZero = c.Except(c.Rune('0'), a.Digit)
|
||||
var pcDigits = c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||
var pcLetter = c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||
var pcLetters = m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||
var space = m.Replace(c.Opt(a.Whitespace), " ")
|
||||
var postcode = c.Seq(pcDigits, space, pcLetters)
|
||||
|
||||
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
||||
}
|
||||
|
||||
func Example_dutchPostcodeUsingMatcher() {
|
||||
pcParser := createPostcodeMatcher()
|
||||
|
||||
for i, input := range []string{
|
||||
"1234 AB",
|
||||
"2233Ab",
|
||||
"1001\t\tab",
|
||||
"1818ab",
|
||||
"1234",
|
||||
"huh",
|
||||
} {
|
||||
output, err, ok := pcParser.Parse(input)
|
||||
if !ok {
|
||||
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
||||
} else {
|
||||
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// [0] Input: "1234 AB" Output: 1234 AB
|
||||
// [1] Input: "2233Ab" Output: 2233 AB
|
||||
// [2] Input: "1001\t\tab" Output: 1001 AB
|
||||
// [3] Input: "1818ab" Output: 1818 AB
|
||||
// [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode)
|
||||
// [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode)
|
||||
}
|
216
examples_test.go
216
examples_test.go
|
@ -7,32 +7,84 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func Example_minimalAnnotated() {
|
||||
func Example_helloWorldUsingParser() {
|
||||
}
|
||||
|
||||
func Example_helloWorldUsingMatcher() {
|
||||
// In this example, a parser is created that is able to parse input that looks
|
||||
// like "Hello, <name>!", and that extracts the name from it.
|
||||
// The implementation uses only a Matcher function and does not implement a
|
||||
// full-fledged state-based Parser for it.
|
||||
|
||||
// Easy access to parsekit parser/combinators, atoms and modifiers.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// Using the parser/combinator support of parsekit, we create a Matcher function
|
||||
// that does all the work. The 'greeting' Matcher matches the whole input and
|
||||
// drops all but the name from it.
|
||||
var hello = c.StrNoCase("hello")
|
||||
var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
||||
var separator = c.Any(comma, a.Whitespace)
|
||||
var name = c.OneOrMore(c.Not(a.Excl))
|
||||
var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
||||
|
||||
// Now we can already do some parsing, by using a Matcher.
|
||||
var parser = parsekit.NewMatcher(greeting, "a friendly greeting")
|
||||
for i, input := range []string{
|
||||
"Hello, world!",
|
||||
"HELLO ,Johnny!",
|
||||
"hello , Bob123!",
|
||||
"hello Pizza!",
|
||||
"Oh no!",
|
||||
"Hello, world",
|
||||
} {
|
||||
output, err, ok := parser.Parse(input)
|
||||
if !ok {
|
||||
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
||||
} else {
|
||||
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// [0] Input: "Hello, world!" Output: world
|
||||
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
||||
// [2] Input: "hello , Bob123!" Output: Bob123
|
||||
// [3] Input: "hello Pizza!" Output: Pizza
|
||||
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
|
||||
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
|
||||
}
|
||||
|
||||
func Example_basicCalculator() {
|
||||
// Let's write a small example for parsing a really basic calculator.
|
||||
// The calculator understands input that looks like:
|
||||
//
|
||||
// 10 + 20 - 8+4
|
||||
//
|
||||
// So positive numbers that can be either added or substracted, with
|
||||
// optional whitespace around the operators.
|
||||
// So positive numbers that can be either added or substracted, and whitespace
|
||||
// is ignored.
|
||||
|
||||
// Easy access to parser/combinators, atoms and modifiers.
|
||||
// Easy access to parsekit parser/combinators, atoms and modifiers.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// When writing a parser, it's a good start to use the parser/combinator
|
||||
// functionality of parsekit to create some Matcher functions. These
|
||||
// functions can later be used in the parser state machine to find the
|
||||
// matching tokens on the input data.
|
||||
var number = c.OneOrMore(a.Digit)
|
||||
//
|
||||
// In our case, we only need a definition of "number, surrounded by
|
||||
// optional whitespace". Skipping whitespace could be a part of the
|
||||
// StateHandler functions below too, but including it in a Matcher makes
|
||||
// things really practical here.
|
||||
var whitespace = m.Drop(c.Opt(a.Whitespace))
|
||||
var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
|
||||
var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
|
||||
|
||||
// We also must define the types of items that the parser will emit.
|
||||
// We only need two: for numbers and for operators.
|
||||
// We only need three of them here, for numbers, plus and minus.
|
||||
// The recommended way to define these, is using 'iota' for auto numbering.
|
||||
const (
|
||||
numberType parsekit.ItemType = iota
|
||||
operatorType
|
||||
addType
|
||||
subtractType
|
||||
)
|
||||
|
||||
// Now it is time to define the state machine for parsing the input.
|
||||
|
@ -41,12 +93,18 @@ func Example_minimalAnnotated() {
|
|||
// The P struct holds the internal state for the parser and it provides
|
||||
// some methods that form the API for your StateHandler implementation.
|
||||
//
|
||||
// Note that normally you'd write normal functions and not anonymous
|
||||
// (note that normally you'd write normal functions and not anonymous
|
||||
// functions like I did here. I had to use these to be able to write the
|
||||
// example code.
|
||||
// example code)
|
||||
|
||||
var operatorHandler parsekit.StateHandler
|
||||
|
||||
// In this state, we expect a number. When a number is found on the input,
|
||||
// it is accepted in the output buffer, after which the output buffer is
|
||||
// emitted as a numberType item. Then we tell the state machine to continue
|
||||
// with the operatorHandler state.
|
||||
// When no number is found, the parser will emit an error, explaining that
|
||||
// "a number" was expected.
|
||||
numberHandler := func(p *parsekit.P) {
|
||||
p.Expects("a number")
|
||||
if p.On(number).Accept().End() {
|
||||
|
@ -55,11 +113,21 @@ func Example_minimalAnnotated() {
|
|||
}
|
||||
}
|
||||
|
||||
// In this state, we expect a plus or minus operator. When one of those
|
||||
// is found, the appropriate Item is emitted and the parser is sent back
|
||||
// to the numberHandler to find the next number on the input.
|
||||
// When no operator is found, then the parser is told to expect the end of
|
||||
// the input. When more input data is available (which is obviously wrong
|
||||
// data since it does not match our syntax), the parser will emit an error.
|
||||
operatorHandler = func(p *parsekit.P) {
|
||||
if p.On(operator).Accept().End() {
|
||||
p.EmitLiteral(operatorType)
|
||||
switch {
|
||||
case p.On(a.Plus).Accept().End():
|
||||
p.EmitLiteral(addType)
|
||||
p.RouteTo(numberHandler)
|
||||
} else {
|
||||
case p.On(a.Minus).Accept().End():
|
||||
p.EmitLiteral(subtractType)
|
||||
p.RouteTo(numberHandler)
|
||||
default:
|
||||
p.ExpectEndOfFile()
|
||||
}
|
||||
}
|
||||
|
@ -70,7 +138,7 @@ func Example_minimalAnnotated() {
|
|||
parser := parsekit.NewParser(numberHandler)
|
||||
|
||||
// Let's feed the parser some input to work with.
|
||||
run := parser.Parse("153+ 22+31 - 4- 6+42")
|
||||
run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
||||
|
||||
// We can now step through the results of the parsing process by repeated
|
||||
// calls to run.Next(). Next() returns either the next parse item, a parse
|
||||
|
@ -91,12 +159,16 @@ func Example_minimalAnnotated() {
|
|||
default:
|
||||
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
|
||||
switch {
|
||||
case item.Type == operatorType && item.Value == "+":
|
||||
case item.Type == addType:
|
||||
op = +1
|
||||
case item.Type == operatorType && item.Value == "-":
|
||||
case item.Type == subtractType:
|
||||
op = -1
|
||||
default:
|
||||
nr, _ := strconv.Atoi(item.Value)
|
||||
case item.Type == numberType:
|
||||
nr, err := strconv.Atoi(item.Value)
|
||||
if err != nil {
|
||||
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
|
||||
return
|
||||
}
|
||||
sum += op * nr
|
||||
}
|
||||
}
|
||||
|
@ -108,9 +180,9 @@ func Example_minimalAnnotated() {
|
|||
// Type: 0, Value: "22"
|
||||
// Type: 1, Value: "+"
|
||||
// Type: 0, Value: "31"
|
||||
// Type: 1, Value: "-"
|
||||
// Type: 2, Value: "-"
|
||||
// Type: 0, Value: "4"
|
||||
// Type: 1, Value: "-"
|
||||
// Type: 2, Value: "-"
|
||||
// Type: 0, Value: "6"
|
||||
// Type: 1, Value: "+"
|
||||
// Type: 0, Value: "42"
|
||||
|
@ -118,76 +190,6 @@ func Example_minimalAnnotated() {
|
|||
// Outcome of computation: 238
|
||||
}
|
||||
|
||||
func Example_minimal() {
|
||||
// Let's write a small example for parsing a really basic calculator.
|
||||
// The calculator understands input that looks like:
|
||||
//
|
||||
// 10 + 20 - 8+4
|
||||
//
|
||||
// So positive numbers that can be either added or substracted, with
|
||||
// optional whitespace around the operators.
|
||||
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
var number = c.OneOrMore(a.Digit)
|
||||
var whitespace = m.Drop(c.Opt(a.Whitespace))
|
||||
var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
|
||||
|
||||
const (
|
||||
numberType parsekit.ItemType = iota
|
||||
operatorType
|
||||
)
|
||||
|
||||
var operatorHandler parsekit.StateHandler
|
||||
|
||||
numberHandler := func(p *parsekit.P) {
|
||||
p.Expects("a number")
|
||||
if p.On(number).Accept().End() {
|
||||
p.EmitLiteral(numberType)
|
||||
p.RouteTo(operatorHandler)
|
||||
}
|
||||
}
|
||||
|
||||
operatorHandler = func(p *parsekit.P) {
|
||||
if p.On(operator).Accept().End() {
|
||||
p.EmitLiteral(operatorType)
|
||||
p.RouteTo(numberHandler)
|
||||
} else {
|
||||
p.ExpectEndOfFile()
|
||||
}
|
||||
}
|
||||
|
||||
parser := parsekit.NewParser(numberHandler)
|
||||
run := parser.Parse("153+ 22+31 - 4- 6+42")
|
||||
|
||||
sum := 0
|
||||
op := +1
|
||||
for {
|
||||
item, err, ok := run.Next()
|
||||
switch {
|
||||
case !ok && err == nil:
|
||||
fmt.Println("Outcome of computation:", sum)
|
||||
return
|
||||
case !ok:
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
return
|
||||
default:
|
||||
switch {
|
||||
case item.Type == operatorType && item.Value == "+":
|
||||
op = +1
|
||||
case item.Type == operatorType && item.Value == "-":
|
||||
op = -1
|
||||
default:
|
||||
nr, _ := strconv.Atoi(item.Value)
|
||||
sum += op * nr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Output:
|
||||
// Outcome of computation: 238
|
||||
}
|
||||
|
||||
func ExampleItemType() {
|
||||
// Make use of positive values. Ideally, define your ItemTypes using
|
||||
// iota for easy automatic value management like this:
|
||||
|
@ -301,37 +303,3 @@ func ExampleMatchAnyRune() {
|
|||
// Match = "y"
|
||||
// Match = " "
|
||||
}
|
||||
|
||||
func ExampleModifyToUpper() {
|
||||
// Easy access to the parsekit definitions.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||
// The first digit is never a zero.
|
||||
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
||||
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||
|
||||
// It is good form to write the letters in upper case.
|
||||
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||
|
||||
// It is good form to use a single space between letters and numbers,
|
||||
// but it is not mandatory.
|
||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||
|
||||
// With all the building blocks, we can now build the postcode parser.
|
||||
postcode := c.Seq(pcDigits, space, pcLetters)
|
||||
|
||||
// Create a parser and let is parse some postcode inputs.
|
||||
// This will print "1234 AB" for every input, because of the built-in normalization.
|
||||
p := parsekit.NewMatcherWrapper(postcode)
|
||||
for _, input := range []string{"1234 AB", "1234Ab", "1234\t\tab", "1234ab"} {
|
||||
output, _, _ := p.Match(input)
|
||||
fmt.Printf("Input: %q, output: %q\n", input, output)
|
||||
}
|
||||
// Output:
|
||||
// Input: "1234 AB", output: "1234 AB"
|
||||
// Input: "1234Ab", output: "1234 AB"
|
||||
// Input: "1234\t\tab", output: "1234 AB"
|
||||
// Input: "1234ab", output: "1234 AB"
|
||||
}
|
||||
|
|
|
@ -486,6 +486,7 @@ func ModifyDrop(matcher Matcher) Matcher {
|
|||
// ModifyTrim creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from both the left and the right of the output.
|
||||
// TODO move cutset to the left arg
|
||||
func ModifyTrim(matcher Matcher, cutset string) Matcher {
|
||||
return modifyTrim(matcher, cutset, true, true)
|
||||
}
|
||||
|
|
12
parsekit.go
12
parsekit.go
|
@ -145,19 +145,19 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
|
|||
// method.
|
||||
//
|
||||
// To match input data against the wrapped Matcher function, use the method
|
||||
// MatcherWrapper.Match().
|
||||
// MatcherWrapper.Parse().
|
||||
type MatcherWrapper struct {
|
||||
parser *Parser
|
||||
}
|
||||
|
||||
// NewMatcherWrapper instantiates a new MatcherWrapper.
|
||||
// NewMatcher instantiates a new MatcherWrapper.
|
||||
//
|
||||
// This is a simple wrapper around a Matcher function. It can be used to
|
||||
// match an input string against that Matcher function and retrieve the
|
||||
// results in a straight forward way.
|
||||
func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
|
||||
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
|
||||
handler := func(p *P) {
|
||||
p.Expects("match")
|
||||
p.Expects(expects)
|
||||
if p.On(matcher).Accept().End() {
|
||||
p.EmitLiteral(0) // ItemType is irrelevant
|
||||
}
|
||||
|
@ -165,8 +165,8 @@ func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
|
|||
return &MatcherWrapper{parser: NewParser(handler)}
|
||||
}
|
||||
|
||||
// Match runs the wrapped Matcher function against the provided input data.
|
||||
func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
|
||||
// Parse runs the wrapped Matcher function against the provided input data.
|
||||
func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) {
|
||||
item, err, ok := w.parser.Parse(input).Next()
|
||||
if !ok {
|
||||
return "", err, false
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
|
||||
const TestItem parsekit.ItemType = 1
|
||||
|
||||
// Easy access to the parsekit definitions.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
type MatcherTest struct {
|
||||
|
@ -27,7 +28,7 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
|
|||
}
|
||||
|
||||
func RunMatcherTest(t *testing.T, test MatcherTest) {
|
||||
output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
|
||||
output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input)
|
||||
|
||||
if test.mustMatch {
|
||||
if !ok {
|
||||
|
|
Loading…
Reference in New Issue