Added complete example for a parser.
This commit is contained in:
parent
070a215ac3
commit
723e2a0c38
165
examples_test.go
165
examples_test.go
|
@ -2,10 +2,122 @@ package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func Example_minimal() {
|
||||||
|
// Let's write a small example for parsing a really basic calculator.
|
||||||
|
// The calculator understands input that looks like:
|
||||||
|
//
|
||||||
|
// 10 + 20 - 8+4
|
||||||
|
//
|
||||||
|
// So positive numbers that can be either added or substracted, with
|
||||||
|
// optional whitespace around the operators.
|
||||||
|
|
||||||
|
// Easy access to parser/combinators, atoms and modifiers.
|
||||||
|
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
|
// When writing a parser, it's a good start to use the parser/combinator
|
||||||
|
// functionality of parsekit to create some Matcher functions. These
|
||||||
|
// functions can later be used in the parser state machine to find the
|
||||||
|
// matching tokens on the input data.
|
||||||
|
var number = c.OneOrMore(a.Digit)
|
||||||
|
var whitespace = m.Drop(c.Opt(a.Whitespace))
|
||||||
|
var operator = c.Seq(whitespace, c.Any(a.Plus, a.Minus), whitespace)
|
||||||
|
|
||||||
|
// We also must define the types of items that the parser will emit.
|
||||||
|
// We only need two: for numbers and for operators.
|
||||||
|
// The recommended way to define these, is using 'iota' for auto numbering.
|
||||||
|
const (
|
||||||
|
numberType parsekit.ItemType = iota
|
||||||
|
operatorType
|
||||||
|
)
|
||||||
|
|
||||||
|
// Now it is time to define the state machine for parsing the input.
|
||||||
|
// The state machine is built up from functions that match the StateHandler
|
||||||
|
// signature: func(*parsekit.P)
|
||||||
|
// The P struct holds the internal state for the parser and it provides
|
||||||
|
// some methods that form the API for your StateHandler implementation.
|
||||||
|
//
|
||||||
|
// Note that normally you'd write normal functions and not anonymous
|
||||||
|
// functions like I did here. I had to use these to be able to write the
|
||||||
|
// example code.
|
||||||
|
|
||||||
|
var operatorHandler parsekit.StateHandler
|
||||||
|
|
||||||
|
numberHandler := func(p *parsekit.P) {
|
||||||
|
p.Expects("a number")
|
||||||
|
if p.On(number).Accept().End() {
|
||||||
|
p.EmitLiteral(numberType)
|
||||||
|
p.RouteTo(operatorHandler)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
operatorHandler = func(p *parsekit.P) {
|
||||||
|
if p.On(operator).Accept().End() {
|
||||||
|
p.EmitLiteral(operatorType)
|
||||||
|
p.RouteTo(numberHandler)
|
||||||
|
} else {
|
||||||
|
p.ExpectEndOfFile()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All is ready for our parser. We now can create a new Parser struct.
|
||||||
|
// We need to tell it what the start state is. In our case, it's the
|
||||||
|
// of course the number state.
|
||||||
|
parser := parsekit.NewParser(numberHandler)
|
||||||
|
|
||||||
|
// Let's fee the parser some input to work with.
|
||||||
|
run := parser.Parse("153+ 22+31 - 4- 6+42")
|
||||||
|
|
||||||
|
// We can step through the results of the parsing process by repeated
|
||||||
|
// calls to run.Next(). Next() returns the next parse item, a parse
|
||||||
|
// error or an end of file. Let's dump the parse results and handle the
|
||||||
|
// computation while we're at it.
|
||||||
|
sum := 0
|
||||||
|
op := +1
|
||||||
|
for {
|
||||||
|
item, err, ok := run.Next()
|
||||||
|
switch {
|
||||||
|
case !ok && err == nil:
|
||||||
|
fmt.Println("End of file reached")
|
||||||
|
fmt.Println("Outcome of computation:", sum)
|
||||||
|
return
|
||||||
|
case !ok:
|
||||||
|
fmt.Printf("Error: %s\n", err)
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
|
||||||
|
switch {
|
||||||
|
case item.Type == operatorType && item.Value == "+":
|
||||||
|
op = +1
|
||||||
|
case item.Type == operatorType && item.Value == "-":
|
||||||
|
op = -1
|
||||||
|
default:
|
||||||
|
nr, _ := strconv.Atoi(item.Value)
|
||||||
|
sum += op * nr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Type: 0, Value: "153"
|
||||||
|
// Type: 1, Value: "+"
|
||||||
|
// Type: 0, Value: "22"
|
||||||
|
// Type: 1, Value: "+"
|
||||||
|
// Type: 0, Value: "31"
|
||||||
|
// Type: 1, Value: "-"
|
||||||
|
// Type: 0, Value: "4"
|
||||||
|
// Type: 1, Value: "-"
|
||||||
|
// Type: 0, Value: "6"
|
||||||
|
// Type: 1, Value: "+"
|
||||||
|
// Type: 0, Value: "42"
|
||||||
|
// End of file reached
|
||||||
|
// Outcome of computation: 238
|
||||||
|
}
|
||||||
|
|
||||||
func ExampleItemType() {
|
func ExampleItemType() {
|
||||||
// Make use of positive values. Ideally, define your ItemTypes using
|
// Make use of positive values. Ideally, define your ItemTypes using
|
||||||
// iota for easy automatic value management like this:
|
// iota for easy automatic value management like this:
|
||||||
|
@ -17,6 +129,41 @@ func ExampleItemType() {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ExampleItem() {
|
||||||
|
var c = parsekit.C
|
||||||
|
|
||||||
|
// You define your own item types for your specific parser.
|
||||||
|
var QuestionItem parsekit.ItemType = 42
|
||||||
|
|
||||||
|
// A StateHandler function can use the defined item type by means of
|
||||||
|
// the p.Emit* methods on parsekit.P.
|
||||||
|
// When errors occur, or the end of the file is reached, then the built-in
|
||||||
|
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
||||||
|
stateHandler := func(p *parsekit.P) {
|
||||||
|
if p.On(c.Str("question")).Accept().End() {
|
||||||
|
p.EmitLiteral(QuestionItem)
|
||||||
|
}
|
||||||
|
p.ExpectEndOfFile()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Successful match
|
||||||
|
item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next()
|
||||||
|
fmt.Println(ok, item.Type == QuestionItem, item.Value)
|
||||||
|
|
||||||
|
// End of file reached
|
||||||
|
item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next()
|
||||||
|
fmt.Println(ok, item.Type == parsekit.ItemEOF)
|
||||||
|
|
||||||
|
// An error occurred
|
||||||
|
item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next()
|
||||||
|
fmt.Println(ok, item.Type == parsekit.ItemError, err)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// true true question
|
||||||
|
// false true
|
||||||
|
// false true unexpected character 'a' (expected end of file)
|
||||||
|
}
|
||||||
|
|
||||||
func ExampleError() {
|
func ExampleError() {
|
||||||
err := &parsekit.Error{
|
err := &parsekit.Error{
|
||||||
Message: "it broke down",
|
Message: "it broke down",
|
||||||
|
@ -89,32 +236,32 @@ func ExampleModifyToUpper() {
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
|
// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||||
// The numbers never start with a zero.
|
// The first digit is never a zero.
|
||||||
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
||||||
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||||
|
|
||||||
// It is good form to write the letters in upper case.
|
// It is good form to write the letters in upper case.
|
||||||
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||||
letters := m.ToUpper(c.Seq(letter, letter))
|
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||||
|
|
||||||
// It is good form to use a single space between letters and numbers,
|
// It is good form to use a single space between letters and numbers,
|
||||||
// but it is not mandatory.
|
// but it is not mandatory.
|
||||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||||
|
|
||||||
// With all the building blocks, we can now build the postcode parser.
|
// With all the building blocks, we can now build the postcode parser.
|
||||||
postcode := c.Seq(numbers, space, letters)
|
postcode := c.Seq(pcDigits, space, pcLetters)
|
||||||
|
|
||||||
// Create a parser and let is parse some postcode inputs.
|
// Create a parser and let is parse some postcode inputs.
|
||||||
// This will print "1234 AB" for every input, because of the built-in normalization.
|
// This will print "1234 AB" for every input, because of the built-in normalization.
|
||||||
p := parsekit.NewMatcherWrapper(postcode)
|
p := parsekit.NewMatcherWrapper(postcode)
|
||||||
for _, input := range []string{"1234 AB", "1234Ab", "1234 ab", "1234ab"} {
|
for _, input := range []string{"1234 AB", "1234Ab", "1234\t\tab", "1234ab"} {
|
||||||
output, _, _ := p.Match("1234 AB")
|
output, _, _ := p.Match(input)
|
||||||
fmt.Printf("Input: %q, output: %q\n", input, output)
|
fmt.Printf("Input: %q, output: %q\n", input, output)
|
||||||
}
|
}
|
||||||
// Output:
|
// Output:
|
||||||
// Input: "1234 AB", output: "1234 AB"
|
// Input: "1234 AB", output: "1234 AB"
|
||||||
// Input: "1234Ab", output: "1234 AB"
|
// Input: "1234Ab", output: "1234 AB"
|
||||||
// Input: "1234 ab", output: "1234 AB"
|
// Input: "1234\t\tab", output: "1234 AB"
|
||||||
// Input: "1234ab", output: "1234 AB"
|
// Input: "1234ab", output: "1234 AB"
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {
|
||||||
|
|
||||||
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||||
switch {
|
switch {
|
||||||
case i.Type == itemEOF:
|
case i.Type == ItemEOF:
|
||||||
return i, nil, false
|
return i, nil, false
|
||||||
case i.Type == itemError:
|
case i.Type == ItemError:
|
||||||
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
||||||
return i, run.p.err, false
|
return i, run.p.err, false
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
|
||||||
p.RouteTo(func(p *P) {
|
p.RouteTo(func(p *P) {
|
||||||
p.Expects("end of file")
|
p.Expects("end of file")
|
||||||
if p.On(A.EndOfFile).Stay().End() {
|
if p.On(A.EndOfFile).Stay().End() {
|
||||||
p.Emit(itemEOF, "EOF")
|
p.Emit(ItemEOF, "EOF")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,13 +11,13 @@ import (
|
||||||
// use by parsekit.
|
// use by parsekit.
|
||||||
type ItemType int
|
type ItemType int
|
||||||
|
|
||||||
// itemEOF is a built-in parser item type that is used for flagging that the
|
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||||
// end of the input was reached.
|
// end of the input was reached.
|
||||||
const itemEOF ItemType = -1
|
const ItemEOF ItemType = -1
|
||||||
|
|
||||||
// itemError is a built-in parser item type that is used for flagging that
|
// ItemError is a built-in parser item type that is used for flagging that
|
||||||
// an error has occurred during parsing.
|
// an error has occurred during parsing.
|
||||||
const itemError ItemType = -2
|
const ItemError ItemType = -2
|
||||||
|
|
||||||
// Item represents an item that can be emitted from the parser.
|
// Item represents an item that can be emitted from the parser.
|
||||||
type Item struct {
|
type Item struct {
|
||||||
|
@ -80,7 +80,7 @@ func (err *Error) ErrorFull() string {
|
||||||
// EmitError emits a Parser error item to the client.
|
// EmitError emits a Parser error item to the client.
|
||||||
func (p *P) EmitError(format string, args ...interface{}) {
|
func (p *P) EmitError(format string, args ...interface{}) {
|
||||||
message := fmt.Sprintf(format, args...)
|
message := fmt.Sprintf(format, args...)
|
||||||
p.Emit(itemError, message)
|
p.Emit(ItemError, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnexpectedInput is used by a StateHandler function to emit an error item
|
// UnexpectedInput is used by a StateHandler function to emit an error item
|
||||||
|
|
Loading…
Reference in New Issue