306 lines
8.9 KiB
Go
306 lines
8.9 KiB
Go
package parsekit_test
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
|
|
"git.makaay.nl/mauricem/go-parsekit"
|
|
)
|
|
|
|
func Example_helloWorldUsingParser() {
|
|
}
|
|
|
|
func Example_helloWorldUsingMatcher() {
|
|
// In this example, a parser is created that is able to parse input that looks
|
|
// like "Hello, <name>!", and that extracts the name from it.
|
|
// The implementation uses only a Matcher function and does not implement a
|
|
// full-fledged state-based Parser for it.
|
|
|
|
// Easy access to parsekit parser/combinators, atoms and modifiers.
|
|
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
|
|
|
// Using the parser/combinator support of parsekit, we create a Matcher function
|
|
// that does all the work. The 'greeting' Matcher matches the whole input and
|
|
// drops all but the name from it.
|
|
var hello = c.StrNoCase("hello")
|
|
var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
|
var separator = c.Any(comma, a.Whitespace)
|
|
var name = c.OneOrMore(c.Not(a.Excl))
|
|
var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
|
|
|
// Now we can already do some parsing, by using a Matcher.
|
|
var parser = parsekit.NewMatcher(greeting, "a friendly greeting")
|
|
for i, input := range []string{
|
|
"Hello, world!",
|
|
"HELLO ,Johnny!",
|
|
"hello , Bob123!",
|
|
"hello Pizza!",
|
|
"Oh no!",
|
|
"Hello, world",
|
|
} {
|
|
output, err, ok := parser.Parse(input)
|
|
if !ok {
|
|
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
|
} else {
|
|
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
|
|
}
|
|
}
|
|
// Output:
|
|
// [0] Input: "Hello, world!" Output: world
|
|
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
|
// [2] Input: "hello , Bob123!" Output: Bob123
|
|
// [3] Input: "hello Pizza!" Output: Pizza
|
|
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
|
|
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
|
|
}
|
|
|
|
func Example_basicCalculator() {
|
|
// Let's write a small example for parsing a really basic calculator.
|
|
// The calculator understands input that looks like:
|
|
//
|
|
// 10 + 20 - 8+4
|
|
//
|
|
// So positive numbers that can be either added or substracted, and whitespace
|
|
// is ignored.
|
|
|
|
// Easy access to parsekit parser/combinators, atoms and modifiers.
|
|
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
|
|
|
// When writing a parser, it's a good start to use the parser/combinator
|
|
// functionality of parsekit to create some Matcher functions. These
|
|
// functions can later be used in the parser state machine to find the
|
|
// matching tokens on the input data.
|
|
//
|
|
// In our case, we only need a definition of "number, surrounded by
|
|
// optional whitespace". Skipping whitespace could be a part of the
|
|
// StateHandler functions below too, but including it in a Matcher makes
|
|
// things really practical here.
|
|
var whitespace = m.Drop(c.Opt(a.Whitespace))
|
|
var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
|
|
|
|
// We also must define the types of items that the parser will emit.
|
|
// We only need three of them here, for numbers, plus and minus.
|
|
// The recommended way to define these, is using 'iota' for auto numbering.
|
|
const (
|
|
numberType parsekit.ItemType = iota
|
|
addType
|
|
subtractType
|
|
)
|
|
|
|
// Now it is time to define the state machine for parsing the input.
|
|
// The state machine is built up from functions that match the StateHandler
|
|
// signature: func(*parsekit.P)
|
|
// The P struct holds the internal state for the parser and it provides
|
|
// some methods that form the API for your StateHandler implementation.
|
|
//
|
|
// (note that normally you'd write normal functions and not anonymous
|
|
// functions like I did here. I had to use these to be able to write the
|
|
// example code)
|
|
|
|
var operatorHandler parsekit.StateHandler
|
|
|
|
// In this state, we expect a number. When a number is found on the input,
|
|
// it is accepted in the output buffer, after which the output buffer is
|
|
// emitted as a numberType item. Then we tell the state machine to continue
|
|
// with the operatorHandler state.
|
|
// When no number is found, the parser will emit an error, explaining that
|
|
// "a number" was expected.
|
|
numberHandler := func(p *parsekit.P) {
|
|
p.Expects("a number")
|
|
if p.On(number).Accept().End() {
|
|
p.EmitLiteral(numberType)
|
|
p.RouteTo(operatorHandler)
|
|
}
|
|
}
|
|
|
|
// In this state, we expect a plus or minus operator. When one of those
|
|
// is found, the appropriate Item is emitted and the parser is sent back
|
|
// to the numberHandler to find the next number on the input.
|
|
// When no operator is found, then the parser is told to expect the end of
|
|
// the input. When more input data is available (which is obviously wrong
|
|
// data since it does not match our syntax), the parser will emit an error.
|
|
operatorHandler = func(p *parsekit.P) {
|
|
switch {
|
|
case p.On(a.Plus).Accept().End():
|
|
p.EmitLiteral(addType)
|
|
p.RouteTo(numberHandler)
|
|
case p.On(a.Minus).Accept().End():
|
|
p.EmitLiteral(subtractType)
|
|
p.RouteTo(numberHandler)
|
|
default:
|
|
p.ExpectEndOfFile()
|
|
}
|
|
}
|
|
|
|
// All is ready for our parser. We now can create a new Parser struct.
|
|
// We need to tell it what the start state is. In our case, it is
|
|
// the number state, since the calculation must start with a number.
|
|
parser := parsekit.NewParser(numberHandler)
|
|
|
|
// Let's feed the parser some input to work with.
|
|
run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
|
|
|
// We can now step through the results of the parsing process by repeated
|
|
// calls to run.Next(). Next() returns either the next parse item, a parse
|
|
// error or an end of file. Let's dump the parse results and handle the
|
|
// computation while we're at it.
|
|
sum := 0
|
|
op := +1
|
|
for {
|
|
item, err, ok := run.Next()
|
|
switch {
|
|
case !ok && err == nil:
|
|
fmt.Println("End of file reached")
|
|
fmt.Println("Outcome of computation:", sum)
|
|
return
|
|
case !ok:
|
|
fmt.Printf("Error: %s\n", err)
|
|
return
|
|
default:
|
|
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
|
|
switch {
|
|
case item.Type == addType:
|
|
op = +1
|
|
case item.Type == subtractType:
|
|
op = -1
|
|
case item.Type == numberType:
|
|
nr, err := strconv.Atoi(item.Value)
|
|
if err != nil {
|
|
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
|
|
return
|
|
}
|
|
sum += op * nr
|
|
}
|
|
}
|
|
}
|
|
|
|
// Output:
|
|
// Type: 0, Value: "153"
|
|
// Type: 1, Value: "+"
|
|
// Type: 0, Value: "22"
|
|
// Type: 1, Value: "+"
|
|
// Type: 0, Value: "31"
|
|
// Type: 2, Value: "-"
|
|
// Type: 0, Value: "4"
|
|
// Type: 2, Value: "-"
|
|
// Type: 0, Value: "6"
|
|
// Type: 1, Value: "+"
|
|
// Type: 0, Value: "42"
|
|
// End of file reached
|
|
// Outcome of computation: 238
|
|
}
|
|
|
|
func ExampleItemType() {
|
|
// Make use of positive values. Ideally, define your ItemTypes using
|
|
// iota for easy automatic value management like this:
|
|
const (
|
|
ItemWord parsekit.ItemType = iota
|
|
ItemNumber
|
|
ItemBlob
|
|
// ...
|
|
)
|
|
}
|
|
|
|
func ExampleItem() {
|
|
var c = parsekit.C
|
|
|
|
// You define your own item types for your specific parser.
|
|
var QuestionItem parsekit.ItemType = 42
|
|
|
|
// A StateHandler function can use the defined item type by means of
|
|
// the p.Emit* methods on parsekit.P.
|
|
// When errors occur, or the end of the file is reached, then the built-in
|
|
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
|
stateHandler := func(p *parsekit.P) {
|
|
if p.On(c.Str("question")).Accept().End() {
|
|
p.EmitLiteral(QuestionItem)
|
|
}
|
|
p.ExpectEndOfFile()
|
|
}
|
|
|
|
// Successful match
|
|
item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next()
|
|
fmt.Println(ok, item.Type == QuestionItem, item.Value)
|
|
|
|
// End of file reached
|
|
item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next()
|
|
fmt.Println(ok, item.Type == parsekit.ItemEOF)
|
|
|
|
// An error occurred
|
|
item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next()
|
|
fmt.Println(ok, item.Type == parsekit.ItemError, err)
|
|
|
|
// Output:
|
|
// true true question
|
|
// false true
|
|
// false true unexpected character 'a' (expected end of file)
|
|
}
|
|
|
|
func ExampleError() {
|
|
err := &parsekit.Error{
|
|
Message: "it broke down",
|
|
Line: 10,
|
|
Column: 42,
|
|
}
|
|
|
|
fmt.Println(err.Error())
|
|
fmt.Printf("%s\n", err)
|
|
fmt.Println(err.ErrorFull())
|
|
// Output:
|
|
// it broke down
|
|
// it broke down
|
|
// it broke down after line 10, column 42
|
|
}
|
|
|
|
func ExampleError_Error() {
|
|
err := &parsekit.Error{
|
|
Message: "it broke down",
|
|
Line: 10,
|
|
Column: 42,
|
|
}
|
|
|
|
fmt.Println(err.Error())
|
|
fmt.Printf("%s\n", err)
|
|
// Output:
|
|
// it broke down
|
|
// it broke down
|
|
}
|
|
|
|
func ExampleError_ErrorFull() {
|
|
err := &parsekit.Error{
|
|
Message: "it broke down",
|
|
Line: 10,
|
|
Column: 42,
|
|
}
|
|
|
|
fmt.Println(err.ErrorFull())
|
|
// Output:
|
|
// it broke down after line 10, column 42
|
|
}
|
|
|
|
func ExampleMatchAnyRune() {
|
|
// Easy access to the parsekit definitions.
|
|
var a = parsekit.A
|
|
|
|
handler := func(p *parsekit.P) {
|
|
p.Expects("Any valid rune")
|
|
if p.On(a.AnyRune).Accept().End() {
|
|
p.EmitLiteral(TestItem)
|
|
p.RouteRepeat()
|
|
}
|
|
}
|
|
parser := parsekit.NewParser(handler)
|
|
run := parser.Parse("¡Any / valid / character will dö!")
|
|
|
|
for i := 0; i < 5; i++ {
|
|
match, _, _ := run.Next()
|
|
fmt.Printf("Match = %q\n", match.Value)
|
|
}
|
|
// Output:
|
|
// Match = "¡"
|
|
// Match = "A"
|
|
// Match = "n"
|
|
// Match = "y"
|
|
// Match = " "
|
|
}
|