Pfff, okay, so today I found a rather nice way of writing a parser that can hold some internal state, while stell holding on to the basic principle of having a function signature the applies to every parse handler in the system. By using methods instead of stand-alone functions, it is possible to let the accompanying struct hold on to the required state. Implemented this principle for the two calculator example projects. The rest is quite broken now, sorry :-p
This commit is contained in:
parent
f7d1e28fa1
commit
41f8733e99
|
|
@ -0,0 +1,109 @@
|
||||||
|
// Let's write a parser for a really basic calculator.
|
||||||
|
// The calculator understands input that looks like:
|
||||||
|
//
|
||||||
|
// 10 + 20 - 8+4
|
||||||
|
//
|
||||||
|
// So positive numbers that can be either added or substracted, and whitespace
|
||||||
|
// is ignored.
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Example_basicCalculator1() {
|
||||||
|
for _, c := range []struct {
|
||||||
|
input string
|
||||||
|
expected int64
|
||||||
|
}{
|
||||||
|
{"0", 0},
|
||||||
|
{"1", 1},
|
||||||
|
{"1+2+3", 6},
|
||||||
|
{" 10 + \t20 - 3 + 7 -10 ", 24},
|
||||||
|
{"", 0},
|
||||||
|
{" \t ", 0},
|
||||||
|
{"+", 0},
|
||||||
|
{"10.8 + 12", 0},
|
||||||
|
{"42+ ", 0},
|
||||||
|
} {
|
||||||
|
output, err := ComputeSimple(c.input)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Input: %q, got error: %s\n", c.input, err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("Input: %q, got outcome: %d, correct = %t\n", c.input, output, output == c.expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// Input: "0", got outcome: 0, correct = true
|
||||||
|
// Input: "1", got outcome: 1, correct = true
|
||||||
|
// Input: "1+2+3", got outcome: 6, correct = true
|
||||||
|
// Input: " 10 + \t20 - 3 + 7 -10 ", got outcome: 24, correct = true
|
||||||
|
// Input: "", got error: unexpected end of file (expected integer number)
|
||||||
|
// Input: " \t ", got error: unexpected character ' ' (expected integer number)
|
||||||
|
// Input: "+", got error: unexpected character '+' (expected integer number)
|
||||||
|
// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
|
||||||
|
// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Implementation of the calculator
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// CalculateSimple interprets a simple calculation, consisting of only integers
|
||||||
|
// and add or subtract operators. It returns the result of the calculation.
|
||||||
|
// An error is returned in case the calculation failed.
|
||||||
|
func ComputeSimple(calculation string) (int64, *parsekit.Error) {
|
||||||
|
calculator := &simpleCalculator{op: +1}
|
||||||
|
parser := parsekit.NewParser(calculator.number)
|
||||||
|
_, err, _ := parser.Parse(calculation).Next()
|
||||||
|
return calculator.Result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// simpleCalculator defines the parsing state machine. We do this using methods
|
||||||
|
// on a struct, so the parser can make use of state data inside that struct
|
||||||
|
// during the parsing.
|
||||||
|
type simpleCalculator struct {
|
||||||
|
Result int64 // holds the resulting outcome of the computation
|
||||||
|
op int64 // represents operation for next term (+1 = add, -1 = subtract)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
||||||
|
// A definition of integer, which conveniently drops surrounding whitespace.
|
||||||
|
pc, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
whitespace := m.Drop(pc.Opt(a.Whitespace))
|
||||||
|
integer := pc.Seq(whitespace, a.Integer, whitespace)
|
||||||
|
|
||||||
|
if p.On(integer).Accept() {
|
||||||
|
value, err := strconv.ParseInt(p.BufLiteral(), 10, 64)
|
||||||
|
p.BufClear()
|
||||||
|
if err != nil {
|
||||||
|
p.EmitError("invalid value %q: %s", value, err)
|
||||||
|
} else {
|
||||||
|
c.Result += c.op * value
|
||||||
|
p.Handle(c.operatorOrEndOfFile)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p.Expects("integer number")
|
||||||
|
p.UnexpectedInput()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
|
||||||
|
var a = parsekit.A
|
||||||
|
switch {
|
||||||
|
case p.On(a.Add).Skip():
|
||||||
|
c.op = +1
|
||||||
|
p.Handle(c.number)
|
||||||
|
case p.On(a.Subtract).Skip():
|
||||||
|
c.op = -1
|
||||||
|
p.Handle(c.number)
|
||||||
|
case !p.On(a.EndOfFile).Stay():
|
||||||
|
p.Expects("operator, '+' or '-'")
|
||||||
|
p.UnexpectedInput()
|
||||||
|
default:
|
||||||
|
p.ExpectEndOfFile()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,213 @@
|
||||||
|
// Let's write the hello world of parsers: a calculator that can interpret
|
||||||
|
// calculations that looks like:
|
||||||
|
//
|
||||||
|
// " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 "
|
||||||
|
//
|
||||||
|
// More formally, a calculation is defined as:
|
||||||
|
//
|
||||||
|
// calculation : expr EOF
|
||||||
|
// expr : term ((ADD|SUB) term)*
|
||||||
|
// term : factor ((MUL|DIV) factor)*
|
||||||
|
// space : (SPACE|TAB)*
|
||||||
|
// factor : space (FLOAT | LPAREN expr RPAREN) space
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Example_basicCalculator2() {
|
||||||
|
for _, c := range []struct {
|
||||||
|
input string
|
||||||
|
expected float64
|
||||||
|
}{
|
||||||
|
{"1", 1},
|
||||||
|
{"(123.10)", 123.10},
|
||||||
|
{"1 + 2 + 3 + 4 + 5", 15},
|
||||||
|
{"1 * 2 * 3 * 4 * 5 * 0.6", 72},
|
||||||
|
{"(3.05+2)*(4.3+5.12)", 47.571},
|
||||||
|
{"8.10 + 999/233", 12.387554},
|
||||||
|
{" -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 ", -20.168},
|
||||||
|
{"", 0},
|
||||||
|
{"(", 0},
|
||||||
|
{"10+20-", 0},
|
||||||
|
{"10+20-(4*10))", 0},
|
||||||
|
{"10+20-((4*10) + 17", 0},
|
||||||
|
} {
|
||||||
|
output, err := Compute(c.input)
|
||||||
|
output = math.Round(output*1000000) / 1000000 // to make the expectation comparisons usable
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Input: %q, got error: %s\n", c.input, err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("Input: %q, got outcome: %f, correct = %t\n", c.input, output, output == c.expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// Input: "1", got outcome: 1.000000, correct = true
|
||||||
|
// Input: "(123.10)", got outcome: 123.100000, correct = true
|
||||||
|
// Input: "1 + 2 + 3 + 4 + 5", got outcome: 15.000000, correct = true
|
||||||
|
// Input: "1 * 2 * 3 * 4 * 5 * 0.6", got outcome: 72.000000, correct = true
|
||||||
|
// Input: "(3.05+2)*(4.3+5.12)", got outcome: 47.571000, correct = true
|
||||||
|
// Input: "8.10 + 999/233", got outcome: 12.387554, correct = true
|
||||||
|
// Input: " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 ", got outcome: -20.168000, correct = true
|
||||||
|
// Input: "", got error: unexpected end of file
|
||||||
|
// Input: "(", got error: unexpected end of file
|
||||||
|
// Input: "10+20-", got error: unexpected end of file
|
||||||
|
// Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file)
|
||||||
|
// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')')
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Implementation of the calculator
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// calculator implements a recursive descent parser that is responsible for parsing
|
||||||
|
// the input computation string according to the grammar.
|
||||||
|
// It offloads the actual computation to a separate interpreter.
|
||||||
|
type calculator struct {
|
||||||
|
interpreter interpreter
|
||||||
|
result float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute takes a calculation string as input and returns the interpreted result
|
||||||
|
// value for the calculation. An error can be returned as well, in case the
|
||||||
|
// computation fails for some reason.
|
||||||
|
func Compute(input string) (float64, *parsekit.Error) {
|
||||||
|
c := &calculator{}
|
||||||
|
parser := parsekit.NewParser(c.computation)
|
||||||
|
_, err, _ := parser.Parse(input).Next()
|
||||||
|
return c.result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *calculator) computation(p *parsekit.ParseAPI) {
|
||||||
|
c.interpreter.push()
|
||||||
|
|
||||||
|
p.Handle(c.expr)
|
||||||
|
p.ExpectEndOfFile()
|
||||||
|
p.Handle(c.factor)
|
||||||
|
|
||||||
|
c.result = c.interpreter.pop()
|
||||||
|
}
|
||||||
|
|
||||||
|
// expr : term ((ADD|SUB) term)*
|
||||||
|
func (c *calculator) expr(p *parsekit.ParseAPI) {
|
||||||
|
c.interpreter.push()
|
||||||
|
|
||||||
|
var pc, a = parsekit.C, parsekit.A
|
||||||
|
p.Handle(c.term)
|
||||||
|
for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
|
||||||
|
c.interpreter.pushOperator(p.LastMatch)
|
||||||
|
p.Handle(c.term)
|
||||||
|
c.interpreter.eval()
|
||||||
|
}
|
||||||
|
|
||||||
|
c.interpreter.pop()
|
||||||
|
}
|
||||||
|
|
||||||
|
// term : factor ((MUL|DIV) factor)*
|
||||||
|
func (c *calculator) term(p *parsekit.ParseAPI) {
|
||||||
|
c.interpreter.push()
|
||||||
|
|
||||||
|
var pc, a = parsekit.C, parsekit.A
|
||||||
|
p.Handle(c.factor)
|
||||||
|
for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
|
||||||
|
c.interpreter.pushOperator(p.LastMatch)
|
||||||
|
p.Handle(c.factor)
|
||||||
|
c.interpreter.eval()
|
||||||
|
}
|
||||||
|
|
||||||
|
c.interpreter.pop()
|
||||||
|
}
|
||||||
|
|
||||||
|
// factor : space (FLOAT | LPAREN expr RPAREN) space
|
||||||
|
func (c *calculator) factor(p *parsekit.ParseAPI) {
|
||||||
|
var pc, a = parsekit.C, parsekit.A
|
||||||
|
p.On(a.Whitespace).Skip()
|
||||||
|
switch {
|
||||||
|
case p.On(pc.Signed(a.Float)).Accept():
|
||||||
|
floatStr := p.BufLiteral()
|
||||||
|
p.BufClear()
|
||||||
|
value, err := strconv.ParseFloat(floatStr, 64)
|
||||||
|
if err != nil {
|
||||||
|
p.EmitError("invalid number %s: %s", floatStr, err)
|
||||||
|
} else {
|
||||||
|
c.interpreter.pushValue(value)
|
||||||
|
}
|
||||||
|
case p.On(a.LeftParen).Skip():
|
||||||
|
p.Handle(c.expr)
|
||||||
|
if !p.On(a.RightParen).Skip() {
|
||||||
|
p.Expects("')'")
|
||||||
|
p.UnexpectedInput()
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
p.UnexpectedInput()
|
||||||
|
}
|
||||||
|
p.On(a.Whitespace).Skip()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// The computational interpreter, used by the calculator.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
type stackFrame struct {
|
||||||
|
a float64
|
||||||
|
b float64
|
||||||
|
op func(a, b float64) float64
|
||||||
|
}
|
||||||
|
|
||||||
|
type interpreter struct {
|
||||||
|
stack []*stackFrame
|
||||||
|
top *stackFrame
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *interpreter) push() *stackFrame {
|
||||||
|
f := &stackFrame{}
|
||||||
|
i.stack = append(i.stack, f)
|
||||||
|
i.top = f
|
||||||
|
i.pushOperator("VAL")
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *interpreter) pop() float64 {
|
||||||
|
value := i.eval()
|
||||||
|
i.stack = i.stack[0 : len(i.stack)-1]
|
||||||
|
if len(i.stack) > 0 {
|
||||||
|
i.top = i.stack[len(i.stack)-1]
|
||||||
|
i.pushValue(value)
|
||||||
|
} else {
|
||||||
|
i.top = nil
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *interpreter) pushValue(value float64) {
|
||||||
|
i.top.a, i.top.b = i.top.b, value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *interpreter) pushOperator(op string) {
|
||||||
|
switch op {
|
||||||
|
case "VAL":
|
||||||
|
i.top.op = func(a, b float64) float64 { return b }
|
||||||
|
case "+":
|
||||||
|
i.top.op = func(a, b float64) float64 { return a + b }
|
||||||
|
case "-":
|
||||||
|
i.top.op = func(a, b float64) float64 { return a - b }
|
||||||
|
case "*":
|
||||||
|
i.top.op = func(a, b float64) float64 { return a * b }
|
||||||
|
case "/":
|
||||||
|
i.top.op = func(a, b float64) float64 { return a / b }
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("Unhandled op name: %s", op))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *interpreter) eval() float64 {
|
||||||
|
value := i.top.op(i.top.a, i.top.b)
|
||||||
|
i.pushValue(value)
|
||||||
|
i.pushOperator("VAL")
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
@ -1,142 +0,0 @@
|
||||||
// Let's write a small example for parsing a really basic calculator.
|
|
||||||
// The calculator understands input that looks like:
|
|
||||||
//
|
|
||||||
// 10 + 20 - 8+4
|
|
||||||
//
|
|
||||||
// So positive numbers that can be either added or substracted, and whitespace
|
|
||||||
// is ignored.
|
|
||||||
package parsekit_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
|
||||||
)
|
|
||||||
|
|
||||||
// When writing a parser, it's a good start to use the parser/combinator
|
|
||||||
// functionality of parsekit to create some TokenHandler functions. These functions
|
|
||||||
// can later be used in the parser state machine to check for matching strings
|
|
||||||
// on the input data.
|
|
||||||
//
|
|
||||||
// For the calculator, we only need a definition of "number, surrounded by
|
|
||||||
// optional whitespace". Skipping whitespace could be a part of the StateHandler
|
|
||||||
// functions below too, but including it in a TokenHandler makes things really
|
|
||||||
// practical.
|
|
||||||
func createNumberMatcher() parsekit.TokenHandler {
|
|
||||||
// Easy access to parsekit definition.
|
|
||||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
|
||||||
|
|
||||||
whitespace := m.Drop(c.Opt(a.Whitespace))
|
|
||||||
return c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
|
|
||||||
}
|
|
||||||
|
|
||||||
var calcNumber = createNumberMatcher()
|
|
||||||
|
|
||||||
// We need to define the ItemTypes that we will use for emitting Items
|
|
||||||
// during the parsing process.
|
|
||||||
const (
|
|
||||||
numberType parsekit.ItemType = iota
|
|
||||||
addType
|
|
||||||
subtractType
|
|
||||||
)
|
|
||||||
|
|
||||||
// We also need to define the state machine for parsing the input.
|
|
||||||
// The state machine is built up from functions that match the StateHandler
|
|
||||||
// signature: func(*parsekit.ParseAPI)
|
|
||||||
// The ParseAPI struct holds the internal state for the parser and it provides
|
|
||||||
// some methods that form the API for your StateHandler implementation.
|
|
||||||
|
|
||||||
// State: expect a number. When a number is found on the input,
|
|
||||||
// it is accepted in the parser's string buffer, after which that buffer is
|
|
||||||
// emitted as a numberType item. Then we tell the state machine to continue
|
|
||||||
// with the calcWaitForOperatorOrEndOfInput state.
|
|
||||||
// When no number is found, the parser will emit an error, explaining that
|
|
||||||
// "a number" was expected.
|
|
||||||
func calcWaitForNumber(p *parsekit.ParseAPI) {
|
|
||||||
p.Expects("a number")
|
|
||||||
if p.On(calcNumber).Accept() {
|
|
||||||
p.EmitLiteral(numberType)
|
|
||||||
p.RouteTo(calcWaitForOperatorOrEndOfInput)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// State: expect a plus or minus operator. When one of those is found, the
|
|
||||||
// appropriate Item is emitted and the parser is sent back to the
|
|
||||||
// numberHandler to find the next number on the input. When no operator is
|
|
||||||
// found, then the parser is told to expect the end of the input. When more
|
|
||||||
// input data are available (which are obviously wrong data since they do
|
|
||||||
// not match our syntax), the parser will emit an error.
|
|
||||||
func calcWaitForOperatorOrEndOfInput(p *parsekit.ParseAPI) {
|
|
||||||
switch {
|
|
||||||
case p.On(a.Plus).Accept():
|
|
||||||
p.EmitLiteral(addType)
|
|
||||||
p.RouteTo(calcWaitForNumber)
|
|
||||||
case p.On(a.Minus).Accept():
|
|
||||||
p.EmitLiteral(subtractType)
|
|
||||||
p.RouteTo(calcWaitForNumber)
|
|
||||||
default:
|
|
||||||
p.ExpectEndOfFile()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All is ready for our parser. We now can create a new Parser struct.
|
|
||||||
// We need to tell it what StateHandler to start with. In our case, it is the
|
|
||||||
// calcWaitForNumber state, since the calculation must start with a number.
|
|
||||||
var calcParser = parsekit.NewParser(calcWaitForNumber)
|
|
||||||
|
|
||||||
func Example_basicCalculator() {
|
|
||||||
// Let's feed the parser some input to work with. This provides us with
|
|
||||||
// a parse run for that input.
|
|
||||||
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
|
||||||
|
|
||||||
// We can now step through the results of the parsing process by repeated
|
|
||||||
// calls to run.Next(). Next() returns either the next parse item, a parse
|
|
||||||
// error or an end of file. Let's dump the parse results and handle the
|
|
||||||
// computation while we're at it.
|
|
||||||
// TODO this in convoluted for people using the parser code I think. Maybe use three output data types instead?
|
|
||||||
sum := 0
|
|
||||||
op := +1
|
|
||||||
for {
|
|
||||||
item, err, ok := run.Next()
|
|
||||||
switch {
|
|
||||||
case !ok && err == nil:
|
|
||||||
fmt.Println("End of file reached")
|
|
||||||
fmt.Println("Outcome of computation:", sum)
|
|
||||||
return
|
|
||||||
case !ok:
|
|
||||||
fmt.Printf("Error: %s\n", err)
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
|
|
||||||
switch {
|
|
||||||
case item.Type == addType:
|
|
||||||
op = +1
|
|
||||||
case item.Type == subtractType:
|
|
||||||
op = -1
|
|
||||||
case item.Type == numberType:
|
|
||||||
nr, err := strconv.Atoi(item.Value)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sum += op * nr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// Type: 0, Value: "153"
|
|
||||||
// Type: 1, Value: "+"
|
|
||||||
// Type: 0, Value: "22"
|
|
||||||
// Type: 1, Value: "+"
|
|
||||||
// Type: 0, Value: "31"
|
|
||||||
// Type: 2, Value: "-"
|
|
||||||
// Type: 0, Value: "4"
|
|
||||||
// Type: 2, Value: "-"
|
|
||||||
// Type: 0, Value: "6"
|
|
||||||
// Type: 1, Value: "+"
|
|
||||||
// Type: 0, Value: "42"
|
|
||||||
// End of file reached
|
|
||||||
// Outcome of computation: 238
|
|
||||||
}
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
// In this example, a Paparserrser is created which can parse and normalize Dutch postcodes
|
// In this example, a Parser is created that can parse and normalize Dutch postcodes
|
||||||
// The implementation uses only TokenHandler functions and does not implement a
|
// The implementation uses only TokenHandler functions and does not implement a
|
||||||
// full-fledged state-based Parser for it.
|
// full-fledged state-based Parser for it.
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
@ -26,7 +26,7 @@ func createPostcodeMatcher() *parsekit.Matcher {
|
||||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||||
postcode := c.Seq(pcDigits, space, pcLetters)
|
postcode := c.Seq(pcDigits, space, pcLetters)
|
||||||
|
|
||||||
// Create a Matcher, which wraps the 'postcode' TokenHandler and allows
|
// Create a Matcher that wraps the 'postcode' TokenHandler and allows
|
||||||
// us to match some input against that handler.
|
// us to match some input against that handler.
|
||||||
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ func createHelloMatcher() *parsekit.Matcher {
|
||||||
name := c.OneOrMore(c.Not(a.Excl))
|
name := c.OneOrMore(c.Not(a.Excl))
|
||||||
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
||||||
|
|
||||||
// Create a Matcher, which wraps the 'greeting' TokenHandler and allows
|
// Create a Matcher that wraps the 'greeting' TokenHandler and allows
|
||||||
// us to match some input against that handler.
|
// us to match some input against that handler.
|
||||||
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ func ExampleItem() {
|
||||||
// You define your own item types for your specific parser.
|
// You define your own item types for your specific parser.
|
||||||
const QuestionItem = parsekit.ItemType(42)
|
const QuestionItem = parsekit.ItemType(42)
|
||||||
|
|
||||||
// A StateHandler function can use the defined item type by means of
|
// A ParseHandler function can use the defined item type by means of
|
||||||
// the p.Emit* methods on parsekit.P.
|
// the p.Emit* methods on parsekit.P.
|
||||||
// When errors occur, or the end of the file is reached, then the built-in
|
// When errors occur, or the end of the file is reached, then the built-in
|
||||||
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
||||||
|
|
|
||||||
|
|
@ -2,20 +2,20 @@ package parsekit
|
||||||
|
|
||||||
import "unicode/utf8"
|
import "unicode/utf8"
|
||||||
|
|
||||||
// StateHandler defines the type of function that must be implemented to handle
|
// ParseHandler defines the type of function that must be implemented to handle
|
||||||
// a parsing state in a Parser state machine.
|
// a parsing state in a Parser state machine.
|
||||||
//
|
//
|
||||||
// A StateHandler function gets a ParseAPI struct as its input. This struct holds
|
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
|
||||||
// all the internal state for the parsing state machine and provides the
|
// all the internal state for the parsing state machine and provides the
|
||||||
// interface that the StateHandler uses to interact with the parser.
|
// interface that the ParseHandler uses to interact with the parser.
|
||||||
type StateHandler func(*ParseAPI)
|
type ParseHandler func(*ParseAPI)
|
||||||
|
|
||||||
// ParseAPI holds the internal state of a parse run and provides an API to
|
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||||
// StateHandler methods to communicate with the parser.
|
// ParseHandler methods to communicate with the parser.
|
||||||
type ParseAPI struct {
|
type ParseAPI struct {
|
||||||
state StateHandler // the function that handles the current state
|
state ParseHandler // the function that handles the current state
|
||||||
nextState StateHandler // the function that will handle the next state
|
nextState ParseHandler // the function that will handle the next state
|
||||||
routeStack []StateHandler // route stack, for handling nested parsing
|
routeStack []ParseHandler // route stack, for handling nested parsing
|
||||||
input string // the input that is being scanned by the parser
|
input string // the input that is being scanned by the parser
|
||||||
inputPos int // current byte cursor position in the input
|
inputPos int // current byte cursor position in the input
|
||||||
cursorLine int // current rune cursor row number in the input
|
cursorLine int // current rune cursor row number in the input
|
||||||
|
|
@ -26,7 +26,7 @@ type ParseAPI struct {
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
||||||
items []Item // a slice of resulting Parser items (see P.Emit())
|
items []Item // a slice of resulting Parser items (see P.Emit())
|
||||||
item Item // the current item as reached by Next() and retrieved by Get()
|
item Item // the current item as reached by Next() and retrieved by Get()
|
||||||
err *Error // an error when lexing failed, retrieved by Error()
|
err *Error // an error when parsing failed, can be retrieved by Error()
|
||||||
|
|
||||||
LastMatch string // a string representation of the last matched input data
|
LastMatch string // a string representation of the last matched input data
|
||||||
}
|
}
|
||||||
|
|
@ -42,11 +42,11 @@ func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
|
||||||
return handleRuneError(r, w)
|
return handleRuneError(r, w)
|
||||||
}
|
}
|
||||||
|
|
||||||
// eofRune is a special rune, which is used to indicate an end of file when
|
// eofRune is a special rune that is used to indicate an end of file when
|
||||||
// reading a character from the input.
|
// reading a character from the input.
|
||||||
const eofRune rune = -1
|
const eofRune rune = -1
|
||||||
|
|
||||||
// invalidRune is a special rune, which is used to indicate an invalid UTF8
|
// invalidRune is a special rune that is used to indicate an invalid UTF8
|
||||||
// rune on the input.
|
// rune on the input.
|
||||||
const invalidRune rune = utf8.RuneError
|
const invalidRune rune = utf8.RuneError
|
||||||
|
|
||||||
|
|
@ -4,7 +4,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Item represents an item that can be emitted from the parser.
|
// Item represents an item that can be emitted from a ParseHandler function.
|
||||||
type Item struct {
|
type Item struct {
|
||||||
Type ItemType
|
Type ItemType
|
||||||
Value string
|
Value string
|
||||||
|
|
@ -50,6 +50,11 @@ func (p *ParseAPI) EmitLiteral(t ItemType) {
|
||||||
p.Emit(t, p.BufLiteral())
|
p.Emit(t, p.BufLiteral())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BufClear clears the contents of the parser string buffer.
|
||||||
|
func (p *ParseAPI) BufClear() {
|
||||||
|
p.buffer.reset()
|
||||||
|
}
|
||||||
|
|
||||||
// BufInterpreted retrieves the contents of the parser's string buffer (all
|
// BufInterpreted retrieves the contents of the parser's string buffer (all
|
||||||
// the runes that were added to it using ParseAPI.Accept()) as an
|
// the runes that were added to it using ParseAPI.Accept()) as an
|
||||||
// interpreted string.
|
// interpreted string.
|
||||||
|
|
@ -118,9 +123,18 @@ func (p *ParseAPI) EmitError(format string, args ...interface{}) {
|
||||||
p.Emit(ItemError, message)
|
p.Emit(ItemError, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnexpectedInput is used by a StateHandler function to emit an error item
|
// EmitEOF emits an EOF to the client. In effect, this will stop the parsing process.
|
||||||
|
func (p *ParseAPI) EmitEOF() {
|
||||||
|
p.Emit(ItemEOF, "EOF")
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnexpectedInput is used by a ParseHandler function to emit an error item
|
||||||
// that tells the client that an unexpected rune was encountered in the input.
|
// that tells the client that an unexpected rune was encountered in the input.
|
||||||
func (p *ParseAPI) UnexpectedInput() {
|
func (p *ParseAPI) UnexpectedInput() {
|
||||||
|
// When some previous parsing step yielded an error, skip this operation.
|
||||||
|
if p.err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
r, _, ok := p.peek(0)
|
r, _, ok := p.peek(0)
|
||||||
switch {
|
switch {
|
||||||
case ok:
|
case ok:
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
// Expects is used to let a StateHandler function describe what input it is expecting.
|
// Expects is used to let a ParseHandler function describe what input it is expecting.
|
||||||
// This expectation is used in error messages to make them more descriptive.
|
// This expectation is used in error messages to make them more descriptive.
|
||||||
//
|
//
|
||||||
// When defining an expectation inside a StateHandler, you do not need to
|
// When defining an expectation inside a ParseHandler, you do not need to
|
||||||
// handle unexpected input yourself. When the end of the function is reached
|
// handle unexpected input yourself. When the end of the function is reached
|
||||||
// without setting the next state, an automatic error will be emitted.
|
// without setting the next state, an automatic error will be emitted.
|
||||||
// This error can differentiate between the following issues:
|
// This error can differentiate between the following issues:
|
||||||
|
|
@ -14,5 +14,7 @@ package parsekit
|
||||||
//
|
//
|
||||||
// 3) the end of the file was reached.
|
// 3) the end of the file was reached.
|
||||||
func (p *ParseAPI) Expects(description string) {
|
func (p *ParseAPI) Expects(description string) {
|
||||||
|
// TODO make this into some debugging tool?
|
||||||
|
// fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn)
|
||||||
p.expecting = description
|
p.expecting = description
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
// On checks if the input at the current cursor position matches the provided
|
// On checks if the input at the current cursor position matches the provided
|
||||||
// TokenHandler. On must be chained with another method, which tells the parser
|
// TokenHandler. On must be chained with another method that tells the parser
|
||||||
// what action to perform when a match was found:
|
// what action to perform when a match was found:
|
||||||
//
|
//
|
||||||
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
||||||
|
|
@ -14,7 +14,7 @@ package parsekit
|
||||||
//
|
//
|
||||||
// p.On(parsekit.A.Whitespace).Skip()
|
// p.On(parsekit.A.Whitespace).Skip()
|
||||||
//
|
//
|
||||||
// The chain as a whole returns a boolean, which indicates whether or not at match
|
// The chain as a whole returns a boolean that indicates whether or not at match
|
||||||
// was found. When no match was found, false is returned and Skip() and Accept()
|
// was found. When no match was found, false is returned and Skip() and Accept()
|
||||||
// will have no effect. Because of this, typical use of an On() chain is as
|
// will have no effect. Because of this, typical use of an On() chain is as
|
||||||
// expression for a conditional expression (if, switch/case, for). E.g.:
|
// expression for a conditional expression (if, switch/case, for). E.g.:
|
||||||
|
|
@ -37,6 +37,15 @@ package parsekit
|
||||||
// p.Emit(SomeItemType, p.BufLiteral())
|
// p.Emit(SomeItemType, p.BufLiteral())
|
||||||
// }
|
// }
|
||||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
|
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
|
||||||
|
// When some previous parsing step yielded an error, skip this operation.
|
||||||
|
if p.err != nil {
|
||||||
|
return &MatchAction{
|
||||||
|
p: p,
|
||||||
|
ok: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the matching operation.
|
||||||
m := &TokenAPI{p: p}
|
m := &TokenAPI{p: p}
|
||||||
if tokenHandler == nil {
|
if tokenHandler == nil {
|
||||||
panic("internal parser error: tokenHandler argument for On() is nil")
|
panic("internal parser error: tokenHandler argument for On() is nil")
|
||||||
|
|
@ -0,0 +1,125 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
// Handle is used to execute other ParseHandler functions from within your
|
||||||
|
// ParseHandler function.
|
||||||
|
func (p *ParseAPI) Handle(handlers ...ParseHandler) {
|
||||||
|
for _, handler := range handlers {
|
||||||
|
// When some previous parsing step yielded an error, skip this operation.
|
||||||
|
if p.err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
handler(p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteTo tells the parser what ParseHandler function to invoke on
|
||||||
|
// the next parse cycle.
|
||||||
|
func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
|
||||||
|
p.nextState = handler
|
||||||
|
return &RouteFollowupAction{p}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteRepeat tells the parser that on the next parsing cycle, the current
|
||||||
|
// ParseHandler must be reinvoked.
|
||||||
|
func (p *ParseAPI) RouteRepeat() {
|
||||||
|
p.RouteTo(p.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteReturn tells the parser that on the next cycle the last ParseHandler
|
||||||
|
// that was pushed on the route stack must be invoked.
|
||||||
|
//
|
||||||
|
// Using this method is optional. When implementating a ParseHandler that
|
||||||
|
// is used as a sort of subroutine (using constructions like
|
||||||
|
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
||||||
|
// providing an explicit routing decision from that handler. The parser will
|
||||||
|
// automatically assume a RouteReturn() in that case.
|
||||||
|
func (p *ParseAPI) RouteReturn() {
|
||||||
|
p.nextState = p.popRoute()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteFollowupAction chains parsing routes.
|
||||||
|
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||||
|
type RouteFollowupAction struct {
|
||||||
|
p *ParseAPI
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThenTo schedules a ParseHandler that must be invoked after the RouteTo
|
||||||
|
// ParseHandler has been completed.
|
||||||
|
// For example:
|
||||||
|
//
|
||||||
|
// p.RouteTo(handlerA).ThenTo(handlerB)
|
||||||
|
func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
|
||||||
|
a.p.pushRoute(state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThenReturnHere schedules the current ParseHandler to be invoked after
|
||||||
|
// the RouteTo ParseHandler has been completed.
|
||||||
|
// For example:
|
||||||
|
//
|
||||||
|
// p.RouteTo(handlerA).ThenReturnHere()
|
||||||
|
func (a *RouteFollowupAction) ThenReturnHere() {
|
||||||
|
a.p.pushRoute(a.p.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// pushRoute adds the ParseHandler to the route stack.
|
||||||
|
// This is used for implementing nested parsing.
|
||||||
|
func (p *ParseAPI) pushRoute(state ParseHandler) {
|
||||||
|
p.routeStack = append(p.routeStack, state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// popRoute pops the last pushed ParseHandler from the route stack.
|
||||||
|
func (p *ParseAPI) popRoute() ParseHandler {
|
||||||
|
last := len(p.routeStack) - 1
|
||||||
|
head, tail := p.routeStack[:last], p.routeStack[last]
|
||||||
|
p.routeStack = head
|
||||||
|
return tail
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||||
|
// Intended use:
|
||||||
|
//
|
||||||
|
// func yourParseHandler(p *parsekit.ParseAPI) {
|
||||||
|
// ...
|
||||||
|
// p.ExpectEndOfFile()
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// This will execute the end of file test right away. If you want to
|
||||||
|
// use the end of file check as a StateHandler instead, you can also
|
||||||
|
// make use of another form, for example:
|
||||||
|
//
|
||||||
|
// func yourParseHandler(p *parsekit.ParseAPI) {
|
||||||
|
// p.RouteTo(yourHandler).ThenTo(parsekit.ExpectEndOfFile)
|
||||||
|
// }
|
||||||
|
func (p *ParseAPI) ExpectEndOfFile() {
|
||||||
|
// When some previous parsing step yielded an error, skip this operation.
|
||||||
|
if p.err == nil {
|
||||||
|
if p.On(A.EndOfFile).Stay() {
|
||||||
|
p.EmitEOF()
|
||||||
|
} else {
|
||||||
|
p.Expects("end of file")
|
||||||
|
p.UnexpectedInput()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExpectEndOfFile can be scheduled as a ParseHandler function.
|
||||||
|
// It makes sure that the input is at the end of file.
|
||||||
|
// Intended use:
|
||||||
|
//
|
||||||
|
// func yourParseHandler(p *parsekit.ParseAPI) {
|
||||||
|
// ...
|
||||||
|
// p.RouteTo(parsekit.ExpectEndOfFile)
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// It is not mandatory to use this ParseHandler. You can take care fo EOF
|
||||||
|
// yourself too. Simply emit an ItemEOF when the end of the input was reached
|
||||||
|
// to stop the parser loop:
|
||||||
|
//
|
||||||
|
// p.EmitEOF()
|
||||||
|
// TODO meh, get rid of this one, once we don't use state scheduling anymore.
|
||||||
|
func ExpectEndOfFile(p *ParseAPI) {
|
||||||
|
p.Expects("end of file")
|
||||||
|
if p.On(A.EndOfFile).Stay() {
|
||||||
|
p.EmitEOF()
|
||||||
|
}
|
||||||
|
}
|
||||||
52
parsekit.go
52
parsekit.go
|
|
@ -9,24 +9,24 @@ import (
|
||||||
// Parser is the top-level struct that holds the configuration for a parser.
|
// Parser is the top-level struct that holds the configuration for a parser.
|
||||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||||
type Parser struct {
|
type Parser struct {
|
||||||
startState StateHandler // the function that handles the very first state
|
startState ParseHandler // the function that handles the very first state
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewParser instantiates a new Parser.
|
// NewParser instantiates a new Parser.
|
||||||
//
|
//
|
||||||
// The Parser is a state machine-style recursive descent parser, in which
|
// The Parser is a state machine-style recursive descent parser, in which
|
||||||
// StateHandler functions are used to move the state machine forward during
|
// ParseHandler functions are used to move the state machine forward during
|
||||||
// parsing. This style of parser is typically used for parsing languages and
|
// parsing. This style of parser is typically used for parsing programming
|
||||||
// structured data formats (like json, toml, etc.)
|
// languages and structured data formats (like json, xml, toml, etc.)
|
||||||
//
|
//
|
||||||
// To start parsing input data, use the method Parser.Parse().
|
// To start parsing input data, use the method Parser.Parse().
|
||||||
func NewParser(startState StateHandler) *Parser {
|
func NewParser(startState ParseHandler) *Parser {
|
||||||
return &Parser{startState: startState}
|
return &Parser{startState: startState}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseRun represents a single parse run for a Parser.
|
// ParseRun represents a single parse run for a Parser.
|
||||||
type ParseRun struct {
|
type ParseRun struct {
|
||||||
p *ParseAPI // holds the internal state of a parse run
|
p *ParseAPI // holds parser state and provides an API to ParseHandler functions
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse starts a parse run on the provided input data.
|
// Parse starts a parse run on the provided input data.
|
||||||
|
|
@ -60,7 +60,7 @@ func (run *ParseRun) Next() (Item, *Error, bool) {
|
||||||
return run.makeReturnValues(item)
|
return run.makeReturnValues(item)
|
||||||
}
|
}
|
||||||
// Otherwise, the next state handler is looked up and invoked.
|
// Otherwise, the next state handler is looked up and invoked.
|
||||||
run.runNextStateHandler()
|
run.runNextParseHandler()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -77,43 +77,43 @@ func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// runNextStateHandler moves the parser, which is bascially a state machine,
|
// runNextParseHandler moves the parser, that is bascially a state machine,
|
||||||
// to its next status. It does so by invoking a function of the
|
// to its next status. It does so by invoking a function of the
|
||||||
// type StateHandler. This function represents the current status and
|
// type ParseHandler. This function represents the current status and
|
||||||
// is responsible for moving the parser to its next status, depending
|
// is responsible for moving the parser to its next status, depending
|
||||||
// on the parsed input data.
|
// on the parsed input data.
|
||||||
func (run *ParseRun) runNextStateHandler() {
|
func (run *ParseRun) runNextParseHandler() {
|
||||||
if state, ok := run.getNextStateHandler(); ok {
|
if state, ok := run.getNextParseHandler(); ok {
|
||||||
run.invokeNextStateHandler(state)
|
run.invokeNextParseHandler(state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// getNextStateHandler determines the next StateHandler to invoke in order
|
// getNextParseHandler determines the next ParseHandler to invoke in order
|
||||||
// to move the parsing state machine one step further.
|
// to move the parsing state machine one step further.
|
||||||
//
|
//
|
||||||
// When implementing a parser, the StateHandler functions must provide
|
// When implementing a parser, the ParseHandler functions must provide
|
||||||
// a routing decision in every invocation. A routing decision is one
|
// a routing decision in every invocation. A routing decision is one
|
||||||
// of the following:
|
// of the following:
|
||||||
//
|
//
|
||||||
// * A route is specified explicitly, which means that the next StateHandler
|
// * A route is specified explicitly, which means that the next ParseHandler
|
||||||
// function to invoke is registered during the StateHandler function
|
// function to invoke is registered during the ParseHandler function
|
||||||
// invocation. For example: p.RouteTo(nextStatus)
|
// invocation. For example: p.RouteTo(nextStatus)
|
||||||
//
|
//
|
||||||
// * A route is specified implicitly, which means that a previous StateHandler
|
// * A route is specified implicitly, which means that a previous ParseHandler
|
||||||
// invocation has registered the followup route for the current state.
|
// invocation has registered the followup route for the current state.
|
||||||
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
|
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
|
||||||
// In this example, the nextStatus StateHandler will not have to specify
|
// In this example, the nextStatus ParseHandler will not have to specify
|
||||||
// a route explicitly, but otherStatus will be used implicitly after
|
// a route explicitly, but otherStatus will be used implicitly after
|
||||||
// the nextStatus function has returned.
|
// the nextStatus function has returned.
|
||||||
//
|
//
|
||||||
// * An expectation is registered by the StateHandler.
|
// * An expectation is registered by the ParseHandler.
|
||||||
// For example: p.Expects("a cool thing")
|
// For example: p.Expects("a cool thing")
|
||||||
// When the StateHandler returns without having specified a route, this
|
// When the ParseHandler returns without having specified a route, this
|
||||||
// expectation is used to generate an "unexpected input" error message.
|
// expectation is used to generate an "unexpected input" error message.
|
||||||
//
|
//
|
||||||
// When no routing decision is provided by a StateHandler, then this is
|
// When no routing decision is provided by a ParseHandler, then this is
|
||||||
// considered a bug in the state handler, and the parser will panic.
|
// considered a bug in the state handler, and the parser will panic.
|
||||||
func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
|
func (run *ParseRun) getNextParseHandler() (ParseHandler, bool) {
|
||||||
switch {
|
switch {
|
||||||
case run.p.nextState != nil:
|
case run.p.nextState != nil:
|
||||||
return run.p.nextState, true
|
return run.p.nextState, true
|
||||||
|
|
@ -124,13 +124,13 @@ func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
|
||||||
return nil, false
|
return nil, false
|
||||||
default:
|
default:
|
||||||
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
|
name := runtime.FuncForPC(reflect.ValueOf(run.p.state).Pointer()).Name()
|
||||||
panic(fmt.Sprintf("internal parser error: StateHandler %s did not provide a routing decision", name))
|
panic(fmt.Sprintf("internal parser error: ParseHandler %s did not provide a routing decision", name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// invokeNextStateHandler moves the parser state to the provided state
|
// invokeNextParseHandler moves the parser state to the provided state
|
||||||
// and invokes the StateHandler function.
|
// and invokes the ParseHandler function.
|
||||||
func (run *ParseRun) invokeNextStateHandler(state StateHandler) {
|
func (run *ParseRun) invokeNextParseHandler(state ParseHandler) {
|
||||||
run.p.state = state
|
run.p.state = state
|
||||||
run.p.nextState = nil
|
run.p.nextState = nil
|
||||||
run.p.expecting = ""
|
run.p.expecting = ""
|
||||||
|
|
|
||||||
|
|
@ -1,76 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// RouteTo tells the parser what StateHandler function to invoke on
|
|
||||||
// the next parse cycle.
|
|
||||||
func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction {
|
|
||||||
p.nextState = state
|
|
||||||
return &RouteFollowupAction{p}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteRepeat tells the parser that on the next parsing cycle, the current
|
|
||||||
// StateHandler must be reinvoked.
|
|
||||||
func (p *ParseAPI) RouteRepeat() {
|
|
||||||
p.RouteTo(p.state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteReturn tells the parser that on the next cycle the last StateHandler
|
|
||||||
// that was pushed on the route stack must be invoked.
|
|
||||||
//
|
|
||||||
// Using this method is optional. When implementating a StateHandler that
|
|
||||||
// is used as a sort of subroutine (using constructions like
|
|
||||||
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
|
||||||
// providing an explicit routing decision from that handler. The parser will
|
|
||||||
// automatically assume a RouteReturn() in that case.
|
|
||||||
func (p *ParseAPI) RouteReturn() {
|
|
||||||
p.nextState = p.popRoute()
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteFollowupAction chains parsing routes.
|
|
||||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
|
||||||
type RouteFollowupAction struct {
|
|
||||||
p *ParseAPI
|
|
||||||
}
|
|
||||||
|
|
||||||
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
|
||||||
// StateHandler has been completed.
|
|
||||||
// For example:
|
|
||||||
//
|
|
||||||
// p.RouteTo(handlerA).ThenTo(handlerB)
|
|
||||||
func (a *RouteFollowupAction) ThenTo(state StateHandler) {
|
|
||||||
a.p.pushRoute(state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ThenReturnHere schedules the current StateHandler to be invoked after
|
|
||||||
// the RouteTo StateHandler has been completed.
|
|
||||||
// For example:
|
|
||||||
//
|
|
||||||
// p.RouteTo(handlerA).ThenReturnHere()
|
|
||||||
func (a *RouteFollowupAction) ThenReturnHere() {
|
|
||||||
a.p.pushRoute(a.p.state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// pushRoute adds the StateHandler to the route stack.
|
|
||||||
// This is used for implementing nested parsing.
|
|
||||||
func (p *ParseAPI) pushRoute(state StateHandler) {
|
|
||||||
p.routeStack = append(p.routeStack, state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// popRoute pops the last pushed StateHandler from the route stack.
|
|
||||||
func (p *ParseAPI) popRoute() StateHandler {
|
|
||||||
last := len(p.routeStack) - 1
|
|
||||||
head, tail := p.routeStack[:last], p.routeStack[last]
|
|
||||||
p.routeStack = head
|
|
||||||
return tail
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExpectEndOfFile can be used from a StateHandler function to indicate that
|
|
||||||
// your parser expects to be at the end of the file. This will schedule
|
|
||||||
// a parsekit-provided StateHandler which will do the actual check for this.
|
|
||||||
func (p *ParseAPI) ExpectEndOfFile() {
|
|
||||||
p.RouteTo(func(p *ParseAPI) {
|
|
||||||
p.Expects("end of file")
|
|
||||||
if p.On(A.EndOfFile).Stay() {
|
|
||||||
p.Emit(ItemEOF, "EOF")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// stringBuffer is a string buffer implementation, which is used by the parser
|
// stringBuffer is a string buffer implementation that is used by the parser
|
||||||
// to efficiently accumulate runes from the input and eventually turn these
|
// to efficiently accumulate runes from the input and eventually turn these
|
||||||
// into a string, either literal or interpreted.
|
// into a string, either literal or interpreted.
|
||||||
type stringBuffer struct {
|
type stringBuffer struct {
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ var C = struct {
|
||||||
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
||||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
||||||
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
||||||
|
Signed func(TokenHandler) TokenHandler
|
||||||
}{
|
}{
|
||||||
Rune: MatchRune,
|
Rune: MatchRune,
|
||||||
Runes: MatchRunes,
|
Runes: MatchRunes,
|
||||||
|
|
@ -51,6 +52,7 @@ var C = struct {
|
||||||
MinMax: MatchMinMax,
|
MinMax: MatchMinMax,
|
||||||
Separated: MatchSeparated,
|
Separated: MatchSeparated,
|
||||||
Except: MatchExcept,
|
Except: MatchExcept,
|
||||||
|
Signed: MatchSigned,
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchRune creates a TokenHandler function that checks if the next rune from
|
// MatchRune creates a TokenHandler function that checks if the next rune from
|
||||||
|
|
@ -293,6 +295,16 @@ func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchSigned creates a TokenHandler that checks if the provided TokenHandler is
|
||||||
|
// prefixed by an optional '+' or '-' sign. This can be used to turn numeric
|
||||||
|
// atoms into a signed version, e.g.
|
||||||
|
//
|
||||||
|
// C.Signed(A.Integer)
|
||||||
|
func MatchSigned(handler TokenHandler) TokenHandler {
|
||||||
|
sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-')))
|
||||||
|
return MatchSeq(sign, handler)
|
||||||
|
}
|
||||||
|
|
||||||
// A provides convenient access to a range of atoms that can be used to
|
// A provides convenient access to a range of atoms that can be used to
|
||||||
// build TokenHandlers or parser rules.
|
// build TokenHandlers or parser rules.
|
||||||
//
|
//
|
||||||
|
|
@ -320,18 +332,26 @@ var A = struct {
|
||||||
Amp TokenHandler
|
Amp TokenHandler
|
||||||
SingleQuote TokenHandler
|
SingleQuote TokenHandler
|
||||||
RoundOpen TokenHandler
|
RoundOpen TokenHandler
|
||||||
|
LeftParen TokenHandler
|
||||||
RoundClose TokenHandler
|
RoundClose TokenHandler
|
||||||
|
RightParen TokenHandler
|
||||||
Asterisk TokenHandler
|
Asterisk TokenHandler
|
||||||
|
Multiply TokenHandler
|
||||||
Plus TokenHandler
|
Plus TokenHandler
|
||||||
|
Add TokenHandler
|
||||||
Comma TokenHandler
|
Comma TokenHandler
|
||||||
Minus TokenHandler
|
Minus TokenHandler
|
||||||
|
Subtract TokenHandler
|
||||||
Dot TokenHandler
|
Dot TokenHandler
|
||||||
Slash TokenHandler
|
Slash TokenHandler
|
||||||
|
Divide TokenHandler
|
||||||
Colon TokenHandler
|
Colon TokenHandler
|
||||||
Semicolon TokenHandler
|
Semicolon TokenHandler
|
||||||
AngleOpen TokenHandler
|
AngleOpen TokenHandler
|
||||||
|
LessThan TokenHandler
|
||||||
Equal TokenHandler
|
Equal TokenHandler
|
||||||
AngleClose TokenHandler
|
AngleClose TokenHandler
|
||||||
|
GreaterThan TokenHandler
|
||||||
Question TokenHandler
|
Question TokenHandler
|
||||||
At TokenHandler
|
At TokenHandler
|
||||||
SquareOpen TokenHandler
|
SquareOpen TokenHandler
|
||||||
|
|
@ -349,6 +369,10 @@ var A = struct {
|
||||||
WhitespaceAndNewlines TokenHandler
|
WhitespaceAndNewlines TokenHandler
|
||||||
EndOfLine TokenHandler
|
EndOfLine TokenHandler
|
||||||
Digit TokenHandler
|
Digit TokenHandler
|
||||||
|
DigitNotZero TokenHandler
|
||||||
|
Digits TokenHandler
|
||||||
|
Float TokenHandler
|
||||||
|
Integer TokenHandler
|
||||||
ASCII TokenHandler
|
ASCII TokenHandler
|
||||||
ASCIILower TokenHandler
|
ASCIILower TokenHandler
|
||||||
ASCIIUpper TokenHandler
|
ASCIIUpper TokenHandler
|
||||||
|
|
@ -369,18 +393,26 @@ var A = struct {
|
||||||
Amp: C.Rune('&'),
|
Amp: C.Rune('&'),
|
||||||
SingleQuote: C.Rune('\''),
|
SingleQuote: C.Rune('\''),
|
||||||
RoundOpen: C.Rune('('),
|
RoundOpen: C.Rune('('),
|
||||||
|
LeftParen: C.Rune('('),
|
||||||
RoundClose: C.Rune(')'),
|
RoundClose: C.Rune(')'),
|
||||||
|
RightParen: C.Rune(')'),
|
||||||
Asterisk: C.Rune('*'),
|
Asterisk: C.Rune('*'),
|
||||||
|
Multiply: C.Rune('*'),
|
||||||
Plus: C.Rune('+'),
|
Plus: C.Rune('+'),
|
||||||
|
Add: C.Rune('+'),
|
||||||
Comma: C.Rune(','),
|
Comma: C.Rune(','),
|
||||||
Minus: C.Rune('-'),
|
Minus: C.Rune('-'),
|
||||||
|
Subtract: C.Rune('-'),
|
||||||
Dot: C.Rune('.'),
|
Dot: C.Rune('.'),
|
||||||
Slash: C.Rune('/'),
|
Slash: C.Rune('/'),
|
||||||
|
Divide: C.Rune('/'),
|
||||||
Colon: C.Rune(':'),
|
Colon: C.Rune(':'),
|
||||||
Semicolon: C.Rune(';'),
|
Semicolon: C.Rune(';'),
|
||||||
AngleOpen: C.Rune('<'),
|
AngleOpen: C.Rune('<'),
|
||||||
|
LessThan: C.Rune('<'),
|
||||||
Equal: C.Rune('='),
|
Equal: C.Rune('='),
|
||||||
AngleClose: C.Rune('>'),
|
AngleClose: C.Rune('>'),
|
||||||
|
GreaterThan: C.Rune('>'),
|
||||||
Question: C.Rune('?'),
|
Question: C.Rune('?'),
|
||||||
At: C.Rune('@'),
|
At: C.Rune('@'),
|
||||||
SquareOpen: C.Rune('['),
|
SquareOpen: C.Rune('['),
|
||||||
|
|
@ -396,7 +428,11 @@ var A = struct {
|
||||||
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
||||||
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
||||||
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||||
Digit: C.RuneRange('0', '9'),
|
Digit: MatchDigit(),
|
||||||
|
DigitNotZero: MatchDigitNotZero(),
|
||||||
|
Digits: MatchDigits(),
|
||||||
|
Integer: MatchInteger(),
|
||||||
|
Float: MatchFloat(),
|
||||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||||
ASCIILower: C.RuneRange('a', 'z'),
|
ASCIILower: C.RuneRange('a', 'z'),
|
||||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||||
|
|
@ -428,6 +464,42 @@ func MatchAnyRune() TokenHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchDigit creates a TokenHandler that checks if a single digit can be read
|
||||||
|
// from the input.
|
||||||
|
func MatchDigit() TokenHandler {
|
||||||
|
return MatchRuneRange('0', '9')
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchDigits creates a TokenHandler that checks if one or more digits can be read
|
||||||
|
// from the input.
|
||||||
|
func MatchDigits() TokenHandler {
|
||||||
|
return MatchOneOrMore(MatchRuneRange('0', '9'))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal
|
||||||
|
// to zero '0' can be read from the input.
|
||||||
|
func MatchDigitNotZero() TokenHandler {
|
||||||
|
return MatchRuneRange('1', '9')
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchInteger creates a TokenHandler function that checks if a valid integer
|
||||||
|
// can be read from the input. In line with Go, a integer cannot start with
|
||||||
|
// a zero. Starting with a zero is used to indicate other bases, like octal or
|
||||||
|
// hexadecimal.
|
||||||
|
func MatchInteger() TokenHandler {
|
||||||
|
justZero := MatchRune('0')
|
||||||
|
integer := C.Seq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
||||||
|
return MatchAny(integer, justZero)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchFloat creates a TokenHandler function that checks if a valid float value
|
||||||
|
// can be read from the input. In case the fractional part is missing, this
|
||||||
|
// TokenHandler will report a match, so both "123" and "123.123" will match.
|
||||||
|
func MatchFloat() TokenHandler {
|
||||||
|
digits := MatchDigits()
|
||||||
|
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
|
||||||
|
}
|
||||||
|
|
||||||
// M provides convenient access to a range of modifiers (which in their nature are
|
// M provides convenient access to a range of modifiers (which in their nature are
|
||||||
// parser/combinators) that can be used when creating TokenHandler functions.
|
// parser/combinators) that can be used when creating TokenHandler functions.
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -96,19 +96,27 @@ func TestAtoms(t *testing.T) {
|
||||||
{"%", a.Percent, true, "%"},
|
{"%", a.Percent, true, "%"},
|
||||||
{"&", a.Amp, true, "&"},
|
{"&", a.Amp, true, "&"},
|
||||||
{"'", a.SingleQuote, true, "'"},
|
{"'", a.SingleQuote, true, "'"},
|
||||||
|
{"(", a.LeftParen, true, "("},
|
||||||
{"(", a.RoundOpen, true, "("},
|
{"(", a.RoundOpen, true, "("},
|
||||||
|
{")", a.RightParen, true, ")"},
|
||||||
{")", a.RoundClose, true, ")"},
|
{")", a.RoundClose, true, ")"},
|
||||||
{"*", a.Asterisk, true, "*"},
|
{"*", a.Asterisk, true, "*"},
|
||||||
|
{"*", a.Multiply, true, "*"},
|
||||||
{"+", a.Plus, true, "+"},
|
{"+", a.Plus, true, "+"},
|
||||||
|
{"+", a.Add, true, "+"},
|
||||||
{",", a.Comma, true, ","},
|
{",", a.Comma, true, ","},
|
||||||
{"-", a.Minus, true, "-"},
|
{"-", a.Minus, true, "-"},
|
||||||
|
{"-", a.Subtract, true, "-"},
|
||||||
{".", a.Dot, true, "."},
|
{".", a.Dot, true, "."},
|
||||||
{"/", a.Slash, true, "/"},
|
{"/", a.Slash, true, "/"},
|
||||||
|
{"/", a.Divide, true, "/"},
|
||||||
{":", a.Colon, true, ":"},
|
{":", a.Colon, true, ":"},
|
||||||
{";", a.Semicolon, true, ";"},
|
{";", a.Semicolon, true, ";"},
|
||||||
{"<", a.AngleOpen, true, "<"},
|
{"<", a.AngleOpen, true, "<"},
|
||||||
|
{"<", a.LessThan, true, "<"},
|
||||||
{"=", a.Equal, true, "="},
|
{"=", a.Equal, true, "="},
|
||||||
{">", a.AngleClose, true, ">"},
|
{">", a.AngleClose, true, ">"},
|
||||||
|
{">", a.GreaterThan, true, ">"},
|
||||||
{"?", a.Question, true, "?"},
|
{"?", a.Question, true, "?"},
|
||||||
{"@", a.At, true, "@"},
|
{"@", a.At, true, "@"},
|
||||||
{"[", a.SquareOpen, true, "["},
|
{"[", a.SquareOpen, true, "["},
|
||||||
|
|
@ -154,6 +162,22 @@ func TestAtoms(t *testing.T) {
|
||||||
{"F", a.HexDigit, true, "F"},
|
{"F", a.HexDigit, true, "F"},
|
||||||
{"g", a.HexDigit, false, "g"},
|
{"g", a.HexDigit, false, "g"},
|
||||||
{"G", a.HexDigit, false, "G"},
|
{"G", a.HexDigit, false, "G"},
|
||||||
|
{"0", a.Integer, true, "0"},
|
||||||
|
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
|
||||||
|
{"1", a.Integer, true, "1"},
|
||||||
|
{"-10X", a.Integer, false, ""},
|
||||||
|
{"+10X", a.Integer, false, ""},
|
||||||
|
{"-10X", c.Signed(a.Integer), true, "-10"},
|
||||||
|
{"+10X", c.Signed(a.Integer), true, "+10"},
|
||||||
|
{"+10.1X", c.Signed(a.Integer), true, "+10"},
|
||||||
|
{"0X", a.Float, true, "0"},
|
||||||
|
{"0X", a.Float, true, "0"},
|
||||||
|
{"1X", a.Float, true, "1"},
|
||||||
|
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||||
|
{"123.321X", a.Float, true, "123.321"},
|
||||||
|
{"-3.14X", a.Float, false, ""},
|
||||||
|
{"-3.14X", c.Signed(a.Float), true, "-3.14"},
|
||||||
|
{"-003.0014X", c.Signed(a.Float), true, "-003.0014"},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -174,8 +198,8 @@ func TestModifiers(t *testing.T) {
|
||||||
|
|
||||||
func TestSequenceOfRunes(t *testing.T) {
|
func TestSequenceOfRunes(t *testing.T) {
|
||||||
sequence := c.Seq(
|
sequence := c.Seq(
|
||||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
|
||||||
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue