Got rid of the full On()...etc chains for both code clarity and usability clarity. Working on good examples and shaving the API's accordingly.
This commit is contained in:
parent
b0cd017b83
commit
2751c78003
|
@ -0,0 +1,140 @@
|
||||||
|
// Let's write a small example for parsing a really basic calculator.
|
||||||
|
// The calculator understands input that looks like:
|
||||||
|
//
|
||||||
|
// 10 + 20 - 8+4
|
||||||
|
//
|
||||||
|
// So positive numbers that can be either added or substracted, and whitespace
|
||||||
|
// is ignored.
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
// When writing a parser, it's a good start to use the parser/combinator
|
||||||
|
// functionality of parsekit to create some Matcher functions. These functions
|
||||||
|
// can later be used in the parser state machine to check for matching strings
|
||||||
|
// on the input data.
|
||||||
|
//
|
||||||
|
// For the calculator, we only need a definition of "number, surrounded by
|
||||||
|
// optional whitespace". Skipping whitespace could be a part of the StateHandler
|
||||||
|
// functions below too, but including it in a Matcher makes things really
|
||||||
|
// practical.
|
||||||
|
func createNumberMatcher() parsekit.Matcher {
|
||||||
|
// Easy access to parsekit definition.
|
||||||
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
|
whitespace := m.Drop(c.Opt(a.Whitespace))
|
||||||
|
return c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
|
||||||
|
}
|
||||||
|
|
||||||
|
var calcNumber = createNumberMatcher()
|
||||||
|
|
||||||
|
// We need to define the ItemTypes that we will use for emitting Items
|
||||||
|
// during the parsing process.
|
||||||
|
const (
|
||||||
|
numberType parsekit.ItemType = iota
|
||||||
|
addType
|
||||||
|
subtractType
|
||||||
|
)
|
||||||
|
|
||||||
|
// We also need to define the state machine for parsing the input.
|
||||||
|
// The state machine is built up from functions that match the StateHandler
|
||||||
|
// signature: func(*parsekit.P)
|
||||||
|
// The P struct holds the internal state for the parser and it provides
|
||||||
|
// some methods that form the API for your StateHandler implementation.
|
||||||
|
|
||||||
|
// State: expect a number. When a number is found on the input,
|
||||||
|
// it is accepted in the output buffer, after which the output buffer is
|
||||||
|
// emitted as a numberType item. Then we tell the state machine to continue
|
||||||
|
// with the calcWaitForOperatorOrEndOfInput state.
|
||||||
|
// When no number is found, the parser will emit an error, explaining that
|
||||||
|
// "a number" was expected.
|
||||||
|
func calcWaitForNumber(p *parsekit.P) {
|
||||||
|
p.Expects("a number")
|
||||||
|
if p.On(calcNumber).Accept() {
|
||||||
|
p.EmitLiteral(numberType)
|
||||||
|
p.RouteTo(calcWaitForOperatorOrEndOfInput)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// State: expect a plus or minus operator. When one of those
|
||||||
|
// is found, the appropriate Item is emitted and the parser is sent back
|
||||||
|
// to the numberHandler to find the next number on the input.
|
||||||
|
// When no operator is found, then the parser is told to expect the end of
|
||||||
|
// the input. When more input data is available (which is obviously wrong
|
||||||
|
// data since it does not match our syntax), the parser will emit an error.
|
||||||
|
func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
|
||||||
|
switch {
|
||||||
|
case p.On(a.Plus).Accept():
|
||||||
|
p.EmitLiteral(addType)
|
||||||
|
p.RouteTo(calcWaitForNumber)
|
||||||
|
case p.On(a.Minus).Accept():
|
||||||
|
p.EmitLiteral(subtractType)
|
||||||
|
p.RouteTo(calcWaitForNumber)
|
||||||
|
default:
|
||||||
|
p.ExpectEndOfFile()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// All is ready for our parser. We now can create a new Parser struct.
|
||||||
|
// We need to tell it what the start state is. In our case, it is the
|
||||||
|
// calcWaitForNumber state, since the calculation must start with a number.
|
||||||
|
var calcParser = parsekit.NewParser(calcWaitForNumber)
|
||||||
|
|
||||||
|
func Example_basicCalculator() {
|
||||||
|
// Let's feed the parser some input to work with.
|
||||||
|
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
||||||
|
|
||||||
|
// We can now step through the results of the parsing process by repeated
|
||||||
|
// calls to run.Next(). Next() returns either the next parse item, a parse
|
||||||
|
// error or an end of file. Let's dump the parse results and handle the
|
||||||
|
// computation while we're at it.
|
||||||
|
sum := 0
|
||||||
|
op := +1
|
||||||
|
for {
|
||||||
|
item, err, ok := run.Next()
|
||||||
|
switch {
|
||||||
|
case !ok && err == nil:
|
||||||
|
fmt.Println("End of file reached")
|
||||||
|
fmt.Println("Outcome of computation:", sum)
|
||||||
|
return
|
||||||
|
case !ok:
|
||||||
|
fmt.Printf("Error: %s\n", err)
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
|
||||||
|
switch {
|
||||||
|
case item.Type == addType:
|
||||||
|
op = +1
|
||||||
|
case item.Type == subtractType:
|
||||||
|
op = -1
|
||||||
|
case item.Type == numberType:
|
||||||
|
nr, err := strconv.Atoi(item.Value)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
sum += op * nr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Type: 0, Value: "153"
|
||||||
|
// Type: 1, Value: "+"
|
||||||
|
// Type: 0, Value: "22"
|
||||||
|
// Type: 1, Value: "+"
|
||||||
|
// Type: 0, Value: "31"
|
||||||
|
// Type: 2, Value: "-"
|
||||||
|
// Type: 0, Value: "4"
|
||||||
|
// Type: 2, Value: "-"
|
||||||
|
// Type: 0, Value: "6"
|
||||||
|
// Type: 1, Value: "+"
|
||||||
|
// Type: 0, Value: "42"
|
||||||
|
// End of file reached
|
||||||
|
// Outcome of computation: 238
|
||||||
|
}
|
|
@ -1,4 +1,6 @@
|
||||||
// In this example, a parser is created which can parse and normalize Dutch postcodes.
|
// In this example, a parser is created which can parse and normalize Dutch postcodes
|
||||||
|
// The implementation uses only a Matcher function and does not implement a
|
||||||
|
// full-fledged state-based Parser for it.
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -9,7 +11,7 @@ import (
|
||||||
|
|
||||||
func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
// Matcher functions are created and combined to satisfy these rules:
|
// Matcher functions are created and combined to satisfy these rules:
|
||||||
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||||
|
@ -17,12 +19,12 @@ func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
||||||
// - A space between letters and digits is optional.
|
// - A space between letters and digits is optional.
|
||||||
// - It is good form to write the letters in upper case.
|
// - It is good form to write the letters in upper case.
|
||||||
// - It is good form to use a single space between digits and letters.
|
// - It is good form to use a single space between digits and letters.
|
||||||
var digitNotZero = c.Except(c.Rune('0'), a.Digit)
|
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
||||||
var pcDigits = c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||||
var pcLetter = c.Any(a.ASCIILower, a.ASCIIUpper)
|
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||||
var pcLetters = m.ToUpper(c.Seq(pcLetter, pcLetter))
|
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||||
var space = m.Replace(c.Opt(a.Whitespace), " ")
|
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||||
var postcode = c.Seq(pcDigits, space, pcLetters)
|
postcode := c.Seq(pcDigits, space, pcLetters)
|
||||||
|
|
||||||
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
// In this example, a parser is created that is able to parse input that looks
|
||||||
|
// like "Hello, <name>!", and that extracts the name from it.
|
||||||
|
//
|
||||||
|
// The implementation uses only a Matcher function and does not implement a
|
||||||
|
// full-fledged state-based Parser for it. If you want to see the same kind of
|
||||||
|
// functionality, implementated using a Paser, take a look at the
|
||||||
|
// HelloWorldUsingParser example.
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
func createHelloMatcher() *parsekit.MatcherWrapper {
|
||||||
|
// Easy access to parsekit definition.
|
||||||
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
|
// Using the parser/combinator support of parsekit, we create a Matcher function
|
||||||
|
// that does all the work. The 'greeting' Matcher matches the whole input and
|
||||||
|
// drops all but the name from it.
|
||||||
|
hello := c.StrNoCase("hello")
|
||||||
|
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
||||||
|
separator := c.Any(comma, a.Whitespace)
|
||||||
|
name := c.OneOrMore(c.Not(a.Excl))
|
||||||
|
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
||||||
|
|
||||||
|
// Using 'greeting' we can now create the Matcher-based parser.
|
||||||
|
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_helloWorldUsingMatcher() {
|
||||||
|
parser := createHelloMatcher()
|
||||||
|
|
||||||
|
for i, input := range []string{
|
||||||
|
"Hello, world!",
|
||||||
|
"HELLO ,Johnny!",
|
||||||
|
"hello , Bob123!",
|
||||||
|
"hello Pizza!",
|
||||||
|
"Oh no!",
|
||||||
|
"Hello, world",
|
||||||
|
} {
|
||||||
|
output, err, ok := parser.Parse(input)
|
||||||
|
if !ok {
|
||||||
|
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// [0] Input: "Hello, world!" Output: world
|
||||||
|
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
||||||
|
// [2] Input: "hello , Bob123!" Output: Bob123
|
||||||
|
// [3] Input: "hello Pizza!" Output: Pizza
|
||||||
|
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
|
||||||
|
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
// In this example, a parser is created that is able to parse input that looks
|
||||||
|
// like "Hello, <name>!", and that extracts the name from it.
|
||||||
|
//
|
||||||
|
// This implementation uses a state-based Parser for it, and it does not implement
|
||||||
|
// any custom combinator/parser Matcher functions. Note that things are much easier to
|
||||||
|
// implement using custom Matchers (see the other HelloWorldUsingMatcher example
|
||||||
|
// for this). Doing this fully parser-based implementation is mainly for your
|
||||||
|
// learning pleasure.
|
||||||
|
//
|
||||||
|
// One big difference between the Matcher-based example and this one, is that the
|
||||||
|
// state-based parser reports errors much more fine-grained. This might or might
|
||||||
|
// not be useful for your specific application.
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
const greeteeItem parsekit.ItemType = 1
|
||||||
|
|
||||||
|
func stateStartOfGreeting(p *parsekit.P) {
|
||||||
|
c := parsekit.C
|
||||||
|
p.Expects("hello")
|
||||||
|
if p.On(c.StrNoCase("hello")).Skip() {
|
||||||
|
p.RouteTo(stateComma)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateComma(p *parsekit.P) {
|
||||||
|
a := parsekit.A
|
||||||
|
p.Expects("comma")
|
||||||
|
switch {
|
||||||
|
case p.On(a.Whitespace).Skip():
|
||||||
|
p.RouteRepeat()
|
||||||
|
case p.On(a.Comma).Skip():
|
||||||
|
p.RouteTo(stateName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateName(p *parsekit.P) {
|
||||||
|
a := parsekit.A
|
||||||
|
p.Expects("name")
|
||||||
|
switch {
|
||||||
|
case p.On(a.Excl).Skip():
|
||||||
|
p.RouteTo(stateEndOfGreeting)
|
||||||
|
case p.On(a.AnyRune).Accept():
|
||||||
|
p.RouteRepeat()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateEndOfGreeting(p *parsekit.P) {
|
||||||
|
p.Expects("end of greeting")
|
||||||
|
if p.On(a.EndOfFile).Stay() {
|
||||||
|
name := strings.TrimSpace(p.BufLiteral())
|
||||||
|
if name == "" {
|
||||||
|
p.EmitError("The name cannot be empty")
|
||||||
|
} else {
|
||||||
|
p.Emit(greeteeItem, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func createHelloParser() *parsekit.Parser {
|
||||||
|
return parsekit.NewParser(stateStartOfGreeting)
|
||||||
|
}
|
||||||
|
|
||||||
|
func Example_helloWorldUsingParser() {
|
||||||
|
parser := createHelloParser()
|
||||||
|
|
||||||
|
for i, input := range []string{
|
||||||
|
"Hello, world!",
|
||||||
|
"HELLO ,Johnny!",
|
||||||
|
"hello , Bob123!",
|
||||||
|
"hello Pizza!",
|
||||||
|
"",
|
||||||
|
" ",
|
||||||
|
"hello",
|
||||||
|
"hello,",
|
||||||
|
"hello , ",
|
||||||
|
"hello , Droopy",
|
||||||
|
"hello , Droopy!",
|
||||||
|
"hello , \t \t Droopy \t !",
|
||||||
|
"Oh no!",
|
||||||
|
"hello,!",
|
||||||
|
} {
|
||||||
|
item, err, ok := parser.Parse(input).Next()
|
||||||
|
if !ok {
|
||||||
|
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// [0] Input: "Hello, world!" Output: world
|
||||||
|
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
||||||
|
// [2] Input: "hello , Bob123!" Output: Bob123
|
||||||
|
// [3] Input: "hello Pizza!" Error: unexpected character 'P' (expected comma)
|
||||||
|
// [4] Input: "" Error: unexpected end of file (expected hello)
|
||||||
|
// [5] Input: " " Error: unexpected character ' ' (expected hello)
|
||||||
|
// [6] Input: "hello" Error: unexpected end of file (expected comma)
|
||||||
|
// [7] Input: "hello," Error: unexpected end of file (expected name)
|
||||||
|
// [8] Input: "hello , " Error: unexpected end of file (expected name)
|
||||||
|
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected name)
|
||||||
|
// [10] Input: "hello , Droopy!" Output: Droopy
|
||||||
|
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
|
||||||
|
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
|
||||||
|
// [13] Input: "hello,!" Error: The name cannot be empty
|
||||||
|
}
|
195
examples_test.go
195
examples_test.go
|
@ -2,194 +2,10 @@ package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Example_helloWorldUsingParser() {
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_helloWorldUsingMatcher() {
|
|
||||||
// In this example, a parser is created that is able to parse input that looks
|
|
||||||
// like "Hello, <name>!", and that extracts the name from it.
|
|
||||||
// The implementation uses only a Matcher function and does not implement a
|
|
||||||
// full-fledged state-based Parser for it.
|
|
||||||
|
|
||||||
// Easy access to parsekit parser/combinators, atoms and modifiers.
|
|
||||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
|
||||||
|
|
||||||
// Using the parser/combinator support of parsekit, we create a Matcher function
|
|
||||||
// that does all the work. The 'greeting' Matcher matches the whole input and
|
|
||||||
// drops all but the name from it.
|
|
||||||
var hello = c.StrNoCase("hello")
|
|
||||||
var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
|
||||||
var separator = c.Any(comma, a.Whitespace)
|
|
||||||
var name = c.OneOrMore(c.Not(a.Excl))
|
|
||||||
var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
|
||||||
|
|
||||||
// Now we can already do some parsing, by using a Matcher.
|
|
||||||
var parser = parsekit.NewMatcher(greeting, "a friendly greeting")
|
|
||||||
for i, input := range []string{
|
|
||||||
"Hello, world!",
|
|
||||||
"HELLO ,Johnny!",
|
|
||||||
"hello , Bob123!",
|
|
||||||
"hello Pizza!",
|
|
||||||
"Oh no!",
|
|
||||||
"Hello, world",
|
|
||||||
} {
|
|
||||||
output, err, ok := parser.Parse(input)
|
|
||||||
if !ok {
|
|
||||||
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
|
|
||||||
} else {
|
|
||||||
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// [0] Input: "Hello, world!" Output: world
|
|
||||||
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
|
||||||
// [2] Input: "hello , Bob123!" Output: Bob123
|
|
||||||
// [3] Input: "hello Pizza!" Output: Pizza
|
|
||||||
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
|
|
||||||
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
|
|
||||||
}
|
|
||||||
|
|
||||||
func Example_basicCalculator() {
|
|
||||||
// Let's write a small example for parsing a really basic calculator.
|
|
||||||
// The calculator understands input that looks like:
|
|
||||||
//
|
|
||||||
// 10 + 20 - 8+4
|
|
||||||
//
|
|
||||||
// So positive numbers that can be either added or substracted, and whitespace
|
|
||||||
// is ignored.
|
|
||||||
|
|
||||||
// Easy access to parsekit parser/combinators, atoms and modifiers.
|
|
||||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
|
||||||
|
|
||||||
// When writing a parser, it's a good start to use the parser/combinator
|
|
||||||
// functionality of parsekit to create some Matcher functions. These
|
|
||||||
// functions can later be used in the parser state machine to find the
|
|
||||||
// matching tokens on the input data.
|
|
||||||
//
|
|
||||||
// In our case, we only need a definition of "number, surrounded by
|
|
||||||
// optional whitespace". Skipping whitespace could be a part of the
|
|
||||||
// StateHandler functions below too, but including it in a Matcher makes
|
|
||||||
// things really practical here.
|
|
||||||
var whitespace = m.Drop(c.Opt(a.Whitespace))
|
|
||||||
var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
|
|
||||||
|
|
||||||
// We also must define the types of items that the parser will emit.
|
|
||||||
// We only need three of them here, for numbers, plus and minus.
|
|
||||||
// The recommended way to define these, is using 'iota' for auto numbering.
|
|
||||||
const (
|
|
||||||
numberType parsekit.ItemType = iota
|
|
||||||
addType
|
|
||||||
subtractType
|
|
||||||
)
|
|
||||||
|
|
||||||
// Now it is time to define the state machine for parsing the input.
|
|
||||||
// The state machine is built up from functions that match the StateHandler
|
|
||||||
// signature: func(*parsekit.P)
|
|
||||||
// The P struct holds the internal state for the parser and it provides
|
|
||||||
// some methods that form the API for your StateHandler implementation.
|
|
||||||
//
|
|
||||||
// (note that normally you'd write normal functions and not anonymous
|
|
||||||
// functions like I did here. I had to use these to be able to write the
|
|
||||||
// example code)
|
|
||||||
|
|
||||||
var operatorHandler parsekit.StateHandler
|
|
||||||
|
|
||||||
// In this state, we expect a number. When a number is found on the input,
|
|
||||||
// it is accepted in the output buffer, after which the output buffer is
|
|
||||||
// emitted as a numberType item. Then we tell the state machine to continue
|
|
||||||
// with the operatorHandler state.
|
|
||||||
// When no number is found, the parser will emit an error, explaining that
|
|
||||||
// "a number" was expected.
|
|
||||||
numberHandler := func(p *parsekit.P) {
|
|
||||||
p.Expects("a number")
|
|
||||||
if p.On(number).Accept().End() {
|
|
||||||
p.EmitLiteral(numberType)
|
|
||||||
p.RouteTo(operatorHandler)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// In this state, we expect a plus or minus operator. When one of those
|
|
||||||
// is found, the appropriate Item is emitted and the parser is sent back
|
|
||||||
// to the numberHandler to find the next number on the input.
|
|
||||||
// When no operator is found, then the parser is told to expect the end of
|
|
||||||
// the input. When more input data is available (which is obviously wrong
|
|
||||||
// data since it does not match our syntax), the parser will emit an error.
|
|
||||||
operatorHandler = func(p *parsekit.P) {
|
|
||||||
switch {
|
|
||||||
case p.On(a.Plus).Accept().End():
|
|
||||||
p.EmitLiteral(addType)
|
|
||||||
p.RouteTo(numberHandler)
|
|
||||||
case p.On(a.Minus).Accept().End():
|
|
||||||
p.EmitLiteral(subtractType)
|
|
||||||
p.RouteTo(numberHandler)
|
|
||||||
default:
|
|
||||||
p.ExpectEndOfFile()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// All is ready for our parser. We now can create a new Parser struct.
|
|
||||||
// We need to tell it what the start state is. In our case, it is
|
|
||||||
// the number state, since the calculation must start with a number.
|
|
||||||
parser := parsekit.NewParser(numberHandler)
|
|
||||||
|
|
||||||
// Let's feed the parser some input to work with.
|
|
||||||
run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
|
||||||
|
|
||||||
// We can now step through the results of the parsing process by repeated
|
|
||||||
// calls to run.Next(). Next() returns either the next parse item, a parse
|
|
||||||
// error or an end of file. Let's dump the parse results and handle the
|
|
||||||
// computation while we're at it.
|
|
||||||
sum := 0
|
|
||||||
op := +1
|
|
||||||
for {
|
|
||||||
item, err, ok := run.Next()
|
|
||||||
switch {
|
|
||||||
case !ok && err == nil:
|
|
||||||
fmt.Println("End of file reached")
|
|
||||||
fmt.Println("Outcome of computation:", sum)
|
|
||||||
return
|
|
||||||
case !ok:
|
|
||||||
fmt.Printf("Error: %s\n", err)
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
|
|
||||||
switch {
|
|
||||||
case item.Type == addType:
|
|
||||||
op = +1
|
|
||||||
case item.Type == subtractType:
|
|
||||||
op = -1
|
|
||||||
case item.Type == numberType:
|
|
||||||
nr, err := strconv.Atoi(item.Value)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sum += op * nr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// Type: 0, Value: "153"
|
|
||||||
// Type: 1, Value: "+"
|
|
||||||
// Type: 0, Value: "22"
|
|
||||||
// Type: 1, Value: "+"
|
|
||||||
// Type: 0, Value: "31"
|
|
||||||
// Type: 2, Value: "-"
|
|
||||||
// Type: 0, Value: "4"
|
|
||||||
// Type: 2, Value: "-"
|
|
||||||
// Type: 0, Value: "6"
|
|
||||||
// Type: 1, Value: "+"
|
|
||||||
// Type: 0, Value: "42"
|
|
||||||
// End of file reached
|
|
||||||
// Outcome of computation: 238
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleItemType() {
|
func ExampleItemType() {
|
||||||
// Make use of positive values. Ideally, define your ItemTypes using
|
// Make use of positive values. Ideally, define your ItemTypes using
|
||||||
// iota for easy automatic value management like this:
|
// iota for easy automatic value management like this:
|
||||||
|
@ -202,17 +18,18 @@ func ExampleItemType() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleItem() {
|
func ExampleItem() {
|
||||||
var c = parsekit.C
|
// Easy access to the parsekit definitions.
|
||||||
|
c := parsekit.C
|
||||||
|
|
||||||
// You define your own item types for your specific parser.
|
// You define your own item types for your specific parser.
|
||||||
var QuestionItem parsekit.ItemType = 42
|
const QuestionItem = parsekit.ItemType(42)
|
||||||
|
|
||||||
// A StateHandler function can use the defined item type by means of
|
// A StateHandler function can use the defined item type by means of
|
||||||
// the p.Emit* methods on parsekit.P.
|
// the p.Emit* methods on parsekit.P.
|
||||||
// When errors occur, or the end of the file is reached, then the built-in
|
// When errors occur, or the end of the file is reached, then the built-in
|
||||||
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
||||||
stateHandler := func(p *parsekit.P) {
|
stateHandler := func(p *parsekit.P) {
|
||||||
if p.On(c.Str("question")).Accept().End() {
|
if p.On(c.Str("question")).Accept() {
|
||||||
p.EmitLiteral(QuestionItem)
|
p.EmitLiteral(QuestionItem)
|
||||||
}
|
}
|
||||||
p.ExpectEndOfFile()
|
p.ExpectEndOfFile()
|
||||||
|
@ -280,11 +97,11 @@ func ExampleError_ErrorFull() {
|
||||||
|
|
||||||
func ExampleMatchAnyRune() {
|
func ExampleMatchAnyRune() {
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
var a = parsekit.A
|
a := parsekit.A
|
||||||
|
|
||||||
handler := func(p *parsekit.P) {
|
handler := func(p *parsekit.P) {
|
||||||
p.Expects("Any valid rune")
|
p.Expects("Any valid rune")
|
||||||
if p.On(a.AnyRune).Accept().End() {
|
if p.On(a.AnyRune).Accept() {
|
||||||
p.EmitLiteral(TestItem)
|
p.EmitLiteral(TestItem)
|
||||||
p.RouteRepeat()
|
p.RouteRepeat()
|
||||||
}
|
}
|
||||||
|
|
|
@ -410,7 +410,7 @@ func MatchEndOfFile() Matcher {
|
||||||
return func(m *MatchDialog) bool {
|
return func(m *MatchDialog) bool {
|
||||||
fork := m.Fork()
|
fork := m.Fork()
|
||||||
input, ok := fork.NextRune()
|
input, ok := fork.NextRune()
|
||||||
return !ok && input == EOF
|
return !ok && input == eofRune
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -213,7 +213,7 @@ func TestSequenceOfRunes(t *testing.T) {
|
||||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||||
parser := parsekit.NewParser(func(p *parsekit.P) {
|
parser := parsekit.NewParser(func(p *parsekit.P) {
|
||||||
p.Expects("Sequence of runes")
|
p.Expects("Sequence of runes")
|
||||||
if p.On(sequence).Accept().End() {
|
if p.On(sequence).Accept() {
|
||||||
p.EmitLiteral(TestItem)
|
p.EmitLiteral(TestItem)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
@ -158,7 +158,7 @@ type MatcherWrapper struct {
|
||||||
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
|
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
|
||||||
handler := func(p *P) {
|
handler := func(p *P) {
|
||||||
p.Expects(expects)
|
p.Expects(expects)
|
||||||
if p.On(matcher).Accept().End() {
|
if p.On(matcher).Accept() {
|
||||||
p.EmitLiteral(0) // ItemType is irrelevant
|
p.EmitLiteral(0) // ItemType is irrelevant
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
109
statehandler.go
109
statehandler.go
|
@ -16,125 +16,54 @@ type P struct {
|
||||||
state StateHandler // the function that handles the current state
|
state StateHandler // the function that handles the current state
|
||||||
nextState StateHandler // the function that will handle the next state
|
nextState StateHandler // the function that will handle the next state
|
||||||
routeStack []StateHandler // route stack, for handling nested parsing
|
routeStack []StateHandler // route stack, for handling nested parsing
|
||||||
input string // the scanned input
|
input string // the input that is being scanned by the parser
|
||||||
inputPos int // current byte cursor position in the input
|
inputPos int // current byte cursor position in the input
|
||||||
cursorLine int // current rune cursor row number in the input
|
cursorLine int // current rune cursor row number in the input
|
||||||
cursorColumn int // current rune cursor column position in the input
|
cursorColumn int // current rune cursor column position in the input
|
||||||
len int // the total length of the input in bytes
|
len int // the total length of the input in bytes
|
||||||
newline bool // keep track of when we have scanned a newline
|
newline bool // keep track of when we have scanned a newline
|
||||||
expecting string // a description of what the current state expects to find
|
expecting string // a description of what the current state expects to find (see P.Expects())
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values
|
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
||||||
items chan Item // channel of resulting Parser items
|
items chan Item // channel of resulting Parser items (see P.Emit())
|
||||||
item Item // the current item as reached by Next() and retrieved by Get()
|
item Item // the current item as reached by Next() and retrieved by Get()
|
||||||
err *Error // an error when lexing failed, retrieved by Error()
|
err *Error // an error when lexing failed, retrieved by Error()
|
||||||
|
|
||||||
LastMatch string // a string representation of the last matched input data
|
LastMatch string // a string representation of the last matched input data
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expects is used to let a state function describe what input it is expecting.
|
// peek returns but does not advance the cursor to the next rune in the input.
|
||||||
// This expectation is used in error messages to make them more descriptive.
|
|
||||||
//
|
|
||||||
// When defining an expectation inside a StateHandler, you do not need to
|
|
||||||
// handle unexpected input yourself. When the end of the function is reached
|
|
||||||
// without setting the next state, an automatic error will be emitted.
|
|
||||||
// This error can differentiate between the following issues:
|
|
||||||
//
|
|
||||||
// * there is valid data on input, but it was not accepted by the function
|
|
||||||
//
|
|
||||||
// * there is an invalid UTF8 character on input
|
|
||||||
//
|
|
||||||
// * the end of the file was reached.
|
|
||||||
func (p *P) Expects(description string) {
|
|
||||||
p.expecting = description
|
|
||||||
}
|
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
|
//
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
// (end of data or invalid UTF8 character).
|
// (end of data or invalid UTF8 character). In this case, the returned rune
|
||||||
|
// will be one of eofRune or invalidRune.
|
||||||
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
||||||
return handleRuneError(r, w)
|
return handleRuneError(r, w)
|
||||||
}
|
}
|
||||||
|
|
||||||
// EOF is a special rune, which is used to indicate an end of file when
|
// eofRune is a special rune, which is used to indicate an end of file when
|
||||||
// reading a character from the input.
|
// reading a character from the input.
|
||||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
const eofRune rune = -1
|
||||||
// say 'I now expect the end of the file' is using something like:
|
|
||||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
|
||||||
const EOF rune = -1
|
|
||||||
|
|
||||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
// invalidRune is a special rune, which is used to indicate an invalid UTF8
|
||||||
// rune on the input.
|
// rune on the input.
|
||||||
const INVALID rune = utf8.RuneError
|
const invalidRune rune = utf8.RuneError
|
||||||
|
|
||||||
// handleRuneError is used to normale rune value in case of errors.
|
// handleRuneError is used to create specific rune value in case of errors.
|
||||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||||
// This can however indicate one of two situations:
|
// This can however indicate one of two situations:
|
||||||
// * w == 0: end of file is reached
|
// 1) w == 0: end of file is reached
|
||||||
// * w == 1: invalid UTF character on input
|
// 2) w == 1: invalid UTF character on input
|
||||||
// This function lets these two cases return respectively the
|
// This function lets these two cases return respectively the
|
||||||
// package's own EOF or INVALID runes, to make it easy for client
|
// package's own eofRune or invalidRune, to make it easy for calling code
|
||||||
// code to distinct between these two cases.
|
// to distinct between these two cases.
|
||||||
func handleRuneError(r rune, w int) (rune, int, bool) {
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||||
if r == utf8.RuneError {
|
if r == utf8.RuneError {
|
||||||
if w == 0 {
|
if w == 0 {
|
||||||
return EOF, 0, false
|
return eofRune, 0, false
|
||||||
}
|
}
|
||||||
return INVALID, w, false
|
return invalidRune, w, false
|
||||||
}
|
}
|
||||||
return r, w, true
|
return r, w, true
|
||||||
}
|
}
|
||||||
|
|
||||||
// RouteTo tells the parser what StateHandler function to invoke
|
|
||||||
// in the next parsing cycle.
|
|
||||||
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
|
||||||
p.nextState = state
|
|
||||||
return &routeFollowupAction{chainAction: chainAction{p, true}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteRepeat indicates that on the next parsing cycle, the current
|
|
||||||
// StateHandler must be reinvoked.
|
|
||||||
func (p *P) RouteRepeat() *chainAction {
|
|
||||||
p.RouteTo(p.state)
|
|
||||||
return &chainAction{nil, true}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteReturn tells the parser that on the next cycle the last
|
|
||||||
// StateHandler that was pushed on the route stack must be invoked.
|
|
||||||
//
|
|
||||||
// Using this method is optional. When implementating a StateHandler that
|
|
||||||
// is used as a sort of subroutine (using constructions like
|
|
||||||
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
|
||||||
// providing an explicit routing decision from that handler. The parser will
|
|
||||||
// automatically assume a RouteReturn() in that case.
|
|
||||||
func (p *P) RouteReturn() *chainAction {
|
|
||||||
p.nextState = p.popRoute()
|
|
||||||
return &chainAction{nil, true}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pushRoute adds the StateHandler to the route stack.
|
|
||||||
// This is used for implementing nested parsing.
|
|
||||||
func (p *P) pushRoute(state StateHandler) {
|
|
||||||
p.routeStack = append(p.routeStack, state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// popRoute pops the last pushed StateHandler from the route stack.
|
|
||||||
func (p *P) popRoute() StateHandler {
|
|
||||||
last := len(p.routeStack) - 1
|
|
||||||
head, tail := p.routeStack[:last], p.routeStack[last]
|
|
||||||
p.routeStack = head
|
|
||||||
return tail
|
|
||||||
}
|
|
||||||
|
|
||||||
// ExpectEndOfFile can be used from a StateHandler function to indicate that
|
|
||||||
// your parser expects to be at the end of the file. This will schedule
|
|
||||||
// a parsekit-provided StateHandler which will do the actual check for this.
|
|
||||||
func (p *P) ExpectEndOfFile() {
|
|
||||||
p.RouteTo(func(p *P) {
|
|
||||||
p.Expects("end of file")
|
|
||||||
if p.On(A.EndOfFile).Stay().End() {
|
|
||||||
p.Emit(ItemEOF, "EOF")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
|
@ -37,6 +37,44 @@ func (p *P) EmitLiteral(t ItemType) {
|
||||||
p.Emit(t, p.buffer.asLiteralString())
|
p.Emit(t, p.buffer.asLiteralString())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BufLiteral retrieves the contents of the parser buffer (all the runes that
|
||||||
|
// were added to it using P.Accept()) as a literal string.
|
||||||
|
//
|
||||||
|
// Literal means that if the input had for example the subsequent runes '\' and 'n'
|
||||||
|
// in it, then the literal string would have a backslash and an 'n' it in, not a
|
||||||
|
// linefeed (ASCII char 10).
|
||||||
|
//
|
||||||
|
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
||||||
|
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
||||||
|
func (p *P) BufLiteral() string {
|
||||||
|
return p.buffer.asLiteralString()
|
||||||
|
}
|
||||||
|
|
||||||
|
// BufInterpreted retrieves the contents of the parser buffer (all the runes that
|
||||||
|
// were added to it using P.Accept()) as an interpreted string.
|
||||||
|
//
|
||||||
|
// Interpreted means that the contents are treated as a Go double quoted
|
||||||
|
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
|
||||||
|
// input had for example the subsequent runes '\' and 'n' in it, then the interpreted
|
||||||
|
// string would have an actual linefeed (ASCII char 10) in it.
|
||||||
|
//
|
||||||
|
// This method returns a boolean value, indicating whether or not the string
|
||||||
|
// interpretation was successful. On invalid string data, an error will
|
||||||
|
// automatically be emitted and the boolean return value will be false.
|
||||||
|
//
|
||||||
|
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
||||||
|
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
||||||
|
func (p *P) BufInterpreted() (string, bool) {
|
||||||
|
s, err := p.buffer.asInterpretedString()
|
||||||
|
if err != nil {
|
||||||
|
p.EmitError(
|
||||||
|
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
||||||
|
p.buffer.asLiteralString(), err)
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
return s, true
|
||||||
|
}
|
||||||
|
|
||||||
// EmitInterpreted passes a Parser item to the client, including accumulated
|
// EmitInterpreted passes a Parser item to the client, including accumulated
|
||||||
// string buffer data a Go double quoted interpreted string (handling escape
|
// string buffer data a Go double quoted interpreted string (handling escape
|
||||||
// codes like \n, \t, \uXXXX, etc.)
|
// codes like \n, \t, \uXXXX, etc.)
|
||||||
|
@ -90,9 +128,9 @@ func (p *P) UnexpectedInput() {
|
||||||
switch {
|
switch {
|
||||||
case ok:
|
case ok:
|
||||||
p.EmitError("unexpected character %q%s", r, fmtExpects(p))
|
p.EmitError("unexpected character %q%s", r, fmtExpects(p))
|
||||||
case r == EOF:
|
case r == eofRune:
|
||||||
p.EmitError("unexpected end of file%s", fmtExpects(p))
|
p.EmitError("unexpected end of file%s", fmtExpects(p))
|
||||||
case r == INVALID:
|
case r == invalidRune:
|
||||||
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
|
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
|
||||||
default:
|
default:
|
||||||
panic("parsekit bug: Unhandled output from peek()")
|
panic("parsekit bug: Unhandled output from peek()")
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
// Expects is used to let a state function describe what input it is expecting.
|
||||||
|
// This expectation is used in error messages to make them more descriptive.
|
||||||
|
//
|
||||||
|
// When defining an expectation inside a StateHandler, you do not need to
|
||||||
|
// handle unexpected input yourself. When the end of the function is reached
|
||||||
|
// without setting the next state, an automatic error will be emitted.
|
||||||
|
// This error can differentiate between the following issues:
|
||||||
|
//
|
||||||
|
// 1) there is valid data on input, but it was not accepted by the function
|
||||||
|
//
|
||||||
|
// 2) there is an invalid UTF8 character on input
|
||||||
|
//
|
||||||
|
// 3) the end of the file was reached.
|
||||||
|
func (p *P) Expects(description string) {
|
||||||
|
p.expecting = description
|
||||||
|
}
|
|
@ -1,41 +1,41 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
// On checks if the current input matches the provided Matcher.
|
// On checks if the input at the current cursor position matches the provided Matcher.
|
||||||
|
// On must be chained with another method, which tells the parser what action to
|
||||||
|
// perform when a match was found:
|
||||||
//
|
//
|
||||||
// This method is the start of a chain method in which multiple things can
|
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
||||||
// be arranged in one go:
|
|
||||||
//
|
//
|
||||||
// 1) Checking whether or not there is a match (this is what On does)
|
// 2) On(...).Accept() - Move cursor forward, add matched runes to the string buffer.
|
||||||
//
|
//
|
||||||
// 2) Deciding what to do with the match (Stay(): do nothing, Skip(): only move
|
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
|
||||||
// the cursor forward, Accept(): move cursor forward and add the match in
|
|
||||||
// the parser string buffer)
|
|
||||||
//
|
//
|
||||||
// 3) Dedicing where to route to (e.g. using RouteTo() to route to a
|
// So an example chain could look like this:
|
||||||
// StateHandler by name)
|
|
||||||
//
|
//
|
||||||
// 4) Followup routing after that, when applicable (.e.g using something like
|
// p.On(parsekit.A.Whitespace).Skip()
|
||||||
// RouteTo(...).ThenTo(...))
|
|
||||||
//
|
//
|
||||||
// For every step of this chain, you can end the chain using the
|
// The chain as a whole returns a boolean, which indicates whether or not at match
|
||||||
// End() method. This will return a boolean value, indicating whether or
|
// was found. When no match was found, false is returned and Skip() and Accept()
|
||||||
// not the initial On() method found a match in the input.
|
// will have no effect. Because of this, typical use of an On() chain is as
|
||||||
// End() is not mandatory. It is merely provided as a means to use
|
// expression for a conditional expression (if, switch/case, for). E.g.:
|
||||||
// a chain as an expression for a switch/case or if statement (since those
|
|
||||||
// require a boolean expression).
|
|
||||||
//
|
//
|
||||||
// You can omit "what to do with the match" and go straight into a routing
|
// // Skip multiple exclamation marks.
|
||||||
// method, e.g.
|
// for p.On(parsekit.A.Excl).Skip() { }
|
||||||
//
|
//
|
||||||
// On(...).RouteTo(...)
|
// // Fork a route based on the input.
|
||||||
|
// switch {
|
||||||
|
// case p.On(parsekit.A.Excl).Stay()
|
||||||
|
// p.RouteTo(stateHandlerA)
|
||||||
|
// case p.On(parsekit.A.Colon).Stay():
|
||||||
|
// p.RouteTo(stateHandlerB)
|
||||||
|
// default:
|
||||||
|
// p.RouteTo(stateHandlerC)
|
||||||
|
// }
|
||||||
//
|
//
|
||||||
// This is functionally the same as using
|
// // When there's a "hi" on input, emit it.
|
||||||
//
|
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||||
// On(...).Stay().RouteTo(...).
|
// p.Emit(SomeItemType, p.BufLiteral())
|
||||||
//
|
// }
|
||||||
// Here's a complete example chain:
|
|
||||||
//
|
|
||||||
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
|
|
||||||
func (p *P) On(matcher Matcher) *matchAction {
|
func (p *P) On(matcher Matcher) *matchAction {
|
||||||
m := &MatchDialog{p: p}
|
m := &MatchDialog{p: p}
|
||||||
if matcher == nil {
|
if matcher == nil {
|
||||||
|
@ -45,39 +45,25 @@ func (p *P) On(matcher Matcher) *matchAction {
|
||||||
|
|
||||||
// Keep track of the last match, to allow parser implementations
|
// Keep track of the last match, to allow parser implementations
|
||||||
// to access it in an easy way. Typical use would be something like:
|
// to access it in an easy way. Typical use would be something like:
|
||||||
|
//
|
||||||
// if p.On(somethingBad).End() {
|
// if p.On(somethingBad).End() {
|
||||||
// p.Errorf("This was bad: %s", p.LastMatch)
|
// p.Errorf("This was bad: %s", p.LastMatch)
|
||||||
// }
|
// }
|
||||||
p.LastMatch = string(m.input)
|
p.LastMatch = string(m.input)
|
||||||
|
|
||||||
return &matchAction{
|
return &matchAction{
|
||||||
routeAction: routeAction{chainAction{p, ok}},
|
p: p,
|
||||||
|
ok: ok,
|
||||||
input: m.input,
|
input: m.input,
|
||||||
output: m.output,
|
output: m.output,
|
||||||
inputPos: p.inputPos + m.inputOffset,
|
inputPos: p.inputPos + m.inputOffset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// chainAction is used for building method chains for the On() method.
|
// matchAction is a struct that is used for building the On()-method chain.
|
||||||
// Every element of the method chain embeds this struct.
|
type matchAction struct {
|
||||||
type chainAction struct {
|
|
||||||
p *P
|
p *P
|
||||||
ok bool
|
ok bool
|
||||||
}
|
|
||||||
|
|
||||||
// End ends the method chain and returns a boolean indicating whether
|
|
||||||
// or not a match was found in the input.
|
|
||||||
func (a *chainAction) End() bool {
|
|
||||||
return a.ok
|
|
||||||
}
|
|
||||||
|
|
||||||
// matchAction is a struct that is used for building On()-method chains.
|
|
||||||
//
|
|
||||||
// It embeds the routeAction struct, to make it possible to go right into
|
|
||||||
// a route action, which is basically a simple way of aliasing a chain
|
|
||||||
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
|
|
||||||
type matchAction struct {
|
|
||||||
routeAction
|
|
||||||
input []rune
|
input []rune
|
||||||
output []rune
|
output []rune
|
||||||
inputPos int
|
inputPos int
|
||||||
|
@ -88,23 +74,29 @@ type matchAction struct {
|
||||||
// When no match was found, then no action is taken.
|
// When no match was found, then no action is taken.
|
||||||
// It returns a routeAction struct, which provides methods that can be used
|
// It returns a routeAction struct, which provides methods that can be used
|
||||||
// to tell the parser what state to go to next.
|
// to tell the parser what state to go to next.
|
||||||
func (a *matchAction) Accept() *routeAction {
|
func (a *matchAction) Accept() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.p.buffer.writeString(string(a.output))
|
a.p.buffer.writeString(string(a.output))
|
||||||
a.advanceCursor()
|
a.advanceCursor()
|
||||||
}
|
}
|
||||||
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip tells the parser to move the cursor past a match that was found,
|
// Skip tells the parser to move the cursor past a match that was found,
|
||||||
// without storing the actual match in the string buffer.
|
// without storing the actual match in the string buffer.
|
||||||
// Returns true in case a match was found.
|
// Returns true in case a match was found.
|
||||||
// When no match was found, then no action is taken and false is returned.
|
// When no match was found, then no action is taken and false is returned.
|
||||||
func (a *matchAction) Skip() *routeAction {
|
func (a *matchAction) Skip() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.advanceCursor()
|
a.advanceCursor()
|
||||||
}
|
}
|
||||||
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
return a.ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stay tells the parser to not move the cursor after finding a match.
|
||||||
|
// Returns true in case a match was found, false otherwise.
|
||||||
|
func (a *matchAction) Stay() bool {
|
||||||
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the input data.
|
// advanceCursor advances the rune cursor one position in the input data.
|
||||||
|
@ -122,67 +114,3 @@ func (a *matchAction) advanceCursor() {
|
||||||
a.p.newline = r == '\n'
|
a.p.newline = r == '\n'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stay tells the parser to not move the cursor after finding a match.
|
|
||||||
// Returns true in case a match was found, false otherwise.
|
|
||||||
func (a *matchAction) Stay() *routeAction {
|
|
||||||
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// routeAction is a struct that is used for building On() method chains.
|
|
||||||
type routeAction struct {
|
|
||||||
chainAction
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteRepeat indicates that on the next parsing cycle,
|
|
||||||
// the current StateHandler must be reinvoked.
|
|
||||||
func (a *routeAction) RouteRepeat() *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
return a.p.RouteRepeat()
|
|
||||||
}
|
|
||||||
return &chainAction{nil, false}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteTo tells the parser what StateHandler function to invoke
|
|
||||||
// in the next parsing cycle.
|
|
||||||
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
|
|
||||||
if a.ok {
|
|
||||||
return a.p.RouteTo(state)
|
|
||||||
}
|
|
||||||
return &routeFollowupAction{chainAction: chainAction{nil, false}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteReturn tells the parser that on the next cycle the next scheduled
|
|
||||||
// route must be invoked.
|
|
||||||
func (a *routeAction) RouteReturn() *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
return a.p.RouteReturn()
|
|
||||||
}
|
|
||||||
return &chainAction{nil, false}
|
|
||||||
}
|
|
||||||
|
|
||||||
// routeFollowupAction chains parsing routes.
|
|
||||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
|
||||||
type routeFollowupAction struct {
|
|
||||||
chainAction
|
|
||||||
}
|
|
||||||
|
|
||||||
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
|
||||||
// StateHandler has been completed.
|
|
||||||
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
|
|
||||||
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
a.p.pushRoute(state)
|
|
||||||
}
|
|
||||||
return &chainAction{nil, a.ok}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ThenReturnHere schedules the current StateHandler to be invoked after
|
|
||||||
// the RouteTo StateHandler has been completed.
|
|
||||||
// For example: p.RouteTo(handlerA).ThenReturnHere()
|
|
||||||
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
a.p.pushRoute(a.p.state)
|
|
||||||
}
|
|
||||||
return &chainAction{nil, a.ok}
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
// RouteTo tells the parser what StateHandler function to invoke
|
||||||
|
// in the next parsing cycle.
|
||||||
|
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
||||||
|
p.nextState = state
|
||||||
|
return &routeFollowupAction{p}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteRepeat indicates that on the next parsing cycle, the current
|
||||||
|
// StateHandler must be reinvoked.
|
||||||
|
func (p *P) RouteRepeat() {
|
||||||
|
p.RouteTo(p.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteReturn tells the parser that on the next cycle the last
|
||||||
|
// StateHandler that was pushed on the route stack must be invoked.
|
||||||
|
//
|
||||||
|
// Using this method is optional. When implementating a StateHandler that
|
||||||
|
// is used as a sort of subroutine (using constructions like
|
||||||
|
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
||||||
|
// providing an explicit routing decision from that handler. The parser will
|
||||||
|
// automatically assume a RouteReturn() in that case.
|
||||||
|
func (p *P) RouteReturn() {
|
||||||
|
p.nextState = p.popRoute()
|
||||||
|
}
|
||||||
|
|
||||||
|
// routeFollowupAction chains parsing routes.
|
||||||
|
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||||
|
type routeFollowupAction struct {
|
||||||
|
p *P
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
||||||
|
// StateHandler has been completed.
|
||||||
|
// For example:
|
||||||
|
//
|
||||||
|
// p.RouteTo(handlerA).ThenTo(handlerB)
|
||||||
|
func (a *routeFollowupAction) ThenTo(state StateHandler) {
|
||||||
|
a.p.pushRoute(state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThenReturnHere schedules the current StateHandler to be invoked after
|
||||||
|
// the RouteTo StateHandler has been completed.
|
||||||
|
// For example:
|
||||||
|
//
|
||||||
|
// p.RouteTo(handlerA).ThenReturnHere()
|
||||||
|
func (a *routeFollowupAction) ThenReturnHere() {
|
||||||
|
a.p.pushRoute(a.p.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// pushRoute adds the StateHandler to the route stack.
|
||||||
|
// This is used for implementing nested parsing.
|
||||||
|
func (p *P) pushRoute(state StateHandler) {
|
||||||
|
p.routeStack = append(p.routeStack, state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// popRoute pops the last pushed StateHandler from the route stack.
|
||||||
|
func (p *P) popRoute() StateHandler {
|
||||||
|
last := len(p.routeStack) - 1
|
||||||
|
head, tail := p.routeStack[:last], p.routeStack[last]
|
||||||
|
p.routeStack = head
|
||||||
|
return tail
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExpectEndOfFile can be used from a StateHandler function to indicate that
|
||||||
|
// your parser expects to be at the end of the file. This will schedule
|
||||||
|
// a parsekit-provided StateHandler which will do the actual check for this.
|
||||||
|
func (p *P) ExpectEndOfFile() {
|
||||||
|
p.RouteTo(func(p *P) {
|
||||||
|
p.Expects("end of file")
|
||||||
|
if p.On(A.EndOfFile).Stay() {
|
||||||
|
p.Emit(ItemEOF, "EOF")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
Loading…
Reference in New Issue