Got rid of the full On()...etc chains for both code clarity and usability clarity. Working on good examples and shaving the API's accordingly.

This commit is contained in:
Maurice Makaay 2019-05-25 22:53:04 +00:00
parent b0cd017b83
commit 2751c78003
13 changed files with 530 additions and 413 deletions

View File

@ -0,0 +1,140 @@
// Let's write a small example for parsing a really basic calculator.
// The calculator understands input that looks like:
//
// 10 + 20 - 8+4
//
// So positive numbers that can be either added or substracted, and whitespace
// is ignored.
package parsekit_test
import (
"fmt"
"strconv"
"git.makaay.nl/mauricem/go-parsekit"
)
// When writing a parser, it's a good start to use the parser/combinator
// functionality of parsekit to create some Matcher functions. These functions
// can later be used in the parser state machine to check for matching strings
// on the input data.
//
// For the calculator, we only need a definition of "number, surrounded by
// optional whitespace". Skipping whitespace could be a part of the StateHandler
// functions below too, but including it in a Matcher makes things really
// practical.
func createNumberMatcher() parsekit.Matcher {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
whitespace := m.Drop(c.Opt(a.Whitespace))
return c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
}
var calcNumber = createNumberMatcher()
// We need to define the ItemTypes that we will use for emitting Items
// during the parsing process.
const (
numberType parsekit.ItemType = iota
addType
subtractType
)
// We also need to define the state machine for parsing the input.
// The state machine is built up from functions that match the StateHandler
// signature: func(*parsekit.P)
// The P struct holds the internal state for the parser and it provides
// some methods that form the API for your StateHandler implementation.
// State: expect a number. When a number is found on the input,
// it is accepted in the output buffer, after which the output buffer is
// emitted as a numberType item. Then we tell the state machine to continue
// with the calcWaitForOperatorOrEndOfInput state.
// When no number is found, the parser will emit an error, explaining that
// "a number" was expected.
func calcWaitForNumber(p *parsekit.P) {
p.Expects("a number")
if p.On(calcNumber).Accept() {
p.EmitLiteral(numberType)
p.RouteTo(calcWaitForOperatorOrEndOfInput)
}
}
// State: expect a plus or minus operator. When one of those
// is found, the appropriate Item is emitted and the parser is sent back
// to the numberHandler to find the next number on the input.
// When no operator is found, then the parser is told to expect the end of
// the input. When more input data is available (which is obviously wrong
// data since it does not match our syntax), the parser will emit an error.
func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
switch {
case p.On(a.Plus).Accept():
p.EmitLiteral(addType)
p.RouteTo(calcWaitForNumber)
case p.On(a.Minus).Accept():
p.EmitLiteral(subtractType)
p.RouteTo(calcWaitForNumber)
default:
p.ExpectEndOfFile()
}
}
// All is ready for our parser. We now can create a new Parser struct.
// We need to tell it what the start state is. In our case, it is the
// calcWaitForNumber state, since the calculation must start with a number.
var calcParser = parsekit.NewParser(calcWaitForNumber)
func Example_basicCalculator() {
// Let's feed the parser some input to work with.
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
// We can now step through the results of the parsing process by repeated
// calls to run.Next(). Next() returns either the next parse item, a parse
// error or an end of file. Let's dump the parse results and handle the
// computation while we're at it.
sum := 0
op := +1
for {
item, err, ok := run.Next()
switch {
case !ok && err == nil:
fmt.Println("End of file reached")
fmt.Println("Outcome of computation:", sum)
return
case !ok:
fmt.Printf("Error: %s\n", err)
return
default:
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
switch {
case item.Type == addType:
op = +1
case item.Type == subtractType:
op = -1
case item.Type == numberType:
nr, err := strconv.Atoi(item.Value)
if err != nil {
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
return
}
sum += op * nr
}
}
}
// Output:
// Type: 0, Value: "153"
// Type: 1, Value: "+"
// Type: 0, Value: "22"
// Type: 1, Value: "+"
// Type: 0, Value: "31"
// Type: 2, Value: "-"
// Type: 0, Value: "4"
// Type: 2, Value: "-"
// Type: 0, Value: "6"
// Type: 1, Value: "+"
// Type: 0, Value: "42"
// End of file reached
// Outcome of computation: 238
}

View File

@ -1,4 +1,6 @@
// In this example, a parser is created which can parse and normalize Dutch postcodes.
// In this example, a parser is created which can parse and normalize Dutch postcodes
// The implementation uses only a Matcher function and does not implement a
// full-fledged state-based Parser for it.
package parsekit_test
import (
@ -9,7 +11,7 @@ import (
func createPostcodeMatcher() *parsekit.MatcherWrapper {
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Matcher functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
@ -17,12 +19,12 @@ func createPostcodeMatcher() *parsekit.MatcherWrapper {
// - A space between letters and digits is optional.
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
var digitNotZero = c.Except(c.Rune('0'), a.Digit)
var pcDigits = c.Seq(digitNotZero, c.Rep(3, a.Digit))
var pcLetter = c.Any(a.ASCIILower, a.ASCIIUpper)
var pcLetters = m.ToUpper(c.Seq(pcLetter, pcLetter))
var space = m.Replace(c.Opt(a.Whitespace), " ")
var postcode = c.Seq(pcDigits, space, pcLetters)
digitNotZero := c.Except(c.Rune('0'), a.Digit)
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
space := m.Replace(c.Opt(a.Whitespace), " ")
postcode := c.Seq(pcDigits, space, pcLetters)
return parsekit.NewMatcher(postcode, "a Dutch postcode")
}

View File

@ -0,0 +1,58 @@
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
//
// The implementation uses only a Matcher function and does not implement a
// full-fledged state-based Parser for it. If you want to see the same kind of
// functionality, implementated using a Paser, take a look at the
// HelloWorldUsingParser example.
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func createHelloMatcher() *parsekit.MatcherWrapper {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a Matcher function
// that does all the work. The 'greeting' Matcher matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
// Using 'greeting' we can now create the Matcher-based parser.
return parsekit.NewMatcher(greeting, "a friendly greeting")
}
func Example_helloWorldUsingMatcher() {
parser := createHelloMatcher()
for i, input := range []string{
"Hello, world!",
"HELLO ,Johnny!",
"hello , Bob123!",
"hello Pizza!",
"Oh no!",
"Hello, world",
} {
output, err, ok := parser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
}

111
example_helloparser_test.go Normal file
View File

@ -0,0 +1,111 @@
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
//
// This implementation uses a state-based Parser for it, and it does not implement
// any custom combinator/parser Matcher functions. Note that things are much easier to
// implement using custom Matchers (see the other HelloWorldUsingMatcher example
// for this). Doing this fully parser-based implementation is mainly for your
// learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that the
// state-based parser reports errors much more fine-grained. This might or might
// not be useful for your specific application.
package parsekit_test
import (
"fmt"
"strings"
"git.makaay.nl/mauricem/go-parsekit"
)
const greeteeItem parsekit.ItemType = 1
func stateStartOfGreeting(p *parsekit.P) {
c := parsekit.C
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
p.RouteTo(stateComma)
}
}
func stateComma(p *parsekit.P) {
a := parsekit.A
p.Expects("comma")
switch {
case p.On(a.Whitespace).Skip():
p.RouteRepeat()
case p.On(a.Comma).Skip():
p.RouteTo(stateName)
}
}
func stateName(p *parsekit.P) {
a := parsekit.A
p.Expects("name")
switch {
case p.On(a.Excl).Skip():
p.RouteTo(stateEndOfGreeting)
case p.On(a.AnyRune).Accept():
p.RouteRepeat()
}
}
func stateEndOfGreeting(p *parsekit.P) {
p.Expects("end of greeting")
if p.On(a.EndOfFile).Stay() {
name := strings.TrimSpace(p.BufLiteral())
if name == "" {
p.EmitError("The name cannot be empty")
} else {
p.Emit(greeteeItem, name)
}
}
}
func createHelloParser() *parsekit.Parser {
return parsekit.NewParser(stateStartOfGreeting)
}
func Example_helloWorldUsingParser() {
parser := createHelloParser()
for i, input := range []string{
"Hello, world!",
"HELLO ,Johnny!",
"hello , Bob123!",
"hello Pizza!",
"",
" ",
"hello",
"hello,",
"hello , ",
"hello , Droopy",
"hello , Droopy!",
"hello , \t \t Droopy \t !",
"Oh no!",
"hello,!",
} {
item, err, ok := parser.Parse(input).Next()
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value)
}
}
// Output:
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Error: unexpected character 'P' (expected comma)
// [4] Input: "" Error: unexpected end of file (expected hello)
// [5] Input: " " Error: unexpected character ' ' (expected hello)
// [6] Input: "hello" Error: unexpected end of file (expected comma)
// [7] Input: "hello," Error: unexpected end of file (expected name)
// [8] Input: "hello , " Error: unexpected end of file (expected name)
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected name)
// [10] Input: "hello , Droopy!" Output: Droopy
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
// [13] Input: "hello,!" Error: The name cannot be empty
}

View File

@ -2,194 +2,10 @@ package parsekit_test
import (
"fmt"
"strconv"
"git.makaay.nl/mauricem/go-parsekit"
)
func Example_helloWorldUsingParser() {
}
func Example_helloWorldUsingMatcher() {
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
// The implementation uses only a Matcher function and does not implement a
// full-fledged state-based Parser for it.
// Easy access to parsekit parser/combinators, atoms and modifiers.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a Matcher function
// that does all the work. The 'greeting' Matcher matches the whole input and
// drops all but the name from it.
var hello = c.StrNoCase("hello")
var comma = c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
var separator = c.Any(comma, a.Whitespace)
var name = c.OneOrMore(c.Not(a.Excl))
var greeting = c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
// Now we can already do some parsing, by using a Matcher.
var parser = parsekit.NewMatcher(greeting, "a friendly greeting")
for i, input := range []string{
"Hello, world!",
"HELLO ,Johnny!",
"hello , Bob123!",
"hello Pizza!",
"Oh no!",
"Hello, world",
} {
output, err, ok := parser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
}
func Example_basicCalculator() {
// Let's write a small example for parsing a really basic calculator.
// The calculator understands input that looks like:
//
// 10 + 20 - 8+4
//
// So positive numbers that can be either added or substracted, and whitespace
// is ignored.
// Easy access to parsekit parser/combinators, atoms and modifiers.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// When writing a parser, it's a good start to use the parser/combinator
// functionality of parsekit to create some Matcher functions. These
// functions can later be used in the parser state machine to find the
// matching tokens on the input data.
//
// In our case, we only need a definition of "number, surrounded by
// optional whitespace". Skipping whitespace could be a part of the
// StateHandler functions below too, but including it in a Matcher makes
// things really practical here.
var whitespace = m.Drop(c.Opt(a.Whitespace))
var number = c.Seq(whitespace, c.OneOrMore(a.Digit), whitespace)
// We also must define the types of items that the parser will emit.
// We only need three of them here, for numbers, plus and minus.
// The recommended way to define these, is using 'iota' for auto numbering.
const (
numberType parsekit.ItemType = iota
addType
subtractType
)
// Now it is time to define the state machine for parsing the input.
// The state machine is built up from functions that match the StateHandler
// signature: func(*parsekit.P)
// The P struct holds the internal state for the parser and it provides
// some methods that form the API for your StateHandler implementation.
//
// (note that normally you'd write normal functions and not anonymous
// functions like I did here. I had to use these to be able to write the
// example code)
var operatorHandler parsekit.StateHandler
// In this state, we expect a number. When a number is found on the input,
// it is accepted in the output buffer, after which the output buffer is
// emitted as a numberType item. Then we tell the state machine to continue
// with the operatorHandler state.
// When no number is found, the parser will emit an error, explaining that
// "a number" was expected.
numberHandler := func(p *parsekit.P) {
p.Expects("a number")
if p.On(number).Accept().End() {
p.EmitLiteral(numberType)
p.RouteTo(operatorHandler)
}
}
// In this state, we expect a plus or minus operator. When one of those
// is found, the appropriate Item is emitted and the parser is sent back
// to the numberHandler to find the next number on the input.
// When no operator is found, then the parser is told to expect the end of
// the input. When more input data is available (which is obviously wrong
// data since it does not match our syntax), the parser will emit an error.
operatorHandler = func(p *parsekit.P) {
switch {
case p.On(a.Plus).Accept().End():
p.EmitLiteral(addType)
p.RouteTo(numberHandler)
case p.On(a.Minus).Accept().End():
p.EmitLiteral(subtractType)
p.RouteTo(numberHandler)
default:
p.ExpectEndOfFile()
}
}
// All is ready for our parser. We now can create a new Parser struct.
// We need to tell it what the start state is. In our case, it is
// the number state, since the calculation must start with a number.
parser := parsekit.NewParser(numberHandler)
// Let's feed the parser some input to work with.
run := parser.Parse(" 153+22 + 31-4 -\t 6+42 ")
// We can now step through the results of the parsing process by repeated
// calls to run.Next(). Next() returns either the next parse item, a parse
// error or an end of file. Let's dump the parse results and handle the
// computation while we're at it.
sum := 0
op := +1
for {
item, err, ok := run.Next()
switch {
case !ok && err == nil:
fmt.Println("End of file reached")
fmt.Println("Outcome of computation:", sum)
return
case !ok:
fmt.Printf("Error: %s\n", err)
return
default:
fmt.Printf("Type: %d, Value: %q\n", item.Type, item.Value)
switch {
case item.Type == addType:
op = +1
case item.Type == subtractType:
op = -1
case item.Type == numberType:
nr, err := strconv.Atoi(item.Value)
if err != nil {
fmt.Printf("Error: invalid number %s: %s\n", item.Value, err)
return
}
sum += op * nr
}
}
}
// Output:
// Type: 0, Value: "153"
// Type: 1, Value: "+"
// Type: 0, Value: "22"
// Type: 1, Value: "+"
// Type: 0, Value: "31"
// Type: 2, Value: "-"
// Type: 0, Value: "4"
// Type: 2, Value: "-"
// Type: 0, Value: "6"
// Type: 1, Value: "+"
// Type: 0, Value: "42"
// End of file reached
// Outcome of computation: 238
}
func ExampleItemType() {
// Make use of positive values. Ideally, define your ItemTypes using
// iota for easy automatic value management like this:
@ -202,17 +18,18 @@ func ExampleItemType() {
}
func ExampleItem() {
var c = parsekit.C
// Easy access to the parsekit definitions.
c := parsekit.C
// You define your own item types for your specific parser.
var QuestionItem parsekit.ItemType = 42
const QuestionItem = parsekit.ItemType(42)
// A StateHandler function can use the defined item type by means of
// the p.Emit* methods on parsekit.P.
// When errors occur, or the end of the file is reached, then the built-in
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
stateHandler := func(p *parsekit.P) {
if p.On(c.Str("question")).Accept().End() {
if p.On(c.Str("question")).Accept() {
p.EmitLiteral(QuestionItem)
}
p.ExpectEndOfFile()
@ -280,11 +97,11 @@ func ExampleError_ErrorFull() {
func ExampleMatchAnyRune() {
// Easy access to the parsekit definitions.
var a = parsekit.A
a := parsekit.A
handler := func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
if p.On(a.AnyRune).Accept() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}

View File

@ -410,7 +410,7 @@ func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == EOF
return !ok && input == eofRune
}
}

View File

@ -213,7 +213,7 @@ func TestSequenceOfRunes(t *testing.T) {
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.NewParser(func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
if p.On(sequence).Accept() {
p.EmitLiteral(TestItem)
}
})

View File

@ -158,7 +158,7 @@ type MatcherWrapper struct {
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
handler := func(p *P) {
p.Expects(expects)
if p.On(matcher).Accept().End() {
if p.On(matcher).Accept() {
p.EmitLiteral(0) // ItemType is irrelevant
}
}

View File

@ -16,125 +16,54 @@ type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
items chan Item // channel of resulting Parser items (see P.Emit())
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
LastMatch string // a string representation of the last matched input data
}
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a StateHandler, you do not need to
// handle unexpected input yourself. When the end of the function is reached
// without setting the next state, an automatic error will be emitted.
// This error can differentiate between the following issues:
//
// * there is valid data on input, but it was not accepted by the function
//
// * there is an invalid UTF8 character on input
//
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}
// peek returns but does not advance the cursor to the next rune(s) in the input.
// peek returns but does not advance the cursor to the next rune in the input.
// Returns the rune, its width in bytes and a boolean.
//
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
// (end of data or invalid UTF8 character). In this case, the returned rune
// will be one of eofRune or invalidRune.
func (p *P) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}
// EOF is a special rune, which is used to indicate an end of file when
// eofRune is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
const eofRune rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// invalidRune is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError
const invalidRune rune = utf8.RuneError
// handleRuneError is used to normale rune value in case of errors.
// handleRuneError is used to create specific rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// 1) w == 0: end of file is reached
// 2) w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
// package's own eofRune or invalidRune, to make it easy for calling code
// to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return EOF, 0, false
return eofRune, 0, false
}
return INVALID, w, false
return invalidRune, w, false
}
return r, w, true
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
p.nextState = state
return &routeFollowupAction{chainAction: chainAction{p, true}}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *chainAction {
p.RouteTo(p.state)
return &chainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *chainAction {
p.nextState = p.popRoute()
return &chainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}
// ExpectEndOfFile can be used from a StateHandler function to indicate that
// your parser expects to be at the end of the file. This will schedule
// a parsekit-provided StateHandler which will do the actual check for this.
func (p *P) ExpectEndOfFile() {
p.RouteTo(func(p *P) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay().End() {
p.Emit(ItemEOF, "EOF")
}
})
}

View File

@ -37,6 +37,44 @@ func (p *P) EmitLiteral(t ItemType) {
p.Emit(t, p.buffer.asLiteralString())
}
// BufLiteral retrieves the contents of the parser buffer (all the runes that
// were added to it using P.Accept()) as a literal string.
//
// Literal means that if the input had for example the subsequent runes '\' and 'n'
// in it, then the literal string would have a backslash and an 'n' it in, not a
// linefeed (ASCII char 10).
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (p *P) BufLiteral() string {
return p.buffer.asLiteralString()
}
// BufInterpreted retrieves the contents of the parser buffer (all the runes that
// were added to it using P.Accept()) as an interpreted string.
//
// Interpreted means that the contents are treated as a Go double quoted
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
// input had for example the subsequent runes '\' and 'n' in it, then the interpreted
// string would have an actual linefeed (ASCII char 10) in it.
//
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and the boolean return value will be false.
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (p *P) BufInterpreted() (string, bool) {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.EmitError(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return "", false
}
return s, true
}
// EmitInterpreted passes a Parser item to the client, including accumulated
// string buffer data a Go double quoted interpreted string (handling escape
// codes like \n, \t, \uXXXX, etc.)
@ -90,9 +128,9 @@ func (p *P) UnexpectedInput() {
switch {
case ok:
p.EmitError("unexpected character %q%s", r, fmtExpects(p))
case r == EOF:
case r == eofRune:
p.EmitError("unexpected end of file%s", fmtExpects(p))
case r == INVALID:
case r == invalidRune:
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
default:
panic("parsekit bug: Unhandled output from peek()")

18
statehandler_expects.go Normal file
View File

@ -0,0 +1,18 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a StateHandler, you do not need to
// handle unexpected input yourself. When the end of the function is reached
// without setting the next state, an automatic error will be emitted.
// This error can differentiate between the following issues:
//
// 1) there is valid data on input, but it was not accepted by the function
//
// 2) there is an invalid UTF8 character on input
//
// 3) the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}

View File

@ -1,41 +1,41 @@
package parsekit
// On checks if the current input matches the provided Matcher.
// On checks if the input at the current cursor position matches the provided Matcher.
// On must be chained with another method, which tells the parser what action to
// perform when a match was found:
//
// This method is the start of a chain method in which multiple things can
// be arranged in one go:
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
//
// 1) Checking whether or not there is a match (this is what On does)
// 2) On(...).Accept() - Move cursor forward, add matched runes to the string buffer.
//
// 2) Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the cursor forward, Accept(): move cursor forward and add the match in
// the parser string buffer)
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
//
// 3) Dedicing where to route to (e.g. using RouteTo() to route to a
// StateHandler by name)
// So an example chain could look like this:
//
// 4) Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
// p.On(parsekit.A.Whitespace).Skip()
//
// For every step of this chain, you can end the chain using the
// End() method. This will return a boolean value, indicating whether or
// not the initial On() method found a match in the input.
// End() is not mandatory. It is merely provided as a means to use
// a chain as an expression for a switch/case or if statement (since those
// require a boolean expression).
// The chain as a whole returns a boolean, which indicates whether or not at match
// was found. When no match was found, false is returned and Skip() and Accept()
// will have no effect. Because of this, typical use of an On() chain is as
// expression for a conditional expression (if, switch/case, for). E.g.:
//
// You can omit "what to do with the match" and go straight into a routing
// method, e.g.
// // Skip multiple exclamation marks.
// for p.On(parsekit.A.Excl).Skip() { }
//
// On(...).RouteTo(...)
// // Fork a route based on the input.
// switch {
// case p.On(parsekit.A.Excl).Stay()
// p.RouteTo(stateHandlerA)
// case p.On(parsekit.A.Colon).Stay():
// p.RouteTo(stateHandlerB)
// default:
// p.RouteTo(stateHandlerC)
// }
//
// This is functionally the same as using
//
// On(...).Stay().RouteTo(...).
//
// Here's a complete example chain:
//
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
// // When there's a "hi" on input, emit it.
// if p.On(parsekit.C.Str("hi")).Accept() {
// p.Emit(SomeItemType, p.BufLiteral())
// }
func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p}
if matcher == nil {
@ -45,39 +45,25 @@ func (p *P) On(matcher Matcher) *matchAction {
// Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like:
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
//
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
p.LastMatch = string(m.input)
return &matchAction{
routeAction: routeAction{chainAction{p, ok}},
input: m.input,
output: m.output,
inputPos: p.inputPos + m.inputOffset,
p: p,
ok: ok,
input: m.input,
output: m.output,
inputPos: p.inputPos + m.inputOffset,
}
}
// chainAction is used for building method chains for the On() method.
// Every element of the method chain embeds this struct.
type chainAction struct {
p *P
ok bool
}
// End ends the method chain and returns a boolean indicating whether
// or not a match was found in the input.
func (a *chainAction) End() bool {
return a.ok
}
// matchAction is a struct that is used for building On()-method chains.
//
// It embeds the routeAction struct, to make it possible to go right into
// a route action, which is basically a simple way of aliasing a chain
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
// matchAction is a struct that is used for building the On()-method chain.
type matchAction struct {
routeAction
p *P
ok bool
input []rune
output []rune
inputPos int
@ -88,23 +74,29 @@ type matchAction struct {
// When no match was found, then no action is taken.
// It returns a routeAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *matchAction) Accept() *routeAction {
func (a *matchAction) Accept() bool {
if a.ok {
a.p.buffer.writeString(string(a.output))
a.advanceCursor()
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
return a.ok
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *matchAction) Skip() *routeAction {
func (a *matchAction) Skip() bool {
if a.ok {
a.advanceCursor()
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
return a.ok
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *matchAction) Stay() bool {
return a.ok
}
// advanceCursor advances the rune cursor one position in the input data.
@ -122,67 +114,3 @@ func (a *matchAction) advanceCursor() {
a.p.newline = r == '\n'
}
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *matchAction) Stay() *routeAction {
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// routeAction is a struct that is used for building On() method chains.
type routeAction struct {
chainAction
}
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *routeAction) RouteRepeat() *chainAction {
if a.ok {
return a.p.RouteRepeat()
}
return &chainAction{nil, false}
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
if a.ok {
return a.p.RouteTo(state)
}
return &routeFollowupAction{chainAction: chainAction{nil, false}}
}
// RouteReturn tells the parser that on the next cycle the next scheduled
// route must be invoked.
func (a *routeAction) RouteReturn() *chainAction {
if a.ok {
return a.p.RouteReturn()
}
return &chainAction{nil, false}
}
// routeFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type routeFollowupAction struct {
chainAction
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
if a.ok {
a.p.pushRoute(state)
}
return &chainAction{nil, a.ok}
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
if a.ok {
a.p.pushRoute(a.p.state)
}
return &chainAction{nil, a.ok}
}

76
statehandler_route.go Normal file
View File

@ -0,0 +1,76 @@
package parsekit
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
p.nextState = state
return &routeFollowupAction{p}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() {
p.RouteTo(p.state)
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() {
p.nextState = p.popRoute()
}
// routeFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type routeFollowupAction struct {
p *P
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example:
//
// p.RouteTo(handlerA).ThenTo(handlerB)
func (a *routeFollowupAction) ThenTo(state StateHandler) {
a.p.pushRoute(state)
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example:
//
// p.RouteTo(handlerA).ThenReturnHere()
func (a *routeFollowupAction) ThenReturnHere() {
a.p.pushRoute(a.p.state)
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}
// ExpectEndOfFile can be used from a StateHandler function to indicate that
// your parser expects to be at the end of the file. This will schedule
// a parsekit-provided StateHandler which will do the actual check for this.
func (p *P) ExpectEndOfFile() {
p.RouteTo(func(p *P) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay() {
p.Emit(ItemEOF, "EOF")
}
})
}