Modified all examples and tests to make use of the new ideas on how to keep parsing state. After this commit, I can cleanup a lot of stuff from the emitting loop-based parser which was basically crap for complex parsers.

This commit is contained in:
Maurice Makaay 2019-05-28 10:42:46 +00:00
parent 980c18099e
commit 3dfa99c965
16 changed files with 354 additions and 283 deletions

View File

@ -51,7 +51,7 @@ func Example_basicCalculator1() {
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Implementation of the calculator // Implementation of the parser
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// CalculateSimple interprets a simple calculation, consisting of only integers // CalculateSimple interprets a simple calculation, consisting of only integers
@ -60,7 +60,7 @@ func Example_basicCalculator1() {
func ComputeSimple(calculation string) (int64, *parsekit.Error) { func ComputeSimple(calculation string) (int64, *parsekit.Error) {
calculator := &simpleCalculator{op: +1} calculator := &simpleCalculator{op: +1}
parser := parsekit.NewParser(calculator.number) parser := parsekit.NewParser(calculator.number)
_, err, _ := parser.Parse(calculation).Next() err := parser.Execute(calculation)
return calculator.Result, err return calculator.Result, err
} }

View File

@ -62,7 +62,7 @@ func Example_basicCalculator2() {
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Implementation of the calculator // Implementation of the parser
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// calculator implements a recursive descent parser that is responsible for parsing // calculator implements a recursive descent parser that is responsible for parsing
@ -79,14 +79,13 @@ type calculator struct {
func Compute(input string) (float64, *parsekit.Error) { func Compute(input string) (float64, *parsekit.Error) {
c := &calculator{} c := &calculator{}
parser := parsekit.NewParser(c.computation) parser := parsekit.NewParser(c.computation)
_, err, _ := parser.Parse(input).Next() err := parser.Execute(input)
return c.result, err return c.result, err
} }
func (c *calculator) computation(p *parsekit.ParseAPI) { func (c *calculator) computation(p *parsekit.ParseAPI) {
p.Handle(c.expr) p.Handle(c.expr)
p.ExpectEndOfFile() p.ExpectEndOfFile()
p.Handle(c.factor)
c.result = c.interpreter.result c.result = c.interpreter.result
} }

View File

@ -9,6 +9,43 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
func Example_dutchPostcodeUsingMatcher() {
parser := createPostcodeMatcher()
for i, input := range []string{
"1234 AB",
"2233Ab",
"1001\t\tab",
"1818ab",
"1212abc",
"1234",
"huh",
"",
"\xcd2222AB",
} {
output, err := parser.Execute(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "1234 AB" Output: 1234 AB
// [1] Input: "2233Ab" Output: 2233 AB
// [2] Input: "1001\t\tab" Output: 1001 AB
// [3] Input: "1818ab" Output: 1818 AB
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1
// [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createPostcodeMatcher() *parsekit.Matcher { func createPostcodeMatcher() *parsekit.Matcher {
// Easy access to the parsekit definitions. // Easy access to the parsekit definitions.
c, a, m := parsekit.C, parsekit.A, parsekit.M c, a, m := parsekit.C, parsekit.A, parsekit.M
@ -24,36 +61,9 @@ func createPostcodeMatcher() *parsekit.Matcher {
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
space := m.Replace(c.Opt(a.Whitespace), " ") space := m.Replace(c.Opt(a.Whitespace), " ")
postcode := c.Seq(pcDigits, space, pcLetters) postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile)
// Create a Matcher that wraps the 'postcode' TokenHandler and allows // Create a Matcher that wraps the 'postcode' TokenHandler and allows
// us to match some input against that handler. // us to match some input against that handler.
return parsekit.NewMatcher(postcode, "a Dutch postcode") return parsekit.NewMatcher(postcode, "a Dutch postcode")
} }
func Example_dutchPostcodeUsingMatcher() {
pcParser := createPostcodeMatcher()
for i, input := range []string{
"1234 AB",
"2233Ab",
"1001\t\tab",
"1818ab",
"1234",
"huh",
} {
output, err, ok := pcParser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "1234 AB" Output: 1234 AB
// [1] Input: "2233Ab" Output: 2233 AB
// [2] Input: "1001\t\tab" Output: 1001 AB
// [3] Input: "1818ab" Output: 1818 AB
// [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode)
// [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode)
}

View File

@ -3,8 +3,8 @@
// //
// The implementation uses only parser/combinator TokenHandler functions and does // The implementation uses only parser/combinator TokenHandler functions and does
// not implement a full-fledged state-based Parser for it. If you want to see the // not implement a full-fledged state-based Parser for it. If you want to see the
// same kind of functionality, implementated using a Paser, take a look at the // same kind of functionality, implementated using a Parser, take a look at the
// HelloWorldUsingParser example. // HelloWorldUsingParser examples.
package parsekit_test package parsekit_test
import ( import (
@ -13,24 +13,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
func createHelloMatcher() *parsekit.Matcher {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
// Create a Matcher that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(greeting, "a friendly greeting")
}
func Example_helloWorldUsingMatcher() { func Example_helloWorldUsingMatcher() {
parser := createHelloMatcher() parser := createHelloMatcher()
@ -43,9 +25,9 @@ func Example_helloWorldUsingMatcher() {
"Hello, world", "Hello, world",
"Hello,!", "Hello,!",
} { } {
output, err, ok := parser.Parse(input) output, err := parser.Execute(input)
if !ok { if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
} else { } else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
} }
@ -55,7 +37,29 @@ func Example_helloWorldUsingMatcher() {
// [1] Input: "HELLO ,Johnny!" Output: Johnny // [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123 // [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza // [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) // [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at line 1, column 1
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) // [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) // [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createHelloMatcher() *parsekit.Matcher {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)
// Create a Matcher that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(greeting, "a friendly greeting")
} }

View File

@ -1,15 +1,18 @@
// In this example, a parser is created that is able to parse input that looks // In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it. // like "Hello, <name>!", and that extracts the name from it.
// //
// This implementation uses a state-based Parser for it, and it does not implement // This implementation uses a state-based Parser for it, and it does not
// any custom parser/combinator TokenHandler functions. Note that things are much // implement any custom parser/combinator TokenHandler functions. Note that
// easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher // things are much easier to implement using custom TokenHandlers (see the other
// example for this). Doing this fully parser-based implementation is mainly for your // HelloWorldUsingMatcher example for this). Doing this fully parser-based
// learning pleasure. // implementation is mainly for your learning pleasure.
// //
// One big difference between the Matcher-based example and this one, is that the // One big difference between the Matcher-based example and this one, is that
// state-based parser reports errors much more fine-grained. This might or might // this parser reports errors much more fine-grained. This might or might not be
// not be useful for your specific use case. // useful for your specific use case. If you need error reporting like this,
// then also take a look at the HelloWorldUsingParser2 example, which does the
// same thing as this version, only more concise.
package parsekit_test package parsekit_test
import ( import (
@ -19,57 +22,7 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
const greeteeItem parsekit.ItemType = 1
func stateStartOfGreeting(p *parsekit.ParseAPI) {
c := parsekit.C
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
p.RouteTo(stateComma)
}
}
func stateComma(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("comma")
switch {
case p.On(a.Whitespace).Skip():
p.RouteRepeat()
case p.On(a.Comma).Skip():
p.RouteTo(stateName)
}
}
func stateName(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("name")
switch {
case p.On(a.Excl).Skip():
p.RouteTo(stateEndOfGreeting)
case p.On(a.AnyRune).Accept():
p.RouteRepeat()
}
}
func stateEndOfGreeting(p *parsekit.ParseAPI) {
p.Expects("end of greeting")
if p.On(a.EndOfFile).Stay() {
name := strings.TrimSpace(p.BufLiteral())
if name == "" {
p.EmitError("The name cannot be empty")
} else {
p.Emit(greeteeItem, name)
}
}
}
func createHelloParser() *parsekit.Parser {
return parsekit.NewParser(stateStartOfGreeting)
}
func Example_helloWorldUsingParser1() { func Example_helloWorldUsingParser1() {
parser := createHelloParser()
for i, input := range []string{ for i, input := range []string{
"Hello, world!", "Hello, world!",
"HELLO ,Johnny!", "HELLO ,Johnny!",
@ -86,11 +39,11 @@ func Example_helloWorldUsingParser1() {
"Oh no!", "Oh no!",
"hello,!", "hello,!",
} { } {
item, err, ok := parser.Parse(input).Next() name, err := (&helloparser1{}).Parse(input)
if !ok { if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else { } else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value) fmt.Printf("[%d] Input: %q Output: %s\n", i, input, name)
} }
} }
// Output: // Output:
@ -109,3 +62,63 @@ func Example_helloWorldUsingParser1() {
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello) // [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
// [13] Input: "hello,!" Error: The name cannot be empty // [13] Input: "hello,!" Error: The name cannot be empty
} }
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
type helloparser1 struct {
greetee string
}
func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
return h.greetee, err
}
func (h *helloparser1) start(p *parsekit.ParseAPI) {
c := parsekit.C
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
p.Handle(h.comma)
}
}
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("comma")
p.On(a.Whitespace).Skip()
if p.On(a.Comma).Skip() {
p.Handle(h.name)
}
}
func (h *helloparser1) name(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("name")
switch {
case p.On(a.Excl).Skip():
p.Handle(h.end)
case p.On(a.AnyRune).Accept():
p.Handle(h.name)
}
}
// Here we could have used p.ExpectEndOfFile() as well, but a slightly
// different route was taken to implement a more friendly 'end of greeting'
// error message.
func (h *helloparser1) end(p *parsekit.ParseAPI) {
if !p.On(a.EndOfFile).Stay() {
p.Expects("end of greeting")
p.UnexpectedInput()
return
}
h.greetee = strings.TrimSpace(p.BufLiteral())
if h.greetee == "" {
p.EmitError("The name cannot be empty")
} else {
p.Stop()
}
}

View File

@ -1,7 +1,16 @@
// This is the same as the example helloWorldUsingParser1, except that in // This is the same as the example HelloWorldUsingParser1, except that in this
// this implementation the state machine is implemented using a combination // implementation the state machine is implemented using a combination of some
// of some TokenHandlers and only a single state, in which multiple // TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
// ParseAPI.On() calls are combined to do all the work in one go. // are combined to do all the work in one go.
//
// Note that things are much easier to implement using custom TokenHandlers (see
// the other HelloWorldUsingMatcher example for this). Doing this implementation
// is mainly for your learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that
// this parser reports errors much more fine-grained. This might or might not be
// useful for your specific use case.:0
package parsekit_test package parsekit_test
import ( import (
@ -10,43 +19,8 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
const greeteeItem2 parsekit.ItemType = 1
func stateFullGreeting(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
if !p.On(c.StrNoCase("hello")).Skip() {
p.EmitError("the greeting is not being friendly")
return
}
if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
p.EmitError("the greeting is not properly separated")
return
}
if !p.On(m.Trim(c.OneOrMore(c.Except(a.Excl, a.AnyRune)), " \t")).Accept() {
p.EmitError("the greeting is targeted at thin air")
return
}
if !p.On(a.Excl).Stay() {
p.EmitError("the greeting is not loud enough")
return
}
if !p.On(a.EndOfFile).Stay() {
p.EmitError("too much stuff going on after the closing '!'")
return
}
name := p.BufLiteral()
if name == "" {
p.EmitError("the name cannot be empty")
} else {
p.Emit(greeteeItem, name)
}
p.ExpectEndOfFile()
}
func Example_helloWorldUsingParser2() { func Example_helloWorldUsingParser2() {
parser := parsekit.NewParser(stateFullGreeting) parser := &helloparser2{}
for i, input := range []string{ for i, input := range []string{
"Hello, world!", "Hello, world!",
@ -65,17 +39,17 @@ func Example_helloWorldUsingParser2() {
"hello,!", "hello,!",
"HELLO, Buster! Eat this!", "HELLO, Buster! Eat this!",
} { } {
item, err, ok := parser.Parse(input).Next() name, err := parser.Parse(input)
if !ok { if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else { } else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value) fmt.Printf("[%d] Input: %q Output: %s\n", i, input, name)
} }
} }
// Output: // Output:
// [0] Input: "Hello, world!" Error: too much stuff going on after the closing '!' // [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Error: too much stuff going on after the closing '!' // [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Error: too much stuff going on after the closing '!' // [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Error: the greeting is not properly separated // [3] Input: "hello Pizza!" Error: the greeting is not properly separated
// [4] Input: "" Error: the greeting is not being friendly // [4] Input: "" Error: the greeting is not being friendly
// [5] Input: " " Error: the greeting is not being friendly // [5] Input: " " Error: the greeting is not being friendly
@ -83,9 +57,54 @@ func Example_helloWorldUsingParser2() {
// [7] Input: "hello," Error: the greeting is targeted at thin air // [7] Input: "hello," Error: the greeting is targeted at thin air
// [8] Input: "hello , " Error: the greeting is targeted at thin air // [8] Input: "hello , " Error: the greeting is targeted at thin air
// [9] Input: "hello , Droopy" Error: the greeting is not loud enough // [9] Input: "hello , Droopy" Error: the greeting is not loud enough
// [10] Input: "hello , Droopy!" Error: too much stuff going on after the closing '!' // [10] Input: "hello , Droopy!" Output: Droopy
// [11] Input: "hello , \t \t Droopy \t !" Error: too much stuff going on after the closing '!' // [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
// [12] Input: "Oh no!" Error: the greeting is not being friendly // [12] Input: "Oh no!" Error: the greeting is not being friendly
// [13] Input: "hello,!" Error: the greeting is targeted at thin air // [13] Input: "hello,!" Error: the greeting is targeted at thin air
// [14] Input: "HELLO, Buster! Eat this!" Error: too much stuff going on after the closing '!' // [14] Input: "HELLO, Buster! Eat this!" Error: too much stuff going on after the closing '!'
} }
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
type helloparser2 struct {
greetee string
}
func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
return h.greetee, err
}
// Note:
// For efficiency, we could have either:
//
// 1) added a return after every call to p.EmitError()
// 2) done an 'else if' for every 'if' after the first
//
// For code readability, I omitted these however. The ParseAPI knows it
// should ignore any upcoming call after an error has been set, so after
// an error the p.On() calls will be invoked, however they will always
// return false.
func (h *helloparser2) start(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
if !p.On(c.StrNoCase("hello")).Skip() {
p.EmitError("the greeting is not being friendly")
} else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
p.EmitError("the greeting is not properly separated")
} else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
p.EmitError("the greeting is targeted at thin air")
} else if !p.On(a.Excl).Skip() {
p.EmitError("the greeting is not loud enough")
} else if !p.On(a.EndOfFile).Stay() {
p.EmitError("too much stuff going on after the closing '!'")
} else {
h.greetee = p.BufLiteral()
if h.greetee == "" {
p.EmitError("the name cannot be empty")
}
p.Stop()
}
}

View File

@ -6,53 +6,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
func ExampleItemType() {
// Make use of positive values. Ideally, define your ItemTypes using
// iota for easy automatic value management like this:
const (
ItemWord parsekit.ItemType = iota
ItemNumber
ItemBlob
// ...
)
}
func ExampleItem() {
// Easy access to the parsekit definitions.
c := parsekit.C
// You define your own item types for your specific parser.
const QuestionItem = parsekit.ItemType(42)
// A ParseHandler function can use the defined item type by means of
// the p.Emit* methods on parsekit.P.
// When errors occur, or the end of the file is reached, then the built-in
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
stateHandler := func(p *parsekit.ParseAPI) {
if p.On(c.Str("question")).Accept() {
p.EmitLiteral(QuestionItem)
}
p.ExpectEndOfFile()
}
// Successful match
item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next()
fmt.Println(ok, item.Type == QuestionItem, item.Value)
// End of file reached
item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next()
fmt.Println(ok, item.Type == parsekit.ItemEOF)
// An error occurred
item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next()
fmt.Println(ok, item.Type == parsekit.ItemError, err)
// Output:
// true true question
// false true
// false true unexpected character 'a' (expected end of file)
}
func ExampleError() { func ExampleError() {
err := &parsekit.Error{ err := &parsekit.Error{
Message: "it broke down", Message: "it broke down",
@ -66,7 +19,7 @@ func ExampleError() {
// Output: // Output:
// it broke down // it broke down
// it broke down // it broke down
// it broke down after line 10, column 42 // it broke down at line 10, column 42
} }
func ExampleError_Error() { func ExampleError_Error() {
@ -92,31 +45,26 @@ func ExampleError_ErrorFull() {
fmt.Println(err.ErrorFull()) fmt.Println(err.ErrorFull())
// Output: // Output:
// it broke down after line 10, column 42 // it broke down at line 10, column 42
} }
func ExampleMatchAnyRune() { func ExampleMatchAnyRune() {
// Easy access to the parsekit definitions. // Easy access to the parsekit definitions.
a := parsekit.A a := parsekit.A
matches := []string{}
stateHandler := func(p *parsekit.ParseAPI) { stateHandler := func(p *parsekit.ParseAPI) {
p.Expects("Any valid rune") for p.On(a.AnyRune).Accept() {
if p.On(a.AnyRune).Accept() { matches = append(matches, p.BufLiteral())
p.EmitLiteral(TestItem) p.BufClear()
p.RouteRepeat()
} }
p.ExpectEndOfFile()
} }
parser := parsekit.NewParser(stateHandler) parser := parsekit.NewParser(stateHandler)
run := parser.Parse("¡Any / valid / character will dö!") err := parser.Execute("¡Any will dö!")
for i := 0; i < 5; i++ { fmt.Printf("Matches = %q, Error = %s\n", matches, err)
match, _, _ := run.Next()
fmt.Printf("Match = %q\n", match.Value)
}
// Output: // Output:
// Match = "¡" // Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
// Match = "A"
// Match = "n"
// Match = "y"
// Match = " "
} }

View File

@ -25,8 +25,9 @@ type ParseAPI struct {
expecting string // a description of what the current state expects to find (see P.Expects()) expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
items []Item // a slice of resulting Parser items (see P.Emit()) items []Item // a slice of resulting Parser items (see P.Emit())
item Item // the current item as reached by Next() and retrieved by Get() item Item // the current item as reached by Next(), retrieved by Get()
err *Error // an error when parsing failed, can be retrieved by Error() err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
LastMatch string // a string representation of the last matched input data LastMatch string // a string representation of the last matched input data
} }

View File

@ -26,6 +26,7 @@ const ItemEOF ItemType = -1
const ItemError ItemType = -2 const ItemError ItemType = -2
// Emit passes a Parser item to the client, including the provided string. // Emit passes a Parser item to the client, including the provided string.
// Deprecated
func (p *ParseAPI) Emit(t ItemType, v string) { func (p *ParseAPI) Emit(t ItemType, v string) {
p.items = append(p.items, Item{t, v}) p.items = append(p.items, Item{t, v})
p.buffer.reset() p.buffer.reset()
@ -39,13 +40,14 @@ func (p *ParseAPI) Emit(t ItemType, v string) {
// linefeed (ASCII char 10). // linefeed (ASCII char 10).
// //
// Retrieving the buffer contents will not affect the buffer itself. New runes can // Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared. // still be added to it. Only when calling P.BufClear(), the buffer will be cleared.
func (p *ParseAPI) BufLiteral() string { func (p *ParseAPI) BufLiteral() string {
return p.buffer.asLiteralString() return p.buffer.asLiteralString()
} }
// EmitLiteral passes a parser Item to the client, including the accumulated // EmitLiteral passes a parser Item to the client, including the accumulated
// string buffer data as a literal string. // string buffer data as a literal string.
// Deprecated
func (p *ParseAPI) EmitLiteral(t ItemType) { func (p *ParseAPI) EmitLiteral(t ItemType) {
p.Emit(t, p.BufLiteral()) p.Emit(t, p.BufLiteral())
} }
@ -114,25 +116,43 @@ func (err *Error) Error() string {
// ErrorFull returns the current error message, including information about // ErrorFull returns the current error message, including information about
// the position in the input where the error occurred. // the position in the input where the error occurred.
func (err *Error) ErrorFull() string { func (err *Error) ErrorFull() string {
return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column) return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
} }
// EmitError emits a parser error item to the client. // EmitError sets an error message in the parser API. This error message
// will eventually be returned by the Parser.Execute() method.
func (p *ParseAPI) EmitError(format string, args ...interface{}) { func (p *ParseAPI) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...) message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message) p.err = &Error{message, p.cursorLine, p.cursorColumn}
} }
// EmitEOF emits an EOF to the client. In effect, this will stop the parsing process. // Stop is used by the parser impementation to tell the API that it has
func (p *ParseAPI) EmitEOF() { // completed the parsing process successfully.
p.Emit(ItemEOF, "EOF") //
// When the parser implementation returns without stopping first, the
// Parser.Execute() will assume that something went wrong and calls
// ParserAPI.UnexpectedInput() to report an error about this.
//
// The parser implementation can define what was being expected, by
// providing a description to ParseAPI.Expecting().
func (p *ParseAPI) Stop() {
p.stopped = true
} }
// UnexpectedInput is used by a ParseHandler function to emit an error item // UnexpectedInput is used to set an error that tells the user that some
// that tells the client that an unexpected rune was encountered in the input. // unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() { func (p *ParseAPI) UnexpectedInput() {
// When some previous parsing step yielded an error, skip this operation. // When some previous parsing step yielded an error, skip this operation.
if p.err != nil { if p.err != nil || p.stopped {
return return
} }
r, _, ok := p.peek(0) r, _, ok := p.peek(0)

View File

@ -16,5 +16,8 @@ package parsekit
func (p *ParseAPI) Expects(description string) { func (p *ParseAPI) Expects(description string) {
// TODO make this into some debugging tool? // TODO make this into some debugging tool?
// fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn) // fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn)
if p.err != nil || p.stopped {
return
}
p.expecting = description p.expecting = description
} }

View File

@ -38,7 +38,7 @@ package parsekit
// } // }
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction { func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
// When some previous parsing step yielded an error, skip this operation. // When some previous parsing step yielded an error, skip this operation.
if p.err != nil { if p.err != nil || p.stopped {
return &MatchAction{ return &MatchAction{
p: p, p: p,
ok: false, ok: false,

View File

@ -5,7 +5,7 @@ package parsekit
func (p *ParseAPI) Handle(handlers ...ParseHandler) { func (p *ParseAPI) Handle(handlers ...ParseHandler) {
for _, handler := range handlers { for _, handler := range handlers {
// When some previous parsing step yielded an error, skip this operation. // When some previous parsing step yielded an error, skip this operation.
if p.err != nil { if p.err != nil || p.stopped {
break break
} }
handler(p) handler(p)
@ -14,6 +14,7 @@ func (p *ParseAPI) Handle(handlers ...ParseHandler) {
// RouteTo tells the parser what ParseHandler function to invoke on // RouteTo tells the parser what ParseHandler function to invoke on
// the next parse cycle. // the next parse cycle.
// Deprecated
func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction { func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
p.nextState = handler p.nextState = handler
return &RouteFollowupAction{p} return &RouteFollowupAction{p}
@ -21,6 +22,7 @@ func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
// RouteRepeat tells the parser that on the next parsing cycle, the current // RouteRepeat tells the parser that on the next parsing cycle, the current
// ParseHandler must be reinvoked. // ParseHandler must be reinvoked.
// Deprecated
func (p *ParseAPI) RouteRepeat() { func (p *ParseAPI) RouteRepeat() {
p.RouteTo(p.state) p.RouteTo(p.state)
} }
@ -33,12 +35,14 @@ func (p *ParseAPI) RouteRepeat() {
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from // p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will // providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case. // automatically assume a RouteReturn() in that case.
// Deprecated
func (p *ParseAPI) RouteReturn() { func (p *ParseAPI) RouteReturn() {
p.nextState = p.popRoute() p.nextState = p.popRoute()
} }
// RouteFollowupAction chains parsing routes. // RouteFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB). // It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
// Deprecated
type RouteFollowupAction struct { type RouteFollowupAction struct {
p *ParseAPI p *ParseAPI
} }
@ -48,6 +52,7 @@ type RouteFollowupAction struct {
// For example: // For example:
// //
// p.RouteTo(handlerA).ThenTo(handlerB) // p.RouteTo(handlerA).ThenTo(handlerB)
// Deprecated
func (a *RouteFollowupAction) ThenTo(state ParseHandler) { func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
a.p.pushRoute(state) a.p.pushRoute(state)
} }
@ -57,17 +62,20 @@ func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
// For example: // For example:
// //
// p.RouteTo(handlerA).ThenReturnHere() // p.RouteTo(handlerA).ThenReturnHere()
// Deprecated
func (a *RouteFollowupAction) ThenReturnHere() { func (a *RouteFollowupAction) ThenReturnHere() {
a.p.pushRoute(a.p.state) a.p.pushRoute(a.p.state)
} }
// pushRoute adds the ParseHandler to the route stack. // pushRoute adds the ParseHandler to the route stack.
// This is used for implementing nested parsing. // This is used for implementing nested parsing.
// Deprecated
func (p *ParseAPI) pushRoute(state ParseHandler) { func (p *ParseAPI) pushRoute(state ParseHandler) {
p.routeStack = append(p.routeStack, state) p.routeStack = append(p.routeStack, state)
} }
// popRoute pops the last pushed ParseHandler from the route stack. // popRoute pops the last pushed ParseHandler from the route stack.
// Deprecated
func (p *ParseAPI) popRoute() ParseHandler { func (p *ParseAPI) popRoute() ParseHandler {
last := len(p.routeStack) - 1 last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last] head, tail := p.routeStack[:last], p.routeStack[last]
@ -92,15 +100,16 @@ func (p *ParseAPI) popRoute() ParseHandler {
// } // }
func (p *ParseAPI) ExpectEndOfFile() { func (p *ParseAPI) ExpectEndOfFile() {
// When some previous parsing step yielded an error, skip this operation. // When some previous parsing step yielded an error, skip this operation.
if p.err == nil { if p.err != nil || p.stopped {
return
}
if p.On(A.EndOfFile).Stay() { if p.On(A.EndOfFile).Stay() {
p.EmitEOF() p.Stop()
} else { } else {
p.Expects("end of file") p.Expects("end of file")
p.UnexpectedInput() p.UnexpectedInput()
} }
} }
}
// ExpectEndOfFile can be scheduled as a ParseHandler function. // ExpectEndOfFile can be scheduled as a ParseHandler function.
// It makes sure that the input is at the end of file. // It makes sure that the input is at the end of file.
@ -115,11 +124,12 @@ func (p *ParseAPI) ExpectEndOfFile() {
// yourself too. Simply emit an ItemEOF when the end of the input was reached // yourself too. Simply emit an ItemEOF when the end of the input was reached
// to stop the parser loop: // to stop the parser loop:
// //
// p.EmitEOF() // p.Stop()
// TODO meh, get rid of this one, once we don't use state scheduling anymore. // TODO meh, get rid of this one, once we don't use state scheduling anymore.
// Deprecated
func ExpectEndOfFile(p *ParseAPI) { func ExpectEndOfFile(p *ParseAPI) {
p.Expects("end of file") p.Expects("end of file")
if p.On(A.EndOfFile).Stay() { if p.On(A.EndOfFile).Stay() {
p.EmitEOF() p.Stop()
} }
} }

View File

@ -9,7 +9,7 @@ import (
// Parser is the top-level struct that holds the configuration for a parser. // Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method. // The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct { type Parser struct {
startState ParseHandler // the function that handles the very first state startHandler ParseHandler // the function that handles the very first state
} }
// NewParser instantiates a new Parser. // NewParser instantiates a new Parser.
@ -19,28 +19,48 @@ type Parser struct {
// parsing. This style of parser is typically used for parsing programming // parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.) // languages and structured data formats (like json, xml, toml, etc.)
// //
// To start parsing input data, use the method Parser.Parse(). // To parse input data, use the method Parser.Execute().
func NewParser(startState ParseHandler) *Parser { func NewParser(startHandler ParseHandler) *Parser {
return &Parser{startState: startState} return &Parser{startHandler: startHandler}
} }
// ParseRun represents a single parse run for a Parser. // ParseRun represents a single parse run for a Parser.
// Deprecated
type ParseRun struct { type ParseRun struct {
p *ParseAPI // holds parser state and provides an API to ParseHandler functions p *ParseAPI // holds parser state and provides an API to ParseHandler functions
} }
// Parse starts a parse run on the provided input data. // Execute starts the parser for the provided input.
// To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method. // When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Parse(input string) *ParseRun { func (p *Parser) Execute(input string) *Error {
return &ParseRun{ api := &ParseAPI{
p: &ParseAPI{
input: input, input: input,
len: len(input), len: len(input),
cursorLine: 1, cursorLine: 1,
cursorColumn: 1, cursorColumn: 1,
nextState: p.startState, nextState: p.startHandler,
},
} }
p.startHandler(api)
if !api.stopped {
api.UnexpectedInput()
}
return api.err
}
// Parse starts a parse run on the provided input data.
// To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method.
// Deprecated
func (p *Parser) Parse(input string) *ParseRun {
panic("Parse() is deprecated, use Execute()")
// return &ParseRun{
// p: &ParseAPI{
// input: input,
// len: len(input),
// cursorLine: 1,
// cursorColumn: 1,
// nextState: p.startHandler,
// },
// }
} }
// Next retrieves the next parsed item for a parse run. // Next retrieves the next parsed item for a parse run.
@ -146,6 +166,7 @@ func (run *ParseRun) invokeNextParseHandler(state ParseHandler) {
// Matcher.Parse(). // Matcher.Parse().
type Matcher struct { type Matcher struct {
parser *Parser parser *Parser
match string
} }
// NewMatcher instantiates a new Matcher. // NewMatcher instantiates a new Matcher.
@ -157,20 +178,32 @@ type Matcher struct {
// The 'expects' parameter is used for creating an error message in case parsed // The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler. // input does not match the TokenHandler.
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher { func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
stateHandler := func(p *ParseAPI) { matcher := &Matcher{}
p.Expects(expects) matcher.parser = NewParser(func(p *ParseAPI) {
if p.On(tokenHandler).Accept() { if p.On(tokenHandler).Accept() {
p.EmitLiteral(0) // ItemType is irrelevant matcher.match = p.BufLiteral()
p.Stop()
} else {
p.Expects(expects)
p.UnexpectedInput()
} }
})
return matcher
} }
return &Matcher{parser: NewParser(stateHandler)}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the matched input string and an error. When an error
// occurred during parsing, the error will be set, nil otherwise.
func (m *Matcher) Execute(input string) (string, *Error) {
err := m.parser.Execute(input)
return m.match, err
} }
// Parse checks for a match on the provided input data. // Parse checks for a match on the provided input data.
func (m *Matcher) Parse(input string) (string, *Error, bool) { func (m *Matcher) Parse(input string) (string, *Error) {
item, err, ok := m.parser.Parse(input).Next() item, err, ok := m.parser.Parse(input).Next()
if !ok { if !ok {
return "", err, false return "", err
} }
return item.Value, nil, true return item.Value, nil
} }

View File

@ -28,16 +28,16 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
} }
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) { func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
output, err, ok := parsekit.NewMatcher(test.tokenHandler, "a match").Parse(test.input) output, err := parsekit.NewMatcher(test.tokenHandler, "a match").Execute(test.input)
if test.mustMatch { if test.mustMatch {
if !ok { if err != nil {
t.Errorf("Test %q failed with error: %s", test.input, err) t.Errorf("Test %q failed with error: %s", test.input, err)
} else if output != test.expected { } else if output != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output) t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
} }
} else { } else {
if ok { if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.input) t.Errorf("Test %q failed: should not match, but it did", test.input)
} }
} }

View File

@ -518,6 +518,7 @@ var M = struct {
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments? Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
@ -527,6 +528,7 @@ var M = struct {
Trim: ModifyTrim, Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft, TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight, TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower, ToLower: ModifyToLower,
ToUpper: ModifyToUpper, ToUpper: ModifyToUpper,
Replace: ModifyReplace, Replace: ModifyReplace,
@ -589,6 +591,13 @@ func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bo
return ModifyByCallback(handler, modfunc) return ModifyByCallback(handler, modfunc)
} }
// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and whitespace characters as defined by unicode
// are are trimmed from the left and right of the output.
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.TrimSpace)
}
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies. // ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided // If it does, then its output is taken and characters from the provided
// cutset are converted into upper case. // cutset are converted into upper case.

View File

@ -205,18 +205,20 @@ func TestSequenceOfRunes(t *testing.T) {
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
) )
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
output := ""
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expects("Sequence of runes") p.Expects("Sequence of runes")
if p.On(sequence).Accept() { if p.On(sequence).Accept() {
p.EmitLiteral(TestItem) output = p.BufLiteral()
p.Stop()
} }
}) })
item, err, ok := parser.Parse(input).Next() err := parser.Execute(input)
if !ok { if err != nil {
t.Fatalf("Parsing failed: %s", err) t.Fatalf("Parsing failed: %s", err)
} }
if item.Value != input { if output != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value) t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output)
} }
} }