Modified all examples and tests to make use of the new ideas on how to keep parsing state. After this commit, I can cleanup a lot of stuff from the emitting loop-based parser which was basically crap for complex parsers.

This commit is contained in:
Maurice Makaay 2019-05-28 10:42:46 +00:00
parent 980c18099e
commit 3dfa99c965
16 changed files with 354 additions and 283 deletions

View File

@ -51,7 +51,7 @@ func Example_basicCalculator1() {
}
// ---------------------------------------------------------------------------
// Implementation of the calculator
// Implementation of the parser
// ---------------------------------------------------------------------------
// CalculateSimple interprets a simple calculation, consisting of only integers
@ -60,7 +60,7 @@ func Example_basicCalculator1() {
func ComputeSimple(calculation string) (int64, *parsekit.Error) {
calculator := &simpleCalculator{op: +1}
parser := parsekit.NewParser(calculator.number)
_, err, _ := parser.Parse(calculation).Next()
err := parser.Execute(calculation)
return calculator.Result, err
}

View File

@ -62,7 +62,7 @@ func Example_basicCalculator2() {
}
// ---------------------------------------------------------------------------
// Implementation of the calculator
// Implementation of the parser
// ---------------------------------------------------------------------------
// calculator implements a recursive descent parser that is responsible for parsing
@ -79,14 +79,13 @@ type calculator struct {
func Compute(input string) (float64, *parsekit.Error) {
c := &calculator{}
parser := parsekit.NewParser(c.computation)
_, err, _ := parser.Parse(input).Next()
err := parser.Execute(input)
return c.result, err
}
func (c *calculator) computation(p *parsekit.ParseAPI) {
p.Handle(c.expr)
p.ExpectEndOfFile()
p.Handle(c.factor)
c.result = c.interpreter.result
}

View File

@ -9,6 +9,43 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func Example_dutchPostcodeUsingMatcher() {
parser := createPostcodeMatcher()
for i, input := range []string{
"1234 AB",
"2233Ab",
"1001\t\tab",
"1818ab",
"1212abc",
"1234",
"huh",
"",
"\xcd2222AB",
} {
output, err := parser.Execute(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "1234 AB" Output: 1234 AB
// [1] Input: "2233Ab" Output: 2233 AB
// [2] Input: "1001\t\tab" Output: 1001 AB
// [3] Input: "1818ab" Output: 1818 AB
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1
// [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createPostcodeMatcher() *parsekit.Matcher {
// Easy access to the parsekit definitions.
c, a, m := parsekit.C, parsekit.A, parsekit.M
@ -24,36 +61,9 @@ func createPostcodeMatcher() *parsekit.Matcher {
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
space := m.Replace(c.Opt(a.Whitespace), " ")
postcode := c.Seq(pcDigits, space, pcLetters)
postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile)
// Create a Matcher that wraps the 'postcode' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(postcode, "a Dutch postcode")
}
func Example_dutchPostcodeUsingMatcher() {
pcParser := createPostcodeMatcher()
for i, input := range []string{
"1234 AB",
"2233Ab",
"1001\t\tab",
"1818ab",
"1234",
"huh",
} {
output, err, ok := pcParser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
}
// Output:
// [0] Input: "1234 AB" Output: 1234 AB
// [1] Input: "2233Ab" Output: 2233 AB
// [2] Input: "1001\t\tab" Output: 1001 AB
// [3] Input: "1818ab" Output: 1818 AB
// [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode)
// [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode)
}

View File

@ -3,8 +3,8 @@
//
// The implementation uses only parser/combinator TokenHandler functions and does
// not implement a full-fledged state-based Parser for it. If you want to see the
// same kind of functionality, implementated using a Paser, take a look at the
// HelloWorldUsingParser example.
// same kind of functionality, implementated using a Parser, take a look at the
// HelloWorldUsingParser examples.
package parsekit_test
import (
@ -13,24 +13,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func createHelloMatcher() *parsekit.Matcher {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
// Create a Matcher that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(greeting, "a friendly greeting")
}
func Example_helloWorldUsingMatcher() {
parser := createHelloMatcher()
@ -43,9 +25,9 @@ func Example_helloWorldUsingMatcher() {
"Hello, world",
"Hello,!",
} {
output, err, ok := parser.Parse(input)
if !ok {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
output, err := parser.Execute(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull())
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
}
@ -55,7 +37,29 @@ func Example_helloWorldUsingMatcher() {
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting)
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting)
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting)
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at line 1, column 1
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createHelloMatcher() *parsekit.Matcher {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)
// Create a Matcher that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(greeting, "a friendly greeting")
}

View File

@ -1,15 +1,18 @@
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
//
// This implementation uses a state-based Parser for it, and it does not implement
// any custom parser/combinator TokenHandler functions. Note that things are much
// easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher
// example for this). Doing this fully parser-based implementation is mainly for your
// learning pleasure.
// This implementation uses a state-based Parser for it, and it does not
// implement any custom parser/combinator TokenHandler functions. Note that
// things are much easier to implement using custom TokenHandlers (see the other
// HelloWorldUsingMatcher example for this). Doing this fully parser-based
// implementation is mainly for your learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that the
// state-based parser reports errors much more fine-grained. This might or might
// not be useful for your specific use case.
// One big difference between the Matcher-based example and this one, is that
// this parser reports errors much more fine-grained. This might or might not be
// useful for your specific use case. If you need error reporting like this,
// then also take a look at the HelloWorldUsingParser2 example, which does the
// same thing as this version, only more concise.
package parsekit_test
import (
@ -19,57 +22,7 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
const greeteeItem parsekit.ItemType = 1
func stateStartOfGreeting(p *parsekit.ParseAPI) {
c := parsekit.C
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
p.RouteTo(stateComma)
}
}
func stateComma(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("comma")
switch {
case p.On(a.Whitespace).Skip():
p.RouteRepeat()
case p.On(a.Comma).Skip():
p.RouteTo(stateName)
}
}
func stateName(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("name")
switch {
case p.On(a.Excl).Skip():
p.RouteTo(stateEndOfGreeting)
case p.On(a.AnyRune).Accept():
p.RouteRepeat()
}
}
func stateEndOfGreeting(p *parsekit.ParseAPI) {
p.Expects("end of greeting")
if p.On(a.EndOfFile).Stay() {
name := strings.TrimSpace(p.BufLiteral())
if name == "" {
p.EmitError("The name cannot be empty")
} else {
p.Emit(greeteeItem, name)
}
}
}
func createHelloParser() *parsekit.Parser {
return parsekit.NewParser(stateStartOfGreeting)
}
func Example_helloWorldUsingParser1() {
parser := createHelloParser()
for i, input := range []string{
"Hello, world!",
"HELLO ,Johnny!",
@ -86,11 +39,11 @@ func Example_helloWorldUsingParser1() {
"Oh no!",
"hello,!",
} {
item, err, ok := parser.Parse(input).Next()
if !ok {
name, err := (&helloparser1{}).Parse(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value)
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, name)
}
}
// Output:
@ -109,3 +62,63 @@ func Example_helloWorldUsingParser1() {
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
// [13] Input: "hello,!" Error: The name cannot be empty
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
type helloparser1 struct {
greetee string
}
func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
return h.greetee, err
}
func (h *helloparser1) start(p *parsekit.ParseAPI) {
c := parsekit.C
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
p.Handle(h.comma)
}
}
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("comma")
p.On(a.Whitespace).Skip()
if p.On(a.Comma).Skip() {
p.Handle(h.name)
}
}
func (h *helloparser1) name(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("name")
switch {
case p.On(a.Excl).Skip():
p.Handle(h.end)
case p.On(a.AnyRune).Accept():
p.Handle(h.name)
}
}
// Here we could have used p.ExpectEndOfFile() as well, but a slightly
// different route was taken to implement a more friendly 'end of greeting'
// error message.
func (h *helloparser1) end(p *parsekit.ParseAPI) {
if !p.On(a.EndOfFile).Stay() {
p.Expects("end of greeting")
p.UnexpectedInput()
return
}
h.greetee = strings.TrimSpace(p.BufLiteral())
if h.greetee == "" {
p.EmitError("The name cannot be empty")
} else {
p.Stop()
}
}

View File

@ -1,7 +1,16 @@
// This is the same as the example helloWorldUsingParser1, except that in
// this implementation the state machine is implemented using a combination
// of some TokenHandlers and only a single state, in which multiple
// ParseAPI.On() calls are combined to do all the work in one go.
// This is the same as the example HelloWorldUsingParser1, except that in this
// implementation the state machine is implemented using a combination of some
// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
// are combined to do all the work in one go.
//
// Note that things are much easier to implement using custom TokenHandlers (see
// the other HelloWorldUsingMatcher example for this). Doing this implementation
// is mainly for your learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that
// this parser reports errors much more fine-grained. This might or might not be
// useful for your specific use case.:0
package parsekit_test
import (
@ -10,43 +19,8 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
const greeteeItem2 parsekit.ItemType = 1
func stateFullGreeting(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
if !p.On(c.StrNoCase("hello")).Skip() {
p.EmitError("the greeting is not being friendly")
return
}
if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
p.EmitError("the greeting is not properly separated")
return
}
if !p.On(m.Trim(c.OneOrMore(c.Except(a.Excl, a.AnyRune)), " \t")).Accept() {
p.EmitError("the greeting is targeted at thin air")
return
}
if !p.On(a.Excl).Stay() {
p.EmitError("the greeting is not loud enough")
return
}
if !p.On(a.EndOfFile).Stay() {
p.EmitError("too much stuff going on after the closing '!'")
return
}
name := p.BufLiteral()
if name == "" {
p.EmitError("the name cannot be empty")
} else {
p.Emit(greeteeItem, name)
}
p.ExpectEndOfFile()
}
func Example_helloWorldUsingParser2() {
parser := parsekit.NewParser(stateFullGreeting)
parser := &helloparser2{}
for i, input := range []string{
"Hello, world!",
@ -65,17 +39,17 @@ func Example_helloWorldUsingParser2() {
"hello,!",
"HELLO, Buster! Eat this!",
} {
item, err, ok := parser.Parse(input).Next()
if !ok {
name, err := parser.Parse(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value)
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, name)
}
}
// Output:
// [0] Input: "Hello, world!" Error: too much stuff going on after the closing '!'
// [1] Input: "HELLO ,Johnny!" Error: too much stuff going on after the closing '!'
// [2] Input: "hello , Bob123!" Error: too much stuff going on after the closing '!'
// [0] Input: "Hello, world!" Output: world
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Error: the greeting is not properly separated
// [4] Input: "" Error: the greeting is not being friendly
// [5] Input: " " Error: the greeting is not being friendly
@ -83,9 +57,54 @@ func Example_helloWorldUsingParser2() {
// [7] Input: "hello," Error: the greeting is targeted at thin air
// [8] Input: "hello , " Error: the greeting is targeted at thin air
// [9] Input: "hello , Droopy" Error: the greeting is not loud enough
// [10] Input: "hello , Droopy!" Error: too much stuff going on after the closing '!'
// [11] Input: "hello , \t \t Droopy \t !" Error: too much stuff going on after the closing '!'
// [10] Input: "hello , Droopy!" Output: Droopy
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
// [12] Input: "Oh no!" Error: the greeting is not being friendly
// [13] Input: "hello,!" Error: the greeting is targeted at thin air
// [14] Input: "HELLO, Buster! Eat this!" Error: too much stuff going on after the closing '!'
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
type helloparser2 struct {
greetee string
}
func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
return h.greetee, err
}
// Note:
// For efficiency, we could have either:
//
// 1) added a return after every call to p.EmitError()
// 2) done an 'else if' for every 'if' after the first
//
// For code readability, I omitted these however. The ParseAPI knows it
// should ignore any upcoming call after an error has been set, so after
// an error the p.On() calls will be invoked, however they will always
// return false.
func (h *helloparser2) start(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
if !p.On(c.StrNoCase("hello")).Skip() {
p.EmitError("the greeting is not being friendly")
} else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
p.EmitError("the greeting is not properly separated")
} else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
p.EmitError("the greeting is targeted at thin air")
} else if !p.On(a.Excl).Skip() {
p.EmitError("the greeting is not loud enough")
} else if !p.On(a.EndOfFile).Stay() {
p.EmitError("too much stuff going on after the closing '!'")
} else {
h.greetee = p.BufLiteral()
if h.greetee == "" {
p.EmitError("the name cannot be empty")
}
p.Stop()
}
}

View File

@ -6,53 +6,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleItemType() {
// Make use of positive values. Ideally, define your ItemTypes using
// iota for easy automatic value management like this:
const (
ItemWord parsekit.ItemType = iota
ItemNumber
ItemBlob
// ...
)
}
func ExampleItem() {
// Easy access to the parsekit definitions.
c := parsekit.C
// You define your own item types for your specific parser.
const QuestionItem = parsekit.ItemType(42)
// A ParseHandler function can use the defined item type by means of
// the p.Emit* methods on parsekit.P.
// When errors occur, or the end of the file is reached, then the built-in
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
stateHandler := func(p *parsekit.ParseAPI) {
if p.On(c.Str("question")).Accept() {
p.EmitLiteral(QuestionItem)
}
p.ExpectEndOfFile()
}
// Successful match
item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next()
fmt.Println(ok, item.Type == QuestionItem, item.Value)
// End of file reached
item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next()
fmt.Println(ok, item.Type == parsekit.ItemEOF)
// An error occurred
item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next()
fmt.Println(ok, item.Type == parsekit.ItemError, err)
// Output:
// true true question
// false true
// false true unexpected character 'a' (expected end of file)
}
func ExampleError() {
err := &parsekit.Error{
Message: "it broke down",
@ -66,7 +19,7 @@ func ExampleError() {
// Output:
// it broke down
// it broke down
// it broke down after line 10, column 42
// it broke down at line 10, column 42
}
func ExampleError_Error() {
@ -92,31 +45,26 @@ func ExampleError_ErrorFull() {
fmt.Println(err.ErrorFull())
// Output:
// it broke down after line 10, column 42
// it broke down at line 10, column 42
}
func ExampleMatchAnyRune() {
// Easy access to the parsekit definitions.
a := parsekit.A
matches := []string{}
stateHandler := func(p *parsekit.ParseAPI) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
for p.On(a.AnyRune).Accept() {
matches = append(matches, p.BufLiteral())
p.BufClear()
}
p.ExpectEndOfFile()
}
parser := parsekit.NewParser(stateHandler)
run := parser.Parse("¡Any / valid / character will dö!")
err := parser.Execute("¡Any will dö!")
for i := 0; i < 5; i++ {
match, _, _ := run.Next()
fmt.Printf("Match = %q\n", match.Value)
}
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
// Output:
// Match = "¡"
// Match = "A"
// Match = "n"
// Match = "y"
// Match = " "
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
}

View File

@ -25,8 +25,9 @@ type ParseAPI struct {
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
items []Item // a slice of resulting Parser items (see P.Emit())
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when parsing failed, can be retrieved by Error()
item Item // the current item as reached by Next(), retrieved by Get()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
LastMatch string // a string representation of the last matched input data
}

View File

@ -26,6 +26,7 @@ const ItemEOF ItemType = -1
const ItemError ItemType = -2
// Emit passes a Parser item to the client, including the provided string.
// Deprecated
func (p *ParseAPI) Emit(t ItemType, v string) {
p.items = append(p.items, Item{t, v})
p.buffer.reset()
@ -39,13 +40,14 @@ func (p *ParseAPI) Emit(t ItemType, v string) {
// linefeed (ASCII char 10).
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
// still be added to it. Only when calling P.BufClear(), the buffer will be cleared.
func (p *ParseAPI) BufLiteral() string {
return p.buffer.asLiteralString()
}
// EmitLiteral passes a parser Item to the client, including the accumulated
// string buffer data as a literal string.
// Deprecated
func (p *ParseAPI) EmitLiteral(t ItemType) {
p.Emit(t, p.BufLiteral())
}
@ -114,25 +116,43 @@ func (err *Error) Error() string {
// ErrorFull returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) ErrorFull() string {
return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column)
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
}
// EmitError emits a parser error item to the client.
// EmitError sets an error message in the parser API. This error message
// will eventually be returned by the Parser.Execute() method.
func (p *ParseAPI) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message)
p.err = &Error{message, p.cursorLine, p.cursorColumn}
}
// EmitEOF emits an EOF to the client. In effect, this will stop the parsing process.
func (p *ParseAPI) EmitEOF() {
p.Emit(ItemEOF, "EOF")
// Stop is used by the parser impementation to tell the API that it has
// completed the parsing process successfully.
//
// When the parser implementation returns without stopping first, the
// Parser.Execute() will assume that something went wrong and calls
// ParserAPI.UnexpectedInput() to report an error about this.
//
// The parser implementation can define what was being expected, by
// providing a description to ParseAPI.Expecting().
func (p *ParseAPI) Stop() {
p.stopped = true
}
// UnexpectedInput is used by a ParseHandler function to emit an error item
// that tells the client that an unexpected rune was encountered in the input.
// UnexpectedInput is used to set an error that tells the user that some
// unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil {
if p.err != nil || p.stopped {
return
}
r, _, ok := p.peek(0)

View File

@ -16,5 +16,8 @@ package parsekit
func (p *ParseAPI) Expects(description string) {
// TODO make this into some debugging tool?
// fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn)
if p.err != nil || p.stopped {
return
}
p.expecting = description
}

View File

@ -38,7 +38,7 @@ package parsekit
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil {
if p.err != nil || p.stopped {
return &MatchAction{
p: p,
ok: false,

View File

@ -5,7 +5,7 @@ package parsekit
func (p *ParseAPI) Handle(handlers ...ParseHandler) {
for _, handler := range handlers {
// When some previous parsing step yielded an error, skip this operation.
if p.err != nil {
if p.err != nil || p.stopped {
break
}
handler(p)
@ -14,6 +14,7 @@ func (p *ParseAPI) Handle(handlers ...ParseHandler) {
// RouteTo tells the parser what ParseHandler function to invoke on
// the next parse cycle.
// Deprecated
func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
p.nextState = handler
return &RouteFollowupAction{p}
@ -21,6 +22,7 @@ func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction {
// RouteRepeat tells the parser that on the next parsing cycle, the current
// ParseHandler must be reinvoked.
// Deprecated
func (p *ParseAPI) RouteRepeat() {
p.RouteTo(p.state)
}
@ -33,12 +35,14 @@ func (p *ParseAPI) RouteRepeat() {
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
// Deprecated
func (p *ParseAPI) RouteReturn() {
p.nextState = p.popRoute()
}
// RouteFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
// Deprecated
type RouteFollowupAction struct {
p *ParseAPI
}
@ -48,6 +52,7 @@ type RouteFollowupAction struct {
// For example:
//
// p.RouteTo(handlerA).ThenTo(handlerB)
// Deprecated
func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
a.p.pushRoute(state)
}
@ -57,17 +62,20 @@ func (a *RouteFollowupAction) ThenTo(state ParseHandler) {
// For example:
//
// p.RouteTo(handlerA).ThenReturnHere()
// Deprecated
func (a *RouteFollowupAction) ThenReturnHere() {
a.p.pushRoute(a.p.state)
}
// pushRoute adds the ParseHandler to the route stack.
// This is used for implementing nested parsing.
// Deprecated
func (p *ParseAPI) pushRoute(state ParseHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed ParseHandler from the route stack.
// Deprecated
func (p *ParseAPI) popRoute() ParseHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
@ -92,13 +100,14 @@ func (p *ParseAPI) popRoute() ParseHandler {
// }
func (p *ParseAPI) ExpectEndOfFile() {
// When some previous parsing step yielded an error, skip this operation.
if p.err == nil {
if p.On(A.EndOfFile).Stay() {
p.EmitEOF()
} else {
p.Expects("end of file")
p.UnexpectedInput()
}
if p.err != nil || p.stopped {
return
}
if p.On(A.EndOfFile).Stay() {
p.Stop()
} else {
p.Expects("end of file")
p.UnexpectedInput()
}
}
@ -115,11 +124,12 @@ func (p *ParseAPI) ExpectEndOfFile() {
// yourself too. Simply emit an ItemEOF when the end of the input was reached
// to stop the parser loop:
//
// p.EmitEOF()
// p.Stop()
// TODO meh, get rid of this one, once we don't use state scheduling anymore.
// Deprecated
func ExpectEndOfFile(p *ParseAPI) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay() {
p.EmitEOF()
p.Stop()
}
}

View File

@ -9,7 +9,7 @@ import (
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
startState ParseHandler // the function that handles the very first state
startHandler ParseHandler // the function that handles the very first state
}
// NewParser instantiates a new Parser.
@ -19,28 +19,48 @@ type Parser struct {
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// To start parsing input data, use the method Parser.Parse().
func NewParser(startState ParseHandler) *Parser {
return &Parser{startState: startState}
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
return &Parser{startHandler: startHandler}
}
// ParseRun represents a single parse run for a Parser.
// Deprecated
type ParseRun struct {
p *ParseAPI // holds parser state and provides an API to ParseHandler functions
}
// Execute starts the parser for the provided input.
// When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Execute(input string) *Error {
api := &ParseAPI{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startHandler,
}
p.startHandler(api)
if !api.stopped {
api.UnexpectedInput()
}
return api.err
}
// Parse starts a parse run on the provided input data.
// To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method.
// Deprecated
func (p *Parser) Parse(input string) *ParseRun {
return &ParseRun{
p: &ParseAPI{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: p.startState,
},
}
panic("Parse() is deprecated, use Execute()")
// return &ParseRun{
// p: &ParseAPI{
// input: input,
// len: len(input),
// cursorLine: 1,
// cursorColumn: 1,
// nextState: p.startHandler,
// },
// }
}
// Next retrieves the next parsed item for a parse run.
@ -146,6 +166,7 @@ func (run *ParseRun) invokeNextParseHandler(state ParseHandler) {
// Matcher.Parse().
type Matcher struct {
parser *Parser
match string
}
// NewMatcher instantiates a new Matcher.
@ -157,20 +178,32 @@ type Matcher struct {
// The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler.
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
stateHandler := func(p *ParseAPI) {
p.Expects(expects)
matcher := &Matcher{}
matcher.parser = NewParser(func(p *ParseAPI) {
if p.On(tokenHandler).Accept() {
p.EmitLiteral(0) // ItemType is irrelevant
matcher.match = p.BufLiteral()
p.Stop()
} else {
p.Expects(expects)
p.UnexpectedInput()
}
}
return &Matcher{parser: NewParser(stateHandler)}
})
return matcher
}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the matched input string and an error. When an error
// occurred during parsing, the error will be set, nil otherwise.
func (m *Matcher) Execute(input string) (string, *Error) {
err := m.parser.Execute(input)
return m.match, err
}
// Parse checks for a match on the provided input data.
func (m *Matcher) Parse(input string) (string, *Error, bool) {
func (m *Matcher) Parse(input string) (string, *Error) {
item, err, ok := m.parser.Parse(input).Next()
if !ok {
return "", err, false
return "", err
}
return item.Value, nil, true
return item.Value, nil
}

View File

@ -28,16 +28,16 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
}
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
output, err, ok := parsekit.NewMatcher(test.tokenHandler, "a match").Parse(test.input)
output, err := parsekit.NewMatcher(test.tokenHandler, "a match").Execute(test.input)
if test.mustMatch {
if !ok {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.input, err)
} else if output != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
}
} else {
if ok {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.input)
}
}

View File

@ -518,6 +518,7 @@ var M = struct {
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
@ -527,6 +528,7 @@ var M = struct {
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
@ -589,6 +591,13 @@ func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bo
return ModifyByCallback(handler, modfunc)
}
// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and whitespace characters as defined by unicode
// are are trimmed from the left and right of the output.
func ModifyTrimSpace(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.TrimSpace)
}
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.

View File

@ -205,18 +205,20 @@ func TestSequenceOfRunes(t *testing.T) {
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
output := ""
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept() {
p.EmitLiteral(TestItem)
output = p.BufLiteral()
p.Stop()
}
})
item, err, ok := parser.Parse(input).Next()
if !ok {
err := parser.Execute(input)
if err != nil {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
if output != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output)
}
}