diff --git a/example_basiccalculator1_test.go b/example_basiccalculator1_test.go index 03a801c..4191f9d 100644 --- a/example_basiccalculator1_test.go +++ b/example_basiccalculator1_test.go @@ -51,7 +51,7 @@ func Example_basicCalculator1() { } // --------------------------------------------------------------------------- -// Implementation of the calculator +// Implementation of the parser // --------------------------------------------------------------------------- // CalculateSimple interprets a simple calculation, consisting of only integers @@ -60,7 +60,7 @@ func Example_basicCalculator1() { func ComputeSimple(calculation string) (int64, *parsekit.Error) { calculator := &simpleCalculator{op: +1} parser := parsekit.NewParser(calculator.number) - _, err, _ := parser.Parse(calculation).Next() + err := parser.Execute(calculation) return calculator.Result, err } diff --git a/example_basiccalculator2_test.go b/example_basiccalculator2_test.go index 659f1cd..1b5cda0 100644 --- a/example_basiccalculator2_test.go +++ b/example_basiccalculator2_test.go @@ -62,7 +62,7 @@ func Example_basicCalculator2() { } // --------------------------------------------------------------------------- -// Implementation of the calculator +// Implementation of the parser // --------------------------------------------------------------------------- // calculator implements a recursive descent parser that is responsible for parsing @@ -79,14 +79,13 @@ type calculator struct { func Compute(input string) (float64, *parsekit.Error) { c := &calculator{} parser := parsekit.NewParser(c.computation) - _, err, _ := parser.Parse(input).Next() + err := parser.Execute(input) return c.result, err } func (c *calculator) computation(p *parsekit.ParseAPI) { p.Handle(c.expr) p.ExpectEndOfFile() - p.Handle(c.factor) c.result = c.interpreter.result } diff --git a/example_dutchpostcode_test.go b/example_dutchpostcode_test.go index d96735a..82de3e3 100644 --- a/example_dutchpostcode_test.go +++ b/example_dutchpostcode_test.go @@ -9,6 +9,43 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) +func Example_dutchPostcodeUsingMatcher() { + parser := createPostcodeMatcher() + + for i, input := range []string{ + "1234 AB", + "2233Ab", + "1001\t\tab", + "1818ab", + "1212abc", + "1234", + "huh", + "", + "\xcd2222AB", + } { + output, err := parser.Execute(input) + if err != nil { + fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull()) + } else { + fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) + } + } + // Output: + // [0] Input: "1234 AB" Output: 1234 AB + // [1] Input: "2233Ab" Output: 2233 AB + // [2] Input: "1001\t\tab" Output: 1001 AB + // [3] Input: "1818ab" Output: 1818 AB + // [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1 + // [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1 + // [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1 + // [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1 + // [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1 +} + +// --------------------------------------------------------------------------- +// Implementation of the parser +// --------------------------------------------------------------------------- + func createPostcodeMatcher() *parsekit.Matcher { // Easy access to the parsekit definitions. c, a, m := parsekit.C, parsekit.A, parsekit.M @@ -24,36 +61,9 @@ func createPostcodeMatcher() *parsekit.Matcher { pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) space := m.Replace(c.Opt(a.Whitespace), " ") - postcode := c.Seq(pcDigits, space, pcLetters) + postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile) // Create a Matcher that wraps the 'postcode' TokenHandler and allows // us to match some input against that handler. return parsekit.NewMatcher(postcode, "a Dutch postcode") } - -func Example_dutchPostcodeUsingMatcher() { - pcParser := createPostcodeMatcher() - - for i, input := range []string{ - "1234 AB", - "2233Ab", - "1001\t\tab", - "1818ab", - "1234", - "huh", - } { - output, err, ok := pcParser.Parse(input) - if !ok { - fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) - } else { - fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) - } - } - // Output: - // [0] Input: "1234 AB" Output: 1234 AB - // [1] Input: "2233Ab" Output: 2233 AB - // [2] Input: "1001\t\tab" Output: 1001 AB - // [3] Input: "1818ab" Output: 1818 AB - // [4] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) - // [5] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) -} diff --git a/example_hellomatcher_test.go b/example_hellomatcher_test.go index 11b4988..acc33f1 100644 --- a/example_hellomatcher_test.go +++ b/example_hellomatcher_test.go @@ -3,8 +3,8 @@ // // The implementation uses only parser/combinator TokenHandler functions and does // not implement a full-fledged state-based Parser for it. If you want to see the -// same kind of functionality, implementated using a Paser, take a look at the -// HelloWorldUsingParser example. +// same kind of functionality, implementated using a Parser, take a look at the +// HelloWorldUsingParser examples. package parsekit_test import ( @@ -13,24 +13,6 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -func createHelloMatcher() *parsekit.Matcher { - // Easy access to parsekit definition. - c, a, m := parsekit.C, parsekit.A, parsekit.M - - // Using the parser/combinator support of parsekit, we create a TokenHandler function - // that does all the work. The 'greeting' TokenHandler matches the whole input and - // drops all but the name from it. - hello := c.StrNoCase("hello") - comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace)) - separator := c.Any(comma, a.Whitespace) - name := c.OneOrMore(c.Not(a.Excl)) - greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl)) - - // Create a Matcher that wraps the 'greeting' TokenHandler and allows - // us to match some input against that handler. - return parsekit.NewMatcher(greeting, "a friendly greeting") -} - func Example_helloWorldUsingMatcher() { parser := createHelloMatcher() @@ -43,9 +25,9 @@ func Example_helloWorldUsingMatcher() { "Hello, world", "Hello,!", } { - output, err, ok := parser.Parse(input) - if !ok { - fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) + output, err := parser.Execute(input) + if err != nil { + fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.ErrorFull()) } else { fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) } @@ -55,7 +37,29 @@ func Example_helloWorldUsingMatcher() { // [1] Input: "HELLO ,Johnny!" Output: Johnny // [2] Input: "hello , Bob123!" Output: Bob123 // [3] Input: "hello Pizza!" Output: Pizza - // [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) - // [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) - // [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) + // [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at line 1, column 1 + // [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1 + // [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1 +} + +// --------------------------------------------------------------------------- +// Implementation of the parser +// --------------------------------------------------------------------------- + +func createHelloMatcher() *parsekit.Matcher { + // Easy access to parsekit definition. + c, a, m := parsekit.C, parsekit.A, parsekit.M + + // Using the parser/combinator support of parsekit, we create a TokenHandler function + // that does all the work. The 'greeting' TokenHandler matches the whole input and + // drops all but the name from it. + hello := c.StrNoCase("hello") + comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace)) + separator := c.Any(comma, a.Whitespace) + name := c.OneOrMore(c.Not(a.Excl)) + greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile) + + // Create a Matcher that wraps the 'greeting' TokenHandler and allows + // us to match some input against that handler. + return parsekit.NewMatcher(greeting, "a friendly greeting") } diff --git a/example_helloparser1_test.go b/example_helloparser1_test.go index 97fb94c..3e0abfe 100644 --- a/example_helloparser1_test.go +++ b/example_helloparser1_test.go @@ -1,15 +1,18 @@ // In this example, a parser is created that is able to parse input that looks // like "Hello, !", and that extracts the name from it. // -// This implementation uses a state-based Parser for it, and it does not implement -// any custom parser/combinator TokenHandler functions. Note that things are much -// easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher -// example for this). Doing this fully parser-based implementation is mainly for your -// learning pleasure. +// This implementation uses a state-based Parser for it, and it does not +// implement any custom parser/combinator TokenHandler functions. Note that +// things are much easier to implement using custom TokenHandlers (see the other +// HelloWorldUsingMatcher example for this). Doing this fully parser-based +// implementation is mainly for your learning pleasure. // -// One big difference between the Matcher-based example and this one, is that the -// state-based parser reports errors much more fine-grained. This might or might -// not be useful for your specific use case. +// One big difference between the Matcher-based example and this one, is that +// this parser reports errors much more fine-grained. This might or might not be +// useful for your specific use case. If you need error reporting like this, +// then also take a look at the HelloWorldUsingParser2 example, which does the +// same thing as this version, only more concise. + package parsekit_test import ( @@ -19,57 +22,7 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -const greeteeItem parsekit.ItemType = 1 - -func stateStartOfGreeting(p *parsekit.ParseAPI) { - c := parsekit.C - p.Expects("hello") - if p.On(c.StrNoCase("hello")).Skip() { - p.RouteTo(stateComma) - } -} - -func stateComma(p *parsekit.ParseAPI) { - a := parsekit.A - p.Expects("comma") - switch { - case p.On(a.Whitespace).Skip(): - p.RouteRepeat() - case p.On(a.Comma).Skip(): - p.RouteTo(stateName) - } -} - -func stateName(p *parsekit.ParseAPI) { - a := parsekit.A - p.Expects("name") - switch { - case p.On(a.Excl).Skip(): - p.RouteTo(stateEndOfGreeting) - case p.On(a.AnyRune).Accept(): - p.RouteRepeat() - } -} - -func stateEndOfGreeting(p *parsekit.ParseAPI) { - p.Expects("end of greeting") - if p.On(a.EndOfFile).Stay() { - name := strings.TrimSpace(p.BufLiteral()) - if name == "" { - p.EmitError("The name cannot be empty") - } else { - p.Emit(greeteeItem, name) - } - } -} - -func createHelloParser() *parsekit.Parser { - return parsekit.NewParser(stateStartOfGreeting) -} - func Example_helloWorldUsingParser1() { - parser := createHelloParser() - for i, input := range []string{ "Hello, world!", "HELLO ,Johnny!", @@ -86,11 +39,11 @@ func Example_helloWorldUsingParser1() { "Oh no!", "hello,!", } { - item, err, ok := parser.Parse(input).Next() - if !ok { + name, err := (&helloparser1{}).Parse(input) + if err != nil { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { - fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value) + fmt.Printf("[%d] Input: %q Output: %s\n", i, input, name) } } // Output: @@ -109,3 +62,63 @@ func Example_helloWorldUsingParser1() { // [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello) // [13] Input: "hello,!" Error: The name cannot be empty } + +// --------------------------------------------------------------------------- +// Implementation of the parser +// --------------------------------------------------------------------------- + +type helloparser1 struct { + greetee string +} + +func (h *helloparser1) Parse(input string) (string, *parsekit.Error) { + parser := parsekit.NewParser(h.start) + err := parser.Execute(input) + return h.greetee, err +} + +func (h *helloparser1) start(p *parsekit.ParseAPI) { + c := parsekit.C + p.Expects("hello") + if p.On(c.StrNoCase("hello")).Skip() { + p.Handle(h.comma) + } +} + +func (h *helloparser1) comma(p *parsekit.ParseAPI) { + a := parsekit.A + p.Expects("comma") + p.On(a.Whitespace).Skip() + if p.On(a.Comma).Skip() { + p.Handle(h.name) + } +} + +func (h *helloparser1) name(p *parsekit.ParseAPI) { + a := parsekit.A + p.Expects("name") + switch { + case p.On(a.Excl).Skip(): + p.Handle(h.end) + case p.On(a.AnyRune).Accept(): + p.Handle(h.name) + } +} + +// Here we could have used p.ExpectEndOfFile() as well, but a slightly +// different route was taken to implement a more friendly 'end of greeting' +// error message. +func (h *helloparser1) end(p *parsekit.ParseAPI) { + if !p.On(a.EndOfFile).Stay() { + p.Expects("end of greeting") + p.UnexpectedInput() + return + } + + h.greetee = strings.TrimSpace(p.BufLiteral()) + if h.greetee == "" { + p.EmitError("The name cannot be empty") + } else { + p.Stop() + } +} diff --git a/example_helloparser2_test.go b/example_helloparser2_test.go index a125ad4..0c3bdb2 100644 --- a/example_helloparser2_test.go +++ b/example_helloparser2_test.go @@ -1,7 +1,16 @@ -// This is the same as the example helloWorldUsingParser1, except that in -// this implementation the state machine is implemented using a combination -// of some TokenHandlers and only a single state, in which multiple -// ParseAPI.On() calls are combined to do all the work in one go. +// This is the same as the example HelloWorldUsingParser1, except that in this +// implementation the state machine is implemented using a combination of some +// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls +// are combined to do all the work in one go. +// +// Note that things are much easier to implement using custom TokenHandlers (see +// the other HelloWorldUsingMatcher example for this). Doing this implementation +// is mainly for your learning pleasure. +// +// One big difference between the Matcher-based example and this one, is that +// this parser reports errors much more fine-grained. This might or might not be +// useful for your specific use case.:0 + package parsekit_test import ( @@ -10,43 +19,8 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -const greeteeItem2 parsekit.ItemType = 1 - -func stateFullGreeting(p *parsekit.ParseAPI) { - c, a, m := parsekit.C, parsekit.A, parsekit.M - if !p.On(c.StrNoCase("hello")).Skip() { - p.EmitError("the greeting is not being friendly") - return - } - if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() { - p.EmitError("the greeting is not properly separated") - return - } - if !p.On(m.Trim(c.OneOrMore(c.Except(a.Excl, a.AnyRune)), " \t")).Accept() { - p.EmitError("the greeting is targeted at thin air") - return - } - if !p.On(a.Excl).Stay() { - p.EmitError("the greeting is not loud enough") - return - } - if !p.On(a.EndOfFile).Stay() { - p.EmitError("too much stuff going on after the closing '!'") - return - } - - name := p.BufLiteral() - if name == "" { - p.EmitError("the name cannot be empty") - } else { - p.Emit(greeteeItem, name) - } - - p.ExpectEndOfFile() -} - func Example_helloWorldUsingParser2() { - parser := parsekit.NewParser(stateFullGreeting) + parser := &helloparser2{} for i, input := range []string{ "Hello, world!", @@ -65,17 +39,17 @@ func Example_helloWorldUsingParser2() { "hello,!", "HELLO, Buster! Eat this!", } { - item, err, ok := parser.Parse(input).Next() - if !ok { + name, err := parser.Parse(input) + if err != nil { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { - fmt.Printf("[%d] Input: %q Output: %s\n", i, input, item.Value) + fmt.Printf("[%d] Input: %q Output: %s\n", i, input, name) } } // Output: - // [0] Input: "Hello, world!" Error: too much stuff going on after the closing '!' - // [1] Input: "HELLO ,Johnny!" Error: too much stuff going on after the closing '!' - // [2] Input: "hello , Bob123!" Error: too much stuff going on after the closing '!' + // [0] Input: "Hello, world!" Output: world + // [1] Input: "HELLO ,Johnny!" Output: Johnny + // [2] Input: "hello , Bob123!" Output: Bob123 // [3] Input: "hello Pizza!" Error: the greeting is not properly separated // [4] Input: "" Error: the greeting is not being friendly // [5] Input: " " Error: the greeting is not being friendly @@ -83,9 +57,54 @@ func Example_helloWorldUsingParser2() { // [7] Input: "hello," Error: the greeting is targeted at thin air // [8] Input: "hello , " Error: the greeting is targeted at thin air // [9] Input: "hello , Droopy" Error: the greeting is not loud enough - // [10] Input: "hello , Droopy!" Error: too much stuff going on after the closing '!' - // [11] Input: "hello , \t \t Droopy \t !" Error: too much stuff going on after the closing '!' + // [10] Input: "hello , Droopy!" Output: Droopy + // [11] Input: "hello , \t \t Droopy \t !" Output: Droopy // [12] Input: "Oh no!" Error: the greeting is not being friendly // [13] Input: "hello,!" Error: the greeting is targeted at thin air // [14] Input: "HELLO, Buster! Eat this!" Error: too much stuff going on after the closing '!' } + +// --------------------------------------------------------------------------- +// Implementation of the parser +// --------------------------------------------------------------------------- + +type helloparser2 struct { + greetee string +} + +func (h *helloparser2) Parse(input string) (string, *parsekit.Error) { + parser := parsekit.NewParser(h.start) + err := parser.Execute(input) + return h.greetee, err +} + +// Note: +// For efficiency, we could have either: +// +// 1) added a return after every call to p.EmitError() +// 2) done an 'else if' for every 'if' after the first +// +// For code readability, I omitted these however. The ParseAPI knows it +// should ignore any upcoming call after an error has been set, so after +// an error the p.On() calls will be invoked, however they will always +// return false. +func (h *helloparser2) start(p *parsekit.ParseAPI) { + c, a, m := parsekit.C, parsekit.A, parsekit.M + if !p.On(c.StrNoCase("hello")).Skip() { + p.EmitError("the greeting is not being friendly") + } else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() { + p.EmitError("the greeting is not properly separated") + } else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() { + p.EmitError("the greeting is targeted at thin air") + } else if !p.On(a.Excl).Skip() { + p.EmitError("the greeting is not loud enough") + } else if !p.On(a.EndOfFile).Stay() { + p.EmitError("too much stuff going on after the closing '!'") + } else { + h.greetee = p.BufLiteral() + if h.greetee == "" { + p.EmitError("the name cannot be empty") + } + p.Stop() + } +} diff --git a/examples_test.go b/examples_test.go index a1a1a2d..1211225 100644 --- a/examples_test.go +++ b/examples_test.go @@ -6,53 +6,6 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -func ExampleItemType() { - // Make use of positive values. Ideally, define your ItemTypes using - // iota for easy automatic value management like this: - const ( - ItemWord parsekit.ItemType = iota - ItemNumber - ItemBlob - // ... - ) -} - -func ExampleItem() { - // Easy access to the parsekit definitions. - c := parsekit.C - - // You define your own item types for your specific parser. - const QuestionItem = parsekit.ItemType(42) - - // A ParseHandler function can use the defined item type by means of - // the p.Emit* methods on parsekit.P. - // When errors occur, or the end of the file is reached, then the built-in - // types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit. - stateHandler := func(p *parsekit.ParseAPI) { - if p.On(c.Str("question")).Accept() { - p.EmitLiteral(QuestionItem) - } - p.ExpectEndOfFile() - } - - // Successful match - item, _, ok := parsekit.NewParser(stateHandler).Parse("question").Next() - fmt.Println(ok, item.Type == QuestionItem, item.Value) - - // End of file reached - item, _, ok = parsekit.NewParser(stateHandler).Parse("").Next() - fmt.Println(ok, item.Type == parsekit.ItemEOF) - - // An error occurred - item, err, ok := parsekit.NewParser(stateHandler).Parse("answer").Next() - fmt.Println(ok, item.Type == parsekit.ItemError, err) - - // Output: - // true true question - // false true - // false true unexpected character 'a' (expected end of file) -} - func ExampleError() { err := &parsekit.Error{ Message: "it broke down", @@ -66,7 +19,7 @@ func ExampleError() { // Output: // it broke down // it broke down - // it broke down after line 10, column 42 + // it broke down at line 10, column 42 } func ExampleError_Error() { @@ -92,31 +45,26 @@ func ExampleError_ErrorFull() { fmt.Println(err.ErrorFull()) // Output: - // it broke down after line 10, column 42 + // it broke down at line 10, column 42 } func ExampleMatchAnyRune() { // Easy access to the parsekit definitions. a := parsekit.A + matches := []string{} + stateHandler := func(p *parsekit.ParseAPI) { - p.Expects("Any valid rune") - if p.On(a.AnyRune).Accept() { - p.EmitLiteral(TestItem) - p.RouteRepeat() + for p.On(a.AnyRune).Accept() { + matches = append(matches, p.BufLiteral()) + p.BufClear() } + p.ExpectEndOfFile() } parser := parsekit.NewParser(stateHandler) - run := parser.Parse("¡Any / valid / character will dö!") + err := parser.Execute("¡Any will dö!") - for i := 0; i < 5; i++ { - match, _, _ := run.Next() - fmt.Printf("Match = %q\n", match.Value) - } + fmt.Printf("Matches = %q, Error = %s\n", matches, err) // Output: - // Match = "¡" - // Match = "A" - // Match = "n" - // Match = "y" - // Match = " " + // Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = } diff --git a/parsehandler.go b/parsehandler.go index 27b7783..00ecb8c 100644 --- a/parsehandler.go +++ b/parsehandler.go @@ -25,8 +25,9 @@ type ParseAPI struct { expecting string // a description of what the current state expects to find (see P.Expects()) buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) items []Item // a slice of resulting Parser items (see P.Emit()) - item Item // the current item as reached by Next() and retrieved by Get() - err *Error // an error when parsing failed, can be retrieved by Error() + item Item // the current item as reached by Next(), retrieved by Get() + err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored + stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored LastMatch string // a string representation of the last matched input data } diff --git a/parsehandler_emit.go b/parsehandler_emit.go index e8cfb6f..c1c9c8e 100644 --- a/parsehandler_emit.go +++ b/parsehandler_emit.go @@ -26,6 +26,7 @@ const ItemEOF ItemType = -1 const ItemError ItemType = -2 // Emit passes a Parser item to the client, including the provided string. +// Deprecated func (p *ParseAPI) Emit(t ItemType, v string) { p.items = append(p.items, Item{t, v}) p.buffer.reset() @@ -39,13 +40,14 @@ func (p *ParseAPI) Emit(t ItemType, v string) { // linefeed (ASCII char 10). // // Retrieving the buffer contents will not affect the buffer itself. New runes can -// still be added to it. Only when calling P.Emit(), the buffer will be cleared. +// still be added to it. Only when calling P.BufClear(), the buffer will be cleared. func (p *ParseAPI) BufLiteral() string { return p.buffer.asLiteralString() } // EmitLiteral passes a parser Item to the client, including the accumulated // string buffer data as a literal string. +// Deprecated func (p *ParseAPI) EmitLiteral(t ItemType) { p.Emit(t, p.BufLiteral()) } @@ -114,25 +116,43 @@ func (err *Error) Error() string { // ErrorFull returns the current error message, including information about // the position in the input where the error occurred. func (err *Error) ErrorFull() string { - return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column) + return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column) } -// EmitError emits a parser error item to the client. +// EmitError sets an error message in the parser API. This error message +// will eventually be returned by the Parser.Execute() method. func (p *ParseAPI) EmitError(format string, args ...interface{}) { message := fmt.Sprintf(format, args...) - p.Emit(ItemError, message) + p.err = &Error{message, p.cursorLine, p.cursorColumn} } -// EmitEOF emits an EOF to the client. In effect, this will stop the parsing process. -func (p *ParseAPI) EmitEOF() { - p.Emit(ItemEOF, "EOF") +// Stop is used by the parser impementation to tell the API that it has +// completed the parsing process successfully. +// +// When the parser implementation returns without stopping first, the +// Parser.Execute() will assume that something went wrong and calls +// ParserAPI.UnexpectedInput() to report an error about this. +// +// The parser implementation can define what was being expected, by +// providing a description to ParseAPI.Expecting(). +func (p *ParseAPI) Stop() { + p.stopped = true } -// UnexpectedInput is used by a ParseHandler function to emit an error item -// that tells the client that an unexpected rune was encountered in the input. +// UnexpectedInput is used to set an error that tells the user that some +// unexpected input was encountered. +// +// It can automatically produce an error message for a couple of situations: +// 1) input simply didn't match the expectation +// 2) the end of the input was reached +// 3) there was an invalid UTF8 character on the input. +// +// The parser implementation can provide some feedback for this error by +// calling ParseAPI.Expects() to set the expectation. When set, the +// expectation is included in the error message. func (p *ParseAPI) UnexpectedInput() { // When some previous parsing step yielded an error, skip this operation. - if p.err != nil { + if p.err != nil || p.stopped { return } r, _, ok := p.peek(0) diff --git a/parsehandler_expects.go b/parsehandler_expects.go index 7e6d80a..266510e 100644 --- a/parsehandler_expects.go +++ b/parsehandler_expects.go @@ -16,5 +16,8 @@ package parsekit func (p *ParseAPI) Expects(description string) { // TODO make this into some debugging tool? // fmt.Printf("Expecting %s @ line %d, col %d\n", description, p.cursorLine, p.cursorColumn) + if p.err != nil || p.stopped { + return + } p.expecting = description } diff --git a/parsehandler_on.go b/parsehandler_on.go index 7066ac3..f0d65a4 100644 --- a/parsehandler_on.go +++ b/parsehandler_on.go @@ -38,7 +38,7 @@ package parsekit // } func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction { // When some previous parsing step yielded an error, skip this operation. - if p.err != nil { + if p.err != nil || p.stopped { return &MatchAction{ p: p, ok: false, diff --git a/parsehandler_route.go b/parsehandler_route.go index d66eae3..52356a5 100644 --- a/parsehandler_route.go +++ b/parsehandler_route.go @@ -5,7 +5,7 @@ package parsekit func (p *ParseAPI) Handle(handlers ...ParseHandler) { for _, handler := range handlers { // When some previous parsing step yielded an error, skip this operation. - if p.err != nil { + if p.err != nil || p.stopped { break } handler(p) @@ -14,6 +14,7 @@ func (p *ParseAPI) Handle(handlers ...ParseHandler) { // RouteTo tells the parser what ParseHandler function to invoke on // the next parse cycle. +// Deprecated func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction { p.nextState = handler return &RouteFollowupAction{p} @@ -21,6 +22,7 @@ func (p *ParseAPI) RouteTo(handler ParseHandler) *RouteFollowupAction { // RouteRepeat tells the parser that on the next parsing cycle, the current // ParseHandler must be reinvoked. +// Deprecated func (p *ParseAPI) RouteRepeat() { p.RouteTo(p.state) } @@ -33,12 +35,14 @@ func (p *ParseAPI) RouteRepeat() { // p.RouteTo(subroutine).ThenReturnHere()), you can refrain from // providing an explicit routing decision from that handler. The parser will // automatically assume a RouteReturn() in that case. +// Deprecated func (p *ParseAPI) RouteReturn() { p.nextState = p.popRoute() } // RouteFollowupAction chains parsing routes. // It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB). +// Deprecated type RouteFollowupAction struct { p *ParseAPI } @@ -48,6 +52,7 @@ type RouteFollowupAction struct { // For example: // // p.RouteTo(handlerA).ThenTo(handlerB) +// Deprecated func (a *RouteFollowupAction) ThenTo(state ParseHandler) { a.p.pushRoute(state) } @@ -57,17 +62,20 @@ func (a *RouteFollowupAction) ThenTo(state ParseHandler) { // For example: // // p.RouteTo(handlerA).ThenReturnHere() +// Deprecated func (a *RouteFollowupAction) ThenReturnHere() { a.p.pushRoute(a.p.state) } // pushRoute adds the ParseHandler to the route stack. // This is used for implementing nested parsing. +// Deprecated func (p *ParseAPI) pushRoute(state ParseHandler) { p.routeStack = append(p.routeStack, state) } // popRoute pops the last pushed ParseHandler from the route stack. +// Deprecated func (p *ParseAPI) popRoute() ParseHandler { last := len(p.routeStack) - 1 head, tail := p.routeStack[:last], p.routeStack[last] @@ -92,13 +100,14 @@ func (p *ParseAPI) popRoute() ParseHandler { // } func (p *ParseAPI) ExpectEndOfFile() { // When some previous parsing step yielded an error, skip this operation. - if p.err == nil { - if p.On(A.EndOfFile).Stay() { - p.EmitEOF() - } else { - p.Expects("end of file") - p.UnexpectedInput() - } + if p.err != nil || p.stopped { + return + } + if p.On(A.EndOfFile).Stay() { + p.Stop() + } else { + p.Expects("end of file") + p.UnexpectedInput() } } @@ -115,11 +124,12 @@ func (p *ParseAPI) ExpectEndOfFile() { // yourself too. Simply emit an ItemEOF when the end of the input was reached // to stop the parser loop: // -// p.EmitEOF() +// p.Stop() // TODO meh, get rid of this one, once we don't use state scheduling anymore. +// Deprecated func ExpectEndOfFile(p *ParseAPI) { p.Expects("end of file") if p.On(A.EndOfFile).Stay() { - p.EmitEOF() + p.Stop() } } diff --git a/parsekit.go b/parsekit.go index a18f939..f3d9ff2 100644 --- a/parsekit.go +++ b/parsekit.go @@ -9,7 +9,7 @@ import ( // Parser is the top-level struct that holds the configuration for a parser. // The Parser can be instantiated using the parsekit.NewParser() method. type Parser struct { - startState ParseHandler // the function that handles the very first state + startHandler ParseHandler // the function that handles the very first state } // NewParser instantiates a new Parser. @@ -19,28 +19,48 @@ type Parser struct { // parsing. This style of parser is typically used for parsing programming // languages and structured data formats (like json, xml, toml, etc.) // -// To start parsing input data, use the method Parser.Parse(). -func NewParser(startState ParseHandler) *Parser { - return &Parser{startState: startState} +// To parse input data, use the method Parser.Execute(). +func NewParser(startHandler ParseHandler) *Parser { + return &Parser{startHandler: startHandler} } // ParseRun represents a single parse run for a Parser. +// Deprecated type ParseRun struct { p *ParseAPI // holds parser state and provides an API to ParseHandler functions } +// Execute starts the parser for the provided input. +// When an error occurs during parsing, then this error is returned. Nil otherwise. +func (p *Parser) Execute(input string) *Error { + api := &ParseAPI{ + input: input, + len: len(input), + cursorLine: 1, + cursorColumn: 1, + nextState: p.startHandler, + } + p.startHandler(api) + if !api.stopped { + api.UnexpectedInput() + } + return api.err +} + // Parse starts a parse run on the provided input data. // To retrieve emitted parser Items from the run, make use of the ParseRun.Next() method. +// Deprecated func (p *Parser) Parse(input string) *ParseRun { - return &ParseRun{ - p: &ParseAPI{ - input: input, - len: len(input), - cursorLine: 1, - cursorColumn: 1, - nextState: p.startState, - }, - } + panic("Parse() is deprecated, use Execute()") + // return &ParseRun{ + // p: &ParseAPI{ + // input: input, + // len: len(input), + // cursorLine: 1, + // cursorColumn: 1, + // nextState: p.startHandler, + // }, + // } } // Next retrieves the next parsed item for a parse run. @@ -146,6 +166,7 @@ func (run *ParseRun) invokeNextParseHandler(state ParseHandler) { // Matcher.Parse(). type Matcher struct { parser *Parser + match string } // NewMatcher instantiates a new Matcher. @@ -157,20 +178,32 @@ type Matcher struct { // The 'expects' parameter is used for creating an error message in case parsed // input does not match the TokenHandler. func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher { - stateHandler := func(p *ParseAPI) { - p.Expects(expects) + matcher := &Matcher{} + matcher.parser = NewParser(func(p *ParseAPI) { if p.On(tokenHandler).Accept() { - p.EmitLiteral(0) // ItemType is irrelevant + matcher.match = p.BufLiteral() + p.Stop() + } else { + p.Expects(expects) + p.UnexpectedInput() } - } - return &Matcher{parser: NewParser(stateHandler)} + }) + return matcher +} + +// Execute feeds the input to the wrapped TokenHandler function. +// It returns the matched input string and an error. When an error +// occurred during parsing, the error will be set, nil otherwise. +func (m *Matcher) Execute(input string) (string, *Error) { + err := m.parser.Execute(input) + return m.match, err } // Parse checks for a match on the provided input data. -func (m *Matcher) Parse(input string) (string, *Error, bool) { +func (m *Matcher) Parse(input string) (string, *Error) { item, err, ok := m.parser.Parse(input).Next() if !ok { - return "", err, false + return "", err } - return item.Value, nil, true + return item.Value, nil } diff --git a/parsekit_test.go b/parsekit_test.go index f40a4d5..f70c416 100644 --- a/parsekit_test.go +++ b/parsekit_test.go @@ -28,16 +28,16 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) { } func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) { - output, err, ok := parsekit.NewMatcher(test.tokenHandler, "a match").Parse(test.input) + output, err := parsekit.NewMatcher(test.tokenHandler, "a match").Execute(test.input) if test.mustMatch { - if !ok { + if err != nil { t.Errorf("Test %q failed with error: %s", test.input, err) } else if output != test.expected { t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output) } } else { - if ok { + if err == nil { t.Errorf("Test %q failed: should not match, but it did", test.input) } } diff --git a/tokenhandlers_builtin.go b/tokenhandlers_builtin.go index b18ece2..bfd5ce0 100644 --- a/tokenhandlers_builtin.go +++ b/tokenhandlers_builtin.go @@ -518,6 +518,7 @@ var M = struct { Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? + TrimSpace func(handler TokenHandler) TokenHandler ToLower func(TokenHandler) TokenHandler ToUpper func(TokenHandler) TokenHandler Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments? @@ -527,6 +528,7 @@ var M = struct { Trim: ModifyTrim, TrimLeft: ModifyTrimLeft, TrimRight: ModifyTrimRight, + TrimSpace: ModifyTrimSpace, ToLower: ModifyToLower, ToUpper: ModifyToUpper, Replace: ModifyReplace, @@ -589,6 +591,13 @@ func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bo return ModifyByCallback(handler, modfunc) } +// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies. +// If it does, then its output is taken and whitespace characters as defined by unicode +// are are trimmed from the left and right of the output. +func ModifyTrimSpace(handler TokenHandler) TokenHandler { + return ModifyByCallback(handler, strings.TrimSpace) +} + // ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies. // If it does, then its output is taken and characters from the provided // cutset are converted into upper case. diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index 4d86970..f7e4c38 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -205,18 +205,20 @@ func TestSequenceOfRunes(t *testing.T) { a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, ) input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + output := "" parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { p.Expects("Sequence of runes") if p.On(sequence).Accept() { - p.EmitLiteral(TestItem) + output = p.BufLiteral() + p.Stop() } }) - item, err, ok := parser.Parse(input).Next() - if !ok { + err := parser.Execute(input) + if err != nil { t.Fatalf("Parsing failed: %s", err) } - if item.Value != input { - t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value) + if output != input { + t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output) } }