From 75373e5ed5da6e88a8e01b850b68d2f1c40a9191 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Tue, 4 Jun 2019 23:15:02 +0000 Subject: [PATCH] Big simplification run once more, cleaned up code, added tests and examples, made stuff unexported where possible, to slim down the exported interface. --- assert/assert_comparison.go | 19 - assert/assert_panic.go | 34 -- assertions_test.go | 125 +++++++ cursor.go | 24 +- cursor_test.go | 35 +- error.go | 31 ++ error_test.go | 46 +++ .../example_basiccalculator1_test.go | 10 +- .../example_basiccalculator2_test.go | 34 +- .../example_dutchpostcode_test.go | 42 ++- .../example_helloManyStateParser_test.go | 3 +- .../example_helloParserCombinator_test.go | 12 +- .../example_helloSingleStateParser_test.go | 2 +- examples/examples.go | 5 + .../examples_state_test.go | 2 +- examples_test.go | 94 ----- parseapi.go | 274 ++++++++++++++- parsehandler.go | 9 - parsehandler_error.go | 37 -- parsehandler_on.go | 128 ------- parsehandler_routing.go | 99 ------ parsehandler_test.go | 106 ------ parsekit.go | 78 ----- parsekit_test.go | 101 ------ parser.go | 64 ++++ parser_test.go | 327 ++++++++++++++++++ reader.go | 46 +-- reader_test.go | 116 ++----- tokenapi.go | 130 ++++--- tokenapi_example_test.go | 70 ++++ tokenapi_result.go | 106 ------ tokenapi_result_test.go | 27 -- tokenapi_test.go | 288 --------------- tokenhandler.go | 139 -------- tokenhandler_test.go | 51 +-- tokenhandlers_builtin.go | 10 +- tokenhandlers_builtin_test.go | 44 ++- tokenizer.go | 49 +++ tokenizer_test.go | 257 ++++++++++++++ tokenresult.go | 116 +++++++ tokenresult_test.go | 25 ++ 41 files changed, 1662 insertions(+), 1553 deletions(-) delete mode 100644 assert/assert_comparison.go delete mode 100644 assert/assert_panic.go create mode 100644 assertions_test.go create mode 100644 error.go create mode 100644 error_test.go rename example_basiccalculator1_test.go => examples/example_basiccalculator1_test.go (95%) rename example_basiccalculator2_test.go => examples/example_basiccalculator2_test.go (86%) rename example_dutchpostcode_test.go => examples/example_dutchpostcode_test.go (59%) rename example_helloManyStateParser_test.go => examples/example_helloManyStateParser_test.go (99%) rename example_helloParserCombinator_test.go => examples/example_helloParserCombinator_test.go (88%) rename example_helloSingleStateParser_test.go => examples/example_helloSingleStateParser_test.go (99%) create mode 100644 examples/examples.go rename examples_state_test.go => examples/examples_state_test.go (97%) delete mode 100644 examples_test.go delete mode 100644 parsehandler.go delete mode 100644 parsehandler_error.go delete mode 100644 parsehandler_on.go delete mode 100644 parsehandler_routing.go delete mode 100644 parsehandler_test.go delete mode 100644 parsekit.go delete mode 100644 parsekit_test.go create mode 100644 parser.go create mode 100644 parser_test.go create mode 100644 tokenapi_example_test.go delete mode 100644 tokenapi_result.go delete mode 100644 tokenapi_result_test.go delete mode 100644 tokenapi_test.go delete mode 100644 tokenhandler.go create mode 100644 tokenizer.go create mode 100644 tokenizer_test.go create mode 100644 tokenresult.go create mode 100644 tokenresult_test.go diff --git a/assert/assert_comparison.go b/assert/assert_comparison.go deleted file mode 100644 index 5f62ba1..0000000 --- a/assert/assert_comparison.go +++ /dev/null @@ -1,19 +0,0 @@ -package assert - -import ( - "testing" -) - -func Equal(t *testing.T, expected interface{}, actual interface{}, forWhat string) { - if expected != actual { - t.Errorf( - "Unexpected value for %s:\nexpected: %q\nactual: %q", - forWhat, expected, actual) - } -} - -func NotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) { - if notExpected == actual { - t.Errorf("Unexpected value for %s: %q", forWhat, actual) - } -} diff --git a/assert/assert_panic.go b/assert/assert_panic.go deleted file mode 100644 index 573d426..0000000 --- a/assert/assert_panic.go +++ /dev/null @@ -1,34 +0,0 @@ -package assert - -import ( - "regexp" - "testing" -) - -type PanicT struct { - Function func() - Expect string - Regexp bool -} - -func Panic(t *testing.T, p PanicT) { - defer func() { - if r := recover(); r != nil { - mismatch := false - if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) { - mismatch = true - } - if !p.Regexp && p.Expect != r.(string) { - mismatch = true - } - if mismatch { - t.Errorf( - "Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q", - p.Expect, r) - } - } else { - t.Errorf("Function did not panic (expected panic message: %s)", p.Expect) - } - }() - p.Function() -} diff --git a/assertions_test.go b/assertions_test.go new file mode 100644 index 0000000..d05ab0e --- /dev/null +++ b/assertions_test.go @@ -0,0 +1,125 @@ +package parsekit + +// This file contains some tools that are used for writing parsekit tests. + +import ( + "regexp" + "testing" +) + +func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) { + if expected != actual { + t.Errorf( + "Unexpected value for %s:\nexpected: %q\nactual: %q", + forWhat, expected, actual) + } +} + +func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) { + if notExpected == actual { + t.Errorf("Unexpected value for %s: %q", forWhat, actual) + } +} + +func AssertTrue(t *testing.T, b bool, assertion string) { + if !b { + t.Errorf("Assertion %s is false", assertion) + } +} + +type PanicT struct { + Function func() + Regexp bool + Expect string +} + +func AssertPanics(t *testing.T, testSet []PanicT) { + for _, test := range testSet { + AssertPanic(t, test) + } +} + +func AssertPanic(t *testing.T, p PanicT) { + defer func() { + if r := recover(); r != nil { + mismatch := false + if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) { + mismatch = true + } + if !p.Regexp && p.Expect != r.(string) { + mismatch = true + } + if mismatch { + t.Errorf( + "Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q", + p.Expect, r) + } + } else { + t.Errorf("Function did not panic (expected panic message: %s)", p.Expect) + } + }() + p.Function() +} + +type TokenHandlerT struct { + Input string + TokenHandler TokenHandler + MustMatch bool + Expected string +} + +func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) { + for _, test := range testSet { + AssertTokenHandler(t, test) + } +} + +func AssertTokenHandler(t *testing.T, test TokenHandlerT) { + result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input) + if test.MustMatch { + if err != nil { + t.Errorf("Test %q failed with error: %s", test.Input, err) + } else if output := result.String(); output != test.Expected { + t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output) + } + } else { + if err == nil { + t.Errorf("Test %q failed: should not match, but it did", test.Input) + } + } +} + +type TokenMakerT struct { + Input string + TokenHandler TokenHandler + Expected []Token +} + +func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { + for _, test := range testSet { + AssertTokenMaker(t, test) + } +} + +func AssertTokenMaker(t *testing.T, test TokenMakerT) { + result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input) + if err != nil { + t.Errorf("Test %q failed with error: %s", test.Input, err) + } else { + if len(result.Tokens()) != len(test.Expected) { + t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens())) + } + for i, expected := range test.Expected { + actual := result.Token(i) + if expected.Type != actual.Type { + t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type) + } + if string(expected.Runes) != string(actual.Runes) { + t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes) + } + if expected.Value != actual.Value { + t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value) + } + } + } +} diff --git a/cursor.go b/cursor.go index d5b03c1..01a424f 100644 --- a/cursor.go +++ b/cursor.go @@ -2,20 +2,31 @@ package parsekit import "fmt" -// Cursor represents the position of the input cursor in various ways. +// Cursor represents the position of a cursor in various ways. type Cursor struct { - Byte int // The cursor offset in bytes - Rune int // The cursor offset in UTF8 runes + Byte int // The cursor offset in bytes, relative to start of file + Rune int // The cursor offset in UTF8 runes, relative to start of file Column int // The column at which the cursor is (0-indexed) Line int // The line at which the cursor is (0-indexed) } -func (c *Cursor) String() string { +// String produces a string representation of the cursor position. +func (c Cursor) String() string { + if c.Line == 0 && c.Column == 0 { + return fmt.Sprintf("start of file") + } return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1) } -// move updates the position of the cursor, based on the provided input string. -func (c *Cursor) move(input string) { +// Move updates the position of the cursor, based on the provided input string. +// The input string represents the runes that has been skipped over. This +// method will take newlines into account to keep track of line numbers and +// column positions automatically. +// +// Note: when you are writing a parser using parsekit, it's unlikely +// that you will use this method directly. The parsekit package takes care +// of calling it at the correct time. +func (c *Cursor) Move(input string) *Cursor { c.Byte += len(input) for _, r := range input { c.Rune++ @@ -26,4 +37,5 @@ func (c *Cursor) move(input string) { c.Column++ } } + return c } diff --git a/cursor_test.go b/cursor_test.go index 58b26c8..ce52048 100644 --- a/cursor_test.go +++ b/cursor_test.go @@ -1,9 +1,38 @@ -package parsekit +package parsekit_test import ( + "fmt" "testing" + + "git.makaay.nl/mauricem/go-parsekit" ) +func ExampleCursor_Move() { + c := &parsekit.Cursor{} + fmt.Printf("after initialization : %s\n", c) + fmt.Printf("after 'some words' : %s\n", c.Move("some words")) + fmt.Printf("after '\\n' : %s\n", c.Move("\n")) + fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.Move("\r\nskip\nlines")) + + // Output: + // after initialization : start of file + // after 'some words' : line 1, column 11 + // after '\n' : line 2, column 1 + // after '\r\nskip\nlines' : line 4, column 6 +} + +func ExampleCursor_String() { + c := &parsekit.Cursor{} + fmt.Println(c.String()) + + c.Move("\nfoobar") + fmt.Println(c.String()) + + // Output: + // start of file + // line 2, column 7 +} + func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { for _, test := range []struct { name string @@ -22,9 +51,9 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, } { - c := Cursor{} + c := parsekit.Cursor{} for _, s := range test.input { - c.move(s) + c.Move(s) } if c.Byte != test.byte { t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte) diff --git a/error.go b/error.go new file mode 100644 index 0000000..5cc6e4b --- /dev/null +++ b/error.go @@ -0,0 +1,31 @@ +package parsekit + +import ( + "fmt" +) + +// Error is used as the error type when parsing errors occur. +// The error includes some context information to allow for useful +// error messages to the user. +type Error struct { + Message string + Cursor Cursor +} + +func (err *Error) Error() string { + if err == nil { + _, linepos := getCaller(1) + panic(fmt.Sprintf("parsekit.Error.Error(): method called with nil error at %s", linepos)) + } + return err.Message +} + +// Full returns the current error message, including information about +// the position in the input where the error occurred. +func (err *Error) Full() string { + if err == nil { + _, linepos := getCaller(1) + panic(fmt.Sprintf("parsekit.Error.Full(): method called with nil error at %s", linepos)) + } + return fmt.Sprintf("%s at %s", err, err.Cursor) +} diff --git a/error_test.go b/error_test.go new file mode 100644 index 0000000..06f92ef --- /dev/null +++ b/error_test.go @@ -0,0 +1,46 @@ +package parsekit_test + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func ExampleError() { + err := &parsekit.Error{ + Message: "it broke down", + Cursor: parsekit.Cursor{Line: 9, Column: 41}, + } + + fmt.Println(err.Error()) + fmt.Printf("%s\n", err) + fmt.Println(err.Full()) + // Output: + // it broke down + // it broke down + // it broke down at line 10, column 42 +} + +func ExampleError_Error() { + err := &parsekit.Error{ + Message: "it broke down", + Cursor: parsekit.Cursor{Line: 9, Column: 41}, + } + + fmt.Println(err.Error()) + fmt.Printf("%s\n", err) + // Output: + // it broke down + // it broke down +} + +func ExampleError_Full() { + err := &parsekit.Error{ + Message: "it broke down", + Cursor: parsekit.Cursor{Line: 9, Column: 41}, + } + + fmt.Println(err.Full()) + // Output: + // it broke down at line 10, column 42 +} diff --git a/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go similarity index 95% rename from example_basiccalculator1_test.go rename to examples/example_basiccalculator1_test.go index 4180aa2..0686acd 100644 --- a/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -5,7 +5,7 @@ // // So positive numbers that can be either added or substracted, and whitespace // is ignored. -package parsekit_test +package examples import ( "fmt" @@ -83,15 +83,15 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) { } func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) { - var a = parsekit.A + var A = parsekit.A switch { - case p.On(a.Add).Skip(): + case p.On(A.Add).Skip(): c.op = +1 p.Handle(c.number) - case p.On(a.Subtract).Skip(): + case p.On(A.Subtract).Skip(): c.op = -1 p.Handle(c.number) - case !p.On(a.EndOfFile).Stay(): + case !p.On(A.EndOfFile).Stay(): p.Expects("operator, '+' or '-'") p.UnexpectedInput() default: diff --git a/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go similarity index 86% rename from example_basiccalculator2_test.go rename to examples/example_basiccalculator2_test.go index a15fa15..fda7184 100644 --- a/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -10,7 +10,7 @@ // = ( | (MUL|DIV) ) // = ( (SPACE|TAB) | "") // = (FLOAT | LPAREN RPAREN) -package parsekit_test +package examples import ( "fmt" @@ -40,7 +40,7 @@ func Example_basicCalculator2() { output, err := Compute(c.input) output = math.Round(output*1000000) / 1000000 // to make the expectation comparisons usable if err != nil { - fmt.Printf("Input: %q, got error: %s\n", c.input, err) + fmt.Printf("Input: %q, got error: %s\n", c.input, err.Full()) } else { fmt.Printf("Input: %q, got outcome: %f, correct = %t\n", c.input, output, output == c.expected) } @@ -53,11 +53,11 @@ func Example_basicCalculator2() { // Input: "(3.05+2)*(4.3+5.12)", got outcome: 47.571000, correct = true // Input: "8.10 + 999/233", got outcome: 12.387554, correct = true // Input: " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 ", got outcome: -20.168000, correct = true - // Input: "", got error: unexpected end of file - // Input: "(", got error: unexpected end of file - // Input: "10+20-", got error: unexpected end of file - // Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) - // Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') + // Input: "", got error: unexpected end of file at start of file + // Input: "(", got error: unexpected end of file at line 1, column 2 + // Input: "10+20-", got error: unexpected end of file at line 1, column 7 + // Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) at line 1, column 13 + // Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19 } // --------------------------------------------------------------------------- @@ -94,9 +94,9 @@ func (c *calculator) calculation(p *parsekit.ParseAPI) { func (c *calculator) expr(p *parsekit.ParseAPI) { c.interpreter.push() - var pc, a = parsekit.C, parsekit.A + var C, A = parsekit.C, parsekit.A if p.Handle(c.term) { - for p.On(pc.Any(a.Add, a.Subtract)).Accept() { + for p.On(C.Any(A.Add, A.Subtract)).Accept() { op := p.Result().Rune(0) if !p.Handle(c.term) { return @@ -112,9 +112,9 @@ func (c *calculator) expr(p *parsekit.ParseAPI) { func (c *calculator) term(p *parsekit.ParseAPI) { c.interpreter.push() - var pc, a = parsekit.C, parsekit.A + var C, A = parsekit.C, parsekit.A if p.Handle(c.factor) { - for p.On(pc.Any(a.Multiply, a.Divide)).Accept() { + for p.On(C.Any(A.Multiply, A.Divide)).Accept() { op := p.Result().Rune(0) if !p.Handle(c.factor) { return @@ -129,17 +129,17 @@ func (c *calculator) term(p *parsekit.ParseAPI) { // = ( (SPACE|TAB) | "") // = (FLOAT | LPAREN RPAREN) func (c *calculator) factor(p *parsekit.ParseAPI) { - var a, tok = parsekit.A, parsekit.T - p.On(a.Whitespace).Skip() + var A, T = parsekit.A, parsekit.T + p.On(A.Whitespace).Skip() switch { - case p.On(tok.Float64(nil, a.Signed(a.Float))).Accept(): + case p.On(T.Float64(nil, A.Signed(A.Float))).Accept(): value := p.Result().Value(0).(float64) c.interpreter.pushValue(value) - case p.On(a.LeftParen).Skip(): + case p.On(A.LeftParen).Skip(): if !p.Handle(c.expr) { return } - if !p.On(a.RightParen).Skip() { + if !p.On(A.RightParen).Skip() { p.Expects("')'") p.UnexpectedInput() return @@ -148,7 +148,7 @@ func (c *calculator) factor(p *parsekit.ParseAPI) { p.UnexpectedInput() return } - p.On(a.Whitespace).Skip() + p.On(A.Whitespace).Skip() } // --------------------------------------------------------------------------- diff --git a/example_dutchpostcode_test.go b/examples/example_dutchpostcode_test.go similarity index 59% rename from example_dutchpostcode_test.go rename to examples/example_dutchpostcode_test.go index 752dbad..73a07e4 100644 --- a/example_dutchpostcode_test.go +++ b/examples/example_dutchpostcode_test.go @@ -2,7 +2,7 @@ // The implementation uses only TokenHandler functions and does not implement a // full-fledged state-based Parser for it. -package parsekit_test +package examples import ( "fmt" @@ -10,8 +10,8 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -func Example_dutchPostcodeUsingMatcher() { - parser := createPostcodeMatcher() +func Example_dutchPostcodeUsingTokenizer() { + parser := createPostcodeTokenizer() for i, input := range []string{ "1234 AB", @@ -24,18 +24,22 @@ func Example_dutchPostcodeUsingMatcher() { "", "\xcd2222AB", } { - output, err := parser.Execute(input) + result, err := parser.Execute(input) if err != nil { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full()) } else { - fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) + fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result) + for _, t := range result.Tokens() { + fmt.Printf(" %s(%s)", t.Type, t.Value) + } + fmt.Printf("\n") } } // Output: - // [0] Input: "1234 AB" Output: 1234 AB - // [1] Input: "2233Ab" Output: 2233 AB - // [2] Input: "1001\t\tab" Output: 1001 AB - // [3] Input: "1818ab" Output: 1818 AB + // [0] Input: "1234 AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB) + // [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB) + // [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB) + // [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB) // [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file // [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file // [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file @@ -47,9 +51,9 @@ func Example_dutchPostcodeUsingMatcher() { // Implementation of the parser // --------------------------------------------------------------------------- -func createPostcodeMatcher() *parsekit.Matcher { +func createPostcodeTokenizer() *parsekit.Tokenizer { // Easy access to the parsekit definitions. - c, a, m := parsekit.C, parsekit.A, parsekit.M + C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T // TokenHandler functions are created and combined to satisfy these rules: // - A Dutch postcode consists of 4 digits and 2 letters (1234XX). @@ -57,14 +61,14 @@ func createPostcodeMatcher() *parsekit.Matcher { // - A space between letters and digits is optional. // - It is good form to write the letters in upper case. // - It is good form to use a single space between digits and letters. - digitNotZero := c.Except(a.Rune('0'), a.Digit) - pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit)) - pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) - pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) - space := m.Replace(c.Opt(a.Whitespace), " ") - postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile) + digitNotZero := C.Except(A.Rune('0'), A.Digit) + pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit)) + pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper) + pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter)) + space := M.Replace(C.Opt(A.Whitespace), " ") + postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile) - // Create a Matcher that wraps the 'postcode' TokenHandler and allows + // Create a Tokenizer that wraps the 'postcode' TokenHandler and allows // us to match some input against that handler. - return parsekit.NewMatcher(postcode, "a Dutch postcode") + return parsekit.NewTokenizer(postcode, "a Dutch postcode") } diff --git a/example_helloManyStateParser_test.go b/examples/example_helloManyStateParser_test.go similarity index 99% rename from example_helloManyStateParser_test.go rename to examples/example_helloManyStateParser_test.go index 29a2543..2308557 100644 --- a/example_helloManyStateParser_test.go +++ b/examples/example_helloManyStateParser_test.go @@ -13,7 +13,7 @@ // like this, then also take a look at the helloSingleState example, which does // the same thing as this version, only more concise. -package parsekit_test +package examples import ( "fmt" @@ -131,6 +131,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { // different route was taken to implement a more friendly 'end of greeting' // error message. func (h *helloparser1) end(p *parsekit.ParseAPI) { + var a = parsekit.A if !p.On(a.EndOfFile).Stay() { p.Expects("end of greeting") p.UnexpectedInput() diff --git a/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go similarity index 88% rename from example_helloParserCombinator_test.go rename to examples/example_helloParserCombinator_test.go index 45033b1..11e714e 100644 --- a/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -5,7 +5,7 @@ // not implement a full-fledged state-based Parser for it. If you want to see the // same kind of functionality, implementated using a Parser, take a look at the // other hello examples. -package parsekit_test +package examples import ( "fmt" @@ -13,8 +13,8 @@ import ( "git.makaay.nl/mauricem/go-parsekit" ) -func Example_helloWorldUsingMatcher() { - parser := createHelloMatcher() +func Example_helloWorldUsingTokenizer() { + parser := createHelloTokenizer() for i, input := range []string{ "Hello, world!", @@ -46,7 +46,7 @@ func Example_helloWorldUsingMatcher() { // Implementation of the parser // --------------------------------------------------------------------------- -func createHelloMatcher() *parsekit.Matcher { +func createHelloTokenizer() *parsekit.Tokenizer { // Easy access to parsekit definition. c, a, m := parsekit.C, parsekit.A, parsekit.M @@ -59,7 +59,7 @@ func createHelloMatcher() *parsekit.Matcher { name := c.OneOrMore(c.Not(a.Excl)) greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile) - // Create a Matcher that wraps the 'greeting' TokenHandler and allows + // Create a Tokenizer that wraps the 'greeting' TokenHandler and allows // us to match some input against that handler. - return parsekit.NewMatcher(greeting, "a friendly greeting") + return parsekit.NewTokenizer(greeting, "a friendly greeting") } diff --git a/example_helloSingleStateParser_test.go b/examples/example_helloSingleStateParser_test.go similarity index 99% rename from example_helloSingleStateParser_test.go rename to examples/example_helloSingleStateParser_test.go index c5d3208..a798da2 100644 --- a/example_helloSingleStateParser_test.go +++ b/examples/example_helloSingleStateParser_test.go @@ -11,7 +11,7 @@ // is that this parser reports errors much more fine-grained. This might or // might not be useful for your specific use case. -package parsekit_test +package examples import ( "fmt" diff --git a/examples/examples.go b/examples/examples.go new file mode 100644 index 0000000..124daf9 --- /dev/null +++ b/examples/examples.go @@ -0,0 +1,5 @@ +// Package examples contains various examples for the parsekit module. +// These examples have been moved into their own package, because they +// are quite numerous and quite big. Too big in my opinion to make them +// all available from within the parsekit package godocs. +package examples diff --git a/examples_state_test.go b/examples/examples_state_test.go similarity index 97% rename from examples_state_test.go rename to examples/examples_state_test.go index c8e9282..7dfbe83 100644 --- a/examples_state_test.go +++ b/examples/examples_state_test.go @@ -5,7 +5,7 @@ // for []string. We add a ParseHandler method directly to that type // and let the parsing code fill the slice with strings during parsing. -package parsekit_test +package examples import ( "fmt" diff --git a/examples_test.go b/examples_test.go deleted file mode 100644 index c765bbb..0000000 --- a/examples_test.go +++ /dev/null @@ -1,94 +0,0 @@ -package parsekit_test - -import ( - "fmt" - - "git.makaay.nl/mauricem/go-parsekit" -) - -func ExampleError() { - err := &parsekit.Error{ - Message: "it broke down", - Line: 10, - Column: 42, - } - - fmt.Println(err.Error()) - fmt.Printf("%s\n", err) - fmt.Println(err.Full()) - // Output: - // it broke down - // it broke down - // it broke down at line 10, column 42 -} - -func ExampleError_Error() { - err := &parsekit.Error{ - Message: "it broke down", - Line: 10, - Column: 42, - } - - fmt.Println(err.Error()) - fmt.Printf("%s\n", err) - // Output: - // it broke down - // it broke down -} - -func ExampleError_Full() { - err := &parsekit.Error{ - Message: "it broke down", - Line: 10, - Column: 42, - } - - fmt.Println(err.Full()) - // Output: - // it broke down at line 10, column 42 -} - -func ExampleMatchAnyRune_usingAcceptedRunes() { - // Easy access to the parsekit definitions. - a := parsekit.A - - matches := []string{} - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - for p.On(a.AnyRune).Accept() { - matches = append(matches, p.Result().String()) - } - p.ExpectEndOfFile() - }) - err := parser.Execute("¡Any will dö!") - - fmt.Printf("Matches = %q, Error = %s\n", matches, err) - // Output: - // Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = -} - -func ExampleMatchAnyRune_usingTokens() { - // Easy access to the parsekit definitions. - c, a, tok := parsekit.C, parsekit.A, parsekit.T - - var tokens []*parsekit.Token - var accepted string - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() { - tokens = p.Result().Tokens() - accepted = p.Result().String() - } - p.ExpectEndOfFile() - }) - parser.Execute("¡Any will dö!") - - fmt.Printf("Runes accepted: %q\n", accepted) - fmt.Printf("Token values: ") - for _, t := range tokens { - fmt.Printf("%c ", t.Value) - } - // Output: - // Runes accepted: "¡Any will dö!" - // Token values: ¡ A n y w i l l d ö ! -} diff --git a/parseapi.go b/parseapi.go index f2bbc95..c6f4ffe 100644 --- a/parseapi.go +++ b/parseapi.go @@ -2,7 +2,7 @@ package parsekit import ( "fmt" - "runtime" + "io" "strings" ) @@ -12,7 +12,7 @@ type ParseAPI struct { tokenAPI *TokenAPI // the input reader loopCheck map[string]bool // used for parser loop detection expecting string // a description of what the current state expects to find (see Expects()) - result *Result // TokenHandler result, as received from On(...).Accept() + result *TokenResult // Last TokenHandler result as retrieved by On(...).Accept() err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored } @@ -29,17 +29,20 @@ func (p *ParseAPI) panicWhenStoppedOrInError() { return } - called, _ := p.getCaller(1) + called, _ := getCaller(1) parts := strings.Split(called, ".") calledShort := parts[len(parts)-1] - caller, filepos := p.getCaller(2) + _, filepos := getCaller(2) after := "Error()" if p.stopped { after = "Stop()" } - panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after)) + panic(fmt.Sprintf( + "parsekit.ParseAPI.%s(): Illegal call to %s() at %s: "+ + "no calls allowed after ParseAPI.%s", + calledShort, calledShort, filepos, after)) } func (p *ParseAPI) isStoppedOrInError() bool { @@ -51,18 +54,261 @@ func (p *ParseAPI) initLoopCheck() { } func (p *ParseAPI) checkForLoops() { - caller, filepos := p.getCaller(2) + _, filepos := getCaller(2) if _, ok := p.loopCheck[filepos]; ok { - panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos)) + panic(fmt.Sprintf("parsekit.ParseAPI: Loop detected in parser at %s", filepos)) } p.loopCheck[filepos] = true } -// TODO delete this one -func (p *ParseAPI) getCaller(depth int) (string, string) { - // No error handling, because we call this method ourselves with safe depth values. - pc, file, line, _ := runtime.Caller(depth + 1) - filepos := fmt.Sprintf("%s:%d", file, line) - caller := runtime.FuncForPC(pc) - return caller.Name(), filepos +// On checks if the input at the current cursor position matches the provided +// TokenHandler. On must be chained with another method that tells the parser +// what action to perform when a match was found: +// +// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes. +// +// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer. +// +// 3) On(...).Stay() - Do nothing, the cursor stays at the same position. +// +// So an example chain could look like this: +// +// p.On(parsekit.A.Whitespace).Skip() +// +// The chain as a whole returns a boolean that indicates whether or not at match +// was found. When no match was found, false is returned and Skip() and Accept() +// will have no effect. Because of this, typical use of an On() chain is as +// expression for a conditional statement (if, switch/case, for). E.g.: +// +// // Skip multiple exclamation marks. +// for p.On(parsekit.A.Excl).Skip() { } +// +// // Fork a route based on the input. +// switch { +// case p.On(parsekit.A.Excl).Stay() +// p.RouteTo(stateHandlerA) +// case p.On(parsekit.A.Colon).Stay(): +// p.RouteTo(stateHandlerB) +// default: +// p.RouteTo(stateHandlerC) +// } +// +// // When there's a "hi" on input, then say hello. +// if p.On(parsekit.C.Str("hi")).Accept() { +// fmt.Println("Hello!") +// } +func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction { + p.panicWhenStoppedOrInError() + p.checkForLoops() + if tokenHandler == nil { + _, filepos := getCaller(1) + panic(fmt.Sprintf( + "parsekit.ParseAPI.On(): On() called with nil "+ + "tokenHandler argument at %s", filepos)) + } + + p.result = nil + p.tokenAPI.result = newTokenResult() + fork := p.tokenAPI.Fork() + ok := tokenHandler(fork) + + return &ParseAPIOnAction{ + parseAPI: p, + tokenAPI: fork, + ok: ok, + } +} + +// ParseAPIOnAction is a struct that is used for building the On()-method chain. +// The On() method will return an initialized struct of this type. +type ParseAPIOnAction struct { + parseAPI *ParseAPI + tokenAPI *TokenAPI + ok bool +} + +// Accept tells the parser to move the cursor past a match that was found, +// and to make the TokenResult from the TokenAPI available in the ParseAPI +// through the Result() method. +// +// Returns true in case a match was found. +// When no match was found, then no action is taken and false is returned. +func (a *ParseAPIOnAction) Accept() bool { + if a.ok { + a.tokenAPI.Merge() + a.parseAPI.result = a.tokenAPI.root.result + a.flushTokenAPI() + a.flushReader() //a.flush() + + } + return a.ok +} + +// Skip tells the parser to move the cursor past a match that was found, +// without making the results available through the ParseAPI. +// +// Note that functionally, you could call Accept() just as well, simply +// ignoring the results. However, the Skip() call is a bit more efficient +// than the Accept() call and (more important if you ask me) the code +// expresses more clearly that your intent is to skip the match. +// +// Returns true in case a match was found. +// When no match was found, then no action is taken and false is returned. +func (a *ParseAPIOnAction) Skip() bool { + if a.ok { + a.tokenAPI.root.cursor = a.tokenAPI.cursor + a.parseAPI.result = nil + a.flushTokenAPI() + a.flushReader() + } + return a.ok +} + +// Stay tells the parser to not move the cursor after finding a match. +// +// A typical use of Stay() is to let one ParseHandler detect the start +// of some kind of token, but without moving the read cursor forward. +// When a match is found, it hands off control to another ParseHandler +// to take care of the actual token parsing. +// +// Returns true in case a match was found, false otherwise. +func (a *ParseAPIOnAction) Stay() bool { + if a.ok { + a.parseAPI.result = nil + a.flushTokenAPI() + } + return a.ok +} + +func (a *ParseAPIOnAction) flushTokenAPI() { + a.tokenAPI.root.result = newTokenResult() + a.tokenAPI.root.detachChilds() +} + +func (a *ParseAPIOnAction) flushReader() { + if a.tokenAPI.offset > 0 { + a.tokenAPI.root.reader.flush(a.tokenAPI.offset) + a.tokenAPI.root.offset = 0 + a.parseAPI.initLoopCheck() + } +} + +// Result returns a TokenResult struct, containing results as produced by the +// last ParseAPI.On().Accept() call. +func (p *ParseAPI) Result() *TokenResult { + result := p.result + if p.result == nil { + _, filepos := getCaller(1) + panic(fmt.Sprintf( + "parsekit.ParseAPI.TokenResult(): TokenResult() called at %s without "+ + "calling ParseAPI.Accept() on beforehand", filepos)) + } + return result +} + +// Handle is used to execute other ParseHandler functions from within your +// ParseHandler function. +// +// The boolean return value is true when the parser can still continue. +// It will be false when either an error was set (using ParseAPI.Error()), +// or the parser was stopped (using ParseAPI.Stop()). +func (p *ParseAPI) Handle(parseHandler ParseHandler) bool { + p.panicWhenStoppedOrInError() + p.panicWhenParseHandlerNil(parseHandler) + parseHandler(p) + return !p.isStoppedOrInError() +} + +func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { + if parseHandler == nil { + _, filepos := getCaller(2) + panic(fmt.Sprintf("parsekit.ParseAPI.Handle(): Handle() called with nil input at %s", filepos)) + } +} + +// Expects is used to let a ParseHandler function describe what input it is +// expecting. This expectation is used in error messages to provide some +// context to them. +// +// When defining an expectation inside a ParseHandler, you do not need to +// handle unexpected input yourself. When the end of the parser is reached +// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(), +// an automatic error will be emitted using ParseAPI.UnexpectedInput(). +func (p *ParseAPI) Expects(description string) { + p.panicWhenStoppedOrInError() + p.expecting = description +} + +// Stop is used by the parser impementation to tell the ParseAPI that it has +// completed the parsing process successfully. +// +// When the parser implementation returns without stopping first (and +// without running into an error), the Parser.Execute() will call +// ParserAPI.ExpectEndOfFile() to check if the end of the file was reached. +// If not, then things will end in an UnexpectedError(). +// Even though this fallback mechanism will work in a lot of cases, try to make +// your parser explicit about things and call Stop() actively yourself. +// +// After stopping, no more calls to ParseAPI methods are allowed. +// Calling a method in this state will result in a panic. +func (p *ParseAPI) Stop() { + p.stopped = true +} + +// Error sets the error message in the ParseAPI. +// +// After setting an error, no more calls to ParseAPI methods are allowed. +// Calling a method in this state will result in a panic. +func (p *ParseAPI) Error(format string, args ...interface{}) { + // No call to p.panicWhenStoppedOrInError(), to allow a parser to + // set a different error message when needed. + message := fmt.Sprintf(format, args...) + p.err = &Error{message, p.tokenAPI.Cursor()} +} + +// ExpectEndOfFile can be used to check if the input is at end of file. +// +// When it finds that the end of the file was indeed reached, then the +// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected +// input is reported through ParseAPI.UnexpectedInput() with "end of file" +// as the expectation. +func (p *ParseAPI) ExpectEndOfFile() { + p.panicWhenStoppedOrInError() + if p.On(A.EndOfFile).Stay() { + p.Stop() + } else { + p.Expects("end of file") + p.UnexpectedInput() + } +} + +// UnexpectedInput is used to set an error that tells the user that some +// unexpected input was encountered. +// +// It can automatically produce an error message for a couple of situations: +// 1) input simply didn't match the expectation +// 2) the end of the input was reached +// 3) there was an invalid UTF8 character on the input. +// +// The parser implementation can provide some feedback for this error by +// calling ParseAPI.Expects() to set the expectation. When set, the +// expectation is included in the error message. +func (p *ParseAPI) UnexpectedInput() { + p.panicWhenStoppedOrInError() + r, err := p.tokenAPI.NextRune() + switch { + case err == nil: + p.Error("unexpected character %q%s", r, fmtExpects(p)) + case err == io.EOF: + p.Error("unexpected end of file%s", fmtExpects(p)) + default: + p.Error("unexpected error '%s'%s", err, fmtExpects(p)) + } +} + +func fmtExpects(p *ParseAPI) string { + if p.expecting == "" { + return "" + } + return fmt.Sprintf(" (expected %s)", p.expecting) } diff --git a/parsehandler.go b/parsehandler.go deleted file mode 100644 index e035439..0000000 --- a/parsehandler.go +++ /dev/null @@ -1,9 +0,0 @@ -package parsekit - -// ParseHandler defines the type of function that must be implemented to handle -// a parsing state in a Parser state machine. -// -// A ParseHandler function gets a ParseAPI struct as its input. This struct holds -// all the internal state for the parsing state machine and provides the -// interface that the ParseHandler uses to interact with the parser. -type ParseHandler func(*ParseAPI) diff --git a/parsehandler_error.go b/parsehandler_error.go deleted file mode 100644 index 5fdf9ab..0000000 --- a/parsehandler_error.go +++ /dev/null @@ -1,37 +0,0 @@ -package parsekit - -import ( - "fmt" -) - -// Error is used as the error type when parsing errors occur. -// The error includes some context information to allow for useful -// error messages to the user. -type Error struct { - Message string - Line int - Column int -} - -func (err *Error) Error() string { - return err.Message -} - -// Full returns the current error message, including information about -// the position in the input where the error occurred. -func (err *Error) Full() string { - if err.Line == 0 { - return fmt.Sprintf("%s at start of file", err) - } else { - return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column) - } -} - -// Error sets the error message in the parser API. This error message -// will eventually be returned by the Parser.Execute() method. -func (p *ParseAPI) Error(format string, args ...interface{}) { - // No call to p.panicWhenStoppedOrInError(), to allow a parser to - // set a different error message when needed. - message := fmt.Sprintf(format, args...) - p.err = &Error{message, p.tokenAPI.cursor.Line, p.tokenAPI.cursor.Column} -} diff --git a/parsehandler_on.go b/parsehandler_on.go deleted file mode 100644 index 1773cfb..0000000 --- a/parsehandler_on.go +++ /dev/null @@ -1,128 +0,0 @@ -package parsekit - -import "fmt" - -// On checks if the input at the current cursor position matches the provided -// TokenHandler. On must be chained with another method that tells the parser -// what action to perform when a match was found: -// -// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes. -// -// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer. -// -// 3) On(...).Stay() - Do nothing, the cursor stays at the same position. -// -// So an example chain could look like this: -// -// p.On(parsekit.A.Whitespace).Skip() -// -// The chain as a whole returns a boolean that indicates whether or not at match -// was found. When no match was found, false is returned and Skip() and Accept() -// will have no effect. Because of this, typical use of an On() chain is as -// expression for a conditional statement (if, switch/case, for). E.g.: -// -// // Skip multiple exclamation marks. -// for p.On(parsekit.A.Excl).Skip() { } -// -// // Fork a route based on the input. -// switch { -// case p.On(parsekit.A.Excl).Stay() -// p.RouteTo(stateHandlerA) -// case p.On(parsekit.A.Colon).Stay(): -// p.RouteTo(stateHandlerB) -// default: -// p.RouteTo(stateHandlerC) -// } -// -// // When there's a "hi" on input, then say hello. -// if p.On(parsekit.C.Str("hi")).Accept() { -// fmt.Println("Hello!") -// } -func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction { - p.panicWhenStoppedOrInError() - p.checkForLoops() - if tokenHandler == nil { - panic("ParseHandler bug: tokenHandler argument for On() is nil") - } - - p.result = nil - p.tokenAPI.result = NewResult() - fork := p.tokenAPI.Fork() - ok := tokenHandler(fork) - - return &ParseAPIOnAction{ - parseAPI: p, - tokenAPI: fork, - ok: ok, - } -} - -// ParseAPIOnAction is a struct that is used for building the On()-method chain. -// The On() method will return an initialized struct of this type. -type ParseAPIOnAction struct { - parseAPI *ParseAPI - tokenAPI *TokenAPI - ok bool -} - -// Accept tells the parser to move the cursor past a match that was found, -// and to make the TokenHandler results available in the ParseAPI through -// the Result() method. -// -// Returns true in case a match was found. -// When no match was found, then no action is taken and false is returned. -func (a *ParseAPIOnAction) Accept() bool { - if a.ok { - a.tokenAPI.Merge() - a.parseAPI.result = a.tokenAPI.root.result - a.tokenAPI.root.result = NewResult() - a.tokenAPI.root.detachChilds() - if a.tokenAPI.offset > 0 { - a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset) - a.parseAPI.initLoopCheck() - } - } - return a.ok -} - -// Skip tells the parser to move the cursor past a match that was found, -// without making the results available through the ParseAPI. -// -// Returns true in case a match was found. -// When no match was found, then no action is taken and false is returned. -func (a *ParseAPIOnAction) Skip() bool { - if a.ok { - a.tokenAPI.root.cursor = a.tokenAPI.cursor - a.tokenAPI.root.result = NewResult() - a.tokenAPI.root.detachChilds() - if a.tokenAPI.offset > 0 { - a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset) - a.parseAPI.initLoopCheck() - } - } - return a.ok -} - -// Stay tells the parser to not move the cursor after finding a match. -// Returns true in case a match was found, false otherwise. -func (a *ParseAPIOnAction) Stay() bool { - if a.ok { - a.tokenAPI.root.result = NewResult() - a.tokenAPI.root.detachChilds() - } - return a.ok -} - -// Result returns a Result struct, containing results as produced by the -// last ParseAPI.On() call. -func (p *ParseAPI) Result() *Result { - result := p.result - if p.result == nil { - caller, filepos := getCaller(1) - panic(fmt.Sprintf( - "parsekit.ParseAPI.Result(): Result() called without calling "+ - "ParseAPI.Accept() on beforehand to make the result available "+ - "from %s at %s", caller, filepos)) - } - return result -} diff --git a/parsehandler_routing.go b/parsehandler_routing.go deleted file mode 100644 index 862b8f3..0000000 --- a/parsehandler_routing.go +++ /dev/null @@ -1,99 +0,0 @@ -package parsekit - -import ( - "fmt" - "io" -) - -// Handle is used to execute other ParseHandler functions from within your -// ParseHandler function. -// -// The boolean return value is true when the parser can still continue. -// It will be false when either an error was set (using ParseAPI.Error()), -// or the parser was stopped (using ParseAPI.Stop()). -func (p *ParseAPI) Handle(parseHandler ParseHandler) bool { - p.panicWhenStoppedOrInError() - p.panicWhenParseHandlerNil(parseHandler) - parseHandler(p) - return !p.isStoppedOrInError() -} - -func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { - if parseHandler == nil { - caller, filepos := p.getCaller(2) - panic(fmt.Sprintf("ParseAPI.Handle() called with nil input from %s at %s", caller, filepos)) - } -} - -// Expects is used to let a ParseHandler function describe what input it is -// expecting. This expectation is used in error messages to provide some -// context to them. -// -// When defining an expectation inside a ParseHandler, you do not need to -// handle unexpected input yourself. When the end of the parser is reached -// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(), -// an automatic error will be emitted using ParseAPI.UnexpectedInput(). -func (p *ParseAPI) Expects(description string) { - p.panicWhenStoppedOrInError() - p.expecting = description -} - -// Stop is used by the parser impementation to tell the API that it has -// completed the parsing process successfully. -// -// When the parser implementation returns without stopping first, the -// Parser.Execute() will assume that something went wrong and calls -// ParserAPI.UnexpectedInput() to report an error about this. -// -// The parser implementation can define what was being expected, by -// providing a description to ParseAPI.Expecting(). -func (p *ParseAPI) Stop() { - p.stopped = true -} - -// ExpectEndOfFile can be used to check if the input is at end of file. -// -// When it finds that the end of the file was indeed reached, then the -// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected -// input is reported through ParseAPI.UnexpectedInput() with "end of file" -// as the expectation. -func (p *ParseAPI) ExpectEndOfFile() { - p.panicWhenStoppedOrInError() - if p.On(A.EndOfFile).Stay() { - p.Stop() - } else { - p.Expects("end of file") - p.UnexpectedInput() - } -} - -// UnexpectedInput is used to set an error that tells the user that some -// unexpected input was encountered. -// -// It can automatically produce an error message for a couple of situations: -// 1) input simply didn't match the expectation -// 2) the end of the input was reached -// 3) there was an invalid UTF8 character on the input. -// -// The parser implementation can provide some feedback for this error by -// calling ParseAPI.Expects() to set the expectation. When set, the -// expectation is included in the error message. -func (p *ParseAPI) UnexpectedInput() { - p.panicWhenStoppedOrInError() - r, err := p.tokenAPI.NextRune() - switch { - case err == nil: - p.Error("unexpected character %q%s", r, fmtExpects(p)) - case err == io.EOF: - p.Error("unexpected end of file%s", fmtExpects(p)) - default: - p.Error("unexpected error '%s'%s", err, fmtExpects(p)) - } -} - -func fmtExpects(p *ParseAPI) string { - if p.expecting == "" { - return "" - } - return fmt.Sprintf(" (expected %s)", p.expecting) -} diff --git a/parsehandler_test.go b/parsehandler_test.go deleted file mode 100644 index 4ec35b8..0000000 --- a/parsehandler_test.go +++ /dev/null @@ -1,106 +0,0 @@ -package parsekit_test - -import ( - "testing" - - "git.makaay.nl/mauricem/go-parsekit" -) - -func TestGivenNilTokenHandler_WhenCallingOn_ParsekitPanics(t *testing.T) { - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.On(nil) - }) - RunPanicTest(t, PanicTest{ - func() { p.Execute("") }, - `ParseHandler bug: tokenHandler argument for On\(\) is nil`}) -} - -func TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics(t *testing.T) { - otherHandler := func(p *parsekit.ParseAPI) { - panic("This is not the handler you're looking for") - } - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Stop() - p.Handle(otherHandler) - }) - RunPanicTest(t, PanicTest{ - func() { p.Execute("") }, - `Illegal call to ParseAPI.Handle\(\) from .*ParsekitPanics.func.* at ` + - `.*/parsehandler_test.go:\d+: no calls allowed after ParseAPI.Stop\(\)`}) -} - -func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) { - otherHandler := func(p *parsekit.ParseAPI) { - panic("This is not the handler you're looking for") - } - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Error("It ends here") - p.Handle(otherHandler) - }) - RunPanicTest(t, PanicTest{ - func() { p.Execute("") }, - `Illegal call to ParseAPI\.Handle\(\) from .*ParsekitPanics\.func2 at ` + - `.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`}) -} - -type parserWithLoop struct { - loopCounter int -} - -func (l *parserWithLoop) first(p *parsekit.ParseAPI) { - p.On(parsekit.A.ASCII).Accept() - p.Handle(l.second) -} - -func (l *parserWithLoop) second(p *parsekit.ParseAPI) { - p.On(parsekit.A.ASCII).Accept() - p.Handle(l.third) -} - -func (l *parserWithLoop) third(p *parsekit.ParseAPI) { - if l.loopCounter++; l.loopCounter > 100 { - p.Error("Loop not detected by parsekit") - return - } - p.On(parsekit.A.ASCII).Accept() - p.Handle(l.first) -} - -func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { - looper := &parserWithLoop{} - parser := parsekit.NewParser(looper.first) - RunPanicTest(t, PanicTest{ - func() { parser.Execute("Het houdt niet op, niet vanzelf") }, - `Loop detected in parser in .*\(\*parserWithLoop\).second at .*/parsehandler_test\.go:\d+`}) -} - -// This test incorporates an actual loop bug that I dropped on myself and -// that I could not easily spot in my code. It sounded so logical: -// I want to get chunks of 5 chars from the input, so I simply loop on: -// -// p.On(c.Max(5, a.AnyRune)) -// -// The problem here is that Max(5, ...) will also match when there is -// no more input, since Max(5, ---) is actually MinMax(0, 5, ...). -// Therefore the loop will never stop. Solving the loop was simple: -// -// p.On(c.MinMax(1, 5, a.AnyRune)) -// -// Now the loop stops when the parser finds no more matching input data. -func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - for p.On(c.Max(5, a.AnyRune)).Accept() { - } - p.Stop() - }) - RunPanicTest(t, PanicTest{ - func() { parser.Execute("This will end soon") }, - `Loop detected in parser in .*ParserPanics.* at .*/parsehandler_test.go:\d+`}) -} - -func TestGivenNullHandler_HandlePanics(t *testing.T) { - parser := parsekit.NewParser(nil) - RunPanicTest(t, PanicTest{ - func() { parser.Execute("") }, - `ParseAPI.Handle\(\) called with nil input from .*\(\*Parser\).Execute at .*/parsekit\.go:\d+`}) -} diff --git a/parsekit.go b/parsekit.go deleted file mode 100644 index fe258b5..0000000 --- a/parsekit.go +++ /dev/null @@ -1,78 +0,0 @@ -package parsekit - -import ( - "strings" -) - -// Parser is the top-level struct that holds the configuration for a parser. -// The Parser can be instantiated using the parsekit.NewParser() method. -type Parser struct { - startHandler ParseHandler // the function that handles the very first state -} - -// NewParser instantiates a new Parser. -// -// The Parser is a state machine-style recursive descent parser, in which -// ParseHandler functions are used to move the state machine forward during -// parsing. This style of parser is typically used for parsing programming -// languages and structured data formats (like json, xml, toml, etc.) -// -// To parse input data, use the method Parser.Execute(). -func NewParser(startHandler ParseHandler) *Parser { - return &Parser{startHandler: startHandler} -} - -// Execute starts the parser for the provided input. -// When an error occurs during parsing, then this error is returned. Nil otherwise. -func (p *Parser) Execute(input string) *Error { - api := &ParseAPI{ - tokenAPI: NewTokenAPI(strings.NewReader(input)), - loopCheck: map[string]bool{}, - } - api.Handle(p.startHandler) - if !api.stopped && api.err == nil { - api.UnexpectedInput() - } - return api.err -} - -// Matcher is the top-level struct that holds the configuration for -// a parser that is based solely on a TokenHandler function. -// The Matcher can be instantiated using the parsekit.NewMatcher() -// method. -// TODO Rename to Tokenizer -type Matcher struct { - parser *Parser - result *Result -} - -// NewMatcher instantiates a new Matcher. -// -// This is a simple wrapper around a TokenHandler function. It can be used to -// match an input string against that TokenHandler function and retrieve the -// results in a straight forward way. -// -// The 'expects' parameter is used for creating an error message in case parsed -// input does not match the TokenHandler. -// TODO Rename to NewTokenizer, and make matcher Tokeninzer, also see if we can use a Reader straight away, no ParseAPI. -func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher { - matcher := &Matcher{} - matcher.parser = NewParser(func(p *ParseAPI) { - if p.On(tokenHandler).Accept() { - matcher.result = p.Result() - p.Stop() - } else { - p.Expects(expects) - p.UnexpectedInput() - } - }) - return matcher -} - -// Execute feeds the input to the wrapped TokenHandler function. -// It returns the TokenHandler's results. When an error occurred during parsing, -// the error will be set, nil otherwise. -func (m *Matcher) Execute(input string) (*Result, *Error) { - err := m.parser.Execute(input) - return m.result, err -} diff --git a/parsekit_test.go b/parsekit_test.go deleted file mode 100644 index 3440e50..0000000 --- a/parsekit_test.go +++ /dev/null @@ -1,101 +0,0 @@ -package parsekit_test - -// This file only provides building blocks for writing tests. -// No actual tests belong in this file. - -import ( - "regexp" - "testing" - - "git.makaay.nl/mauricem/go-parsekit" -) - -// Easy access to the parsekit definitions. -var c, a, m, tok = parsekit.C, parsekit.A, parsekit.M, parsekit.T - -type TokenHandlerTest struct { - Input string - TokenHandler parsekit.TokenHandler - MustMatch bool - Expected string -} - -func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) { - for _, test := range testSet { - RunTokenHandlerTest(t, test) - } -} - -func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) { - result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input) - if test.MustMatch { - if err != nil { - t.Errorf("Test %q failed with error: %s", test.Input, err) - } else if output := result.String(); output != test.Expected { - t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output) - } - } else { - if err == nil { - t.Errorf("Test %q failed: should not match, but it did", test.Input) - } - } -} - -type TokenMakerTest struct { - Input string - TokenHandler parsekit.TokenHandler - Expected []parsekit.Token -} - -func RunTokenMakerTest(t *testing.T, test TokenMakerTest) { - result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input) - if err != nil { - t.Errorf("Test %q failed with error: %s", test.Input, err) - } else { - if len(result.Tokens()) != len(test.Expected) { - t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens())) - } - for i, expected := range test.Expected { - actual := result.Token(i) - if expected.Type != actual.Type { - t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type) - } - if string(expected.Runes) != string(actual.Runes) { - t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes) - } - if expected.Value != actual.Value { - t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value) - } - } - } -} - -func RunTokenMakerTests(t *testing.T, testSet []TokenMakerTest) { - for _, test := range testSet { - RunTokenMakerTest(t, test) - } -} - -type PanicTest struct { - function func() - expected string -} - -func RunPanicTest(t *testing.T, p PanicTest) { - defer func() { - if r := recover(); r != nil { - if !regexp.MustCompile(p.expected).MatchString(r.(string)) { - t.Errorf("Function did panic, but unexpected panic message received:\nexpected: %q\nactual: %q\n", p.expected, r) - } - } else { - t.Errorf("Function did not panic (expected panic message: %s)", p.expected) - } - }() - p.function() -} - -func RunPanicTests(t *testing.T, testSet []PanicTest) { - for _, test := range testSet { - RunPanicTest(t, test) - } -} diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..7c4195a --- /dev/null +++ b/parser.go @@ -0,0 +1,64 @@ +package parsekit + +import ( + "fmt" + "runtime" + "strings" +) + +// Parser is the top-level struct that holds the configuration for a parser. +// The Parser can be instantiated using the parsekit.NewParser() method. +type Parser struct { + startHandler ParseHandler // the function that handles the very first state +} + +// ParseHandler defines the type of function that must be implemented to handle +// a parsing state in a Parser state machine. +// +// A ParseHandler function gets a ParseAPI struct as its input. This struct holds +// all the internal state for the parsing state machine and provides the +// interface that the ParseHandler uses to interact with the parser. +type ParseHandler func(*ParseAPI) + +// NewParser instantiates a new Parser. +// +// The Parser is a state machine-style recursive descent parser, in which +// ParseHandler functions are used to move the state machine forward during +// parsing. This style of parser is typically used for parsing programming +// languages and structured data formats (like json, xml, toml, etc.) +// +// To parse input data, use the method Parser.Execute(). +func NewParser(startHandler ParseHandler) *Parser { + if startHandler == nil { + _, filepos := getCaller(1) + panic(fmt.Sprintf("parsekit.NewParser(): NewParser() called with nil input at %s", filepos)) + } + return &Parser{startHandler: startHandler} +} + +// Execute starts the parser for the provided input. +// When an error occurs during parsing, then this error is returned. Nil otherwise. +func (p *Parser) Execute(input string) *Error { + api := &ParseAPI{ + tokenAPI: NewTokenAPI(strings.NewReader(input)), + loopCheck: map[string]bool{}, + } + if api.Handle(p.startHandler) { + // Handle indicated that parsing could still continue, meaning that there + // was no error and that the parsing has not actively been Stop()-ed. + // However, at this point, the parsing really should have stopped. + // We'll see what happens when we tell the parser that EOF was expected. + // This might work if we're indeed at EOF. Otherwise, an error will be + // generated. + api.ExpectEndOfFile() + } + return api.err +} + +func getCaller(depth int) (string, string) { + // No error handling, because we call this method ourselves with safe depth values. + pc, file, line, _ := runtime.Caller(depth + 1) + filepos := fmt.Sprintf("%s:%d", file, line) + caller := runtime.FuncForPC(pc) + return caller.Name(), filepos +} diff --git a/parser_test.go b/parser_test.go new file mode 100644 index 0000000..5df49f8 --- /dev/null +++ b/parser_test.go @@ -0,0 +1,327 @@ +package parsekit_test + +import ( + "fmt" + "testing" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func ExampleParser_usingAcceptedRunes() { + // Easy access to the parsekit definitions. + a := parsekit.A + + matches := []string{} + + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + for p.On(a.AnyRune).Accept() { + matches = append(matches, p.Result().String()) + } + p.ExpectEndOfFile() + }) + err := parser.Execute("¡Any will dö!") + + fmt.Printf("Matches = %q, Error = %s\n", matches, err) + // Output: + // Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = +} + +func ExampleParser_usingTokens() { + // Easy access to the parsekit definitions. + c, a, tok := parsekit.C, parsekit.A, parsekit.T + + var tokens []*parsekit.Token + var accepted string + + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() { + tokens = p.Result().Tokens() + accepted = p.Result().String() + } + p.ExpectEndOfFile() + }) + parser.Execute("¡Any will dö!") + + fmt.Printf("Runes accepted: %q\n", accepted) + fmt.Printf("Token values: ") + for _, t := range tokens { + fmt.Printf("%c ", t.Value) + } + // Output: + // Runes accepted: "¡Any will dö!" + // Token values: ¡ A n y w i l l d ö ! +} + +func ExampleParseAPI_UnexpectedInput() { + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.Expects("a thing") + p.UnexpectedInput() + }) + err := parser.Execute("Whatever, this parser will never be happy...") + fmt.Println(err.Full()) + + // Output: + // unexpected character 'W' (expected a thing) at start of file +} + +func ExampleParseAPIOnAction_Accept() { + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + // When a case-insensitive match on "Yowza!" is found by the + // tokenizer, then Accept() will make the result available + // through ParseAPI.Result() + if p.On(parsekit.A.StrNoCase("Yowza!")).Accept() { + // Result.String() returns a string containing all + // accepted runes that were matched against. + fmt.Println(p.Result().String()) + } + }) + parser.Execute("YOWZA!") + + // Output: + // YOWZA! +} + +func ExampleParseAPIOnAction_Skip() { + var result string + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + for loop := true; loop; { + switch { + case p.On(parsekit.A.Rune('X')).Skip(): + // NOOP, skip this rune + case p.On(parsekit.A.AnyRune).Accept(): + result += p.Result().String() + default: + loop = false + } + } + }) + parser.Execute("HXeXllXoXX, XXwoXrlXXXd!") + fmt.Println(result) + + // Output: + // Hello, world! +} + +func ExampleParseAPI_Stop() { + C, A := parsekit.C, parsekit.A + + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + fmt.Printf("First word: ") + for p.On(C.Not(A.Space)).Accept() { + fmt.Printf("%s", p.Result()) + } + p.Stop() + }) + parser.Execute("Input with spaces") + + // Output: + // First word: Input +} + +func ExampleParseAPI_Stop_notCalledAndNoInputPending() { + C, A := parsekit.C, parsekit.A + + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + fmt.Printf("Word: ") + for p.On(C.Not(A.Space)).Accept() { + fmt.Printf("%s", p.Result()) + } + fmt.Printf("\n") + }) + err := parser.Execute("Troglodyte") + fmt.Printf("Error is nil: %t\n", err == nil) + + // Output: + // Word: Troglodyte + // Error is nil: true +} + +func ExampleParseAPI_Stop_notCalledButInputPending() { + C, A := parsekit.C, parsekit.A + + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + fmt.Printf("First word: ") + for p.On(C.Not(A.Space)).Accept() { + fmt.Printf("%s", p.Result()) + } + fmt.Printf("\n") + }) + err := parser.Execute("Input with spaces") + fmt.Printf("Error: %s\n", err.Full()) + + // Output: + // First word: Input + // Error: unexpected character ' ' (expected end of file) at line 1, column 6 +} + +func ExampleParseAPIOnAction_Stay() { + // Definition of a fantasy serial number format. + C, A := parsekit.C, parsekit.A + serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits) + + // This handler is able to handle serial numbers. + serialnrHandler := func(p *parsekit.ParseAPI) { + if p.On(serialnr).Accept() { + fmt.Println(p.Result().String()) + } + } + + // Start could function as a sort of dispatcher, handing over + // control to the correct ParseHandler function, based on the input. + start := func(p *parsekit.ParseAPI) { + if p.On(parsekit.A.Asterisk).Stay() { + p.Handle(serialnrHandler) + return + } + // ... other cases could go here ... + } + + parser := parsekit.NewParser(start) + parser.Execute("#XX1234") + parser.Execute("*ay432566") + parser.Execute("*ZD987112") + + // Output: + // *ZD987112 +} + +func TestGivenNullHandler_NewParserPanics(t *testing.T) { + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { parsekit.NewParser(nil) }, + Regexp: true, + Expect: `parsekit\.NewParser\(\): NewParser\(\) called ` + + `with nil input at /.*/parser_test\.go:\d+`}) +} + +func TestGivenNullHandler_HandlePanics(t *testing.T) { + brokenParseHandler := func(p *parsekit.ParseAPI) { + p.Handle(nil) + } + parser := parsekit.NewParser(brokenParseHandler) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { parser.Execute("") }, + Regexp: true, + Expect: `parsekit\.ParseAPI\.Handle\(\): Handle\(\) called with nil input ` + + `at /.*/parser_test\.go:\d+`}) +} +func TestGivenNilTokenHandler_OnPanics(t *testing.T) { + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.On(nil) + }) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { p.Execute("") }, + Regexp: true, + Expect: `parsekit\.ParseAPI\.On\(\): On\(\) called with nil ` + + `tokenHandler argument at /.*/parser_test\.go:\d+`}) +} + +func TestGivenStoppedParser_HandlePanics(t *testing.T) { + otherHandler := func(p *parsekit.ParseAPI) { + panic("This is not the handler you're looking for") + } + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.Stop() + p.Handle(otherHandler) + }) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { p.Execute("") }, + Regexp: true, + Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` + + `at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Stop\(\)`}) +} + +func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) { + otherHandler := func(p *parsekit.ParseAPI) { + panic("This is not the handler you're looking for") + } + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.Error("It ends here") + p.Handle(otherHandler) + }) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { p.Execute("") }, + Regexp: true, + Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` + + `at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`}) +} + +func TestGivenParserWithoutCallToAccept_ResultPanics(t *testing.T) { + p := parsekit.NewParser(func(p *parsekit.ParseAPI) { + p.Result() + }) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { p.Execute("") }, + Regexp: true, + Expect: `parsekit\.ParseAPI\.TokenResult\(\): TokenResult\(\) called at ` + + `/.*/parser_test.go:\d+ without calling ParseAPI.Accept\(\) on beforehand`}) +} + +func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) { + p := parsekit.NewParser(func(p *parsekit.ParseAPI) {}) + err := p.Execute("") + parsekit.AssertTrue(t, err == nil, "err") +} + +func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) { + p := parsekit.NewParser(func(p *parsekit.ParseAPI) {}) + err := p.Execute("x") + parsekit.AssertEqual(t, "unexpected character 'x' (expected end of file) at start of file", err.Full(), "err") +} + +type parserWithLoop struct { + loopCounter int +} + +func (l *parserWithLoop) first(p *parsekit.ParseAPI) { + p.On(parsekit.A.ASCII).Accept() + p.Handle(l.second) +} + +func (l *parserWithLoop) second(p *parsekit.ParseAPI) { + p.On(parsekit.A.ASCII).Accept() + p.Handle(l.third) +} + +func (l *parserWithLoop) third(p *parsekit.ParseAPI) { + if l.loopCounter++; l.loopCounter > 100 { + p.Error("Loop not detected by parsekit") + return + } + p.On(parsekit.A.ASCII).Accept() + p.Handle(l.first) +} + +func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { + looper := &parserWithLoop{} + parser := parsekit.NewParser(looper.first) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { parser.Execute("Het houdt niet op, niet vanzelf") }, + Regexp: true, + Expect: `parsekit\.ParseAPI: Loop detected in parser at /.*/parser_test.go:\d+`}) +} + +// This test incorporates an actual loop bug that I dropped on myself and +// that I could not easily spot in my code. It sounded so logical: +// I want to get chunks of 5 chars from the input, so I simply loop on: +// +// p.On(c.Max(5, a.AnyRune)) +// +// The problem here is that Max(5, ...) will also match when there is +// no more input, since Max(5, ---) is actually MinMax(0, 5, ...). +// Therefore the loop will never stop. Solving the loop was simple: +// +// p.On(c.MinMax(1, 5, a.AnyRune)) +// +// Now the loop stops when the parser finds no more matching input data. +func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { + var c, a = parsekit.C, parsekit.A + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + for p.On(c.Max(5, a.AnyRune)).Accept() { + } + p.Stop() + }) + parsekit.AssertPanic(t, parsekit.PanicT{ + Function: func() { parser.Execute("This will end soon") }, + Regexp: true, + Expect: `parsekit\.ParseAPI: Loop detected in parser at .*/parser_test.go:\d+`}) +} diff --git a/reader.go b/reader.go index 2d7fb56..3f37779 100644 --- a/reader.go +++ b/reader.go @@ -7,29 +7,31 @@ import ( "unicode/utf8" ) -// Reader wraps around an io.Reader and provides buffering to allows us to read +// reader wraps around an io.Reader and provides buffering to allows us to read // the same runes over and over again. This is useful for implementing a parser // that must be able to do lookahead on the input, returning to the original // input position after finishing that lookahead). // // To minimze memory use, it is also possible to flush the buffer when there is // no more need to go back to previously read runes. -type Reader struct { +// +// The reader is used internally by parsekit.TokenAPI. +type reader struct { bufio *bufio.Reader // Used for ReadRune() buffer []rune // Input buffer, holding runes that were read from input bufferOffset int // The offset of the buffer, relative to the start of the input bufferLen int // Input size, the number of runes in the buffer } -// NewReader initializes a new Reader struct, wrapped around the provided io.Reader. -func NewReader(r io.Reader) *Reader { - return &Reader{ +// newwReader initializes a new reader struct, wrapped around the provided io.Reader. +func newReader(r io.Reader) *reader { + return &reader{ bufio: bufio.NewReader(r), buffer: []rune{}, } } -// RuneAt reads the rune at the provided rune offset. +// runeAt reads the rune at the provided rune offset. // // This offset is relative to the current starting position of the buffer in // the reader. When starting reading, offset 0 will point at the start of the @@ -43,7 +45,7 @@ func NewReader(r io.Reader) *Reader { // When reading failed, the rune will be utf8.RuneError. One special read // fail is actually a normal situation: end of file reached. In that case, // the returned error wille be io.EOF. -func (r *Reader) RuneAt(offset int) (rune, error) { +func (r *reader) runeAt(offset int) (rune, error) { // Rune at provided offset is not yet available in the input buffer. // Read runes until we have enough runes to satisfy the offset. for r.bufferLen <= offset { @@ -66,37 +68,11 @@ func (r *Reader) RuneAt(offset int) (rune, error) { return r.buffer[offset], nil } -// RunesAt reads a slice of runes of length 'len', starting from offset 'offset'. -// -// This offset is relative to the current starting position of the buffer in -// the reader. When starting reading, offset 0 will point at the start of the -// input. After flushing, offset 0 will point at the input up to where -// the flush was done. -// -// When an error is encountered during reading (EOF or other error), then the -// error return value will be set. In case of an error, any runes that could be -// successfully read are returned along with the error. -// TODO Do I actually use this interface? -func (r *Reader) RunesAt(start int, len int) ([]rune, error) { - if len == 0 { - return r.buffer[0:0], nil - } - end := start + len - _, err := r.RuneAt(end) - if err != nil { - if end > r.bufferLen { - end = r.bufferLen - } - return r.buffer[start:end], err - } - return r.buffer[start:end], nil -} - // Flush deletes the provided number of runes from the start of the -// reader buffer. After flushing the buffer, offset 0 as used by RuneAt() +// reader buffer. After flushing the buffer, offset 0 as used by runeAt() // will point to the rune that comes after the flushed runes. // So what this basically does is turn the Reader into a sliding window. -func (r *Reader) Flush(numberOfRunes int) { +func (r *reader) flush(numberOfRunes int) { if numberOfRunes > r.bufferLen { panic(fmt.Sprintf( "parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+ diff --git a/reader_test.go b/reader_test.go index 672fbca..f9d2967 100644 --- a/reader_test.go +++ b/reader_test.go @@ -5,14 +5,12 @@ import ( "io" "strings" "testing" - - "git.makaay.nl/mauricem/go-parsekit/assert" ) -func ExampleNewReader() { +func ExamplenewReader() { in := strings.NewReader("Hello, world!") - r := NewReader(in) - at := func(i int) rune { r, _ := r.RuneAt(i); return r } + r := newReader(in) + at := func(i int) rune { r, _ := r.runeAt(i); return r } fmt.Printf("%c", at(0)) fmt.Printf("%c", at(12)) @@ -21,114 +19,76 @@ func ExampleNewReader() { // H! } -func ExampleReader_RuneAt() { +func TestReader_runeAt(t *testing.T) { in := strings.NewReader("Hello, world!") - r := NewReader(in) - at := func(i int) rune { r, _ := r.RuneAt(i); return r } + r := newReader(in) + at := func(i int) rune { r, _ := r.runeAt(i); return r } // It is possible to go back and forth while reading the input. - fmt.Printf("%c", at(0)) - fmt.Printf("%c", at(12)) - fmt.Printf("%c", at(7)) - fmt.Printf("%c", at(0)) - - // Output: - // H!wH + result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0)) + AssertEqual(t, "H!wH", result, "result") } -func ExampleReader_RuneAt_endOfFile() { +func TestReader_runeAt_endOfFile(t *testing.T) { in := strings.NewReader("Hello, world!") - r := NewReader(in) + r := newReader(in) - rn, err := r.RuneAt(13) - fmt.Printf("%q %s %t\n", rn, err, err == io.EOF) + rn, err := r.runeAt(13) + result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) + AssertEqual(t, "'�' EOF true", result, "result") - rn, err = r.RuneAt(20) - fmt.Printf("%q %s %t\n", rn, err, err == io.EOF) - - // Output: - // '�' EOF true - // '�' EOF true + rn, err = r.runeAt(20) + result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) + AssertEqual(t, "'�' EOF true", result, "result") } -func ExampleReader_RuneAt_invalidRune() { +func TestReader_runeAt_invalidRune(t *testing.T) { in := strings.NewReader("Hello, \xcdworld!") - r := NewReader(in) + r := newReader(in) + at := func(i int) rune { r, _ := r.runeAt(i); return r } - rn, err := r.RuneAt(6) - fmt.Printf("%q %t\n", rn, err == nil) - rn, err = r.RuneAt(7) - fmt.Printf("%q %t\n", rn, err == nil) - rn, err = r.RuneAt(8) - fmt.Printf("%q %t\n", rn, err == nil) - rn, err = r.RuneAt(9) - fmt.Printf("%q %t\n", rn, err == nil) - - // Output: - // ' ' true - // '�' true - // 'w' true - // 'o' true -} - -func ExampleReader_RunesAt() { - in := strings.NewReader("Hello, \xcdworld!") - r := NewReader(in) - - rs, err := r.RunesAt(4, 6) - fmt.Printf("%q %t\n", string(rs), err == nil) - rs, err = r.RunesAt(4, 0) - fmt.Printf("%q %t\n", string(rs), err == nil) - rs, err = r.RunesAt(8, 100) - fmt.Printf("%q %t\n", string(rs), err == io.EOF) - - // Output: - // "o, �wo" true - // "" true - // "world!" true + result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) + AssertEqual(t, " �wo", result, "result") } func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { in := strings.NewReader("\uFEFFBommetje!") - r := NewReader(in) - b, _ := r.RuneAt(0) - o, _ := r.RuneAt(1) - m, _ := r.RuneAt(2) + r := newReader(in) + b, _ := r.runeAt(0) + o, _ := r.runeAt(1) + m, _ := r.runeAt(2) bom := fmt.Sprintf("%c%c%c", b, o, m) - assert.Equal(t, "Bom", bom, "first three runes") + AssertEqual(t, "Bom", bom, "first three runes") } -func ExampleReader_Flush() { +func TestReader_Flush(t *testing.T) { in := strings.NewReader("Hello, world!") - r := NewReader(in) - at := func(i int) rune { r, _ := r.RuneAt(i); return r } - rb := func(start int, len int) []rune { r, _ := r.RunesAt(start, len); return r } + r := newReader(in) + at := func(i int) rune { r, _ := r.runeAt(i); return r } // Fills the buffer with the first 8 runes on the input: "Hello, w" - fmt.Printf("%c\n", at(7)) + result := fmt.Sprintf("%c", at(7)) + AssertEqual(t, "w", result, "first read") // Now flush the first 4 runes from the buffer (dropping "Hell" from it) - r.Flush(4) + r.flush(4) // Rune 0 is now pointing at what originally was rune offset 4. // We can continue reading from there. - fmt.Printf("%s", string(rb(0, 8))) - - // Output: - // w - // o, world + result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5)) + AssertEqual(t, "o, wor", result, "second read") } func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { in := strings.NewReader("Hello, world!") - r := NewReader(in) + r := newReader(in) // Fill buffer with "Hello, worl", the first 11 runes. - r.RuneAt(10) + r.runeAt(10) // However, we flush 12 runes, which exceeds the buffer size. - assert.Panic(t, assert.PanicT{ - Function: func() { r.Flush(12) }, + AssertPanic(t, PanicT{ + Function: func() { r.flush(12) }, Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)", }) } diff --git a/tokenapi.go b/tokenapi.go index f085a23..1508861 100644 --- a/tokenapi.go +++ b/tokenapi.go @@ -5,7 +5,7 @@ import ( "io" ) -// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and +// TokenAPI wraps a parsekit.reader and its purpose is to retrieve input data and // to report back results. For easy lookahead support, a forking strategy is // provided. // @@ -14,54 +14,64 @@ import ( // To retrieve the next rune from the TokenAPI, call the NextRune() method. // // When the rune is to be accepted as input, call the method Accept(). The rune -// is then added to the result buffer of the TokenAPI struct. +// is then added to the results of the TokenAPI and the read cursor is moved +// forward. Runes collected this way can later on be retrieved using for +// example the method Result().Runes(). +// // It is mandatory to call Accept() after retrieving a rune, before calling // NextRune() again. Failing to do so will result in a panic. // -// By invoking NextRune() + Accept() multiple times, the result buffer is extended +// By invoking NextRune() + Accept() multiple times, the result can be extended // with as many runes as needed. // +// Next to adding runes to the output, it is also possible to modify the +// already collected runes or to produce lexical Tokens. For all things +// concerning results, take a look at the Result struct, which can be +// accessed though the method Result(). +// // FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT: // // Sometimes, we must be able to perform a lookahead, which might either -// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be -// brought back to the original state, so we can try a different route. +// succeed or fail. In case of a failing lookahead, the state of the +// TokenAPI must be brought back to the original state, so we can try +// a different route. // -// The way in which this is supported, is by forking a TokenAPI struct by calling -// Fork(). This will return a forked child TokenAPI, with an empty result buffer, -// but using the same input cursor position as the forked parent. +// The way in which this is supported, is by forking a TokenAPI struct by +// calling method Fork(). This will return a forked child TokenAPI, with +// an empty result buffer, but using the same read cursor position as the +// forked parent. // // After forking, the same interface as described for BASIC OPERATION can be // used to fill the result buffer. When the lookahead was successful, then // Merge() can be called on the forked child to append the child's result -// buffer to the parent's result buffer, and to move the input cursor position +// buffer to the parent's result buffer, and to move the read cursor position // to that of the child. // -// When the lookahead was unsuccessful, then the forked child TokenAPI can simply -// be discarded. The parent TokenAPI was never modified, so it can safely be used -// as if the lookahead never happened. +// When the lookahead was unsuccessful, then the forked child TokenAPI can +// simply be discarded. The parent TokenAPI was never modified, so it can +// safely be used as if the lookahead never happened. // // Note: // Many tokenizers/parsers take a different approach on lookaheads by using -// peeks and by moving the input cursor position back and forth, or by putting +// peeks and by moving the read cursor position back and forth, or by putting // read input back on the input stream. That often leads to code that is -// efficient, however, in my opinion, not very untuitive to read. +// efficient, however, in my opinion, not very intuitive to read. type TokenAPI struct { - reader *Reader - cursor *Cursor // current read cursor position, rel. to the input start - offset int // current rune offset rel. to the Reader's sliding window - result *Result // results as produced by a TokenHandler (runes, Tokens) - root *TokenAPI // the root TokenAPI - parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child - child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent + reader *reader + cursor *Cursor // current read cursor position, rel. to the input start + offset int // current rune offset rel. to the Reader's sliding window + result *TokenResult // results as produced by a TokenHandler (runes, Tokens) + root *TokenAPI // the root TokenAPI + parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child + child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent } // NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. func NewTokenAPI(r io.Reader) *TokenAPI { input := &TokenAPI{ - reader: NewReader(r), + reader: newReader(r), cursor: &Cursor{}, - result: NewResult(), + result: newTokenResult(), } input.root = input return input @@ -78,13 +88,14 @@ func NewTokenAPI(r io.Reader) *TokenAPI { // without explicitly accepting, this method will panic. func (i *TokenAPI) NextRune() (rune, error) { if i.result.lastRune != nil { - caller, linepos := getCaller(1) - panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+ - "to Accept() from %s at %s", caller, linepos)) + _, linepos := getCaller(1) + panic(fmt.Sprintf( + "parsekit.TokenAPI.NextRune(): NextRune() called at %s without a "+ + "prior call to Accept()", linepos)) } i.detachChilds() - readRune, err := i.reader.RuneAt(i.offset) + readRune, err := i.reader.runeAt(i.offset) i.result.lastRune = &runeInfo{r: readRune, err: err} return readRune, err } @@ -96,24 +107,38 @@ func (i *TokenAPI) NextRune() (rune, error) { // returned an error. Calling Accept() in such case will result in a panic. func (i *TokenAPI) Accept() { if i.result.lastRune == nil { - caller, linepos := getCaller(1) + _, linepos := getCaller(1) panic(fmt.Sprintf( - "parsekit.TokenAPI.Accept(): Accept() called without first "+ - "calling NextRune() from %s at %s", caller, linepos)) + "parsekit.TokenAPI.Accept(): Accept() called at %s without "+ + "first calling NextRune()", linepos)) } else if i.result.lastRune.err != nil { - caller, linepos := getCaller(1) + _, linepos := getCaller(1) panic(fmt.Sprintf( - "parsekit.TokenAPI.Accept(): Accept() called while the previous "+ - "call to NextRune() failed from %s at %s", caller, linepos)) + "parsekit.TokenAPI.Accept(): Accept() called at %s, but the "+ + "prior call to NextRune() failed", linepos)) } i.result.runes = append(i.result.runes, i.result.lastRune.r) - i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r)) + i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r)) i.offset++ i.result.lastRune = nil } // Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and // read cursor position, but for the rest this is a fresh TokenAPI. +// +// By forking a TokenAPI, you can freely work with the forked child, without +// affecting the parent TokenAPI. This is for example useful when you must perform +// some form of lookahead. +// +// When such lookahead turned out successful and you want to accept the results +// into the parent TokenAPI, you can call TokenAPIold.Merge() on the forked +// child. This will add the runes in the result buffer to the result buffer of +// the parent. It also updates the read cursor position of the parent to that +// of the child. +// +// When the lookahead failed, or you don't the results as produced by that +// lookahead, the forked child can simply be discarded. You can continue to work +// with the parent TokenAPI as if nothing ever happened. func (i *TokenAPI) Fork() *TokenAPI { i.detachChilds() @@ -125,18 +150,27 @@ func (i *TokenAPI) Fork() *TokenAPI { root: i.root, parent: i, } - child.result = NewResult() + child.result = newTokenResult() *child.cursor = *i.cursor i.child = child i.result.lastRune = nil return child } -// Merge adds the data of the forked child TokenAPI that Merge() is called on to the -// data of its parent (results and read cursor position). +// Merge appends the Result of a forked child TokenAPI to the Result of its +// parent. The read cursor position of the parent is also updated to that of +// the forked child. +// +// After the merge operation, the child is reset so it can immediately be +// reused for performing another match. This means that all Result data are +// cleared, but the read cursor position is kept at its current position. +// This allows a child to feed results in chunks to its parent. func (i *TokenAPI) Merge() { if i.parent == nil { - panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI") + _, filepos := getCaller(1) + panic(fmt.Sprintf( + "parsekit.TokenAPI.Merge(): Merge() called at %s "+ + "on a non-forked TokenAPI", filepos)) } i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) @@ -145,12 +179,12 @@ func (i *TokenAPI) Merge() { i.parent.cursor = i.cursor i.detachChilds() - i.result = NewResult() + i.result = newTokenResult() } -// Result returns the result data for the TokenAPI. The returned struct -// can be used to retrieve and modify the result data. -func (i *TokenAPI) Result() *Result { +// Result returns the TokenResult data for the TokenAPI. The returned struct +// can be used to retrieve and to modify result data. +func (i *TokenAPI) Result() *TokenResult { return i.result } @@ -160,18 +194,6 @@ func (i *TokenAPI) Cursor() Cursor { return *i.cursor } -// FlushReaderBuffer delegates to the Flush() method of the contained -// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the -// reader cache. -func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) { - if i != i.root { - panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child") - } - i.detachChilds() - i.reader.Flush(numberOfRunes) - i.offset = 0 -} - func (i *TokenAPI) detachChilds() { if i.child != nil { i.child.detachChildsRecurse() diff --git a/tokenapi_example_test.go b/tokenapi_example_test.go new file mode 100644 index 0000000..888cfd8 --- /dev/null +++ b/tokenapi_example_test.go @@ -0,0 +1,70 @@ +package parsekit_test + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit" +) + +func ExampleTokenAPI_Fork() { + // This custom TokenHandler checks for a sequence of runes: "abcd" + // This is done in 4 steps and only after finishing all steps, + // the TokenHandler will confirm a successful match. + abcdSequence := func(t *parsekit.TokenAPI) bool { + child := t.Fork() // fork, so we won't change parent t + for _, checkRune := range "abcd" { + readRune, err := child.NextRune() + if err != nil || readRune != checkRune { + return false // report mismatch, parent t is left untouched + } + child.Accept() // add rune to child output + } + child.Merge() // we have a match, add resulting output to parent + return true // and report the successful match + } + + // Note: a custom TokenHandler is normally not what you need. + // You can make use of the parser/combinator tooling to do things + // a lot simpler. The handler from above can be replaced with: + simpler := parsekit.A.Str("abcd") + + result, err := parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcdefgh") + fmt.Println(result, err) + result, err = parsekit.NewTokenizer(simpler, "abcd").Execute("abcdefgh") + fmt.Println(result, err) + result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcx") + fmt.Println(result, err) + result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("xyz") + fmt.Println(result, err) + + // Output: + // abcd + // abcd + // unexpected character 'a' (expected abcd) + // unexpected character 'x' (expected abcd) +} + +func ExampleTokenAPI_Merge() { + tokenHandler := func(t *parsekit.TokenAPI) bool { + child1 := t.Fork() + child1.NextRune() // reads 'H' + child1.Accept() + child1.NextRune() // reads 'i' + child1.Accept() + + child2 := child1.Fork() + child2.NextRune() // reads ' ' + child2.Accept() + child2.NextRune() // reads 'd' + child2.Accept() + + child1.Merge() // We merge child1, which has read 'H' and 'i' only. + return true + } + + result, _ := parsekit.NewTokenizer(tokenHandler, "a match").Execute("Hi mister X!") + fmt.Println(result) + + // Output: + // Hi +} diff --git a/tokenapi_result.go b/tokenapi_result.go deleted file mode 100644 index ef90761..0000000 --- a/tokenapi_result.go +++ /dev/null @@ -1,106 +0,0 @@ -package parsekit - -import ( - "fmt" -) - -// Result holds results as produced by a TokenHandler. -type Result struct { - lastRune *runeInfo // Information about the last rune read using NextRune() - runes []rune - tokens []*Token -} - -type runeInfo struct { - r rune - err error -} - -// Token defines a lexical token as produced by TokenHandlers. -type Token struct { - Type interface{} // token type, can be any type that a parser author sees fit - Runes []rune // the runes that make up the token - Value interface{} // an optional value of any type -} - -// NewResult initializes an empty result struct. -func NewResult() *Result { - return &Result{ - runes: []rune{}, - tokens: []*Token{}, - } -} - -// ClearRunes clears the runes in the Result. -func (r *Result) ClearRunes() { - r.runes = []rune{} -} - -// SetRunes replaces the Runes from the Result with the provided input. -func (r *Result) SetRunes(s interface{}) { - r.ClearRunes() - r.AddRunes(s) -} - -// AddRunes is used to add runes to the Result. -func (r *Result) AddRunes(s interface{}) { - switch s := s.(type) { - case string: - r.runes = append(r.runes, []rune(s)...) - case []rune: - r.runes = append(r.runes, s...) - case rune: - r.runes = append(r.runes, s) - default: - panic(fmt.Sprintf("parsekit.Result.SetRunes(): unsupported type '%T' used", s)) - } -} - -// Runes retrieves the Runes from the Result. -func (r *Result) Runes() []rune { - return r.runes -} - -// Rune retrieve a single rune from the Result at the specified index. -func (r *Result) Rune(idx int) rune { - return r.runes[idx] -} - -// String returns the Runes from the Result as a string. -func (r *Result) String() string { - return string(r.runes) -} - -// ClearTokens clears the tokens in the Result. -func (r *Result) ClearTokens() { - r.tokens = []*Token{} -} - -// AddToken is used to add a Token to the results. -func (r *Result) AddToken(t *Token) { - r.tokens = append(r.tokens, t) -} - -// Tokens retrieves the Tokens from the Result. -func (r *Result) Tokens() []*Token { - return r.tokens -} - -// Token retrieves a single Token from the Result at the specified index. -func (r *Result) Token(idx int) *Token { - return r.tokens[idx] -} - -// Values retrieves a slice containing only the Values for the Result Tokens. -func (r *Result) Values() []interface{} { - values := make([]interface{}, len(r.tokens)) - for i, tok := range r.tokens { - values[i] = tok.Value - } - return values -} - -// Value retrieves a single Value from the Result Token at the specified index. -func (r *Result) Value(idx int) interface{} { - return r.tokens[idx].Value -} diff --git a/tokenapi_result_test.go b/tokenapi_result_test.go deleted file mode 100644 index 3ccec52..0000000 --- a/tokenapi_result_test.go +++ /dev/null @@ -1,27 +0,0 @@ -package parsekit - -import ( - "testing" - - "git.makaay.nl/mauricem/go-parsekit/assert" -) - -func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { - i := mkInput() - i.Result().SetRunes("string") - assert.Equal(t, "string", string(i.Result().String()), "i.Result() with string input") - i.Result().SetRunes([]rune("rune slice")) - assert.Equal(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input") - i.Result().SetRunes('X') - assert.Equal(t, "X", string(i.Result().String()), "i.Result() with rune input") -} - -func TestSetResult_PanicsOnUnhandledInput(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - i.Result().SetRunes(1234567) - }, - Expect: "parsekit.Result.SetRunes(): unsupported type 'int' used", - }) -} diff --git a/tokenapi_test.go b/tokenapi_test.go deleted file mode 100644 index 59e78f9..0000000 --- a/tokenapi_test.go +++ /dev/null @@ -1,288 +0,0 @@ -package parsekit - -import ( - "io" - "strings" - "testing" - "unicode/utf8" - - "git.makaay.nl/mauricem/go-parsekit/assert" -) - -func TestCallingNextRune_ReturnsNextRune(t *testing.T) { - r, _ := mkInput().NextRune() - assert.Equal(t, 'T', r, "first rune") -} - -func TestInputCanAcceptRunesFromReader(t *testing.T) { - i := mkInput() - i.NextRune() - i.Accept() - i.NextRune() - i.Accept() - i.NextRune() - i.Accept() - assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()") -} - -func TestCallingNextRuneTwice_Panics(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - i.NextRune() - i.NextRune() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` + - `a prior call to Accept\(\) from .*TestCallingNextRuneTwice_Panics.* at /.*_test.go:\d+`, - }) -} - -func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: mkInput().Accept, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` + - `first calling NextRune\(\) from .* at /.*:\d+`, - }) -} - -func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - i.Merge() - }, - Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI", - }) -} - -func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - f := i.Fork() - i.NextRune() - f.Merge() - }, - Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI", - }) -} - -func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - f := i.Fork() - i.Fork() - f.Merge() - }, - Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI", - }) -} - -func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) { - i := mkInput() - f1 := i.Fork() - f2 := f1.Fork() - f3 := f2.Fork() - f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3 - f5 := f4.Fork() - assert.Equal(t, true, i.parent == nil, "i.parent == nil") - assert.Equal(t, true, i.child == f1, "i.child == f1") - assert.Equal(t, true, f1.parent == i, "f1.parent == i") - assert.Equal(t, true, f1.child == f4, "f1.child == f4") - assert.Equal(t, true, f2.child == nil, "f2.child == nil") - assert.Equal(t, true, f2.parent == nil, "f2.parent == nil") - assert.Equal(t, true, f3.child == nil, "f3.child == nil") - assert.Equal(t, true, f3.parent == nil, "f3.parent == nil") - assert.Equal(t, true, f4.parent == f1, "f4.parent == f1") - assert.Equal(t, true, f4.child == f5, "f4.child == f5") - assert.Equal(t, true, f5.parent == f4, "f5.parent == f4") - assert.Equal(t, true, f5.child == nil, "f5.child == nil") - - i.NextRune() - - assert.Equal(t, true, i.parent == nil, "i.parent == nil") - assert.Equal(t, true, i.child == nil, "i.child == nil") - assert.Equal(t, true, f1.parent == nil, "f1.parent == nil") - assert.Equal(t, true, f1.child == nil, "f1.child == nil") - assert.Equal(t, true, f2.child == nil, "f2.child == nil") - assert.Equal(t, true, f2.parent == nil, "f2.parent == nil") - assert.Equal(t, true, f3.child == nil, "f3.child == nil") - assert.Equal(t, true, f3.parent == nil, "f3.parent == nil") - assert.Equal(t, true, f4.parent == nil, "f4.parent == nil") - assert.Equal(t, true, f4.child == nil, "f4.child == nil") - assert.Equal(t, true, f5.parent == nil, "f5.parent == nil") - assert.Equal(t, true, f5.child == nil, "f5.child == nil") -} - -func TestForkingInput_ClearsLastRune(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - i.NextRune() - i.Fork() - i.Accept() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` + - `first calling NextRune\(\) from .* at /.*:\d+`, - }) -} - -func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { - i := mkInput() - r, _ := i.NextRune() - assert.Equal(t, 'T', r, "result from 1st call to NextRune()") - // TODO still (*runeInfo) case needed? - assert.NotEqual(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after NextRune()") - i.Accept() - assert.Equal(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after Accept()") - assert.Equal(t, 1, i.offset, "Input.offset") - assert.Equal(t, 'T', i.reader.buffer[0], "Input.buffer[0]") - r, _ = i.NextRune() - assert.Equal(t, 'e', r, "result from 2nd call to NextRune()") -} - -func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) { - i := mkInput() - for j := 0; j < 7; j++ { - i.NextRune() - i.Accept() - } - assert.Equal(t, "Testing", string(i.reader.buffer), "reader input buffer") - assert.Equal(t, "Testing", i.Result().String(), "i.Result().String()") -} - -func TestAccept_UpdatesCursor(t *testing.T) { - i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines")) - assert.Equal(t, "line 1, column 1", i.cursor.String(), "cursor 1") - for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" - i.NextRune() - i.Accept() - } - assert.Equal(t, "line 1, column 7", i.cursor.String(), "cursor 2") - i.NextRune() // read "\n", cursor ends up at start of new line - i.Accept() - assert.Equal(t, "line 2, column 1", i.cursor.String(), "cursor 3") - for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" - i.NextRune() - i.Accept() - } - assert.Equal(t, "line 3, column 5", i.cursor.String(), "cursor 4") - assert.Equal(t, *i.cursor, i.Cursor(), "i.Cursor()") -} - -func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { - // Create input, accept the first rune. - i := mkInput() - i.NextRune() - i.Accept() // T - assert.Equal(t, "T", i.Result().String(), "accepted rune in input") - // Fork - f := i.Fork() - assert.Equal(t, f, i.child, "Input.child (must be f)") - assert.Equal(t, i, f.parent, "Input.parent (must be i)") - assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte") - assert.Equal(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte") - // Accept two runes via fork. - f.NextRune() - f.Accept() // e - f.NextRune() - f.Accept() // s - assert.Equal(t, "es", f.Result().String(), "result runes in fork") - assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte") - assert.Equal(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte") - // Merge fork back into parent - f.Merge() - assert.Equal(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()") - assert.Equal(t, 3, i.cursor.Byte, "i.child.cursor.Byte") -} - -func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { - i := mkInput() - i.NextRune() - i.Accept() - f1 := i.Fork() - f1.NextRune() - f1.Accept() - f2 := f1.Fork() - f2.NextRune() - f2.Accept() - assert.Equal(t, "T", i.Result().String(), "i.Result().String()") - assert.Equal(t, 1, i.offset, "i.offset") - assert.Equal(t, "e", f1.Result().String(), "f1.Result().String()") - assert.Equal(t, 2, f1.offset, "f1.offset") - assert.Equal(t, "s", f2.Result().String(), "f2.Result().String()") - assert.Equal(t, 3, f2.offset, "f2.offset") - f2.Merge() - assert.Equal(t, "T", i.Result().String(), "i.Result().String()") - assert.Equal(t, 1, i.offset, "i.offset") - assert.Equal(t, "es", f1.Result().String(), "f1.Result().String()") - assert.Equal(t, 3, f1.offset, "f1.offset") - assert.Equal(t, "", f2.Result().String(), "f2.Result().String()") - assert.Equal(t, 3, f2.offset, "f2.offset") - f1.Merge() - assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()") - assert.Equal(t, 3, i.offset, "i.offset") - assert.Equal(t, "", f1.Result().String(), "f1.Result().String()") - assert.Equal(t, 3, f1.offset, "f1.offset") - assert.Equal(t, "", f2.Result().String(), "f2.Result().String()") - assert.Equal(t, 3, f2.offset, "f2.offset") -} - -func TestGivenForkedChild_FlushReaderBuffer_Panics(t *testing.T) { - assert.Panic(t, assert.PanicT{ - Function: func() { - i := mkInput() - f := i.Fork() - f.FlushReaderBuffer(1) - }, - Expect: "parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() " + - "can only be called on the root TokenAPI, not on a forked child", - }) -} - -func TestGivenRootWithSomeRunesRead_FlushReaderBuffer_ClearsReaderBuffer(t *testing.T) { - i := mkInput() - i.NextRune() - i.Accept() - i.NextRune() - i.Accept() - i.FlushReaderBuffer(2) - assert.Equal(t, "Te", i.Result().String(), "i.Result()") - assert.Equal(t, 0, i.offset, "i.offset") - i.NextRune() - i.Accept() - i.NextRune() - i.Accept() - assert.Equal(t, 2, i.offset, "i.offset") - i.FlushReaderBuffer(2) - assert.Equal(t, "Test", i.Result().String(), "i.Result()") - assert.Equal(t, 0, i.offset, "i.offset") -} - -func TestWhenCallingNextRuneAtEndOfFile_EOFIsReturned(t *testing.T) { - i := NewTokenAPI(strings.NewReader("X")) - i.NextRune() - i.Accept() - r, err := i.NextRune() - assert.Equal(t, true, r == utf8.RuneError, "returned rune from NextRune()") - assert.Equal(t, true, err == io.EOF, "returned error from NextRune()") -} -func TestAfterReadingRuneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { - i := NewTokenAPI(strings.NewReader("X")) - f := i.Fork() - f.NextRune() - f.Accept() - r, err := f.NextRune() - assert.Equal(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") - r, err = i.NextRune() - assert.Equal(t, 'X', r, "returned rune from 2nd NextRune()") - assert.Equal(t, true, err == nil, "returned error from 2nd NextRune()") -} - -func mkInput() *TokenAPI { - return NewTokenAPI(strings.NewReader("Testing")) -} diff --git a/tokenhandler.go b/tokenhandler.go deleted file mode 100644 index 997e4ed..0000000 --- a/tokenhandler.go +++ /dev/null @@ -1,139 +0,0 @@ -package parsekit - -import ( - "fmt" - "runtime" -) - -// TokenHandler is the function type that is involved in turning a low level -// stream of UTF8 runes into lexical tokens. Its purpose is to check if input -// data matches some kind of pattern and to report back the token(s). -// -// A TokenHandler function gets a TokenAPI as its input and returns a boolean to -// indicate whether or not it found a match on the input. The TokenAPI is used -// for retrieving input data to match against and for reporting back results. -type TokenHandler func(t *TokenAPI) bool - -// NextRune retrieves the next rune from the input. -// -// It returns the rune and a boolean. The boolean will be false in case an -// invalid UTF8 rune or the end of the file was encountered. -// -// After retrieving a rune, Accept() or Skip() can be called to respectively add -// the rune to the TokenAPIold's string buffer or to fully ignore it. This way, -// a TokenHandler has full control over what runes are significant for the -// resulting output of that TokenHandler. -// -// After using NextRune(), this method can not be reinvoked, until the last read -// rune is explicitly accepted or skipped as described above. -// func (t *TokenAPIold) NextRune() (rune, bool) { -// if t.lastRune != nil { -// caller, filepos := getCaller(1) -// panic(fmt.Sprintf( -// "TokenHandler bug: NextRune() was called from %s at %s "+ -// "without accepting or skipping the previously read rune", caller, filepos)) -// } -// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset) -// t.lastRune = &runeInfo{r, w, ok} -// if ok { -// t.result.Input = append(t.result.Input, r) -// } -// return r, ok -// } - -// Fork splits off a child TokenAPIold, containing the same input cursor position -// as the parent TokenAPIold, but with all other data in a fresh state. -// -// By forking, a TokenHandler function can freely work with a TokenAPIold, without -// affecting the parent TokenAPIold. This is for example useful when the -// TokenHandler function must perform some form of lookahead. -// -// When a successful match was found, the TokenHandler function can call -// TokenAPIold.Merge() on the forked child to have the resulting output added -// to the parent TokenAPIold. -// -// When no match was found, the forked child can simply be discarded. -// -// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'. -// This is done in 4 steps and only after finishing all steps, the TokenHandler -// function can confirm a successful match. The TokenHandler function for this -// case could look like this (yes, it's naive, but it shows the point): -// TODO make proper tested example -// -// func MatchAbcd(t *TokenAPIold) bool { -// child := t.Fork() // fork to keep m from input untouched -// for _, letter := []rune {'a', 'b', 'c', 'd'} { -// if r, ok := t.NextRune(); !ok || r != letter { -// return false // report mismatch, t is left untouched -// } -// child.Accept() // add rune to child output -// } -// child.Merge() // we have a match, add resulting output to parent -// return true // and report the successful match -// } - -// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting -// output of the TokenAPIold. -// func (t *TokenAPIold) Accept() { -// t.checkAllowedCall("Accept()") -// t.buffer = append(t.buffer, t.lastRune.Rune) -// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune) -// t.inputOffset += t.lastRune.ByteSize -// t.lastRune = nil -// } - -// Skip will ignore the last rune as read by NextRune(). -// func (t *TokenAPIold) Skip() { -// t.checkAllowedCall("Skip()") -// t.inputOffset += t.lastRune.ByteSize -// t.lastRune = nil -// } - -// func (t *TokenAPIold) checkAllowedCall(name string) { -// if t.lastRune == nil { -// caller, filepos := getCaller(2) -// panic(fmt.Sprintf( -// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()", -// name, caller, filepos)) -// } -// if !t.lastRune.OK { -// caller, filepos := getCaller(2) -// panic(fmt.Sprintf( -// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+ -// "did not return OK (EOF or invalid rune)", name, caller, filepos)) -// } -// } - -// AddToken is used to add a token to the results of the TokenHandler. -// func (t *TokenAPIold) AddToken(tok *Token) { -// t.result.Tokens = append(t.result.Tokens, tok) -// } - -// Merge merges the resulting output from a forked child TokenAPIold back into -// its parent: The runes that are accepted in the child are added to the parent -// runes and the parent's input cursor position is advanced to the child's -// cursor position. -// -// After the merge, the child TokenAPIold is reset so it can immediately be -// reused for performing another match (all data are cleared, except for the -// input offset which is kept at its current position). -// func (t *TokenAPIold) Merge() bool { -// if t.parent == nil { -// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog") -// } -// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...) -// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...) -// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...) -// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...) -// t.parent.inputOffset = t.inputOffset -// t.result = &TokResult{} -// return true -// } - -func getCaller(depth int) (string, string) { - // No error handling, because we call this method ourselves with safe depth values. - pc, file, line, _ := runtime.Caller(depth + 1) - filepos := fmt.Sprintf("%s:%d", file, line) - caller := runtime.FuncForPC(pc) - return caller.Name(), filepos -} diff --git a/tokenhandler_test.go b/tokenhandler_test.go index 04f005c..3d0af65 100644 --- a/tokenhandler_test.go +++ b/tokenhandler_test.go @@ -4,11 +4,10 @@ import ( "testing" "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/assert" ) func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) { - parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { for i := 0; i < 20; i++ { t.NextRune() t.Accept() @@ -22,7 +21,7 @@ func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) { } func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) { - parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { t.Result().AddToken(&parsekit.Token{ Type: "PI", Runes: []rune("π"), @@ -60,8 +59,9 @@ func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) { } func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) { - fooToken := tok.StrLiteral("ASCII", c.OneOrMore(a.ASCII)) - parser := parsekit.NewMatcher(fooToken, "something") + var tok, c, a = parsekit.T, parsekit.C, parsekit.A + fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII)) + parser := parsekit.NewTokenizer(fooToken, "something") input := "This is fine ASCII Åltho hère öt endĩt!" result, err := parser.Execute(input) @@ -74,15 +74,16 @@ func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) { } func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) { + var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A fooToken := c.Seq( m.Drop(c.ZeroOrMore(a.Asterisk)), - tok.StrLiteral("COMBI", c.Seq( - tok.StrLiteral("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))), - tok.StrLiteral("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))), + tok.Str("COMBI", c.Seq( + tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))), + tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))), )), m.Drop(c.ZeroOrMore(a.Asterisk)), ) - parser := parsekit.NewMatcher(fooToken, "something") + parser := parsekit.NewTokenizer(fooToken, "something") input := "*** This is fine ASCII Åltho hère öt endĩt! ***" output := "This is fine ASCIIÅltho hère öt endĩt!" result, err := parser.Execute(input) @@ -108,50 +109,50 @@ func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) { } func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) { - parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { t.Accept() return false }, "test") - assert.Panic(t, assert.PanicT{ + parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { parser.Execute("input string") }, Regexp: true, - Expect: `parsekit.TokenAPI.Accept\(\): Accept\(\) called without first ` + - `calling NextRune\(\) from .*CallToAcceptPanics.* at /.*_test.go`, - }) + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` + + `/.*/tokenhandler_test\.go:\d+ without first calling NextRune\(\)`}) } func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) { - parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { t.NextRune() t.NextRune() return false }, "test") - assert.Panic(t, assert.PanicT{ + parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { parser.Execute("input string") }, Regexp: true, - Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` + - `a prior call to Accept\(\) from .*CallToNextRunePanics.* at /.*/tokenhandler_test.go:\d+`}) + Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` + + `/.*/tokenhandler_test\.go:\d+ without a prior call to Accept\(\)`}) } func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) { - parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { + parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { t.NextRune() t.Accept() return false }, "test") - assert.Panic(t, assert.PanicT{ + parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { parser.Execute("") }, Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called while the previous call to ` + - `NextRune\(\) failed from .*CallToAcceptPanics.* at .*_test\.go:\d+`}) + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` + + `/.*/tokenhandler_test.go:\d+, but the prior call to NextRune\(\) failed`}) } func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) { - assert.Panic(t, assert.PanicT{ + parsekit.AssertPanic(t, parsekit.PanicT{ Function: func() { a := parsekit.TokenAPI{} a.Merge() }, - Expect: `parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI`, - }) + Regexp: true, + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + + `/.*/tokenhandler_test\.go:\d+ on a non-forked TokenAPI`}) } diff --git a/tokenhandlers_builtin.go b/tokenhandlers_builtin.go index ea003d5..e348450 100644 --- a/tokenhandlers_builtin.go +++ b/tokenhandlers_builtin.go @@ -215,7 +215,7 @@ var A = struct { // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var T = struct { - StrLiteral func(interface{}, TokenHandler) TokenHandler + Str func(interface{}, TokenHandler) TokenHandler StrInterpreted func(interface{}, TokenHandler) TokenHandler Byte func(interface{}, TokenHandler) TokenHandler Rune func(interface{}, TokenHandler) TokenHandler @@ -234,7 +234,7 @@ var T = struct { Boolean func(interface{}, TokenHandler) TokenHandler ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler }{ - StrLiteral: MakeStrLiteralToken, + Str: MakeStrLiteralToken, StrInterpreted: MakeStrInterpretedToken, Byte: MakeByteToken, Rune: MakeRuneToken, @@ -343,13 +343,13 @@ func MatchOpt(handler TokenHandler) TokenHandler { } // MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be -// applied in their exact order. Only if all matcher apply, the sequence +// applied in their exact order. Only if all TokenHandlers apply, the sequence // reports successful match. func MatchSeq(handlers ...TokenHandler) TokenHandler { return func(t *TokenAPI) bool { child := t.Fork() - for _, matcher := range handlers { - if !matcher(child) { + for _, handler := range handlers { + if !handler(child) { return false } } diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index 27d08b8..132cc3a 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -8,7 +8,8 @@ import ( ) func TestCombinators(t *testing.T) { - RunTokenHandlerTests(t, []TokenHandlerTest{ + var c, a, m = parsekit.C, parsekit.A, parsekit.M + parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ {"abc", c.Not(a.Rune('b')), true, "a"}, {"bcd", c.Not(a.Rune('b')), false, ""}, {"bcd", c.Not(a.Rune('b')), false, ""}, @@ -67,24 +68,26 @@ func TestCombinators(t *testing.T) { } func TestCombinatorPanics(t *testing.T) { - RunPanicTests(t, []PanicTest{ - {func() { a.RuneRange('z', 'a') }, + var c, a = parsekit.C, parsekit.A + parsekit.AssertPanics(t, []parsekit.PanicT{ + {func() { a.RuneRange('z', 'a') }, false, "TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"}, - {func() { c.MinMax(-1, 1, parsekit.A.Space) }, + {func() { c.MinMax(-1, 1, parsekit.A.Space) }, false, "TokenHandler bug: MatchMinMax definition error: min must be >= 0"}, - {func() { c.MinMax(1, -1, parsekit.A.Space) }, + {func() { c.MinMax(1, -1, parsekit.A.Space) }, false, "TokenHandler bug: MatchMinMax definition error: max must be >= 0"}, - {func() { c.MinMax(10, 5, parsekit.A.Space) }, + {func() { c.MinMax(10, 5, parsekit.A.Space) }, false, "TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"}, - {func() { c.Min(-10, parsekit.A.Space) }, + {func() { c.Min(-10, parsekit.A.Space) }, false, "TokenHandler bug: MatchMin definition error: min must be >= 0"}, - {func() { c.Max(-42, parsekit.A.Space) }, + {func() { c.Max(-42, parsekit.A.Space) }, false, "TokenHandler bug: MatchMax definition error: max must be >= 0"}, }) } func TestAtoms(t *testing.T) { - RunTokenHandlerTests(t, []TokenHandlerTest{ + var a = parsekit.A + parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ {"dd", a.RuneRange('b', 'e'), true, "d"}, {"ee", a.RuneRange('b', 'e'), true, "e"}, {"ff", a.RuneRange('b', 'e'), false, ""}, @@ -223,7 +226,8 @@ func TestAtoms(t *testing.T) { } func TestModifiers(t *testing.T) { - RunTokenHandlerTests(t, []TokenHandlerTest{ + var c, a, m = parsekit.C, parsekit.A, parsekit.M + parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, @@ -242,21 +246,23 @@ func TestModifiers(t *testing.T) { // follow the correct pattern. Therefore, tokenmakers will panic when the // input cannot be processed successfully. func TestTokenMakerErrorHandling(t *testing.T) { + var a, tok = parsekit.A, parsekit.T invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool() - parser := parsekit.NewMatcher(invalid, "boolean") - RunPanicTest(t, PanicTest{ - func() { parser.Execute("no") }, + parser := parsekit.NewTokenizer(invalid, "boolean") + parsekit.AssertPanic(t, parsekit.PanicT{ + func() { parser.Execute("no") }, false, `TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + - `invalid syntax \(only use a type conversion token maker, when the input has been validated on beforehand\)`, + `invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`, }) } func TestTokenMakers(t *testing.T) { - RunTokenMakerTests(t, []TokenMakerTest{ - {`empty token`, tok.StrLiteral("A", c.ZeroOrMore(a.Digit)), + var c, a, tok = parsekit.C, parsekit.A, parsekit.T + parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{ + {`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)), []parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}}, - {`Ѝюج literal \string`, tok.StrLiteral("B", c.OneOrMore(a.AnyRune)), + {`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)), []parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}}, {`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)), @@ -313,6 +319,7 @@ func TestTokenMakers(t *testing.T) { } func TestSequenceOfRunes(t *testing.T) { + var c, a = parsekit.C, parsekit.A sequence := c.Seq( a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen, a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash, @@ -340,6 +347,7 @@ func TestSequenceOfRunes(t *testing.T) { // I know, this is hell, but that's the whole point for this test :-> func TestCombination(t *testing.T) { + var c, a, m = parsekit.C, parsekit.A, parsekit.M demonic := c.Seq( c.Opt(a.SquareOpen), m.Trim( @@ -360,7 +368,7 @@ func TestCombination(t *testing.T) { c.Opt(a.SquareClose), ) - RunTokenHandlerTests(t, []TokenHandlerTest{ + parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ {"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"}, {"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"}, {">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"}, diff --git a/tokenizer.go b/tokenizer.go new file mode 100644 index 0000000..6eed57a --- /dev/null +++ b/tokenizer.go @@ -0,0 +1,49 @@ +package parsekit + +// Tokenizer is the top-level struct that holds the configuration for +// a parser that is based solely on a TokenHandler function. +// The Tokenizer can be instantiated using the parsekit.NewTokenizer() +// method. +type Tokenizer struct { + parser *Parser + result *TokenResult +} + +// TokenHandler is the function type that is involved in turning a low level +// stream of UTF8 runes into lexical tokens. Its purpose is to check if input +// data matches some kind of pattern and to report back the results. +// +// A TokenHandler function gets a TokenAPI as its input and returns a boolean to +// indicate whether or not it found a match on the input. The TokenAPI is used +// for retrieving input data to match against and for reporting back results. +type TokenHandler func(t *TokenAPI) bool + +// NewTokenizer instantiates a new Tokenizer. +// +// This is a simple wrapper around a TokenHandler function. It can be used to +// match an input string against that TokenHandler function and retrieve the +// results in a straight forward way. +// +// The 'expects' parameter is used for creating an error message in case parsed +// input does not match the TokenHandler. +func NewTokenizer(tokenHandler TokenHandler, expects string) *Tokenizer { + tokenizer := &Tokenizer{} + tokenizer.parser = NewParser(func(p *ParseAPI) { + if p.On(tokenHandler).Accept() { + tokenizer.result = p.Result() + p.Stop() + } else { + p.Expects(expects) + p.UnexpectedInput() + } + }) + return tokenizer +} + +// Execute feeds the input to the wrapped TokenHandler function. +// It returns the TokenHandler's TokenResult. When an error occurred +// during parsing, the error will be set, nil otherwise. +func (t *Tokenizer) Execute(input string) (*TokenResult, *Error) { + err := t.parser.Execute(input) + return t.result, err +} diff --git a/tokenizer_test.go b/tokenizer_test.go new file mode 100644 index 0000000..052891d --- /dev/null +++ b/tokenizer_test.go @@ -0,0 +1,257 @@ +package parsekit + +import ( + "io" + "strings" + "testing" + "unicode/utf8" +) + +func TestCallingNextRune_ReturnsNextRune(t *testing.T) { + r, _ := mkInput().NextRune() + AssertEqual(t, 'T', r, "first rune") +} + +func TestInputCanAcceptRunesFromReader(t *testing.T) { + i := mkInput() + i.NextRune() + i.Accept() + i.NextRune() + i.Accept() + i.NextRune() + i.Accept() + AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") +} + +func TestCallingNextRuneTwice_Panics(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.NextRune() + i.NextRune() + }, + Regexp: true, + Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` + + `/.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, + }) +} + +func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { + AssertPanic(t, PanicT{ + Function: mkInput().Accept, + Regexp: true, + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` + + `at /.*/assertions_test\.go:\d+ without first calling NextRune()`, + }) +} + +func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.Merge() + }, + Regexp: true, + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + + `/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) +} + +func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + f := i.Fork() + i.NextRune() + f.Merge() + }, + Regexp: true, + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + + `/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) +} + +func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + f := i.Fork() + i.Fork() + f.Merge() + }, + Regexp: true, + Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + + `/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) +} + +func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) { + i := mkInput() + f1 := i.Fork() + f2 := f1.Fork() + f3 := f2.Fork() + f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3 + f5 := f4.Fork() + AssertEqual(t, true, i.parent == nil, "i.parent == nil") + AssertEqual(t, true, i.child == f1, "i.child == f1") + AssertEqual(t, true, f1.parent == i, "f1.parent == i") + AssertEqual(t, true, f1.child == f4, "f1.child == f4") + AssertEqual(t, true, f2.child == nil, "f2.child == nil") + AssertEqual(t, true, f2.parent == nil, "f2.parent == nil") + AssertEqual(t, true, f3.child == nil, "f3.child == nil") + AssertEqual(t, true, f3.parent == nil, "f3.parent == nil") + AssertEqual(t, true, f4.parent == f1, "f4.parent == f1") + AssertEqual(t, true, f4.child == f5, "f4.child == f5") + AssertEqual(t, true, f5.parent == f4, "f5.parent == f4") + AssertEqual(t, true, f5.child == nil, "f5.child == nil") + + i.NextRune() + + AssertEqual(t, true, i.parent == nil, "i.parent == nil") + AssertEqual(t, true, i.child == nil, "i.child == nil") + AssertEqual(t, true, f1.parent == nil, "f1.parent == nil") + AssertEqual(t, true, f1.child == nil, "f1.child == nil") + AssertEqual(t, true, f2.child == nil, "f2.child == nil") + AssertEqual(t, true, f2.parent == nil, "f2.parent == nil") + AssertEqual(t, true, f3.child == nil, "f3.child == nil") + AssertEqual(t, true, f3.parent == nil, "f3.parent == nil") + AssertEqual(t, true, f4.parent == nil, "f4.parent == nil") + AssertEqual(t, true, f4.child == nil, "f4.child == nil") + AssertEqual(t, true, f5.parent == nil, "f5.parent == nil") + AssertEqual(t, true, f5.child == nil, "f5.child == nil") +} + +func TestForkingInput_ClearsLastRune(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.NextRune() + i.Fork() + i.Accept() + }, + Regexp: true, + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` + + `at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, + }) +} + +func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { + i := mkInput() + r, _ := i.NextRune() + AssertEqual(t, 'T', r, "result from 1st call to NextRune()") + AssertTrue(t, i.result.lastRune != nil, "Input.lastRune after NextRune() is not nil") + i.Accept() + AssertTrue(t, i.result.lastRune == nil, "Input.lastRune after Accept() is nil") + AssertEqual(t, 1, i.offset, "Input.offset") + AssertEqual(t, 'T', i.reader.buffer[0], "Input.buffer[0]") + r, _ = i.NextRune() + AssertEqual(t, 'e', r, "result from 2nd call to NextRune()") +} + +func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) { + i := mkInput() + for j := 0; j < 7; j++ { + i.NextRune() + i.Accept() + } + AssertEqual(t, "Testing", string(i.reader.buffer), "reader input buffer") + AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()") +} + +func TestAccept_UpdatesCursor(t *testing.T) { + i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines")) + AssertEqual(t, "start of file", i.cursor.String(), "cursor 1") + for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" + i.NextRune() + i.Accept() + } + AssertEqual(t, "line 1, column 7", i.cursor.String(), "cursor 2") + i.NextRune() // read "\n", cursor ends up at start of new line + i.Accept() + AssertEqual(t, "line 2, column 1", i.cursor.String(), "cursor 3") + for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" + i.NextRune() + i.Accept() + } + AssertEqual(t, "line 3, column 5", i.cursor.String(), "cursor 4") + AssertEqual(t, *i.cursor, i.Cursor(), "i.Cursor()") +} + +func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { + // Create input, accept the first rune. + i := mkInput() + i.NextRune() + i.Accept() // T + AssertEqual(t, "T", i.Result().String(), "accepted rune in input") + // Fork + f := i.Fork() + AssertEqual(t, f, i.child, "Input.child (must be f)") + AssertEqual(t, i, f.parent, "Input.parent (must be i)") + AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte") + AssertEqual(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte") + // Accept two runes via fork. + f.NextRune() + f.Accept() // e + f.NextRune() + f.Accept() // s + AssertEqual(t, "es", f.Result().String(), "result runes in fork") + AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte") + AssertEqual(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte") + // Merge fork back into parent + f.Merge() + AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()") + AssertEqual(t, 3, i.cursor.Byte, "i.child.cursor.Byte") +} + +func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { + i := mkInput() + i.NextRune() + i.Accept() + f1 := i.Fork() + f1.NextRune() + f1.Accept() + f2 := f1.Fork() + f2.NextRune() + f2.Accept() + AssertEqual(t, "T", i.Result().String(), "i.Result().String()") + AssertEqual(t, 1, i.offset, "i.offset") + AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()") + AssertEqual(t, 2, f1.offset, "f1.offset") + AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()") + AssertEqual(t, 3, f2.offset, "f2.offset") + f2.Merge() + AssertEqual(t, "T", i.Result().String(), "i.Result().String()") + AssertEqual(t, 1, i.offset, "i.offset") + AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()") + AssertEqual(t, 3, f1.offset, "f1.offset") + AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") + AssertEqual(t, 3, f2.offset, "f2.offset") + f1.Merge() + AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") + AssertEqual(t, 3, i.offset, "i.offset") + AssertEqual(t, "", f1.Result().String(), "f1.Result().String()") + AssertEqual(t, 3, f1.offset, "f1.offset") + AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") + AssertEqual(t, 3, f2.offset, "f2.offset") +} + +func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { + i := NewTokenAPI(strings.NewReader("X")) + i.NextRune() + i.Accept() + r, err := i.NextRune() + AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") + AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") +} +func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { + i := NewTokenAPI(strings.NewReader("X")) + f := i.Fork() + f.NextRune() + f.Accept() + r, err := f.NextRune() + AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") + r, err = i.NextRune() + AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") + AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") +} + +func mkInput() *TokenAPI { + return NewTokenAPI(strings.NewReader("Testing")) +} diff --git a/tokenresult.go b/tokenresult.go new file mode 100644 index 0000000..99f2158 --- /dev/null +++ b/tokenresult.go @@ -0,0 +1,116 @@ +package parsekit + +import ( + "fmt" +) + +// Result holds results as produced by a TokenHandler. +type TokenResult struct { + lastRune *runeInfo // Information about the last rune read using NextRune() + runes []rune + tokens []*Token +} + +type runeInfo struct { + r rune + err error +} + +// Token defines a lexical token as produced by TokenHandlers. +type Token struct { + Type interface{} // token type, can be any type that a parser author sees fit + Runes []rune // the runes that make up the token + Value interface{} // an optional value of any type +} + +// newTokenResult initializes an empty result struct. +func newTokenResult() *TokenResult { + return &TokenResult{ + runes: []rune{}, + tokens: []*Token{}, + } +} + +// ClearRunes clears the runes in the TokenResult. +func (r *TokenResult) ClearRunes() { + r.runes = []rune{} +} + +// SetRunes replaces the Runes from the TokenResult with the provided input. +func (r *TokenResult) SetRunes(s interface{}) { + r.ClearRunes() + r.AddRunes(s) +} + +// AddRunes is used to add runes to the TokenResult. +func (r *TokenResult) AddRunes(set ...interface{}) { + for _, s := range set { + switch s := s.(type) { + case string: + r.runes = append(r.runes, []rune(s)...) + case []rune: + r.runes = append(r.runes, s...) + case rune: + r.runes = append(r.runes, s) + default: + panic(fmt.Sprintf("parsekit.TokenResult.SetRunes(): unsupported type '%T' used", s)) + } + } +} + +// Runes retrieves the Runes from the TokenResult. +func (r *TokenResult) Runes() []rune { + return r.runes +} + +// Rune retrieve a single rune from the TokenResult at the specified index. +func (r *TokenResult) Rune(idx int) rune { + return r.runes[idx] +} + +// String returns the Runes from the TokenResult as a string. +func (r *TokenResult) String() string { + return string(r.runes) +} + +// ClearTokens clears the tokens in the TokenResult. +func (r *TokenResult) ClearTokens() { + r.tokens = []*Token{} +} + +// SetTokens replaces the Tokens from the TokenResult with the provided input. +func (r *TokenResult) SetTokens(tokens []*Token) { + r.ClearTokens() + for _, t := range tokens { + r.AddToken(t) + } +} + +// AddToken is used to add a Token to the TokenResult. +func (r *TokenResult) AddToken(t *Token) { + r.tokens = append(r.tokens, t) +} + +// Tokens retrieves the Tokens from the TokenResult. +func (r *TokenResult) Tokens() []*Token { + return r.tokens +} + +// Token retrieves a single Token from the TokenResult at the specified index. +func (r *TokenResult) Token(idx int) *Token { + return r.tokens[idx] +} + +// Values retrieves a slice containing only the Values for the TokenResult Tokens. +func (r *TokenResult) Values() []interface{} { + values := make([]interface{}, len(r.tokens)) + for i, tok := range r.tokens { + values[i] = tok.Value + } + return values +} + +// Value retrieves a single Value from the TokenResult Token at the specified index. +func (r *TokenResult) Value(idx int) interface{} { + return r.tokens[idx].Value +} diff --git a/tokenresult_test.go b/tokenresult_test.go new file mode 100644 index 0000000..fc94cef --- /dev/null +++ b/tokenresult_test.go @@ -0,0 +1,25 @@ +package parsekit + +import ( + "testing" +) + +func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { + i := mkInput() + i.Result().SetRunes("string") + AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input") + i.Result().SetRunes([]rune("rune slice")) + AssertEqual(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input") + i.Result().SetRunes('X') + AssertEqual(t, "X", string(i.Result().String()), "i.Result() with rune input") +} + +func TestSetResult_PanicsOnUnhandledInput(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.Result().SetRunes(1234567) + }, + Expect: "parsekit.TokenResult.SetRunes(): unsupported type 'int' used", + }) +}