From 1f0e0fcc1704deec813dc830d73255b6ed1118e1 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Tue, 11 Jun 2019 22:23:30 +0000 Subject: [PATCH] Splitting up functionality in packages, intermediate step. --- assertions_test.go | 12 +- cursor.go => common/cursor.go | 2 +- cursor_test.go => common/cursor_test.go | 10 +- error.go => common/error.go | 8 +- error_test.go => common/error_test.go | 8 +- examples/example_basiccalculator1_test.go | 12 +- examples/example_basiccalculator2_test.go | 10 +- examples/example_dutchpostcode_test.go | 23 +- examples/example_helloManyStateParser_test.go | 16 +- .../example_helloParserCombinator_test.go | 14 +- .../example_helloSingleStateParser_test.go | 6 +- examples/examples_state_test.go | 6 +- parseapi.go | 49 +-- parser.go | 11 +- parser_test.go | 29 +- {reader => read}/reader.go | 12 +- {reader => read}/reader_test.go | 40 +-- tokenhandler_test.go | 155 ---------- tokenize/assertions_test.go | 127 ++++++++ tokenapi.go => tokenize/tokenapi.go | 39 +-- .../tokenapi_example_test.go | 24 +- tokenizer.go => tokenize/tokenhandler.go | 39 +-- .../tokenhandlerresult.go | 16 +- .../tokenhandlerresult_test.go | 18 +- .../tokenhandlers_builtin.go | 26 +- .../tokenhandlers_builtin_test.go | 132 ++++---- tokenize/tokenizer.go | 34 ++ tokenize/tokenizer_test.go | 179 +++++++++++ tokenize/tokenizer_unexported_test.go | 125 ++++++++ tokenizer_test.go | 291 ------------------ 30 files changed, 727 insertions(+), 746 deletions(-) rename cursor.go => common/cursor.go (98%) rename cursor_test.go => common/cursor_test.go (93%) rename error.go => common/error.go (82%) rename error_test.go => common/error_test.go (61%) rename {reader => read}/reader.go (94%) rename {reader => read}/reader_test.go (88%) delete mode 100644 tokenhandler_test.go create mode 100644 tokenize/assertions_test.go rename tokenapi.go => tokenize/tokenapi.go (88%) rename tokenapi_example_test.go => tokenize/tokenapi_example_test.go (69%) rename tokenizer.go => tokenize/tokenhandler.go (58%) rename tokenhandlerresult.go => tokenize/tokenhandlerresult.go (89%) rename tokenresult_test.go => tokenize/tokenhandlerresult_test.go (72%) rename tokenhandlers_builtin.go => tokenize/tokenhandlers_builtin.go (97%) rename tokenhandlers_builtin_test.go => tokenize/tokenhandlers_builtin_test.go (81%) create mode 100644 tokenize/tokenizer.go create mode 100644 tokenize/tokenizer_test.go create mode 100644 tokenize/tokenizer_unexported_test.go delete mode 100644 tokenizer_test.go diff --git a/assertions_test.go b/assertions_test.go index 76a9bd1..7d9a365 100644 --- a/assertions_test.go +++ b/assertions_test.go @@ -5,6 +5,8 @@ package parsekit import ( "regexp" "testing" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) { @@ -63,7 +65,7 @@ func AssertPanic(t *testing.T, p PanicT) { type TokenHandlerT struct { Input string - TokenHandler TokenHandler + TokenHandler tokenize.TokenHandler MustMatch bool Expected string } @@ -75,7 +77,7 @@ func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) { } func AssertTokenHandler(t *testing.T, test TokenHandlerT) { - result, err := NewTokenizer(test.TokenHandler).Execute(test.Input) + result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) if test.MustMatch { if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) @@ -91,8 +93,8 @@ func AssertTokenHandler(t *testing.T, test TokenHandlerT) { type TokenMakerT struct { Input string - TokenHandler TokenHandler - Expected []Token + TokenHandler tokenize.TokenHandler + Expected []tokenize.Token } func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { @@ -102,7 +104,7 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { } func AssertTokenMaker(t *testing.T, test TokenMakerT) { - result, err := NewTokenizer(test.TokenHandler).Execute(test.Input) + result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) } else { diff --git a/cursor.go b/common/cursor.go similarity index 98% rename from cursor.go rename to common/cursor.go index 01a424f..77e8c8e 100644 --- a/cursor.go +++ b/common/cursor.go @@ -1,4 +1,4 @@ -package parsekit +package common import "fmt" diff --git a/cursor_test.go b/common/cursor_test.go similarity index 93% rename from cursor_test.go rename to common/cursor_test.go index ce52048..ff29405 100644 --- a/cursor_test.go +++ b/common/cursor_test.go @@ -1,14 +1,14 @@ -package parsekit_test +package common_test import ( "fmt" "testing" - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" ) func ExampleCursor_Move() { - c := &parsekit.Cursor{} + c := &common.Cursor{} fmt.Printf("after initialization : %s\n", c) fmt.Printf("after 'some words' : %s\n", c.Move("some words")) fmt.Printf("after '\\n' : %s\n", c.Move("\n")) @@ -22,7 +22,7 @@ func ExampleCursor_Move() { } func ExampleCursor_String() { - c := &parsekit.Cursor{} + c := &common.Cursor{} fmt.Println(c.String()) c.Move("\nfoobar") @@ -51,7 +51,7 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, } { - c := parsekit.Cursor{} + c := common.Cursor{} for _, s := range test.input { c.Move(s) } diff --git a/error.go b/common/error.go similarity index 82% rename from error.go rename to common/error.go index eb1ddf5..de5c415 100644 --- a/error.go +++ b/common/error.go @@ -1,4 +1,4 @@ -package parsekit +package common import ( "fmt" @@ -16,12 +16,12 @@ type Error struct { func (err *Error) Error() string { if err == nil { - callerPanic(1, "parsekit.Error.Error(): method called with nil error at {caller}") + CallerPanic(1, "common.Error.Error(): method called with nil error at {caller}") } return fmt.Sprintf("%s at %s", err.Message, err.Cursor) } -func callerFunc(depth int) string { +func CallerFunc(depth int) string { // No error handling, because we call this method ourselves with safe depth values. pc, _, _, _ := runtime.Caller(depth + 1) caller := runtime.FuncForPC(pc) @@ -36,7 +36,7 @@ func callerFilepos(depth int) string { return fmt.Sprintf("%s:%d", file, line) } -func callerPanic(depth int, f string, args ...interface{}) { +func CallerPanic(depth int, f string, args ...interface{}) { filepos := callerFilepos(depth + 1) m := fmt.Sprintf(f, args...) m = strings.Replace(m, "{caller}", filepos, 1) diff --git a/error_test.go b/common/error_test.go similarity index 61% rename from error_test.go rename to common/error_test.go index 697b6fd..28107f7 100644 --- a/error_test.go +++ b/common/error_test.go @@ -1,15 +1,15 @@ -package parsekit_test +package common_test import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" ) func ExampleError() { - err := &parsekit.Error{ + err := &common.Error{ Message: "it broke down", - Cursor: parsekit.Cursor{Line: 9, Column: 41}, + Cursor: common.Cursor{Line: 9, Column: 41}, } fmt.Println(err.Error()) diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index 91edab2..896f1fb 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -11,6 +11,8 @@ import ( "fmt" "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_basicCalculator1() { @@ -54,7 +56,7 @@ func Example_basicCalculator1() { // ComputeSimple interprets a simple calculation, consisting of only integers // and add or subtract operators. It returns the result of the calculation. // An error is returned in case the calculation failed. -func ComputeSimple(calculation string) (int64, *parsekit.Error) { +func ComputeSimple(calculation string) (int64, *common.Error) { calculator := &simpleCalculator{op: +1} parser := parsekit.NewParser(calculator.number) err := parser.Execute(calculation) @@ -70,9 +72,9 @@ type simpleCalculator struct { } // A definition of an int64, which conveniently drops surrounding blanks. -var dropBlank = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Blanks)) -var bareInteger = parsekit.C.Seq(dropBlank, parsekit.A.Integer, dropBlank) -var int64Token = parsekit.T.Int64(nil, bareInteger) +var dropBlank = tokenize.M.Drop(tokenize.C.Opt(tokenize.A.Blanks)) +var bareInteger = tokenize.C.Seq(dropBlank, tokenize.A.Integer, dropBlank) +var int64Token = tokenize.T.Int64(nil, bareInteger) func (c *simpleCalculator) number(p *parsekit.ParseAPI) { if p.Accept(int64Token) { @@ -84,7 +86,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) { } func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) { - var A = parsekit.A + var A = tokenize.A switch { case p.Accept(A.Add): c.op = +1 diff --git a/examples/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go index 2a5dc7d..1ea5fda 100644 --- a/examples/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -17,6 +17,8 @@ import ( "math" "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_basicCalculator2() { @@ -75,7 +77,7 @@ type calculator struct { // Compute takes a calculation string as input and returns the interpreted result // value for the calculation. An error can be returned as well, in case the // calculation fails for some reason. -func Compute(input string) (float64, *parsekit.Error) { +func Compute(input string) (float64, *common.Error) { calc := &calculator{} parser := parsekit.NewParser(calc.calculation) err := parser.Execute(input) @@ -94,7 +96,7 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) { func (calc *calculator) expr(p *parsekit.ParseAPI) { calc.interpreter.push() - var A = parsekit.A + var A = tokenize.A if p.Handle(calc.term) { for p.Accept(A.Add.Or(A.Subtract)) { op := p.Result().Rune(0) @@ -112,7 +114,7 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) { func (calc *calculator) term(p *parsekit.ParseAPI) { calc.interpreter.push() - var A = parsekit.A + var A = tokenize.A if p.Handle(calc.factor) { for p.Accept(A.Multiply.Or(A.Divide)) { op := p.Result().Rune(0) @@ -129,7 +131,7 @@ func (calc *calculator) term(p *parsekit.ParseAPI) { // = ( (SPACE|TAB) | "") // = (FLOAT | LPAREN RPAREN) func (calc *calculator) factor(p *parsekit.ParseAPI) { - var A, T = parsekit.A, parsekit.T + var A, T = tokenize.A, tokenize.T p.Accept(A.Blanks) switch { case p.Accept(T.Float64(nil, A.Signed(A.Float))): diff --git a/examples/example_dutchpostcode_test.go b/examples/example_dutchpostcode_test.go index f8e5923..65bc810 100644 --- a/examples/example_dutchpostcode_test.go +++ b/examples/example_dutchpostcode_test.go @@ -7,7 +7,7 @@ package examples import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_dutchPostcodeUsingTokenizer() { @@ -40,20 +40,20 @@ func Example_dutchPostcodeUsingTokenizer() { // [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB) // [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB) // [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB) - // [4] Input: "1212abc" Error: unexpected input at start of file - // [5] Input: "1234" Error: unexpected input at start of file - // [6] Input: "huh" Error: unexpected input at start of file - // [7] Input: "" Error: unexpected end of file at start of file - // [8] Input: "\xcd2222AB" Error: unexpected input at start of file + // [4] Input: "1212abc" Error: mismatch at start of file + // [5] Input: "1234" Error: mismatch at start of file + // [6] Input: "huh" Error: mismatch at start of file + // [7] Input: "" Error: mismatch at start of file + // [8] Input: "\xcd2222AB" Error: mismatch at start of file } // --------------------------------------------------------------------------- // Implementation of the parser // --------------------------------------------------------------------------- -func createPostcodeTokenizer() *parsekit.Tokenizer { +func createPostcodeTokenizer() *tokenize.Tokenizer { // Easy access to the parsekit definitions. - C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T + C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T // TokenHandler functions are created and combined to satisfy these rules: // - A Dutch postcode consists of 4 digits and 2 letters (1234XX). @@ -61,14 +61,13 @@ func createPostcodeTokenizer() *parsekit.Tokenizer { // - A space between letters and digits is optional. // - It is good form to write the letters in upper case. // - It is good form to use a single space between digits and letters. - digitNotZero := C.Except(A.Rune('0'), A.Digit) - pcDigits := C.Seq(digitNotZero, A.Digit.Times(3)) + pcDigits := A.DigitNotZero.Then(A.Digit.Times(3)) pcLetter := A.ASCIILower.Or(A.ASCIIUpper) pcLetters := M.ToUpper(pcLetter.Times(2)) - space := M.Replace(C.Opt(A.Blanks), " ") + space := M.Replace(A.Blanks.Optional(), " ") postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile) // Create a Tokenizer that wraps the 'postcode' TokenHandler and allows // us to match some input against that handler. - return parsekit.NewTokenizer(postcode) + return tokenize.NewTokenizer(postcode) } diff --git a/examples/example_helloManyStateParser_test.go b/examples/example_helloManyStateParser_test.go index c019ba6..eaaddb7 100644 --- a/examples/example_helloManyStateParser_test.go +++ b/examples/example_helloManyStateParser_test.go @@ -20,6 +20,8 @@ import ( "strings" "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_helloWorldUsingParser1() { @@ -74,14 +76,14 @@ type helloparser1 struct { greetee string } -func (h *helloparser1) Parse(input string) (string, *parsekit.Error) { +func (h *helloparser1) Parse(input string) (string, *common.Error) { parser := parsekit.NewParser(h.start) err := parser.Execute(input) return h.greetee, err } func (h *helloparser1) start(p *parsekit.ParseAPI) { - a := parsekit.A + a := tokenize.A if p.Accept(a.StrNoCase("hello")) { p.Handle(h.comma) } else { @@ -90,7 +92,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) { } func (h *helloparser1) comma(p *parsekit.ParseAPI) { - a := parsekit.A + a := tokenize.A switch { case p.Accept(a.Blanks): p.Handle(h.comma) @@ -102,7 +104,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) { } func (h *helloparser1) startName(p *parsekit.ParseAPI) { - a := parsekit.A + a := tokenize.A p.Accept(a.Blanks) if p.Peek(a.AnyRune) { p.Handle(h.name) @@ -112,7 +114,7 @@ func (h *helloparser1) startName(p *parsekit.ParseAPI) { } func (h *helloparser1) name(p *parsekit.ParseAPI) { - a := parsekit.A + a := tokenize.A switch { case p.Peek(a.Excl): p.Handle(h.exclamation) @@ -125,7 +127,7 @@ func (h *helloparser1) name(p *parsekit.ParseAPI) { } func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { - a := parsekit.A + a := tokenize.A if p.Accept(a.Excl) { p.Handle(h.end) } else { @@ -137,7 +139,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { // different route was taken to implement a more friendly 'end of greeting' // error message. func (h *helloparser1) end(p *parsekit.ParseAPI) { - var a = parsekit.A + var a = tokenize.A if !p.Accept(a.EndOfFile) { p.Expected("end of greeting") return diff --git a/examples/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go index 59d3631..f7b295d 100644 --- a/examples/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -10,7 +10,7 @@ package examples import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_helloWorldUsingTokenizer() { @@ -37,18 +37,18 @@ func Example_helloWorldUsingTokenizer() { // [1] Input: "HELLO ,Johnny!" Output: Johnny // [2] Input: "hello , Bob123!" Output: Bob123 // [3] Input: "hello Pizza!" Output: Pizza - // [4] Input: "Oh no!" Error: unexpected input at start of file - // [5] Input: "Hello, world" Error: unexpected input at start of file - // [6] Input: "Hello,!" Error: unexpected input at start of file + // [4] Input: "Oh no!" Error: mismatch at start of file + // [5] Input: "Hello, world" Error: mismatch at start of file + // [6] Input: "Hello,!" Error: mismatch at start of file } // --------------------------------------------------------------------------- // Implementation of the parser // --------------------------------------------------------------------------- -func createHelloTokenizer() *parsekit.Tokenizer { +func createHelloTokenizer() *tokenize.Tokenizer { // Easy access to parsekit definition. - c, a, m := parsekit.C, parsekit.A, parsekit.M + c, a, m := tokenize.C, tokenize.A, tokenize.M // Using the parser/combinator support of parsekit, we create a TokenHandler function // that does all the work. The 'greeting' TokenHandler matches the whole input and @@ -65,5 +65,5 @@ func createHelloTokenizer() *parsekit.Tokenizer { // Create a Tokenizer that wraps the 'greeting' TokenHandler and allows // us to match some input against that handler. - return parsekit.NewTokenizer(greeting) + return tokenize.NewTokenizer(greeting) } diff --git a/examples/example_helloSingleStateParser_test.go b/examples/example_helloSingleStateParser_test.go index b889caa..94b9d81 100644 --- a/examples/example_helloSingleStateParser_test.go +++ b/examples/example_helloSingleStateParser_test.go @@ -17,6 +17,8 @@ import ( "fmt" "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func Example_helloWorldUsingParser2() { @@ -72,14 +74,14 @@ type helloparser2 struct { greetee string } -func (h *helloparser2) Parse(input string) (string, *parsekit.Error) { +func (h *helloparser2) Parse(input string) (string, *common.Error) { parser := parsekit.NewParser(h.start) err := parser.Execute(input) return h.greetee, err } func (h *helloparser2) start(p *parsekit.ParseAPI) { - c, a, m := parsekit.C, parsekit.A, parsekit.M + c, a, m := tokenize.C, tokenize.A, tokenize.M if !p.Accept(a.StrNoCase("hello")) { p.Error("the greeting is not being friendly") return diff --git a/examples/examples_state_test.go b/examples/examples_state_test.go index c54b5c1..dcc6b9b 100644 --- a/examples/examples_state_test.go +++ b/examples/examples_state_test.go @@ -11,12 +11,14 @@ import ( "fmt" "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) type Chunks []string -func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error { - c, a := parsekit.C, parsekit.A +func (l *Chunks) AddChopped(s string, chunkSize int) *common.Error { + c, a := tokenize.C, tokenize.A chunkOfRunes := c.MinMax(1, chunkSize, a.AnyRune) parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { diff --git a/parseapi.go b/parseapi.go index 9407f5d..c7cc242 100644 --- a/parseapi.go +++ b/parseapi.go @@ -3,16 +3,19 @@ package parsekit import ( "fmt" "io" + + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) // ParseAPI holds the internal state of a parse run and provides an API that // ParseHandler methods can use to communicate with the parser. type ParseAPI struct { - tokenAPI *TokenAPI // the TokenAPI, used for communicating with TokenHandler functions - loopCheck map[string]bool // used for parser loop detection - result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept() - err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored - stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored + tokenAPI *tokenize.TokenAPI // the TokenAPI, used for communicating with TokenHandler functions + loopCheck map[string]bool // used for parser loop detection + result *tokenize.TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept() + err *common.Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored + stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored } // Peek checks if the upcoming input data matches the provided TokenHandler. @@ -22,13 +25,13 @@ type ParseAPI struct { // // After calling this method, you can retrieve the produced TokenHandlerResult // using the ParseAPI.Result() method. -func (p *ParseAPI) Peek(tokenHandler TokenHandler) bool { +func (p *ParseAPI) Peek(tokenHandler tokenize.TokenHandler) bool { p.result = nil forkedTokenAPI, ok := p.invokeTokenHandler("Peek", tokenHandler) if ok { p.result = forkedTokenAPI.Result() - p.tokenAPI.clearResults() - p.tokenAPI.detachChilds() + p.tokenAPI.ClearResults() + p.tokenAPI.DetachChilds() } return ok } @@ -39,29 +42,29 @@ func (p *ParseAPI) Peek(tokenHandler TokenHandler) bool { // // After calling this method, you can retrieve the produced TokenHandlerResult // using the ParseAPI.Result() method. -func (p *ParseAPI) Accept(tokenHandler TokenHandler) bool { +func (p *ParseAPI) Accept(tokenHandler tokenize.TokenHandler) bool { p.result = nil forkedTokenAPI, ok := p.invokeTokenHandler("Accept", tokenHandler) if ok { forkedTokenAPI.Merge() p.result = p.tokenAPI.Result() - p.tokenAPI.detachChilds() - if p.tokenAPI.flushReader() { + p.tokenAPI.DetachChilds() + if p.tokenAPI.FlushReader() { p.initLoopCheck() } } return ok } -func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler TokenHandler) (*TokenAPI, bool) { +func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler tokenize.TokenHandler) (*tokenize.TokenAPI, bool) { p.panicWhenStoppedOrInError() p.checkForLoops() if tokenHandler == nil { - callerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name) + common.CallerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name) } p.result = nil - p.tokenAPI.clearResults() + p.tokenAPI.ClearResults() child := p.tokenAPI.Fork() ok := tokenHandler(child) @@ -80,14 +83,14 @@ func (p *ParseAPI) panicWhenStoppedOrInError() { return } - called := callerFunc(1) + called := common.CallerFunc(1) after := "Error()" if p.stopped { after = "Stop()" } - callerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+ + common.CallerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+ "no calls allowed after ParseAPI.%s", called, called, after) } @@ -100,9 +103,9 @@ func (p *ParseAPI) initLoopCheck() { } func (p *ParseAPI) checkForLoops() { - filepos := callerFilepos(3) + filepos := common.CallerFilePos(3) if _, ok := p.loopCheck[filepos]; ok { - callerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}") + common.CallerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}") } p.loopCheck[filepos] = true } @@ -112,10 +115,10 @@ func (p *ParseAPI) checkForLoops() { // // When Result() is called without first doing a Peek() or Accept(), then no // result will be available and the method will panic. -func (p *ParseAPI) Result() *TokenHandlerResult { +func (p *ParseAPI) Result() *tokenize.TokenHandlerResult { result := p.result if p.result == nil { - callerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+ + common.CallerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+ "at {caller} without calling ParseAPI.Peek() or ParseAPI.Accept() on beforehand") } return result @@ -136,7 +139,7 @@ func (p *ParseAPI) Handle(parseHandler ParseHandler) bool { func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { if parseHandler == nil { - callerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}") + common.CallerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}") } } @@ -164,7 +167,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) { // No call to p.panicWhenStoppedOrInError(), to allow a parser to // set a different error message when needed. message := fmt.Sprintf(format, args...) - p.err = &Error{message, *p.tokenAPI.result.cursor} + p.err = &common.Error{message, *p.tokenAPI.Result().Cursor()} } // ExpectEndOfFile can be used to check if the input is at end of file. @@ -175,7 +178,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) { // as the expectation. func (p *ParseAPI) ExpectEndOfFile() { p.panicWhenStoppedOrInError() - if p.Peek(A.EndOfFile) { + if p.Peek(tokenize.A.EndOfFile) { p.Stop() } else { p.Expected("end of file") diff --git a/parser.go b/parser.go index 903df2b..877b6fa 100644 --- a/parser.go +++ b/parser.go @@ -1,5 +1,10 @@ package parsekit +import ( + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + // Parser is the top-level struct that holds the configuration for a parser. // The Parser can be instantiated using the parsekit.NewParser() method. type Parser struct { @@ -27,7 +32,7 @@ type ParseHandler func(*ParseAPI) // To parse input data, use the method Parser.Execute(). func NewParser(startHandler ParseHandler) *Parser { if startHandler == nil { - callerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}") + common.CallerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}") } return &Parser{startHandler: startHandler} } @@ -36,9 +41,9 @@ func NewParser(startHandler ParseHandler) *Parser { // For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). // // When an error occurs during parsing, then this error is returned, nil otherwise. -func (p *Parser) Execute(input interface{}) *Error { +func (p *Parser) Execute(input interface{}) *common.Error { api := &ParseAPI{ - tokenAPI: NewTokenAPI(input), + tokenAPI: tokenize.NewTokenAPI(input), loopCheck: map[string]bool{}, } if api.Handle(p.startHandler) { diff --git a/parser_test.go b/parser_test.go index 1f3acba..cb32552 100644 --- a/parser_test.go +++ b/parser_test.go @@ -5,11 +5,12 @@ import ( "testing" "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func ExampleParser_usingAcceptedRunes() { // Easy access to the parsekit definitions. - a := parsekit.A + a := tokenize.A matches := []string{} @@ -28,7 +29,7 @@ func ExampleParser_usingAcceptedRunes() { func ExampleParser_usingTokens() { // Easy access to the parsekit definitions. - c, a, tok := parsekit.C, parsekit.A, parsekit.T + c, a, tok := tokenize.C, tokenize.A, tokenize.T parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) { @@ -60,7 +61,7 @@ func ExampleParseAPI_Accept_inIfStatement() { // When a case-insensitive match on "Yowza!" is found by the // tokenizer, then Accept() will make the result available // through ParseAPI.Result() - if p.Accept(parsekit.A.StrNoCase("Yowza!")) { + if p.Accept(tokenize.A.StrNoCase("Yowza!")) { // Result.String() returns a string containing all // accepted runes that were matched against. fmt.Println(p.Result().String()) @@ -77,9 +78,9 @@ func ExampleParseAPI_Accept_inSwitchStatement() { parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { for loop := true; loop; { switch { - case p.Accept(parsekit.A.Rune('X')): + case p.Accept(tokenize.A.Rune('X')): // NOOP, skip this rune - case p.Accept(parsekit.A.AnyRune): + case p.Accept(tokenize.A.AnyRune): result += p.Result().String() default: loop = false @@ -94,7 +95,7 @@ func ExampleParseAPI_Accept_inSwitchStatement() { } func ExampleParseAPI_Stop() { - C, A := parsekit.C, parsekit.A + C, A := tokenize.C, tokenize.A parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { fmt.Printf("First word: ") @@ -110,7 +111,7 @@ func ExampleParseAPI_Stop() { } func ExampleParseAPI_Stop_notCalledAndNoInputPending() { - C, A := parsekit.C, parsekit.A + C, A := tokenize.C, tokenize.A parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { fmt.Printf("Word: ") @@ -128,7 +129,7 @@ func ExampleParseAPI_Stop_notCalledAndNoInputPending() { } func ExampleParseAPI_Stop_notCalledButInputPending() { - C, A := parsekit.C, parsekit.A + C, A := tokenize.C, tokenize.A parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { fmt.Printf("First word: ") @@ -147,7 +148,7 @@ func ExampleParseAPI_Stop_notCalledButInputPending() { func ExampleParseAPI_Peek() { // Definition of a fantasy serial number format. - C, A := parsekit.C, parsekit.A + C, A := tokenize.C, tokenize.A serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits) // This handler is able to handle serial numbers. @@ -160,7 +161,7 @@ func ExampleParseAPI_Peek() { // Start could function as a sort of dispatcher, handing over // control to the correct ParseHandler function, based on the input. start := func(p *parsekit.ParseAPI) { - if p.Peek(parsekit.A.Asterisk) { + if p.Peek(tokenize.A.Asterisk) { p.Handle(serialnrHandler) return } @@ -275,12 +276,12 @@ type parserWithLoop struct { } func (l *parserWithLoop) first(p *parsekit.ParseAPI) { - p.Accept(parsekit.A.ASCII) + p.Accept(tokenize.A.ASCII) p.Handle(l.second) } func (l *parserWithLoop) second(p *parsekit.ParseAPI) { - p.Accept(parsekit.A.ASCII) + p.Accept(tokenize.A.ASCII) p.Handle(l.third) } @@ -289,7 +290,7 @@ func (l *parserWithLoop) third(p *parsekit.ParseAPI) { p.Error("Loop not detected by parsekit") return } - p.Accept(parsekit.A.ASCII) + p.Accept(tokenize.A.ASCII) p.Handle(l.first) } @@ -316,7 +317,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { // // Now the loop stops when the parser finds no more matching input data. func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { - var c, a = parsekit.C, parsekit.A + var c, a = tokenize.C, tokenize.A parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { for p.Accept(c.Max(5, a.AnyRune)) { } diff --git a/reader/reader.go b/read/reader.go similarity index 94% rename from reader/reader.go rename to read/reader.go index b80063a..6d9afd8 100644 --- a/reader/reader.go +++ b/read/reader.go @@ -1,4 +1,4 @@ -// Package reader provides a buffered Reader that wraps around an io.Reader. +// Package read provides a buffered Reader that wraps around an io.Reader. // // Functionally, it provides an input buffer in the form of a sliding window. // Let's say we've got the following input coming up in the io.Reader that is @@ -37,7 +37,7 @@ // // So after a flush, the first upcoming rune after the flushed runes // will always be at offset 0. -package reader +package read import ( "bufio" @@ -57,7 +57,7 @@ import ( // To minimze memory use, it is also possible to flush the read buffer when there is // no more need to go back to previously read runes. // -// The parserkit.reader.Reader is used internally by parsekit.TokenAPI. +// The parserkit.reader.Reader is used internally by tokenize.TokenAPI. type Reader struct { bufio *bufio.Reader // Used for ReadRune() buffer []rune // Input buffer, holding runes that were read from input @@ -89,7 +89,7 @@ func makeBufioReader(input interface{}) *bufio.Reader { case string: return bufio.NewReader(strings.NewReader(input)) default: - panic(fmt.Sprintf("parsekit.reader.New(): no support for input of type %T", input)) + panic(fmt.Sprintf("parsekit.read.New(): no support for input of type %T", input)) } } @@ -153,7 +153,7 @@ func (r *Reader) RuneAt(offset int) (rune, error) { const smallBufferSize = 64 // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer. -var ErrTooLarge = errors.New("parsekit.reader: too large") +var ErrTooLarge = errors.New("parsekit.read: too large") // grow grows the buffer to guarantee space for n more bytes. // It returns the index where bytes should be written. @@ -200,7 +200,7 @@ func makeSlice(n int) []rune { func (r *Reader) Flush(numberOfRunes int) { if numberOfRunes > len(r.buffer) { panic(fmt.Sprintf( - "parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+ + "parsekit.read.Reader.Flush(): number of runes to flush (%d) "+ "exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer))) } r.buffer = r.buffer[numberOfRunes:] diff --git a/reader/reader_test.go b/read/reader_test.go similarity index 88% rename from reader/reader_test.go rename to read/reader_test.go index d32977a..28f9c45 100644 --- a/reader/reader_test.go +++ b/read/reader_test.go @@ -1,4 +1,4 @@ -package reader_test +package read_test import ( "bufio" @@ -8,12 +8,12 @@ import ( "testing" "unicode/utf8" - "git.makaay.nl/mauricem/go-parsekit/reader" + "git.makaay.nl/mauricem/go-parsekit/read" "github.com/stretchr/testify/assert" ) func ExampleNew() { - r := reader.New(strings.NewReader("Hello, world!")) + r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } fmt.Printf("%c", at(0)) @@ -33,7 +33,7 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { {"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))}, {"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))}, } { - r := reader.New(test.input) + r := read.New(test.input) firstRune, _ := r.RuneAt(0) if firstRune != 'H' { t.Errorf("[%s] first rune not 'H'", test.name) @@ -47,12 +47,12 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { func TestNew_UnhandledInputType_Panics(t *testing.T) { assert.PanicsWithValue(t, - "parsekit.reader.New(): no support for input of type int", - func() { reader.New(12345) }) + "parsekit.read.New(): no support for input of type int", + func() { read.New(12345) }) } func TestReader_RuneAt(t *testing.T) { - r := reader.New(strings.NewReader("Hello, world!")) + r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // It is possible to go back and forth while reading the input. @@ -61,7 +61,7 @@ func TestReader_RuneAt(t *testing.T) { } func TestReader_RuneAt_endOfFile(t *testing.T) { - r := reader.New(strings.NewReader("Hello, world!")) + r := read.New(strings.NewReader("Hello, world!")) rn, err := r.RuneAt(13) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) @@ -73,7 +73,7 @@ func TestReader_RuneAt_endOfFile(t *testing.T) { } func TestReader_RuneAt_invalidRune(t *testing.T) { - r := reader.New(strings.NewReader("Hello, \xcdworld!")) + r := read.New(strings.NewReader("Hello, \xcdworld!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) @@ -81,7 +81,7 @@ func TestReader_RuneAt_invalidRune(t *testing.T) { } func ExampleReader_RuneAt() { - reader := reader.New(strings.NewReader("Hello, world!")) + reader := read.New(strings.NewReader("Hello, world!")) fmt.Printf("Runes: ") for i := 0; ; i++ { @@ -99,7 +99,7 @@ func ExampleReader_RuneAt() { } func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { - r := reader.New(strings.NewReader("\uFEFFBommetje!")) + r := read.New(strings.NewReader("\uFEFFBommetje!")) b, _ := r.RuneAt(0) o, _ := r.RuneAt(1) m, _ := r.RuneAt(2) @@ -108,7 +108,7 @@ func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { } func TestReader_Flush(t *testing.T) { - r := reader.New(strings.NewReader("Hello, world!")) + r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // Fills the buffer with the first 8 runes on the input: "Hello, w" @@ -125,7 +125,7 @@ func TestReader_Flush(t *testing.T) { } func ExampleReader_Flush() { - r := reader.New(strings.NewReader("dog eat dog!")) + r := read.New(strings.NewReader("dog eat dog!")) at := func(offset int) rune { c, _ := r.RuneAt(offset); return c } // Read from the first 4 runes of the input. @@ -148,20 +148,20 @@ func ExampleReader_Flush() { } func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { - r := reader.New(strings.NewReader("Hello, world!")) + r := read.New(strings.NewReader("Hello, world!")) // Fill buffer with "Hello, worl", the first 11 runes. r.RuneAt(10) // However, we flush 12 runes, which exceeds the buffer size. assert.PanicsWithValue(t, - "parsekit.Input.Reader.Flush(): number of runes to flush "+ + "parsekit.read.Reader.Flush(): number of runes to flush "+ "(12) exceeds size of the buffer (11)", func() { r.Flush(12) }) } func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) { - r := reader.New(strings.NewReader("Hello, world!")) + r := read.New(strings.NewReader("Hello, world!")) _, err := r.RuneAt(13) assert.Equal(t, err.Error(), "EOF") _, err = r.RuneAt(13) @@ -188,7 +188,7 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) { io.ErrUnexpectedEOF, // This error must never popup in the tests below. }, } - r := reader.New(input) + r := read.New(input) // Read the last availble rune. readRune, _ := r.RuneAt(3) @@ -233,7 +233,7 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) { func TestInputLargerThanDefaultBufSize64(t *testing.T) { input, size := makeLargeStubReader() - r := reader.New(input) + r := read.New(input) readRune, err := r.RuneAt(0) assert.Equal(t, 'X', readRune) @@ -247,7 +247,7 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) { func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) { input, size := makeLargeStubReader() - r := reader.New(input) + r := read.New(input) readRune, _ := r.RuneAt(size - 200) assert.Equal(t, 'X', readRune) @@ -257,7 +257,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { input, size := makeLargeStubReader() - r := reader.New(input) + r := read.New(input) readRune, _ := r.RuneAt(size - 1) assert.Equal(t, 'Y', readRune) diff --git a/tokenhandler_test.go b/tokenhandler_test.go deleted file mode 100644 index e597c4b..0000000 --- a/tokenhandler_test.go +++ /dev/null @@ -1,155 +0,0 @@ -package parsekit_test - -import ( - "testing" - - "git.makaay.nl/mauricem/go-parsekit" -) - -func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) { - parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { - for i := 0; i < 20; i++ { - t.NextRune() - t.Accept() - } - return true - }) - result, _ := parser.Execute("This is some random data to parse") - if result.String() != "This is some random " { - t.Fatalf("Got unexpected output from TokenHandler: %s", result.String()) - } -} - -func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) { - parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { - t.Result().AddToken(&parsekit.Token{ - Type: "PI", - Runes: []rune("π"), - Value: 3.1415, - }) - t.Result().AddToken(&parsekit.Token{ - Type: nil, - Runes: []rune("yes"), - Value: true, - }) - return true - }) - result, _ := parser.Execute("doesn't matter") - if len(result.Tokens()) != 2 { - t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens())) - } - if result.Token(0).Value != 3.1415 { - t.Fatal("Token 0 value not 3.1415") - } - if string(result.Token(0).Runes) != "π" { - t.Fatal("Token 0 runes not \"π\"") - } - if result.Token(0).Type != "PI" { - t.Fatal("Token 0 type not \"PI\"") - } - if result.Token(1).Value != true { - t.Fatal("Token 1 value not true") - } - if string(result.Token(1).Runes) != "yes" { - t.Fatal("Token 1 runes not \"yes\"") - } - if result.Token(1).Type != nil { - t.Fatal("Token 1 type not nil") - } -} - -func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) { - var tok, c, a = parsekit.T, parsekit.C, parsekit.A - fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII)) - parser := parsekit.NewTokenizer(fooToken) - input := "This is fine ASCII Åltho hère öt endĩt!" - result, err := parser.Execute(input) - - if err != nil { - t.Fatalf("Unexpected error from parser: %s", err) - } - if result.String() != "This is fine ASCII " { - t.Fatalf("result.String() contains unexpected data: %s", result.String()) - } -} - -func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) { - var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A - ascii := tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))) - utf8 := tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))) - stars := m.Drop(c.ZeroOrMore(a.Asterisk)) - fooToken := c.Seq(stars, tok.Str("COMBI", ascii.Then(utf8)), stars) - parser := parsekit.NewTokenizer(fooToken) - - input := "*** This is fine ASCII Åltho hère öt endĩt! ***" - output := "This is fine ASCIIÅltho hère öt endĩt!" - result, err := parser.Execute(input) - - if err != nil { - t.Fatalf("Unexpected error from parser: %s", err) - } - if result.String() != output { - t.Fatalf("result.String() contains unexpected data: %s", result.String()) - } - if result.Token(0).Type != "COMBI" { - t.Fatalf("Token 0 has unexpected type: %s", result.Token(0).Type) - } - if result.Token(0).Value != "This is fine ASCIIÅltho hère öt endĩt!" { - t.Fatalf("Token 0 has unexpected value: %s", result.Token(0).Value) - } - if result.Token(1).Value != "This is fine ASCII" { - t.Fatalf("Token 1 has unexpected value: %s", result.Token(0).Value) - } - if result.Token(2).Value != "Åltho hère öt endĩt!" { - t.Fatalf("Token 2 has unexpected value: %s", result.Token(0).Value) - } -} - -func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) { - parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { - t.Accept() - return false - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parser.Execute("input string") }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` + - `/.*/tokenhandler_test\.go:\d+ without first calling NextRune\(\)`}) -} - -func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) { - parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { - t.NextRune() - t.NextRune() - return false - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parser.Execute("input string") }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` + - `/.*/tokenhandler_test\.go:\d+ without a prior call to Accept\(\)`}) -} - -func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) { - parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool { - t.NextRune() - t.Accept() - return false - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parser.Execute("") }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` + - `/.*/tokenhandler_test.go:\d+, but the prior call to NextRune\(\) failed`}) -} - -func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) { - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { - a := parsekit.TokenAPI{} - a.Merge() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` + - `/.*/tokenhandler_test\.go:\d+ on a non-forked TokenAPI`}) -} diff --git a/tokenize/assertions_test.go b/tokenize/assertions_test.go new file mode 100644 index 0000000..0b7477c --- /dev/null +++ b/tokenize/assertions_test.go @@ -0,0 +1,127 @@ +package tokenize_test + +// This file contains some tools that are used for writing parsekit tests. + +import ( + "regexp" + "testing" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) { + if expected != actual { + t.Errorf( + "Unexpected value for %s:\nexpected: %q\nactual: %q", + forWhat, expected, actual) + } +} + +// func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) { +// if notExpected == actual { +// t.Errorf("Unexpected value for %s: %q", forWhat, actual) +// } +// } + +func AssertTrue(t *testing.T, b bool, assertion string) { + if !b { + t.Errorf("Assertion %s is false", assertion) + } +} + +type PanicT struct { + Function func() + Regexp bool + Expect string +} + +func AssertPanics(t *testing.T, testSet []PanicT) { + for _, test := range testSet { + AssertPanic(t, test) + } +} + +func AssertPanic(t *testing.T, p PanicT) { + defer func() { + if r := recover(); r != nil { + mismatch := false + if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) { + mismatch = true + } + if !p.Regexp && p.Expect != r.(string) { + mismatch = true + } + if mismatch { + t.Errorf( + "Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q", + p.Expect, r) + } + } else { + t.Errorf("Function did not panic (expected panic message: %s)", p.Expect) + } + }() + p.Function() +} + +type TokenHandlerT struct { + Input string + TokenHandler tokenize.TokenHandler + MustMatch bool + Expected string +} + +func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) { + for _, test := range testSet { + AssertTokenHandler(t, test) + } +} + +func AssertTokenHandler(t *testing.T, test TokenHandlerT) { + result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) + if test.MustMatch { + if err != nil { + t.Errorf("Test %q failed with error: %s", test.Input, err) + } else if output := result.String(); output != test.Expected { + t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output) + } + } else { + if err == nil { + t.Errorf("Test %q failed: should not match, but it did", test.Input) + } + } +} + +type TokenMakerT struct { + Input string + TokenHandler tokenize.TokenHandler + Expected []tokenize.Token +} + +func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { + for _, test := range testSet { + AssertTokenMaker(t, test) + } +} + +func AssertTokenMaker(t *testing.T, test TokenMakerT) { + result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) + if err != nil { + t.Errorf("Test %q failed with error: %s", test.Input, err) + } else { + if len(result.Tokens()) != len(test.Expected) { + t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens())) + } + for i, expected := range test.Expected { + actual := result.Token(i) + if expected.Type != actual.Type { + t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type) + } + if string(expected.Runes) != string(actual.Runes) { + t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes) + } + if expected.Value != actual.Value { + t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value) + } + } + } +} diff --git a/tokenapi.go b/tokenize/tokenapi.go similarity index 88% rename from tokenapi.go rename to tokenize/tokenapi.go index 5d9c7a4..7419f2c 100644 --- a/tokenapi.go +++ b/tokenize/tokenapi.go @@ -1,13 +1,14 @@ -package parsekit +package tokenize import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit/reader" + "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/read" ) // TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from -// a parsekit.reader.Reader and to report back tokenizing results. For easy +// a parsekit.read.Reader and to report back tokenizing results. For easy // lookahead support, a forking strategy is provided. // // BASIC OPERATION: @@ -61,7 +62,7 @@ import ( // can lead to hard to track bugs. I much prefer this forking method, since // no bookkeeping has to be implemented when implementing a parser. type TokenAPI struct { - reader *reader.Reader + reader *read.Reader parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position) @@ -70,7 +71,7 @@ type TokenAPI struct { // NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. func NewTokenAPI(input interface{}) *TokenAPI { return &TokenAPI{ - reader: reader.New(input), + reader: read.New(input), result: newTokenHandlerResult(), } } @@ -86,10 +87,10 @@ func NewTokenAPI(input interface{}) *TokenAPI { // without explicitly accepting, this method will panic. func (i *TokenAPI) NextRune() (rune, error) { if i.result.lastRune != nil { - callerPanic(1, "parsekit.TokenAPI.NextRune(): NextRune() called at {caller} "+ + common.CallerPanic(1, "tokenize.TokenAPI.NextRune(): NextRune() called at {caller} "+ "without a prior call to Accept()") } - i.detachChilds() + i.DetachChilds() readRune, err := i.reader.RuneAt(i.result.offset) i.result.lastRune = &runeInfo{r: readRune, err: err} @@ -103,9 +104,9 @@ func (i *TokenAPI) NextRune() (rune, error) { // returned an error. Calling Accept() in such case will result in a panic. func (i *TokenAPI) Accept() { if i.result.lastRune == nil { - callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()") + common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()") } else if i.result.lastRune.err != nil { - callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") + common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") } i.result.runes = append(i.result.runes, i.result.lastRune.r) i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r)) @@ -131,7 +132,7 @@ func (i *TokenAPI) Accept() { // with the parent TokenAPI as if nothing ever happened. func (i *TokenAPI) Fork() *TokenAPI { // Cleanup current forking / reading state. - i.detachChilds() + i.DetachChilds() i.result.lastRune = nil // Create the new fork. @@ -155,12 +156,12 @@ func (i *TokenAPI) Fork() *TokenAPI { // This allows a child to feed results in chunks to its parent. func (i *TokenAPI) Merge() { if i.parent == nil { - callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI") + common.CallerPanic(1, "tokenize.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI") } i.addResultsToParent() i.syncCursorTo(i.parent) - i.clearResults() - i.detachChilds() + i.ClearResults() + i.DetachChilds() } func (i *TokenAPI) addResultsToParent() { @@ -173,29 +174,29 @@ func (i *TokenAPI) syncCursorTo(to *TokenAPI) { *to.result.cursor = *i.result.cursor } -func (i *TokenAPI) clearResults() { +func (i *TokenAPI) ClearResults() { i.result.lastRune = nil i.result.runes = []rune{} i.result.tokens = []*Token{} i.result.err = nil } -func (i *TokenAPI) detachChilds() { +func (i *TokenAPI) DetachChilds() { if i.child != nil { - i.child.detachChildsRecurse() + i.child.DetachChildsRecurse() i.child = nil } } -func (i *TokenAPI) detachChildsRecurse() { +func (i *TokenAPI) DetachChildsRecurse() { if i.child != nil { - i.child.detachChildsRecurse() + i.child.DetachChildsRecurse() } i.child = nil i.parent = nil } -func (i *TokenAPI) flushReader() bool { +func (i *TokenAPI) FlushReader() bool { if i.result.offset > 0 { i.reader.Flush(i.result.offset) i.result.offset = 0 diff --git a/tokenapi_example_test.go b/tokenize/tokenapi_example_test.go similarity index 69% rename from tokenapi_example_test.go rename to tokenize/tokenapi_example_test.go index d838c2f..bf2ae90 100644 --- a/tokenapi_example_test.go +++ b/tokenize/tokenapi_example_test.go @@ -1,15 +1,15 @@ -package parsekit_test +package tokenize_test import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func ExampleTokenAPI_Fork() { // This custom TokenHandler checks for input 'a', 'b' or 'c'. - abcHandler := func(t *parsekit.TokenAPI) bool { - a := parsekit.A + abcHandler := func(t *tokenize.TokenAPI) bool { + a := tokenize.A for _, r := range []rune{'a', 'b', 'c'} { child := t.Fork() // fork, so we won't change parent t if a.Rune(r)(child) { @@ -26,26 +26,26 @@ func ExampleTokenAPI_Fork() { // You can make use of the parser/combinator tooling to do things // a lot simpler and take care of forking at the appropriate places. // The handler from above can be replaced with: - simpler := parsekit.A.RuneRange('a', 'c') + simpler := tokenize.A.RuneRange('a', 'c') - result, err := parsekit.NewTokenizer(abcHandler).Execute("another test") + result, err := tokenize.NewTokenizer(abcHandler).Execute("another test") fmt.Println(result, err) - result, err = parsekit.NewTokenizer(simpler).Execute("curious") + result, err = tokenize.NewTokenizer(simpler).Execute("curious") fmt.Println(result, err) - result, err = parsekit.NewTokenizer(abcHandler).Execute("bang on!") + result, err = tokenize.NewTokenizer(abcHandler).Execute("bang on!") fmt.Println(result, err) - result, err = parsekit.NewTokenizer(abcHandler).Execute("not a match") + result, err = tokenize.NewTokenizer(abcHandler).Execute("not a match") fmt.Println(result, err) // Output: // a // c // b - // unexpected input at start of file + // mismatch at start of file } func ExampleTokenAPI_Merge() { - tokenHandler := func(t *parsekit.TokenAPI) bool { + tokenHandler := func(t *tokenize.TokenAPI) bool { child1 := t.Fork() child1.NextRune() // reads 'H' child1.Accept() @@ -62,7 +62,7 @@ func ExampleTokenAPI_Merge() { return true } - result, _ := parsekit.NewTokenizer(tokenHandler).Execute("Hi mister X!") + result, _ := tokenize.NewTokenizer(tokenHandler).Execute("Hi mister X!") fmt.Println(result) // Output: diff --git a/tokenizer.go b/tokenize/tokenhandler.go similarity index 58% rename from tokenizer.go rename to tokenize/tokenhandler.go index 75f863d..1d1c891 100644 --- a/tokenizer.go +++ b/tokenize/tokenhandler.go @@ -1,13 +1,4 @@ -package parsekit - -// Tokenizer is the top-level struct that holds the configuration for -// a parser that is based solely on a TokenHandler function. -// The Tokenizer can be instantiated using the parsekit.NewTokenizer() -// method. -type Tokenizer struct { - parser *Parser - result *TokenHandlerResult -} +package tokenize // TokenHandler is the function type that is involved in turning a low level // stream of UTF8 runes into lexical tokens. Its purpose is to check if input @@ -47,31 +38,3 @@ func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHand func (handler TokenHandler) Optional() TokenHandler { return MatchOpt(handler) } - -// NewTokenizer instantiates a new Tokenizer. -// -// This is a simple wrapper around a TokenHandler function. It can be used to -// match an input string against that TokenHandler function and retrieve the -// results in a straight forward way. -func NewTokenizer(tokenHandler TokenHandler) *Tokenizer { - tokenizer := &Tokenizer{} - tokenizer.parser = NewParser(func(p *ParseAPI) { - if p.Accept(tokenHandler) { - tokenizer.result = p.Result() - p.Stop() - } else { - p.Expected("") - } - }) - return tokenizer -} - -// Execute feeds the input to the wrapped TokenHandler function. -// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). -// -// It returns the TokenHandler's TokenHandlerResult. When an error occurred -// during parsing, the error will be set, nil otherwise. -func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *Error) { - err := t.parser.Execute(input) - return t.result, err -} diff --git a/tokenhandlerresult.go b/tokenize/tokenhandlerresult.go similarity index 89% rename from tokenhandlerresult.go rename to tokenize/tokenhandlerresult.go index bef2d32..62ed1ca 100644 --- a/tokenhandlerresult.go +++ b/tokenize/tokenhandlerresult.go @@ -1,8 +1,10 @@ -package parsekit +package tokenize import ( "fmt" "strings" + + "git.makaay.nl/mauricem/go-parsekit/common" ) // TokenHandlerResult is a struct that is used for holding tokenizing results @@ -12,9 +14,9 @@ type TokenHandlerResult struct { lastRune *runeInfo // Information about the last rune read using NextRune() runes []rune tokens []*Token - cursor *Cursor // current read cursor position, relative to the start of the file - offset int // current rune offset relative to the Reader's sliding window - err *Error // can be used by a TokenHandler to report a specific issue with the input + cursor *common.Cursor // current read cursor position, relative to the start of the file + offset int // current rune offset relative to the Reader's sliding window + err *common.Error // can be used by a TokenHandler to report a specific issue with the input } type runeInfo struct { @@ -59,7 +61,7 @@ func newTokenHandlerResult() *TokenHandlerResult { return &TokenHandlerResult{ runes: []rune{}, tokens: []*Token{}, - cursor: &Cursor{}, + cursor: &common.Cursor{}, } } @@ -90,7 +92,7 @@ func (r *TokenHandlerResult) addRunes(set ...interface{}) { case rune: r.runes = append(r.runes, s) default: - callerPanic(2, "parsekit.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s) + common.CallerPanic(2, "tokenize.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s) } } } @@ -167,6 +169,6 @@ func (r *TokenHandlerResult) Value(idx int) interface{} { // Cursor retrieves the read cursor from the TokenHandlerResult. This is the // first cursor position after the runes that were read by the TokenHandler. -func (r *TokenHandlerResult) Cursor() *Cursor { +func (r *TokenHandlerResult) Cursor() *common.Cursor { return r.cursor } diff --git a/tokenresult_test.go b/tokenize/tokenhandlerresult_test.go similarity index 72% rename from tokenresult_test.go rename to tokenize/tokenhandlerresult_test.go index bd379ce..6972884 100644 --- a/tokenresult_test.go +++ b/tokenize/tokenhandlerresult_test.go @@ -1,17 +1,19 @@ -package parsekit +package tokenize_test import ( "fmt" "strings" "testing" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func ExampleToken() { - t0 := Token{ + t0 := tokenize.Token{ Runes: []rune("10.1.2.3"), } - t1 := Token{ + t1 := tokenize.Token{ Runes: []rune("two hundred and twenty four"), Type: "Number", Value: 224, @@ -19,12 +21,12 @@ func ExampleToken() { const TName = 1 - t2 := Token{ + t2 := tokenize.Token{ Runes: []rune("John"), Type: TName, } - t3 := Token{ + t3 := tokenize.Token{ Runes: []rune("The answer"), Value: 42, } @@ -39,7 +41,7 @@ func ExampleToken() { } func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { - i := NewTokenAPI(strings.NewReader("Testing")) + i := tokenize.NewTokenAPI(strings.NewReader("Testing")) i.Result().SetRunes("string") AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input") i.Result().SetRunes([]rune("rune slice")) @@ -51,10 +53,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { func TestSetResult_PanicsOnUnhandledInput(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := NewTokenAPI(strings.NewReader("Testing")) + i := tokenize.NewTokenAPI(strings.NewReader("Testing")) i.Result().SetRunes(1234567) }, Regexp: true, - Expect: `parsekit\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`, + Expect: `tokenize\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenhandlerresult_test.go:\d+`, }) } diff --git a/tokenhandlers_builtin.go b/tokenize/tokenhandlers_builtin.go similarity index 97% rename from tokenhandlers_builtin.go rename to tokenize/tokenhandlers_builtin.go index b7a1a38..9fd84bf 100644 --- a/tokenhandlers_builtin.go +++ b/tokenize/tokenhandlers_builtin.go @@ -1,4 +1,4 @@ -package parsekit +package tokenize import ( "fmt" @@ -9,6 +9,8 @@ import ( "strings" "unicode" "unicode/utf8" + + "git.makaay.nl/mauricem/go-parsekit/common" ) // C provides convenient access to a range of parser/combinators that can be @@ -22,7 +24,7 @@ import ( // When using C in your own parser, then it is advised to create a variable // to reference it: // -// var c = parsekit.C +// var c = tokenize.C // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var C = struct { @@ -58,7 +60,7 @@ var C = struct { // When using A in your own parser, then it is advised to create a variable // to reference it: // -// var a = parsekit.A +// var a = tokenize.A // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var A = struct { @@ -236,7 +238,7 @@ var A = struct { // When using M in your own parser, then it is advised to create a variable // to reference it: // -// var m = parsekit.M +// var m = tokenize.M // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var M = struct { @@ -268,7 +270,7 @@ var M = struct { // When using T in your own parser, then it is advised to create a variable // to reference it: // -// var t = parsekit.T +// var t = tokenize.T // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var T = struct { @@ -332,7 +334,7 @@ func MatchRunes(expected ...rune) TokenHandler { // creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'. func MatchRuneRange(start rune, end rune) TokenHandler { if end < start { - callerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end) + common.CallerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end) } return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end }) } @@ -485,7 +487,7 @@ func MatchRep(times int, handler TokenHandler) TokenHandler { // When more matches are possible, these will be included in the output. func MatchMin(min int, handler TokenHandler) TokenHandler { if min < 0 { - callerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0") + common.CallerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0") } return matchMinMax(min, -1, handler, "MatchMin") } @@ -496,7 +498,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler { // Zero matches are considered a successful match. func MatchMax(max int, handler TokenHandler) TokenHandler { if max < 0 { - callerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0") + common.CallerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0") } return matchMinMax(0, max, handler, "MatchMax") } @@ -519,17 +521,17 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler { // inclusive. All matches will be included in the output. func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler { if max < 0 { - callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0") + common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0") } if min < 0 { - callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0") + common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0") } return matchMinMax(min, max, handler, "MatchMinMax") } func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler { if max >= 0 && min > max { - callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) + common.CallerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) } return func(t *TokenAPI) bool { total := 0 @@ -594,7 +596,7 @@ func MatchSigned(handler TokenHandler) TokenHandler { // ranging from -9223372036854775808 to 9223372036854775807. func MatchIntegerBetween(min int64, max int64) TokenHandler { if max < min { - callerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min) + common.CallerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min) } digits := MatchSigned(MatchDigits()) return func(t *TokenAPI) bool { diff --git a/tokenhandlers_builtin_test.go b/tokenize/tokenhandlers_builtin_test.go similarity index 81% rename from tokenhandlers_builtin_test.go rename to tokenize/tokenhandlers_builtin_test.go index 68e01f5..080ab28 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenize/tokenhandlers_builtin_test.go @@ -1,15 +1,15 @@ -package parsekit_test +package tokenize_test import ( "fmt" "testing" - "git.makaay.nl/mauricem/go-parsekit" + "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func TestCombinators(t *testing.T) { - var c, a, m = parsekit.C, parsekit.A, parsekit.M - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + var c, a, m = tokenize.C, tokenize.A, tokenize.M + AssertTokenHandlers(t, []TokenHandlerT{ {"abc", c.Not(a.Rune('b')), true, "a"}, {"bcd", c.Not(a.Rune('b')), false, ""}, {"bcd", c.Not(a.Rune('b')), false, ""}, @@ -68,19 +68,19 @@ func TestCombinators(t *testing.T) { } func TestCombinatorPanics(t *testing.T) { - var c, a = parsekit.C, parsekit.A - parsekit.AssertPanics(t, []parsekit.PanicT{ + var c, a = tokenize.C, tokenize.A + AssertPanics(t, []PanicT{ {func() { a.RuneRange('z', 'a') }, true, `TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`}, - {func() { c.MinMax(-1, 1, parsekit.A.Space) }, true, + {func() { c.MinMax(-1, 1, a.Space) }, true, `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`}, - {func() { c.MinMax(1, -1, parsekit.A.Space) }, true, + {func() { c.MinMax(1, -1, a.Space) }, true, `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`}, - {func() { c.MinMax(10, 5, parsekit.A.Space) }, true, + {func() { c.MinMax(10, 5, a.Space) }, true, `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`}, - {func() { c.Min(-10, parsekit.A.Space) }, true, + {func() { c.Min(-10, a.Space) }, true, `TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`}, - {func() { c.Max(-42, parsekit.A.Space) }, true, + {func() { c.Max(-42, a.Space) }, true, `TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`}, {func() { a.IntegerBetween(10, -10) }, true, `TokenHandler: MatchIntegerBetween definition error at /.*/tokenhandlers_builtin_test.go:\d+: max -10 must not be < min 10`}, @@ -88,8 +88,8 @@ func TestCombinatorPanics(t *testing.T) { } func TestAtoms(t *testing.T) { - var a = parsekit.A - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + var a = tokenize.A + AssertTokenHandlers(t, []TokenHandlerT{ {"dd", a.RuneRange('b', 'e'), true, "d"}, {"ee", a.RuneRange('b', 'e'), true, "e"}, {"ff", a.RuneRange('b', 'e'), false, ""}, @@ -225,8 +225,8 @@ func TestAtoms(t *testing.T) { } func TestIPv4Atoms(t *testing.T) { - var a = parsekit.A - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + var a = tokenize.A + AssertTokenHandlers(t, []TokenHandlerT{ {"0X", a.Octet, true, "0"}, {"00X", a.Octet, true, "00"}, {"000X", a.Octet, true, "000"}, @@ -257,8 +257,8 @@ func TestIPv4Atoms(t *testing.T) { } func TestIPv6Atoms(t *testing.T) { - var a = parsekit.A - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + var a = tokenize.A + AssertTokenHandlers(t, []TokenHandlerT{ {"", a.IPv6, false, ""}, {"::", a.IPv6, true, "::"}, {"1::", a.IPv6, true, "1::"}, @@ -286,8 +286,8 @@ func TestIPv6Atoms(t *testing.T) { } func TestModifiers(t *testing.T) { - var c, a, m = parsekit.C, parsekit.A, parsekit.M - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + var c, a, m = tokenize.C, tokenize.A, tokenize.M + AssertTokenHandlers(t, []TokenHandlerT{ {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, @@ -306,10 +306,10 @@ func TestModifiers(t *testing.T) { // follow the correct pattern. Therefore, tokenmakers will panic when the // input cannot be processed successfully. func TestTokenMakerErrorHandling(t *testing.T) { - var a, tok = parsekit.A, parsekit.T + var a, tok = tokenize.A, tokenize.T invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool() - parser := parsekit.NewTokenizer(invalid) - parsekit.AssertPanic(t, parsekit.PanicT{ + parser := tokenize.NewTokenizer(invalid) + AssertPanic(t, PanicT{ func() { parser.Execute("no") }, false, `TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + `invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`, @@ -317,19 +317,19 @@ func TestTokenMakerErrorHandling(t *testing.T) { } func TestTokenMakers(t *testing.T) { - var c, a, tok = parsekit.C, parsekit.A, parsekit.T - parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{ + var c, a, tok = tokenize.C, tokenize.A, tokenize.T + AssertTokenMakers(t, []TokenMakerT{ {`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)), - []parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}}, + []tokenize.Token{{Type: "A", Runes: []rune(""), Value: ""}}}, {`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)), - []parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}}, + []tokenize.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}}, {`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)), - []parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}}, + []tokenize.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}}, - {"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}}, - {"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{ + {"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}}, + {"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{ {Type: "bar", Runes: []rune("R"), Value: byte('R')}, {Type: "bar", Runes: []rune("O"), Value: byte('O')}, {Type: "bar", Runes: []rune("C"), Value: byte('C')}, @@ -337,28 +337,28 @@ func TestTokenMakers(t *testing.T) { {Type: "bar", Runes: []rune("S"), Value: byte('S')}, }}, - {"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}}, + {"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}}, - {`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}}, - {`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}}, - {`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}}, - {`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}}, - {`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}}, - {`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}}, - {`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}}, - {`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}}, - {`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}}, + {`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}}, + {`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}}, + {`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}}, + {`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}}, + {`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}}, + {`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}}, + {`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}}, + {`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}}, + {`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}}, - {`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}}, - {`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}}, - {`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}}, - {`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}}, - {`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}}, + {`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}}, + {`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}}, + {`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}}, + {`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}}, + {`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}}, - {`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}}, - {`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}}, + {`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}}, + {`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}}, - {`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{ + {`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{ {Type: "P", Runes: []rune("1"), Value: true}, {Type: "P", Runes: []rune("t"), Value: true}, {Type: "P", Runes: []rune("T"), Value: true}, @@ -367,7 +367,7 @@ func TestTokenMakers(t *testing.T) { {Type: "P", Runes: []rune("True"), Value: true}, }}, - {`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{ + {`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{ {Type: "P", Runes: []rune("0"), Value: false}, {Type: "P", Runes: []rune("f"), Value: false}, {Type: "P", Runes: []rune("F"), Value: false}, @@ -379,8 +379,8 @@ func TestTokenMakers(t *testing.T) { } func TestSyntacticSugar(t *testing.T) { - var a = parsekit.A - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + var a = tokenize.A + AssertTokenHandlers(t, []TokenHandlerT{ {"aaaaaa", a.Rune('a').Times(4), true, "aaaa"}, {"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"}, {"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"}, @@ -391,37 +391,9 @@ func TestSyntacticSugar(t *testing.T) { }) } -func TestSequenceOfRunes(t *testing.T) { - var c, a = parsekit.C, parsekit.A - sequence := c.Seq( - a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen, - a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash, - a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question, - a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore, - a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, - ) - input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - output := "" - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - if p.Accept(sequence) { - output = p.Result().String() - p.Stop() - } else { - p.Expected("sequence of runes") - } - }) - err := parser.Execute(input) - if err != nil { - t.Fatalf("Parsing failed: %s", err) - } - if output != input { - t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output) - } -} - // I know, this is hell, but that's the whole point for this test :-> func TestCombination(t *testing.T) { - var c, a, m = parsekit.C, parsekit.A, parsekit.M + var c, a, m = tokenize.C, tokenize.A, tokenize.M demonic := c.Seq( c.Opt(a.SquareOpen), m.Trim( @@ -442,7 +414,7 @@ func TestCombination(t *testing.T) { c.Opt(a.SquareClose), ) - parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{ + AssertTokenHandlers(t, []TokenHandlerT{ {"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"}, {"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"}, {">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"}, diff --git a/tokenize/tokenizer.go b/tokenize/tokenizer.go new file mode 100644 index 0000000..1ae1a1d --- /dev/null +++ b/tokenize/tokenizer.go @@ -0,0 +1,34 @@ +package tokenize + +import ( + "git.makaay.nl/mauricem/go-parsekit/common" +) + +// Tokenizer is the top-level struct that holds the configuration for +// a parser that is based solely on a TokenHandler function. +// The Tokenizer can be instantiated using the parsekit.NewTokenizer() +// method. +type Tokenizer struct { + handler TokenHandler +} + +// NewTokenizer instantiates a new Tokenizer, based on the provided TokenHandler. +func NewTokenizer(tokenHandler TokenHandler) *Tokenizer { + return &Tokenizer{tokenHandler} +} + +// Execute feeds the input to the wrapped TokenHandler function. +// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). +// +// It returns the TokenHandler's TokenHandlerResult. When an error occurred +// during parsing, the error will be set, nil otherwise. +func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *common.Error) { + api := NewTokenAPI(input) + ok := t.handler(api) + + if !ok { + err := &common.Error{Message: "mismatch", Cursor: common.Cursor{}} + return nil, err + } + return api.Result(), nil +} diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go new file mode 100644 index 0000000..b24fba7 --- /dev/null +++ b/tokenize/tokenizer_test.go @@ -0,0 +1,179 @@ +package tokenize_test + +import ( + "fmt" + "io" + "strings" + "testing" + "unicode/utf8" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +// TODO For error handling, it would be really cool if for example the +// 10.0.300.1/24 case would return an actual error stating that +// 300 is not a valid octet for an IPv4 address. +// Biggest thing to take care of here, is that errors should not stop +// a Parser flow (since we might be trying to match different cases in +// sequence), but a Parser flow should optionally be able to make use +// of the actual error. +// The same goes for a Tokenizer, since those can also make use of +// optional matching using tokenize.C.Any(...) for example. If matching +// for Any(IPv4, Digits), the example case should simply end up with 10 +// after the IPv4 mismatch. +func ExampleTokenizer_Execute() { + // Build the tokenizer for ip/mask. + var c, a, t = tokenize.C, tokenize.A, tokenize.T + ip := t.Str("ip", a.IPv4) + mask := t.Int8("mask", a.IPv4CIDRMask) + cidr := c.Seq(ip, a.Slash, mask) + tokenizer := tokenize.NewTokenizer(cidr) + + for _, input := range []string{ + "000.000.000.000/000", + "192.168.0.1/24", + "255.255.255.255/32", + "10.0.300.1/24", + "not an IPv4 CIDR", + } { + // Execute returns a TokenHandlerResult and an error, which is nil on success. + result, err := tokenizer.Execute(input) + + if err == nil { + fmt.Printf("Result: %s\n", result.Tokens()) + } else { + fmt.Printf("Error: %s\n", err) + } + } + // Output: + // Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0) + // Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24) + // Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32) + // Error: mismatch at start of file + // Error: mismatch at start of file +} + +func TestCallingNextRune_ReturnsNextRune(t *testing.T) { + r, _ := mkInput().NextRune() + AssertEqual(t, 'T', r, "first rune") +} + +func TestInputCanAcceptRunesFromReader(t *testing.T) { + i := mkInput() + i.NextRune() + i.Accept() + i.NextRune() + i.Accept() + i.NextRune() + i.Accept() + AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") +} + +func TestCallingNextRuneTwice_Panics(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.NextRune() + i.NextRune() + }, + Regexp: true, + Expect: `tokenize\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, + }) +} + +func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { + AssertPanic(t, PanicT{ + Function: mkInput().Accept, + Regexp: true, + Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`, + }) +} + +func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.Merge() + }, + Regexp: true, + Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) +} + +func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + f := i.Fork() + i.NextRune() + f.Merge() + }, + Regexp: true, + Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) +} + +func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + f := i.Fork() + i.Fork() + f.Merge() + }, + Regexp: true, + Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) +} + +func TestForkingInput_ClearsLastRune(t *testing.T) { + AssertPanic(t, PanicT{ + Function: func() { + i := mkInput() + i.NextRune() + i.Fork() + i.Accept() + }, + Regexp: true, + Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, + }) +} + +func TestAccept_UpdatesCursor(t *testing.T) { + i := tokenize.NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines")) + AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1") + for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" + i.NextRune() + i.Accept() + } + AssertEqual(t, "line 1, column 7", i.Result().Cursor().String(), "cursor 2") + i.NextRune() // read "\n", cursor ends up at start of new line + i.Accept() + AssertEqual(t, "line 2, column 1", i.Result().Cursor().String(), "cursor 3") + for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" + i.NextRune() + i.Accept() + } + AssertEqual(t, "line 3, column 5", i.Result().Cursor().String(), "cursor 4") +} + +func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { + i := tokenize.NewTokenAPI(strings.NewReader("X")) + i.NextRune() + i.Accept() + r, err := i.NextRune() + AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") + AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") +} +func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { + i := tokenize.NewTokenAPI(strings.NewReader("X")) + f := i.Fork() + f.NextRune() + f.Accept() + r, err := f.NextRune() + AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") + r, err = i.NextRune() + AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") + AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") +} + +func mkInput() *tokenize.TokenAPI { + return tokenize.NewTokenAPI("Testing") +} diff --git a/tokenize/tokenizer_unexported_test.go b/tokenize/tokenizer_unexported_test.go new file mode 100644 index 0000000..df69d0b --- /dev/null +++ b/tokenize/tokenizer_unexported_test.go @@ -0,0 +1,125 @@ +package tokenize + +import ( + "testing" +) + +func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { + // Create input, accept the first rune. + i := NewTokenAPI("Testing") + i.NextRune() + i.Accept() // T + AssertEqual(t, "T", i.Result().String(), "accepted rune in input") + // Fork + f := i.Fork() + AssertEqual(t, f, i.child, "Input.child (must be f)") + AssertEqual(t, i, f.parent, "Input.parent (must be i)") + AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte") + AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte") + // Accept two runes via fork. + f.NextRune() + f.Accept() // e + f.NextRune() + f.Accept() // s + AssertEqual(t, "es", f.Result().String(), "result runes in fork") + AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte") + AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte") + // Merge fork back into parent + f.Merge() + AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()") + AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte") +} + +func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { + i := NewTokenAPI("Testing") + i.NextRune() + i.Accept() + f1 := i.Fork() + f1.NextRune() + f1.Accept() + f2 := f1.Fork() + f2.NextRune() + f2.Accept() + AssertEqual(t, "T", i.Result().String(), "i.Result().String()") + AssertEqual(t, 1, i.result.offset, "i.offset A") + AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()") + AssertEqual(t, 2, f1.result.offset, "f1.offset A") + AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()") + AssertEqual(t, 3, f2.result.offset, "f2.offset A") + f2.Merge() + AssertEqual(t, "T", i.Result().String(), "i.Result().String()") + AssertEqual(t, 1, i.result.offset, "i.offset B") + AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()") + AssertEqual(t, 3, f1.result.offset, "f1.offset B") + AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") + AssertEqual(t, 3, f2.result.offset, "f2.offset B") + f1.Merge() + AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") + AssertEqual(t, 3, i.result.offset, "i.offset C") + AssertEqual(t, "", f1.Result().String(), "f1.Result().String()") + AssertEqual(t, 3, f1.result.offset, "f1.offset C") + AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") + AssertEqual(t, 3, f2.result.offset, "f2.offset C") +} + +func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) { + i := NewTokenAPI("Testing") + f1 := i.Fork() + f2 := f1.Fork() + f3 := f2.Fork() + f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3 + f5 := f4.Fork() + AssertEqual(t, true, i.parent == nil, "i.parent == nil") + AssertEqual(t, true, i.child == f1, "i.child == f1") + AssertEqual(t, true, f1.parent == i, "f1.parent == i") + AssertEqual(t, true, f1.child == f4, "f1.child == f4") + AssertEqual(t, true, f2.child == nil, "f2.child == nil") + AssertEqual(t, true, f2.parent == nil, "f2.parent == nil") + AssertEqual(t, true, f3.child == nil, "f3.child == nil") + AssertEqual(t, true, f3.parent == nil, "f3.parent == nil") + AssertEqual(t, true, f4.parent == f1, "f4.parent == f1") + AssertEqual(t, true, f4.child == f5, "f4.child == f5") + AssertEqual(t, true, f5.parent == f4, "f5.parent == f4") + AssertEqual(t, true, f5.child == nil, "f5.child == nil") + + i.NextRune() + + AssertEqual(t, true, i.parent == nil, "i.parent == nil") + AssertEqual(t, true, i.child == nil, "i.child == nil") + AssertEqual(t, true, f1.parent == nil, "f1.parent == nil") + AssertEqual(t, true, f1.child == nil, "f1.child == nil") + AssertEqual(t, true, f2.child == nil, "f2.child == nil") + AssertEqual(t, true, f2.parent == nil, "f2.parent == nil") + AssertEqual(t, true, f3.child == nil, "f3.child == nil") + AssertEqual(t, true, f3.parent == nil, "f3.parent == nil") + AssertEqual(t, true, f4.parent == nil, "f4.parent == nil") + AssertEqual(t, true, f4.child == nil, "f4.child == nil") + AssertEqual(t, true, f5.parent == nil, "f5.parent == nil") + AssertEqual(t, true, f5.child == nil, "f5.child == nil") +} + +func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { + i := NewTokenAPI("Testing") + r, _ := i.NextRune() + AssertEqual(t, 'T', r, "result from 1st call to NextRune()") + AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil") + i.Accept() + AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil") + AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset") + r, _ = i.NextRune() + AssertEqual(t, 'e', r, "result from 2nd call to NextRune()") +} + +func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) { + if expected != actual { + t.Errorf( + "Unexpected value for %s:\nexpected: %q\nactual: %q", + forWhat, expected, actual) + } +} + +func AssertTrue(t *testing.T, b bool, assertion string) { + if !b { + t.Errorf("Assertion %s is false", assertion) + } +} diff --git a/tokenizer_test.go b/tokenizer_test.go deleted file mode 100644 index 737cbc9..0000000 --- a/tokenizer_test.go +++ /dev/null @@ -1,291 +0,0 @@ -package parsekit - -import ( - "fmt" - "io" - "strings" - "testing" - "unicode/utf8" -) - -// TODO For error handling, it would be really cool if for example the -// 10.0.300.1/24 case would return an actual error stating that -// 300 is not a valid octet for an IPv4 address. -// Biggest thing to take care of here, is that errors should not stop -// a Parser flow (since we might be trying to match different cases in -// sequence), but a Parser flow should optionally be able to make use -// of the actual error. -// The same goes for a Tokenizer, since those can also make use of -// optional matching using parsekit.C.Any(...) for example. If matching -// for Any(IPv4, Digits), the example case should simply end up with 10 -// after the IPv4 mismatch. -func ExampleTokenizer_Execute() { - // Build the tokenizer for ip/mask. - ip := T.Str("ip", A.IPv4) - mask := T.Int8("mask", A.IPv4CIDRMask) - cidr := C.Seq(ip, A.Slash, mask) - tokenizer := NewTokenizer(cidr) - - for _, input := range []string{ - "000.000.000.000/000", - "192.168.0.1/24", - "255.255.255.255/32", - "10.0.300.1/24", - "not an IPv4 CIDR", - } { - // Execute returns a TokenHandlerResult and an error, which is nil on success. - result, err := tokenizer.Execute(input) - - if err == nil { - fmt.Printf("Result: %s\n", result.Tokens()) - } else { - fmt.Printf("Error: %s\n", err) - } - } - // Output: - // Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0) - // Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24) - // Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32) - // Error: unexpected input at start of file - // Error: unexpected input at start of file -} - -func TestCallingNextRune_ReturnsNextRune(t *testing.T) { - r, _ := mkInput().NextRune() - AssertEqual(t, 'T', r, "first rune") -} - -func TestInputCanAcceptRunesFromReader(t *testing.T) { - i := mkInput() - i.NextRune() - i.Accept() - i.NextRune() - i.Accept() - i.NextRune() - i.Accept() - AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") -} - -func TestCallingNextRuneTwice_Panics(t *testing.T) { - AssertPanic(t, PanicT{ - Function: func() { - i := mkInput() - i.NextRune() - i.NextRune() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, - }) -} - -func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { - AssertPanic(t, PanicT{ - Function: mkInput().Accept, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`, - }) -} - -func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { - AssertPanic(t, PanicT{ - Function: func() { - i := mkInput() - i.Merge() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) -} - -func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { - AssertPanic(t, PanicT{ - Function: func() { - i := mkInput() - f := i.Fork() - i.NextRune() - f.Merge() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) -} - -func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { - AssertPanic(t, PanicT{ - Function: func() { - i := mkInput() - f := i.Fork() - i.Fork() - f.Merge() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) -} - -func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) { - i := mkInput() - f1 := i.Fork() - f2 := f1.Fork() - f3 := f2.Fork() - f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3 - f5 := f4.Fork() - AssertEqual(t, true, i.parent == nil, "i.parent == nil") - AssertEqual(t, true, i.child == f1, "i.child == f1") - AssertEqual(t, true, f1.parent == i, "f1.parent == i") - AssertEqual(t, true, f1.child == f4, "f1.child == f4") - AssertEqual(t, true, f2.child == nil, "f2.child == nil") - AssertEqual(t, true, f2.parent == nil, "f2.parent == nil") - AssertEqual(t, true, f3.child == nil, "f3.child == nil") - AssertEqual(t, true, f3.parent == nil, "f3.parent == nil") - AssertEqual(t, true, f4.parent == f1, "f4.parent == f1") - AssertEqual(t, true, f4.child == f5, "f4.child == f5") - AssertEqual(t, true, f5.parent == f4, "f5.parent == f4") - AssertEqual(t, true, f5.child == nil, "f5.child == nil") - - i.NextRune() - - AssertEqual(t, true, i.parent == nil, "i.parent == nil") - AssertEqual(t, true, i.child == nil, "i.child == nil") - AssertEqual(t, true, f1.parent == nil, "f1.parent == nil") - AssertEqual(t, true, f1.child == nil, "f1.child == nil") - AssertEqual(t, true, f2.child == nil, "f2.child == nil") - AssertEqual(t, true, f2.parent == nil, "f2.parent == nil") - AssertEqual(t, true, f3.child == nil, "f3.child == nil") - AssertEqual(t, true, f3.parent == nil, "f3.parent == nil") - AssertEqual(t, true, f4.parent == nil, "f4.parent == nil") - AssertEqual(t, true, f4.child == nil, "f4.child == nil") - AssertEqual(t, true, f5.parent == nil, "f5.parent == nil") - AssertEqual(t, true, f5.child == nil, "f5.child == nil") -} - -func TestForkingInput_ClearsLastRune(t *testing.T) { - AssertPanic(t, PanicT{ - Function: func() { - i := mkInput() - i.NextRune() - i.Fork() - i.Accept() - }, - Regexp: true, - Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, - }) -} - -func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { - i := mkInput() - r, _ := i.NextRune() - AssertEqual(t, 'T', r, "result from 1st call to NextRune()") - AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil") - i.Accept() - AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil") - AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset") - r, _ = i.NextRune() - AssertEqual(t, 'e', r, "result from 2nd call to NextRune()") -} - -func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) { - i := mkInput() - for j := 0; j < 7; j++ { - i.NextRune() - i.Accept() - } - AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()") -} - -func TestAccept_UpdatesCursor(t *testing.T) { - i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines")) - AssertEqual(t, "start of file", i.result.cursor.String(), "cursor 1") - for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" - i.NextRune() - i.Accept() - } - AssertEqual(t, "line 1, column 7", i.result.cursor.String(), "cursor 2") - i.NextRune() // read "\n", cursor ends up at start of new line - i.Accept() - AssertEqual(t, "line 2, column 1", i.result.cursor.String(), "cursor 3") - for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" - i.NextRune() - i.Accept() - } - AssertEqual(t, "line 3, column 5", i.result.cursor.String(), "cursor 4") -} - -func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { - // Create input, accept the first rune. - i := mkInput() - i.NextRune() - i.Accept() // T - AssertEqual(t, "T", i.Result().String(), "accepted rune in input") - // Fork - f := i.Fork() - AssertEqual(t, f, i.child, "Input.child (must be f)") - AssertEqual(t, i, f.parent, "Input.parent (must be i)") - AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte") - AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte") - // Accept two runes via fork. - f.NextRune() - f.Accept() // e - f.NextRune() - f.Accept() // s - AssertEqual(t, "es", f.Result().String(), "result runes in fork") - AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte") - AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte") - // Merge fork back into parent - f.Merge() - AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()") - AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte") -} - -func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { - i := mkInput() - i.NextRune() - i.Accept() - f1 := i.Fork() - f1.NextRune() - f1.Accept() - f2 := f1.Fork() - f2.NextRune() - f2.Accept() - AssertEqual(t, "T", i.Result().String(), "i.Result().String()") - AssertEqual(t, 1, i.result.offset, "i.offset A") - AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()") - AssertEqual(t, 2, f1.result.offset, "f1.offset A") - AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()") - AssertEqual(t, 3, f2.result.offset, "f2.offset A") - f2.Merge() - AssertEqual(t, "T", i.Result().String(), "i.Result().String()") - AssertEqual(t, 1, i.result.offset, "i.offset B") - AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()") - AssertEqual(t, 3, f1.result.offset, "f1.offset B") - AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") - AssertEqual(t, 3, f2.result.offset, "f2.offset B") - f1.Merge() - AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()") - AssertEqual(t, 3, i.result.offset, "i.offset C") - AssertEqual(t, "", f1.Result().String(), "f1.Result().String()") - AssertEqual(t, 3, f1.result.offset, "f1.offset C") - AssertEqual(t, "", f2.Result().String(), "f2.Result().String()") - AssertEqual(t, 3, f2.result.offset, "f2.offset C") -} - -func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { - i := NewTokenAPI(strings.NewReader("X")) - i.NextRune() - i.Accept() - r, err := i.NextRune() - AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") - AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") -} -func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { - i := NewTokenAPI(strings.NewReader("X")) - f := i.Fork() - f.NextRune() - f.Accept() - r, err := f.NextRune() - AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()") - r, err = i.NextRune() - AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()") - AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") -} - -func mkInput() *TokenAPI { - return NewTokenAPI(strings.NewReader("Testing")) -}