From 56b8df3aab1ff2f81c9915906c40072a8a40318b Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Fri, 12 Jul 2019 12:33:18 +0000 Subject: [PATCH] Removed loop protection code. This is useful, but it puts a performance burden on the code when doing it by keeping track of actual callers through the call stack. Maybe to be reintroduced in a future version with something like a simple counter and a maximum depth-style protection. --- parse/api.go | 35 ++++------------------ parse/parse.go | 4 +-- parse/parse_test.go | 58 ------------------------------------ read/read.go | 5 +++- tokenize/api.go | 52 ++++++++++++++++++++------------ tokenize/api_test.go | 8 ++--- tokenize/cursor.go | 34 +++++---------------- tokenize/cursor_test.go | 39 ++++-------------------- tokenize/handlers_builtin.go | 3 +- tokenize/tokenize.go | 2 +- tokenize/tokenizer_test.go | 8 ++--- 11 files changed, 70 insertions(+), 178 deletions(-) diff --git a/parse/api.go b/parse/api.go index 8d44ec2..d404057 100644 --- a/parse/api.go +++ b/parse/api.go @@ -16,11 +16,10 @@ import ( // // • call other parse.Handler functions, the core of recursive-descent parsing (Handle) type API struct { - tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions - Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call - loopCheck map[uintptr]bool // used for parser loop detection - err error // parse error, retrieved by Error(), using API methods is denied when set - stopped bool // a boolean set to true by Stop() + tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions + Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call + err error // parse error, retrieved by Error(), using API methods is denied when set + stopped bool // a boolean set to true by Stop() } // Peek checks if the upcoming input data matches the provided tokenize.Handler. @@ -55,9 +54,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool { p.tokenAPI.Dispose(forkedAPI) // And flush the input reader buffer. - if p.tokenAPI.FlushInput() { - p.initLoopCheck() - } + p.tokenAPI.FlushInput() } else { p.tokenAPI.Dispose(forkedAPI) } @@ -66,12 +63,11 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool { func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) { p.panicWhenStoppedOrInError(name) - p.checkForLoops(name) if tokenHandler == nil { callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") } - p.tokenAPI.Reset() + p.tokenAPI.Reset() // TODO uh, why did I do this again? Just for i.runeRead = false ? child := p.tokenAPI.Fork() ok := tokenHandler(p.tokenAPI) @@ -105,25 +101,6 @@ func (p *API) IsStoppedOrInError() bool { return p.stopped || p.err != nil } -// initLoopCheck clears the loop check data, a map in which we keep -// track of the lines of code from which Accept() and/or Peek() are called. -// When Accept() is called, and the parser moved forward in the input data, -// this method is called to reset the map for the new read cursor position. -func (p *API) initLoopCheck() { - p.loopCheck = make(map[uintptr]bool) -} - -// checkForLoops checks if the line of code from which Accept() or Peek() -// was called has been seen before for the current read cursor position. -// If yes, then the parser is in a loop and the method will panic. -func (p *API) checkForLoops(name string) { - filepos := callerPointer(3) - if _, ok := p.loopCheck[filepos]; ok { - callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}") - } - p.loopCheck[filepos] = true -} - // Handle executes other parse.Handler functions from within the active // parse.Handler function. // diff --git a/parse/parse.go b/parse/parse.go index 227d487..0a2bbf8 100644 --- a/parse/parse.go +++ b/parse/parse.go @@ -31,8 +31,8 @@ func New(startHandler Handler) Func { } return func(input interface{}) error { api := &API{ - tokenAPI: tokenize.NewAPI(input), - loopCheck: make(map[uintptr]bool), + tokenAPI: tokenize.NewAPI(input), + // NOOPCHECK loopCheck: make(map[uintptr]bool), } if api.Handle(startHandler) { // Handle returned true, indicating that parsing could still continue. diff --git a/parse/parse_test.go b/parse/parse_test.go index 255c2ca..693c669 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -266,61 +266,3 @@ func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) err := p("x") parse.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err") } - -type parserWithLoop struct { - loopCounter int -} - -func (l *parserWithLoop) first(p *parse.API) { - p.Accept(tokenize.A.ASCII) - p.Handle(l.second) -} - -func (l *parserWithLoop) second(p *parse.API) { - p.Accept(tokenize.A.ASCII) - p.Handle(l.third) -} - -func (l *parserWithLoop) third(p *parse.API) { - if l.loopCounter++; l.loopCounter > 100 { - p.Error("Loop not detected by parsekit") - return - } - p.Accept(tokenize.A.ASCII) - p.Handle(l.first) -} - -func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { - looper := &parserWithLoop{} - parser := parse.New(looper.first) - parse.AssertPanic(t, parse.PanicT{ - Function: func() { parser("Het houdt niet op, niet vanzelf") }, - Regexp: true, - Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at /.*/parse_test.go:\d+`}) -} - -// This test incorporates an actual loop bug that I dropped on myself and -// that I could not easily spot in my code. It sounded so logical: -// I want to get chunks of 5 chars from the input, so I simply loop on: -// -// p.On(c.Max(5, a.AnyRune)) -// -// The problem here is that Max(5, ...) will also match when there is -// no more input, since Max(5, ...) is actually MinMax(0, 5, ...). -// Therefore the loop will never stop. Solving the loop was simple: -// -// p.On(c.MinMax(1, 5, a.AnyRune)) -// -// Now the loop stops when the parser finds no more matching input data. -func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { - var c, a = tokenize.C, tokenize.A - parser := parse.New(func(p *parse.API) { - for p.Accept(c.Max(5, a.AnyRune)) { - } - p.Stop() - }) - parse.AssertPanic(t, parse.PanicT{ - Function: func() { parser("This will end soon") }, - Regexp: true, - Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at .*/parse_test.go:\d+`}) -} diff --git a/read/read.go b/read/read.go index ae082d6..33f6875 100644 --- a/read/read.go +++ b/read/read.go @@ -129,8 +129,10 @@ func (r *Buffer) RuneAt(offset int) (rune, error) { if n > 0 { r.grow(n) + var readRune rune + var err error for writeAt := l; writeAt <= offset; writeAt++ { - readRune, _, err := r.bufio.ReadRune() + readRune, _, err = r.bufio.ReadRune() // Skip BOM. if !r.firstReadDone { @@ -150,6 +152,7 @@ func (r *Buffer) RuneAt(offset int) (rune, error) { r.buffer[writeAt] = readRune } + return readRune, nil } return r.buffer[offset], nil } diff --git a/tokenize/api.go b/tokenize/api.go index 8faa4ae..ce62a61 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -1,6 +1,8 @@ package tokenize import ( + "fmt" + "git.makaay.nl/mauricem/go-parsekit/read" ) @@ -80,12 +82,13 @@ type API struct { } type stackFrame struct { - offset int // current rune offset relative to the Reader's sliding window - runeStart int - runeEnd int - tokenStart int - tokenEnd int - cursor Cursor + offset int // current rune read offset relative to the Reader's sliding window + column int // The column at which the cursor is (0-indexed) + line int // The line at which the cursor is (0-indexed) + runeStart int // the starting point in the API.runes slice for runes produced by this stack level + runeEnd int // the end point in the API.runes slice for runes produced by this stack level + tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level + tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level // TODO err error // can be used by a Handler to report a specific issue with the input @@ -173,7 +176,7 @@ func (i *API) accept(runes ...rune) { for offset, r := range runes { i.runes[curRuneEnd+offset] = r - i.stackFrame.cursor.moveByRune(r) + i.stackFrame.moveCursorByRune(r) } i.stackFrame.runeEnd = newRuneEnd i.stackFrame.offset += len(runes) @@ -213,16 +216,23 @@ func (i *API) Fork() int { i.stackLevel++ i.runeRead = false - parent := i.stackFrame + // A + // i.stackFrames[i.stackLevel] = *i.stackFrame + // i.stackFrame = &i.stackFrames[i.stackLevel] + // i.stackFrame.runeStart = i.stackFrame.runeEnd + // i.stackFrame.tokenStart = i.stackFrame.tokenEnd - f := &i.stackFrames[i.stackLevel] - f.offset = parent.offset - f.cursor = parent.cursor - f.runeStart = parent.runeEnd - f.runeEnd = parent.runeEnd - f.tokenStart = parent.tokenEnd - f.tokenEnd = parent.tokenEnd - i.stackFrame = f + // B + parent := i.stackFrame + child := &i.stackFrames[i.stackLevel] + child.offset = parent.offset + child.column = parent.column + child.line = parent.line + child.runeStart = parent.runeEnd + child.runeEnd = parent.runeEnd + child.tokenStart = parent.tokenEnd + child.tokenEnd = parent.tokenEnd + i.stackFrame = child return i.stackLevel } @@ -267,7 +277,8 @@ func (i *API) Merge(stackLevel int) { i.stackFrame.tokenStart = i.stackFrame.tokenEnd parent.offset = i.stackFrame.offset - parent.cursor = i.stackFrame.cursor + parent.line = i.stackFrame.line + parent.column = i.stackFrame.column i.stackFrame.err = nil i.runeRead = false @@ -375,8 +386,11 @@ func (i *API) SetString(s string) { i.SetRunes([]rune(s)...) } -func (i *API) Cursor() Cursor { - return i.stackFrame.cursor +func (i *API) Cursor() string { + if i.stackFrame.line == 0 && i.stackFrame.column == 0 { + return fmt.Sprintf("start of file") + } + return fmt.Sprintf("line %d, column %d", i.stackFrame.line+1, i.stackFrame.column+1) } func (i *API) Tokens() []Token { diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 4a25b41..941b8bc 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -213,7 +213,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { AssertEqual(t, 'c', r, "child4 rune 3") api.Accept() AssertEqual(t, "c", api.String(), "child4 runes after rune 1") - AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3") + AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") // Merge "c" from child4 to child3. api.Merge(child4) @@ -223,7 +223,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { // Child3 should now have the compbined results "abc" from child4's work. AssertEqual(t, "abc", api.String(), "child3 after merge of child4") - AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4") + AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") // Now read some data from child3. r, _ = api.NextRune() @@ -259,7 +259,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { api.Dispose(child3) AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3") - AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3") + AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") // Merge child2 to child1 and dispose of it. api.Merge(child2) @@ -279,7 +279,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) { api.Accept() AssertEqual(t, "abcdefg", api.String(), "api string end result") - AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result") + AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") } func TestClearRunes(t *testing.T) { diff --git a/tokenize/cursor.go b/tokenize/cursor.go index 1f7c02c..8043641 100644 --- a/tokenize/cursor.go +++ b/tokenize/cursor.go @@ -1,40 +1,22 @@ package tokenize -import ( - "fmt" -) - -// Cursor represents the position of a cursor in various ways. -type Cursor struct { - Column int // The column at which the cursor is (0-indexed) - Line int // The line at which the cursor is (0-indexed) -} - -// String produces a string representation of the cursor position. -func (c Cursor) String() string { - if c.Line == 0 && c.Column == 0 { - return fmt.Sprintf("start of file") - } - return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1) -} - // move updates the position of the cursor, based on the provided input string. // The input string represents the runes that the cursor must be moved over. // This method will take newlines into account to keep track of line numbers and // column positions automatically. -func (c *Cursor) move(input string) *Cursor { +func (f *stackFrame) moveCursor(input string) *stackFrame { for _, r := range input { - c.moveByRune(r) + f.moveCursorByRune(r) } - return c + return f } -func (c *Cursor) moveByRune(r rune) *Cursor { +func (f *stackFrame) moveCursorByRune(r rune) *stackFrame { if r == '\n' { - c.Column = 0 - c.Line++ + f.column = 0 + f.line++ } else { - c.Column++ + f.column++ } - return c + return f } diff --git a/tokenize/cursor_test.go b/tokenize/cursor_test.go index 5fba54c..f89508f 100644 --- a/tokenize/cursor_test.go +++ b/tokenize/cursor_test.go @@ -1,36 +1,9 @@ package tokenize import ( - "fmt" "testing" ) -func ExampleCursor_move() { - c := Cursor{} - fmt.Printf("after initialization : %s\n", c) - fmt.Printf("after 'some words' : %s\n", c.move("some words")) - fmt.Printf("after '\\n' : %s\n", c.move("\n")) - fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines")) - - // Output: - // after initialization : start of file - // after 'some words' : line 1, column 11 - // after '\n' : line 2, column 1 - // after '\r\nskip\nlines' : line 4, column 6 -} - -func ExampleCursor_String() { - c := Cursor{} - fmt.Println(c.String()) - - c.move("\nfoobar") - fmt.Println(c.String()) - - // Output: - // start of file - // line 2, column 7 -} - func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { for _, test := range []struct { name string @@ -49,15 +22,15 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, } { - c := Cursor{} + api := NewAPI("") for _, s := range test.input { - c.move(s) + api.stackFrame.moveCursor(s) } - if c.Line != test.line { - t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line) + if api.stackFrame.line != test.line { + t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line) } - if c.Column != test.column { - t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column) + if api.stackFrame.column != test.column { + t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, api.stackFrame.column, test.column) } } } diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 8f4b735..e4a0d35 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -1303,7 +1303,8 @@ func ModifyDrop(handler Handler) Handler { // Otherwise we'd have to do a Reset() + Merge() call to get the same result. parent := &t.stackFrames[t.stackLevel-1] parent.offset = t.stackFrame.offset - parent.cursor = t.stackFrame.cursor + parent.line = t.stackFrame.line + parent.column = t.stackFrame.column t.Dispose(child) return true } diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go index a06ab8b..d7d0d42 100644 --- a/tokenize/tokenize.go +++ b/tokenize/tokenize.go @@ -43,7 +43,7 @@ func New(tokenHandler Handler) Func { ok := tokenHandler(api) if !ok { - err := fmt.Errorf("mismatch at %s", Cursor{}) + err := fmt.Errorf("mismatch at %s", api.Cursor()) return nil, err } result := &Result{ diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index 0786fc4..733aa70 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -181,20 +181,20 @@ func TestForkingInput_ClearsLastRune(t *testing.T) { func TestAccept_UpdatesCursor(t *testing.T) { i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) - AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1") + AssertEqual(t, "start of file", i.Cursor(), "cursor 1") for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" i.NextRune() i.Accept() } - AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2") + AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2") i.NextRune() // read "\n", cursor ends up at start of new line i.Accept() - AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3") + AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3") for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" i.NextRune() i.Accept() } - AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4") + AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4") } func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {