Removed loop protection code. This is useful, but it puts a performance burden on the code when doing it by keeping track of actual callers through the call stack. Maybe to be reintroduced in a future version with something like a simple counter and a maximum depth-style protection.

2019-07-12 12:33:18 +00:00 · 2019-07-12 12:33:18 +00:00 · 56b8df3aab
parent 09746c0d2e
commit 56b8df3aab
11 changed files with 70 additions and 178 deletions
--- a/parse/api.go
+++ b/parse/api.go
@ -16,11 +16,10 @@ import (
 //
 // • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
 type API struct {
-	tokenAPI  *tokenize.API    // the tokenize.API, used for communicating with tokenize.Handler functions
-	Result    tokenize.Result  // a struct, holding the results of the last Peek() or Accept() call
-	loopCheck map[uintptr]bool // used for parser loop detection
-	err       error            // parse error, retrieved by Error(), using API methods is denied when set
-	stopped   bool             // a boolean set to true by Stop()
+	tokenAPI *tokenize.API   // the tokenize.API, used for communicating with tokenize.Handler functions
+	Result   tokenize.Result // a struct, holding the results of the last Peek() or Accept() call
+	err      error           // parse error, retrieved by Error(), using API methods is denied when set
+	stopped  bool            // a boolean set to true by Stop()
 }

 // Peek checks if the upcoming input data matches the provided tokenize.Handler.
@ -55,9 +54,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
 		p.tokenAPI.Dispose(forkedAPI)

 		// And flush the input reader buffer.
-		if p.tokenAPI.FlushInput() {
-			p.initLoopCheck()
-		}
+		p.tokenAPI.FlushInput()
 	} else {
 		p.tokenAPI.Dispose(forkedAPI)
 	}
@ -66,12 +63,11 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {

 func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
 	p.panicWhenStoppedOrInError(name)
-	p.checkForLoops(name)
 	if tokenHandler == nil {
 		callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
 	}

-	p.tokenAPI.Reset()
+	p.tokenAPI.Reset() // TODO uh, why did I do this again? Just for i.runeRead = false ?
 	child := p.tokenAPI.Fork()
 	ok := tokenHandler(p.tokenAPI)

@ -105,25 +101,6 @@ func (p *API) IsStoppedOrInError() bool {
 	return p.stopped || p.err != nil
 }

-// initLoopCheck clears the loop check data, a map in which we keep
-// track of the lines of code from which Accept() and/or Peek() are called.
-// When Accept() is called, and the parser moved forward in the input data,
-// this method is called to reset the map for the new read cursor position.
-func (p *API) initLoopCheck() {
-	p.loopCheck = make(map[uintptr]bool)
-}
-
-// checkForLoops checks if the line of code from which Accept() or Peek()
-// was called has been seen before for the current read cursor position.
-// If yes, then the parser is in a loop and the method will panic.
-func (p *API) checkForLoops(name string) {
-	filepos := callerPointer(3)
-	if _, ok := p.loopCheck[filepos]; ok {
-		callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
-	}
-	p.loopCheck[filepos] = true
-}
-
 // Handle executes other parse.Handler functions from within the active
 // parse.Handler function.
 //
--- a/parse/parse.go
+++ b/parse/parse.go
@ -31,8 +31,8 @@ func New(startHandler Handler) Func {
 	}
 	return func(input interface{}) error {
 		api := &API{
-			tokenAPI:  tokenize.NewAPI(input),
-			loopCheck: make(map[uintptr]bool),
+			tokenAPI: tokenize.NewAPI(input),
+			// NOOPCHECK loopCheck: make(map[uintptr]bool),
 		}
 		if api.Handle(startHandler) {
 			// Handle returned true, indicating that parsing could still continue.
--- a/parse/parse_test.go
+++ b/parse/parse_test.go
@ -266,61 +266,3 @@ func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T)
 	err := p("x")
 	parse.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err")
 }
-
-type parserWithLoop struct {
-	loopCounter int
-}
-
-func (l *parserWithLoop) first(p *parse.API) {
-	p.Accept(tokenize.A.ASCII)
-	p.Handle(l.second)
-}
-
-func (l *parserWithLoop) second(p *parse.API) {
-	p.Accept(tokenize.A.ASCII)
-	p.Handle(l.third)
-}
-
-func (l *parserWithLoop) third(p *parse.API) {
-	if l.loopCounter++; l.loopCounter > 100 {
-		p.Error("Loop not detected by parsekit")
-		return
-	}
-	p.Accept(tokenize.A.ASCII)
-	p.Handle(l.first)
-}
-
-func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
-	looper := &parserWithLoop{}
-	parser := parse.New(looper.first)
-	parse.AssertPanic(t, parse.PanicT{
-		Function: func() { parser("Het houdt niet op, niet vanzelf") },
-		Regexp:   true,
-		Expect:   `parsekit\.parse\.API.Accept\(\): Loop detected in parser at /.*/parse_test.go:\d+`})
-}
-
-// This test incorporates an actual loop bug that I dropped on myself and
-// that I could not easily spot in my code. It sounded so logical:
-// I want to get chunks of 5 chars from the input, so I simply loop on:
-//
-//    p.On(c.Max(5, a.AnyRune))
-//
-// The problem here is that Max(5, ...) will also match when there is
-// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
-// Therefore the loop will never stop. Solving the loop was simple:
-//
-//    p.On(c.MinMax(1, 5, a.AnyRune))
-//
-// Now the loop stops when the parser finds no more matching input data.
-func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
-	var c, a = tokenize.C, tokenize.A
-	parser := parse.New(func(p *parse.API) {
-		for p.Accept(c.Max(5, a.AnyRune)) {
-		}
-		p.Stop()
-	})
-	parse.AssertPanic(t, parse.PanicT{
-		Function: func() { parser("This will end soon") },
-		Regexp:   true,
-		Expect:   `parsekit\.parse\.API.Accept\(\): Loop detected in parser at .*/parse_test.go:\d+`})
-}
--- a/read/read.go
+++ b/read/read.go
@ -129,8 +129,10 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {

 	if n > 0 {
 		r.grow(n)
+		var readRune rune
+		var err error
 		for writeAt := l; writeAt <= offset; writeAt++ {
-			readRune, _, err := r.bufio.ReadRune()
+			readRune, _, err = r.bufio.ReadRune()

 			// Skip BOM.
 			if !r.firstReadDone {
@ -150,6 +152,7 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {

 			r.buffer[writeAt] = readRune
 		}
+		return readRune, nil
 	}
 	return r.buffer[offset], nil
 }
--- a/tokenize/api.go
+++ b/tokenize/api.go
@ -1,6 +1,8 @@
 package tokenize

 import (
+	"fmt"
+
 	"git.makaay.nl/mauricem/go-parsekit/read"
 )

@ -80,12 +82,13 @@ type API struct {
 }

 type stackFrame struct {
-	offset     int // current rune offset relative to the Reader's sliding window
-	runeStart  int
-	runeEnd    int
-	tokenStart int
-	tokenEnd   int
-	cursor     Cursor
+	offset     int // current rune read offset relative to the Reader's sliding window
+	column     int // The column at which the cursor is (0-indexed)
+	line       int // The line at which the cursor is (0-indexed)
+	runeStart  int // the starting point in the API.runes slice for runes produced by this stack level
+	runeEnd    int // the end point in the API.runes slice for runes produced by this stack level
+	tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
+	tokenEnd   int // the end point in the API.tokens slice for tokens produced by this stack level

 	// TODO
 	err error // can be used by a Handler to report a specific issue with the input
@ -173,7 +176,7 @@ func (i *API) accept(runes ...rune) {

 	for offset, r := range runes {
 		i.runes[curRuneEnd+offset] = r
-		i.stackFrame.cursor.moveByRune(r)
+		i.stackFrame.moveCursorByRune(r)
 	}
 	i.stackFrame.runeEnd = newRuneEnd
 	i.stackFrame.offset += len(runes)
@ -213,16 +216,23 @@ func (i *API) Fork() int {
 	i.stackLevel++
 	i.runeRead = false

-	parent := i.stackFrame
+	// A
+	// i.stackFrames[i.stackLevel] = *i.stackFrame
+	// i.stackFrame = &i.stackFrames[i.stackLevel]
+	// i.stackFrame.runeStart = i.stackFrame.runeEnd
+	// i.stackFrame.tokenStart = i.stackFrame.tokenEnd

-	f := &i.stackFrames[i.stackLevel]
-	f.offset = parent.offset
-	f.cursor = parent.cursor
-	f.runeStart = parent.runeEnd
-	f.runeEnd = parent.runeEnd
-	f.tokenStart = parent.tokenEnd
-	f.tokenEnd = parent.tokenEnd
-	i.stackFrame = f
+	// B
+	parent := i.stackFrame
+	child := &i.stackFrames[i.stackLevel]
+	child.offset = parent.offset
+	child.column = parent.column
+	child.line = parent.line
+	child.runeStart = parent.runeEnd
+	child.runeEnd = parent.runeEnd
+	child.tokenStart = parent.tokenEnd
+	child.tokenEnd = parent.tokenEnd
+	i.stackFrame = child

 	return i.stackLevel
 }
@ -267,7 +277,8 @@ func (i *API) Merge(stackLevel int) {
 	i.stackFrame.tokenStart = i.stackFrame.tokenEnd

 	parent.offset = i.stackFrame.offset
-	parent.cursor = i.stackFrame.cursor
+	parent.line = i.stackFrame.line
+	parent.column = i.stackFrame.column

 	i.stackFrame.err = nil
 	i.runeRead = false
@ -375,8 +386,11 @@ func (i *API) SetString(s string) {
 	i.SetRunes([]rune(s)...)
 }

-func (i *API) Cursor() Cursor {
-	return i.stackFrame.cursor
+func (i *API) Cursor() string {
+	if i.stackFrame.line == 0 && i.stackFrame.column == 0 {
+		return fmt.Sprintf("start of file")
+	}
+	return fmt.Sprintf("line %d, column %d", i.stackFrame.line+1, i.stackFrame.column+1)
 }

 func (i *API) Tokens() []Token {
--- a/tokenize/api_test.go
+++ b/tokenize/api_test.go
@ -213,7 +213,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
 	AssertEqual(t, 'c', r, "child4 rune 3")
 	api.Accept()
 	AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
-	AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
+	AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")

 	// Merge "c" from child4 to child3.
 	api.Merge(child4)
@ -223,7 +223,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {

 	// Child3 should now have the compbined results "abc" from child4's work.
 	AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
-	AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
+	AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")

 	// Now read some data from child3.
 	r, _ = api.NextRune()
@ -259,7 +259,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
 	api.Dispose(child3)

 	AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
-	AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
+	AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")

 	// Merge child2 to child1 and dispose of it.
 	api.Merge(child2)
@ -279,7 +279,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
 	api.Accept()

 	AssertEqual(t, "abcdefg", api.String(), "api string end result")
-	AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
+	AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
 }

 func TestClearRunes(t *testing.T) {
--- a/tokenize/cursor.go
+++ b/tokenize/cursor.go
@ -1,40 +1,22 @@
 package tokenize

-import (
-	"fmt"
-)
-
-// Cursor represents the position of a cursor in various ways.
-type Cursor struct {
-	Column int // The column at which the cursor is (0-indexed)
-	Line   int // The line at which the cursor is (0-indexed)
-}
-
-// String produces a string representation of the cursor position.
-func (c Cursor) String() string {
-	if c.Line == 0 && c.Column == 0 {
-		return fmt.Sprintf("start of file")
-	}
-	return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
-}
-
 // move updates the position of the cursor, based on the provided input string.
 // The input string represents the runes that the cursor must be moved over.
 // This method will take newlines into account to keep track of line numbers and
 // column positions automatically.
-func (c *Cursor) move(input string) *Cursor {
+func (f *stackFrame) moveCursor(input string) *stackFrame {
 	for _, r := range input {
-		c.moveByRune(r)
+		f.moveCursorByRune(r)
 	}
-	return c
+	return f
 }

-func (c *Cursor) moveByRune(r rune) *Cursor {
+func (f *stackFrame) moveCursorByRune(r rune) *stackFrame {
 	if r == '\n' {
-		c.Column = 0
-		c.Line++
+		f.column = 0
+		f.line++
 	} else {
-		c.Column++
+		f.column++
 	}
-	return c
+	return f
 }
--- a/tokenize/cursor_test.go
+++ b/tokenize/cursor_test.go
@ -1,36 +1,9 @@
 package tokenize

 import (
-	"fmt"
 	"testing"
 )

-func ExampleCursor_move() {
-	c := Cursor{}
-	fmt.Printf("after initialization    : %s\n", c)
-	fmt.Printf("after 'some words'      : %s\n", c.move("some words"))
-	fmt.Printf("after '\\n'              : %s\n", c.move("\n"))
-	fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
-
-	// Output:
-	// after initialization    : start of file
-	// after 'some words'      : line 1, column 11
-	// after '\n'              : line 2, column 1
-	// after '\r\nskip\nlines' : line 4, column 6
-}
-
-func ExampleCursor_String() {
-	c := Cursor{}
-	fmt.Println(c.String())
-
-	c.move("\nfoobar")
-	fmt.Println(c.String())
-
-	// Output:
-	// start of file
-	// line 2, column 7
-}
-
 func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
 	for _, test := range []struct {
 		name   string
@ -49,15 +22,15 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
 		{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
 		{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
 	} {
-		c := Cursor{}
+		api := NewAPI("")
 		for _, s := range test.input {
-			c.move(s)
+			api.stackFrame.moveCursor(s)
 		}
-		if c.Line != test.line {
-			t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
+		if api.stackFrame.line != test.line {
+			t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
 		}
-		if c.Column != test.column {
-			t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
+		if api.stackFrame.column != test.column {
+			t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, api.stackFrame.column, test.column)
 		}
 	}
 }
--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@ -1303,7 +1303,8 @@ func ModifyDrop(handler Handler) Handler {
 			// Otherwise we'd have to do a Reset() + Merge() call to get the same result.
 			parent := &t.stackFrames[t.stackLevel-1]
 			parent.offset = t.stackFrame.offset
-			parent.cursor = t.stackFrame.cursor
+			parent.line = t.stackFrame.line
+			parent.column = t.stackFrame.column
 			t.Dispose(child)
 			return true
 		}
--- a/tokenize/tokenize.go
+++ b/tokenize/tokenize.go
@ -43,7 +43,7 @@ func New(tokenHandler Handler) Func {
 		ok := tokenHandler(api)

 		if !ok {
-			err := fmt.Errorf("mismatch at %s", Cursor{})
+			err := fmt.Errorf("mismatch at %s", api.Cursor())
 			return nil, err
 		}
 		result := &Result{
--- a/tokenize/tokenizer_test.go
+++ b/tokenize/tokenizer_test.go
@ -181,20 +181,20 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {

 func TestAccept_UpdatesCursor(t *testing.T) {
 	i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
-	AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
+	AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
 	for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
 		i.NextRune()
 		i.Accept()
 	}
-	AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
+	AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
 	i.NextRune() // read "\n", cursor ends up at start of new line
 	i.Accept()
-	AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
+	AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
 	for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
 		i.NextRune()
 		i.Accept()
 	}
-	AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
+	AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
 }

 func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {