Removed loop protection code. This is useful, but it puts a performance burden on the code when doing it by keeping track of actual callers through the call stack. Maybe to be reintroduced in a future version with something like a simple counter and a maximum depth-style protection.

This commit is contained in:
Maurice Makaay 2019-07-12 12:33:18 +00:00
parent 09746c0d2e
commit 56b8df3aab
11 changed files with 70 additions and 178 deletions

View File

@ -16,11 +16,10 @@ import (
//
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
type API struct {
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call
loopCheck map[uintptr]bool // used for parser loop detection
err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop()
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call
err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop()
}
// Peek checks if the upcoming input data matches the provided tokenize.Handler.
@ -55,9 +54,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
p.tokenAPI.Dispose(forkedAPI)
// And flush the input reader buffer.
if p.tokenAPI.FlushInput() {
p.initLoopCheck()
}
p.tokenAPI.FlushInput()
} else {
p.tokenAPI.Dispose(forkedAPI)
}
@ -66,12 +63,11 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
p.panicWhenStoppedOrInError(name)
p.checkForLoops(name)
if tokenHandler == nil {
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
}
p.tokenAPI.Reset()
p.tokenAPI.Reset() // TODO uh, why did I do this again? Just for i.runeRead = false ?
child := p.tokenAPI.Fork()
ok := tokenHandler(p.tokenAPI)
@ -105,25 +101,6 @@ func (p *API) IsStoppedOrInError() bool {
return p.stopped || p.err != nil
}
// initLoopCheck clears the loop check data, a map in which we keep
// track of the lines of code from which Accept() and/or Peek() are called.
// When Accept() is called, and the parser moved forward in the input data,
// this method is called to reset the map for the new read cursor position.
func (p *API) initLoopCheck() {
p.loopCheck = make(map[uintptr]bool)
}
// checkForLoops checks if the line of code from which Accept() or Peek()
// was called has been seen before for the current read cursor position.
// If yes, then the parser is in a loop and the method will panic.
func (p *API) checkForLoops(name string) {
filepos := callerPointer(3)
if _, ok := p.loopCheck[filepos]; ok {
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
}
p.loopCheck[filepos] = true
}
// Handle executes other parse.Handler functions from within the active
// parse.Handler function.
//

View File

@ -31,8 +31,8 @@ func New(startHandler Handler) Func {
}
return func(input interface{}) error {
api := &API{
tokenAPI: tokenize.NewAPI(input),
loopCheck: make(map[uintptr]bool),
tokenAPI: tokenize.NewAPI(input),
// NOOPCHECK loopCheck: make(map[uintptr]bool),
}
if api.Handle(startHandler) {
// Handle returned true, indicating that parsing could still continue.

View File

@ -266,61 +266,3 @@ func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T)
err := p("x")
parse.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err")
}
type parserWithLoop struct {
loopCounter int
}
func (l *parserWithLoop) first(p *parse.API) {
p.Accept(tokenize.A.ASCII)
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parse.API) {
p.Accept(tokenize.A.ASCII)
p.Handle(l.third)
}
func (l *parserWithLoop) third(p *parse.API) {
if l.loopCounter++; l.loopCounter > 100 {
p.Error("Loop not detected by parsekit")
return
}
p.Accept(tokenize.A.ASCII)
p.Handle(l.first)
}
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
looper := &parserWithLoop{}
parser := parse.New(looper.first)
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("Het houdt niet op, niet vanzelf") },
Regexp: true,
Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at /.*/parse_test.go:\d+`})
}
// This test incorporates an actual loop bug that I dropped on myself and
// that I could not easily spot in my code. It sounded so logical:
// I want to get chunks of 5 chars from the input, so I simply loop on:
//
// p.On(c.Max(5, a.AnyRune))
//
// The problem here is that Max(5, ...) will also match when there is
// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
// Therefore the loop will never stop. Solving the loop was simple:
//
// p.On(c.MinMax(1, 5, a.AnyRune))
//
// Now the loop stops when the parser finds no more matching input data.
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A
parser := parse.New(func(p *parse.API) {
for p.Accept(c.Max(5, a.AnyRune)) {
}
p.Stop()
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("This will end soon") },
Regexp: true,
Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at .*/parse_test.go:\d+`})
}

View File

@ -129,8 +129,10 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
if n > 0 {
r.grow(n)
var readRune rune
var err error
for writeAt := l; writeAt <= offset; writeAt++ {
readRune, _, err := r.bufio.ReadRune()
readRune, _, err = r.bufio.ReadRune()
// Skip BOM.
if !r.firstReadDone {
@ -150,6 +152,7 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
r.buffer[writeAt] = readRune
}
return readRune, nil
}
return r.buffer[offset], nil
}

View File

@ -1,6 +1,8 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
)
@ -80,12 +82,13 @@ type API struct {
}
type stackFrame struct {
offset int // current rune offset relative to the Reader's sliding window
runeStart int
runeEnd int
tokenStart int
tokenEnd int
cursor Cursor
offset int // current rune read offset relative to the Reader's sliding window
column int // The column at which the cursor is (0-indexed)
line int // The line at which the cursor is (0-indexed)
runeStart int // the starting point in the API.runes slice for runes produced by this stack level
runeEnd int // the end point in the API.runes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
// TODO
err error // can be used by a Handler to report a specific issue with the input
@ -173,7 +176,7 @@ func (i *API) accept(runes ...rune) {
for offset, r := range runes {
i.runes[curRuneEnd+offset] = r
i.stackFrame.cursor.moveByRune(r)
i.stackFrame.moveCursorByRune(r)
}
i.stackFrame.runeEnd = newRuneEnd
i.stackFrame.offset += len(runes)
@ -213,16 +216,23 @@ func (i *API) Fork() int {
i.stackLevel++
i.runeRead = false
parent := i.stackFrame
// A
// i.stackFrames[i.stackLevel] = *i.stackFrame
// i.stackFrame = &i.stackFrames[i.stackLevel]
// i.stackFrame.runeStart = i.stackFrame.runeEnd
// i.stackFrame.tokenStart = i.stackFrame.tokenEnd
f := &i.stackFrames[i.stackLevel]
f.offset = parent.offset
f.cursor = parent.cursor
f.runeStart = parent.runeEnd
f.runeEnd = parent.runeEnd
f.tokenStart = parent.tokenEnd
f.tokenEnd = parent.tokenEnd
i.stackFrame = f
// B
parent := i.stackFrame
child := &i.stackFrames[i.stackLevel]
child.offset = parent.offset
child.column = parent.column
child.line = parent.line
child.runeStart = parent.runeEnd
child.runeEnd = parent.runeEnd
child.tokenStart = parent.tokenEnd
child.tokenEnd = parent.tokenEnd
i.stackFrame = child
return i.stackLevel
}
@ -267,7 +277,8 @@ func (i *API) Merge(stackLevel int) {
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
parent.offset = i.stackFrame.offset
parent.cursor = i.stackFrame.cursor
parent.line = i.stackFrame.line
parent.column = i.stackFrame.column
i.stackFrame.err = nil
i.runeRead = false
@ -375,8 +386,11 @@ func (i *API) SetString(s string) {
i.SetRunes([]rune(s)...)
}
func (i *API) Cursor() Cursor {
return i.stackFrame.cursor
func (i *API) Cursor() string {
if i.stackFrame.line == 0 && i.stackFrame.column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", i.stackFrame.line+1, i.stackFrame.column+1)
}
func (i *API) Tokens() []Token {

View File

@ -213,7 +213,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
AssertEqual(t, 'c', r, "child4 rune 3")
api.Accept()
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
// Merge "c" from child4 to child3.
api.Merge(child4)
@ -223,7 +223,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
// Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3.
r, _ = api.NextRune()
@ -259,7 +259,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api.Dispose(child3)
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")
// Merge child2 to child1 and dispose of it.
api.Merge(child2)
@ -279,7 +279,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api.Accept()
AssertEqual(t, "abcdefg", api.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
}
func TestClearRunes(t *testing.T) {

View File

@ -1,40 +1,22 @@
package tokenize
import (
"fmt"
)
// Cursor represents the position of a cursor in various ways.
type Cursor struct {
Column int // The column at which the cursor is (0-indexed)
Line int // The line at which the cursor is (0-indexed)
}
// String produces a string representation of the cursor position.
func (c Cursor) String() string {
if c.Line == 0 && c.Column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
}
// move updates the position of the cursor, based on the provided input string.
// The input string represents the runes that the cursor must be moved over.
// This method will take newlines into account to keep track of line numbers and
// column positions automatically.
func (c *Cursor) move(input string) *Cursor {
func (f *stackFrame) moveCursor(input string) *stackFrame {
for _, r := range input {
c.moveByRune(r)
f.moveCursorByRune(r)
}
return c
return f
}
func (c *Cursor) moveByRune(r rune) *Cursor {
func (f *stackFrame) moveCursorByRune(r rune) *stackFrame {
if r == '\n' {
c.Column = 0
c.Line++
f.column = 0
f.line++
} else {
c.Column++
f.column++
}
return c
return f
}

View File

@ -1,36 +1,9 @@
package tokenize
import (
"fmt"
"testing"
)
func ExampleCursor_move() {
c := Cursor{}
fmt.Printf("after initialization : %s\n", c)
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
// Output:
// after initialization : start of file
// after 'some words' : line 1, column 11
// after '\n' : line 2, column 1
// after '\r\nskip\nlines' : line 4, column 6
}
func ExampleCursor_String() {
c := Cursor{}
fmt.Println(c.String())
c.move("\nfoobar")
fmt.Println(c.String())
// Output:
// start of file
// line 2, column 7
}
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
for _, test := range []struct {
name string
@ -49,15 +22,15 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
c := Cursor{}
api := NewAPI("")
for _, s := range test.input {
c.move(s)
api.stackFrame.moveCursor(s)
}
if c.Line != test.line {
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
if api.stackFrame.line != test.line {
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
}
if c.Column != test.column {
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
if api.stackFrame.column != test.column {
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, api.stackFrame.column, test.column)
}
}
}

View File

@ -1303,7 +1303,8 @@ func ModifyDrop(handler Handler) Handler {
// Otherwise we'd have to do a Reset() + Merge() call to get the same result.
parent := &t.stackFrames[t.stackLevel-1]
parent.offset = t.stackFrame.offset
parent.cursor = t.stackFrame.cursor
parent.line = t.stackFrame.line
parent.column = t.stackFrame.column
t.Dispose(child)
return true
}

View File

@ -43,7 +43,7 @@ func New(tokenHandler Handler) Func {
ok := tokenHandler(api)
if !ok {
err := fmt.Errorf("mismatch at %s", Cursor{})
err := fmt.Errorf("mismatch at %s", api.Cursor())
return nil, err
}
result := &Result{

View File

@ -181,20 +181,20 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {