Removed loop protection code. This is useful, but it puts a performance burden on the code when doing it by keeping track of actual callers through the call stack. Maybe to be reintroduced in a future version with something like a simple counter and a maximum depth-style protection.
This commit is contained in:
parent
09746c0d2e
commit
56b8df3aab
35
parse/api.go
35
parse/api.go
|
@ -16,11 +16,10 @@ import (
|
|||
//
|
||||
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
|
||||
type API struct {
|
||||
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
||||
Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call
|
||||
loopCheck map[uintptr]bool // used for parser loop detection
|
||||
err error // parse error, retrieved by Error(), using API methods is denied when set
|
||||
stopped bool // a boolean set to true by Stop()
|
||||
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
||||
Result tokenize.Result // a struct, holding the results of the last Peek() or Accept() call
|
||||
err error // parse error, retrieved by Error(), using API methods is denied when set
|
||||
stopped bool // a boolean set to true by Stop()
|
||||
}
|
||||
|
||||
// Peek checks if the upcoming input data matches the provided tokenize.Handler.
|
||||
|
@ -55,9 +54,7 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
|||
p.tokenAPI.Dispose(forkedAPI)
|
||||
|
||||
// And flush the input reader buffer.
|
||||
if p.tokenAPI.FlushInput() {
|
||||
p.initLoopCheck()
|
||||
}
|
||||
p.tokenAPI.FlushInput()
|
||||
} else {
|
||||
p.tokenAPI.Dispose(forkedAPI)
|
||||
}
|
||||
|
@ -66,12 +63,11 @@ func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
|||
|
||||
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
|
||||
p.panicWhenStoppedOrInError(name)
|
||||
p.checkForLoops(name)
|
||||
if tokenHandler == nil {
|
||||
callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}")
|
||||
}
|
||||
|
||||
p.tokenAPI.Reset()
|
||||
p.tokenAPI.Reset() // TODO uh, why did I do this again? Just for i.runeRead = false ?
|
||||
child := p.tokenAPI.Fork()
|
||||
ok := tokenHandler(p.tokenAPI)
|
||||
|
||||
|
@ -105,25 +101,6 @@ func (p *API) IsStoppedOrInError() bool {
|
|||
return p.stopped || p.err != nil
|
||||
}
|
||||
|
||||
// initLoopCheck clears the loop check data, a map in which we keep
|
||||
// track of the lines of code from which Accept() and/or Peek() are called.
|
||||
// When Accept() is called, and the parser moved forward in the input data,
|
||||
// this method is called to reset the map for the new read cursor position.
|
||||
func (p *API) initLoopCheck() {
|
||||
p.loopCheck = make(map[uintptr]bool)
|
||||
}
|
||||
|
||||
// checkForLoops checks if the line of code from which Accept() or Peek()
|
||||
// was called has been seen before for the current read cursor position.
|
||||
// If yes, then the parser is in a loop and the method will panic.
|
||||
func (p *API) checkForLoops(name string) {
|
||||
filepos := callerPointer(3)
|
||||
if _, ok := p.loopCheck[filepos]; ok {
|
||||
callerPanic(name, "parsekit.parse.API.{name}(): Loop detected in parser at {caller}")
|
||||
}
|
||||
p.loopCheck[filepos] = true
|
||||
}
|
||||
|
||||
// Handle executes other parse.Handler functions from within the active
|
||||
// parse.Handler function.
|
||||
//
|
||||
|
|
|
@ -31,8 +31,8 @@ func New(startHandler Handler) Func {
|
|||
}
|
||||
return func(input interface{}) error {
|
||||
api := &API{
|
||||
tokenAPI: tokenize.NewAPI(input),
|
||||
loopCheck: make(map[uintptr]bool),
|
||||
tokenAPI: tokenize.NewAPI(input),
|
||||
// NOOPCHECK loopCheck: make(map[uintptr]bool),
|
||||
}
|
||||
if api.Handle(startHandler) {
|
||||
// Handle returned true, indicating that parsing could still continue.
|
||||
|
|
|
@ -266,61 +266,3 @@ func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T)
|
|||
err := p("x")
|
||||
parse.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err")
|
||||
}
|
||||
|
||||
type parserWithLoop struct {
|
||||
loopCounter int
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) first(p *parse.API) {
|
||||
p.Accept(tokenize.A.ASCII)
|
||||
p.Handle(l.second)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) second(p *parse.API) {
|
||||
p.Accept(tokenize.A.ASCII)
|
||||
p.Handle(l.third)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) third(p *parse.API) {
|
||||
if l.loopCounter++; l.loopCounter > 100 {
|
||||
p.Error("Loop not detected by parsekit")
|
||||
return
|
||||
}
|
||||
p.Accept(tokenize.A.ASCII)
|
||||
p.Handle(l.first)
|
||||
}
|
||||
|
||||
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
||||
looper := &parserWithLoop{}
|
||||
parser := parse.New(looper.first)
|
||||
parse.AssertPanic(t, parse.PanicT{
|
||||
Function: func() { parser("Het houdt niet op, niet vanzelf") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at /.*/parse_test.go:\d+`})
|
||||
}
|
||||
|
||||
// This test incorporates an actual loop bug that I dropped on myself and
|
||||
// that I could not easily spot in my code. It sounded so logical:
|
||||
// I want to get chunks of 5 chars from the input, so I simply loop on:
|
||||
//
|
||||
// p.On(c.Max(5, a.AnyRune))
|
||||
//
|
||||
// The problem here is that Max(5, ...) will also match when there is
|
||||
// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
|
||||
// Therefore the loop will never stop. Solving the loop was simple:
|
||||
//
|
||||
// p.On(c.MinMax(1, 5, a.AnyRune))
|
||||
//
|
||||
// Now the loop stops when the parser finds no more matching input data.
|
||||
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
||||
var c, a = tokenize.C, tokenize.A
|
||||
parser := parse.New(func(p *parse.API) {
|
||||
for p.Accept(c.Max(5, a.AnyRune)) {
|
||||
}
|
||||
p.Stop()
|
||||
})
|
||||
parse.AssertPanic(t, parse.PanicT{
|
||||
Function: func() { parser("This will end soon") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.parse\.API.Accept\(\): Loop detected in parser at .*/parse_test.go:\d+`})
|
||||
}
|
||||
|
|
|
@ -129,8 +129,10 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
|
|||
|
||||
if n > 0 {
|
||||
r.grow(n)
|
||||
var readRune rune
|
||||
var err error
|
||||
for writeAt := l; writeAt <= offset; writeAt++ {
|
||||
readRune, _, err := r.bufio.ReadRune()
|
||||
readRune, _, err = r.bufio.ReadRune()
|
||||
|
||||
// Skip BOM.
|
||||
if !r.firstReadDone {
|
||||
|
@ -150,6 +152,7 @@ func (r *Buffer) RuneAt(offset int) (rune, error) {
|
|||
|
||||
r.buffer[writeAt] = readRune
|
||||
}
|
||||
return readRune, nil
|
||||
}
|
||||
return r.buffer[offset], nil
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
)
|
||||
|
||||
|
@ -80,12 +82,13 @@ type API struct {
|
|||
}
|
||||
|
||||
type stackFrame struct {
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
runeStart int
|
||||
runeEnd int
|
||||
tokenStart int
|
||||
tokenEnd int
|
||||
cursor Cursor
|
||||
offset int // current rune read offset relative to the Reader's sliding window
|
||||
column int // The column at which the cursor is (0-indexed)
|
||||
line int // The line at which the cursor is (0-indexed)
|
||||
runeStart int // the starting point in the API.runes slice for runes produced by this stack level
|
||||
runeEnd int // the end point in the API.runes slice for runes produced by this stack level
|
||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||
tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level
|
||||
|
||||
// TODO
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
|
@ -173,7 +176,7 @@ func (i *API) accept(runes ...rune) {
|
|||
|
||||
for offset, r := range runes {
|
||||
i.runes[curRuneEnd+offset] = r
|
||||
i.stackFrame.cursor.moveByRune(r)
|
||||
i.stackFrame.moveCursorByRune(r)
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
i.stackFrame.offset += len(runes)
|
||||
|
@ -213,16 +216,23 @@ func (i *API) Fork() int {
|
|||
i.stackLevel++
|
||||
i.runeRead = false
|
||||
|
||||
parent := i.stackFrame
|
||||
// A
|
||||
// i.stackFrames[i.stackLevel] = *i.stackFrame
|
||||
// i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||
// i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||
// i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
|
||||
f := &i.stackFrames[i.stackLevel]
|
||||
f.offset = parent.offset
|
||||
f.cursor = parent.cursor
|
||||
f.runeStart = parent.runeEnd
|
||||
f.runeEnd = parent.runeEnd
|
||||
f.tokenStart = parent.tokenEnd
|
||||
f.tokenEnd = parent.tokenEnd
|
||||
i.stackFrame = f
|
||||
// B
|
||||
parent := i.stackFrame
|
||||
child := &i.stackFrames[i.stackLevel]
|
||||
child.offset = parent.offset
|
||||
child.column = parent.column
|
||||
child.line = parent.line
|
||||
child.runeStart = parent.runeEnd
|
||||
child.runeEnd = parent.runeEnd
|
||||
child.tokenStart = parent.tokenEnd
|
||||
child.tokenEnd = parent.tokenEnd
|
||||
i.stackFrame = child
|
||||
|
||||
return i.stackLevel
|
||||
}
|
||||
|
@ -267,7 +277,8 @@ func (i *API) Merge(stackLevel int) {
|
|||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
|
||||
parent.offset = i.stackFrame.offset
|
||||
parent.cursor = i.stackFrame.cursor
|
||||
parent.line = i.stackFrame.line
|
||||
parent.column = i.stackFrame.column
|
||||
|
||||
i.stackFrame.err = nil
|
||||
i.runeRead = false
|
||||
|
@ -375,8 +386,11 @@ func (i *API) SetString(s string) {
|
|||
i.SetRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) Cursor() Cursor {
|
||||
return i.stackFrame.cursor
|
||||
func (i *API) Cursor() string {
|
||||
if i.stackFrame.line == 0 && i.stackFrame.column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", i.stackFrame.line+1, i.stackFrame.column+1)
|
||||
}
|
||||
|
||||
func (i *API) Tokens() []Token {
|
||||
|
|
|
@ -213,7 +213,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
|||
AssertEqual(t, 'c', r, "child4 rune 3")
|
||||
api.Accept()
|
||||
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3")
|
||||
|
||||
// Merge "c" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
|
@ -223,7 +223,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
|||
|
||||
// Child3 should now have the compbined results "abc" from child4's work.
|
||||
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4")
|
||||
|
||||
// Now read some data from child3.
|
||||
r, _ = api.NextRune()
|
||||
|
@ -259,7 +259,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
|||
api.Dispose(child3)
|
||||
|
||||
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
||||
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
||||
AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3")
|
||||
|
||||
// Merge child2 to child1 and dispose of it.
|
||||
api.Merge(child2)
|
||||
|
@ -279,7 +279,7 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
|||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
||||
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
||||
AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result")
|
||||
}
|
||||
|
||||
func TestClearRunes(t *testing.T) {
|
||||
|
|
|
@ -1,40 +1,22 @@
|
|||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Cursor represents the position of a cursor in various ways.
|
||||
type Cursor struct {
|
||||
Column int // The column at which the cursor is (0-indexed)
|
||||
Line int // The line at which the cursor is (0-indexed)
|
||||
}
|
||||
|
||||
// String produces a string representation of the cursor position.
|
||||
func (c Cursor) String() string {
|
||||
if c.Line == 0 && c.Column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||
}
|
||||
|
||||
// move updates the position of the cursor, based on the provided input string.
|
||||
// The input string represents the runes that the cursor must be moved over.
|
||||
// This method will take newlines into account to keep track of line numbers and
|
||||
// column positions automatically.
|
||||
func (c *Cursor) move(input string) *Cursor {
|
||||
func (f *stackFrame) moveCursor(input string) *stackFrame {
|
||||
for _, r := range input {
|
||||
c.moveByRune(r)
|
||||
f.moveCursorByRune(r)
|
||||
}
|
||||
return c
|
||||
return f
|
||||
}
|
||||
|
||||
func (c *Cursor) moveByRune(r rune) *Cursor {
|
||||
func (f *stackFrame) moveCursorByRune(r rune) *stackFrame {
|
||||
if r == '\n' {
|
||||
c.Column = 0
|
||||
c.Line++
|
||||
f.column = 0
|
||||
f.line++
|
||||
} else {
|
||||
c.Column++
|
||||
f.column++
|
||||
}
|
||||
return c
|
||||
return f
|
||||
}
|
||||
|
|
|
@ -1,36 +1,9 @@
|
|||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExampleCursor_move() {
|
||||
c := Cursor{}
|
||||
fmt.Printf("after initialization : %s\n", c)
|
||||
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
|
||||
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
|
||||
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
|
||||
|
||||
// Output:
|
||||
// after initialization : start of file
|
||||
// after 'some words' : line 1, column 11
|
||||
// after '\n' : line 2, column 1
|
||||
// after '\r\nskip\nlines' : line 4, column 6
|
||||
}
|
||||
|
||||
func ExampleCursor_String() {
|
||||
c := Cursor{}
|
||||
fmt.Println(c.String())
|
||||
|
||||
c.move("\nfoobar")
|
||||
fmt.Println(c.String())
|
||||
|
||||
// Output:
|
||||
// start of file
|
||||
// line 2, column 7
|
||||
}
|
||||
|
||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
name string
|
||||
|
@ -49,15 +22,15 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
|||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||
} {
|
||||
c := Cursor{}
|
||||
api := NewAPI("")
|
||||
for _, s := range test.input {
|
||||
c.move(s)
|
||||
api.stackFrame.moveCursor(s)
|
||||
}
|
||||
if c.Line != test.line {
|
||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
||||
if api.stackFrame.line != test.line {
|
||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
|
||||
}
|
||||
if c.Column != test.column {
|
||||
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
||||
if api.stackFrame.column != test.column {
|
||||
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, api.stackFrame.column, test.column)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1303,7 +1303,8 @@ func ModifyDrop(handler Handler) Handler {
|
|||
// Otherwise we'd have to do a Reset() + Merge() call to get the same result.
|
||||
parent := &t.stackFrames[t.stackLevel-1]
|
||||
parent.offset = t.stackFrame.offset
|
||||
parent.cursor = t.stackFrame.cursor
|
||||
parent.line = t.stackFrame.line
|
||||
parent.column = t.stackFrame.column
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ func New(tokenHandler Handler) Func {
|
|||
ok := tokenHandler(api)
|
||||
|
||||
if !ok {
|
||||
err := fmt.Errorf("mismatch at %s", Cursor{})
|
||||
err := fmt.Errorf("mismatch at %s", api.Cursor())
|
||||
return nil, err
|
||||
}
|
||||
result := &Result{
|
||||
|
|
|
@ -181,20 +181,20 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
|
|||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
||||
AssertEqual(t, "start of file", i.Cursor(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
||||
AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
||||
AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
||||
AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
|
|
Loading…
Reference in New Issue