Added crude loop protection to the parser, which should prevent parsers running in circles (happened to me a few times too).

This commit is contained in:
Maurice Makaay 2019-05-28 23:01:23 +00:00
parent 7aff3fc43e
commit d31d09abf0
7 changed files with 76 additions and 20 deletions

View File

@ -1,6 +1,7 @@
// In this example, a Parser is created that can parse and normalize Dutch postcodes
// The implementation uses only TokenHandler functions and does not implement a
// full-fledged state-based Parser for it.
package parsekit_test
import (

View File

@ -15,20 +15,22 @@ import (
type letterCollection []string
func (l *letterCollection) parseStart(p *parsekit.ParseAPI) {
for p.On(parsekit.C.MinMax(1, 3, parsekit.A.AnyRune)).Accept() {
*l = append(*l, p.BufLiteral())
p.BufClear()
}
p.ExpectEndOfFile()
func (l *letterCollection) AddChopped(s string, chunkSize int) *parsekit.Error {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(parsekit.C.MinMax(1, chunkSize, parsekit.A.AnyRune)).Accept() {
*l = append(*l, p.BufLiteral())
p.BufClear()
}
})
return parser.Execute(s)
}
func Example_usingSliceAsParserState() {
letters := &letterCollection{}
parser := parsekit.NewParser(letters.parseStart)
err := parser.Execute("¡Any will dö!")
letters.AddChopped("This string will", 4)
letters.AddChopped("be cut to bits!!!!!!", 8)
fmt.Printf("Matches = %q, Error = %s\n", *letters, err)
fmt.Printf("Matches = %q", *letters)
// Output:
// Matches = ["¡An" "y w" "ill" " dö" "!"], Error = <nil>
// Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"]
}

View File

@ -18,16 +18,17 @@ type ParseHandler func(*ParseAPI)
// ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser.
type ParseAPI struct {
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
loopCheck map[string]bool // used for parser loop detection
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
LastMatch string // a string representation of the last matched input data
}
@ -62,6 +63,16 @@ func (p *ParseAPI) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
func (p *ParseAPI) checkForLoops() {
pc, file, line, _ := runtime.Caller(2)
id := fmt.Sprintf("%s:%d", file, line)
if _, ok := p.loopCheck[id]; ok {
caller := runtime.FuncForPC(pc)
panic(fmt.Sprintf("Loop detected in parser in %s at %s, line %d", caller.Name(), file, line))
}
p.loopCheck[id] = true
}
// peek returns but does not advance the cursor to the next rune in the input.
// Returns the rune, its width in bytes and a boolean.
//

View File

@ -38,6 +38,7 @@ package parsekit
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
p.panicWhenStoppedOrInError()
p.checkForLoops()
// Perform the matching operation.
m := &TokenAPI{p: p}
@ -109,6 +110,10 @@ func (a *ParseAPIOnAction) Stay() bool {
// While doing so, it keeps tracks of newlines that are encountered, so we
// can report on line + column positions on error.
func (a *ParseAPIOnAction) advanceCursor() {
if a.p.inputPos == a.inputPos {
return
}
a.p.loopCheck = map[string]bool{}
a.p.inputPos = a.inputPos
for _, r := range a.input {
if a.p.newline {

View File

@ -73,3 +73,36 @@ func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *te
t.Fatalf("Got unexpected error: %s", err.Error())
}
}
type parserWithLoop struct {
loopCounter int
}
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.third)
}
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
if l.loopCounter++; l.loopCounter > 100 {
p.Error("Loop not detected by parsekit")
return
}
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.first)
}
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
looper := &parserWithLoop{}
parser := parsekit.NewParser(looper.first)
RunPanicTest(t, PanicTest{
func() { parser.Execute("Het houdt niet op, niet vanzelf") },
"Loop detected in parser in git.makaay.nl/mauricem/go-parsekit_test." +
"(*parserWithLoop).second at /home/ubuntu/Projects/Parsekit/go-parsekit" +
"/parsehandler_test.go, line 87"})
}

View File

@ -26,6 +26,7 @@ func (p *Parser) Execute(input string) *Error {
len: len(input),
cursorLine: 1,
cursorColumn: 1,
loopCheck: map[string]bool{},
}
api.Handle(p.startHandler)
if !api.stopped && api.err == nil {

View File

@ -46,6 +46,9 @@ func TestCombinators(t *testing.T) {
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},