Added crude loop protection to the parser, which should prevent parsers running in circles (happened to me a few times too).
This commit is contained in:
parent
7aff3fc43e
commit
d31d09abf0
|
@ -1,6 +1,7 @@
|
|||
// In this example, a Parser is created that can parse and normalize Dutch postcodes
|
||||
// The implementation uses only TokenHandler functions and does not implement a
|
||||
// full-fledged state-based Parser for it.
|
||||
|
||||
package parsekit_test
|
||||
|
||||
import (
|
||||
|
|
|
@ -15,20 +15,22 @@ import (
|
|||
|
||||
type letterCollection []string
|
||||
|
||||
func (l *letterCollection) parseStart(p *parsekit.ParseAPI) {
|
||||
for p.On(parsekit.C.MinMax(1, 3, parsekit.A.AnyRune)).Accept() {
|
||||
*l = append(*l, p.BufLiteral())
|
||||
p.BufClear()
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
func (l *letterCollection) AddChopped(s string, chunkSize int) *parsekit.Error {
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(parsekit.C.MinMax(1, chunkSize, parsekit.A.AnyRune)).Accept() {
|
||||
*l = append(*l, p.BufLiteral())
|
||||
p.BufClear()
|
||||
}
|
||||
})
|
||||
return parser.Execute(s)
|
||||
}
|
||||
|
||||
func Example_usingSliceAsParserState() {
|
||||
letters := &letterCollection{}
|
||||
parser := parsekit.NewParser(letters.parseStart)
|
||||
err := parser.Execute("¡Any will dö!")
|
||||
letters.AddChopped("This string will", 4)
|
||||
letters.AddChopped("be cut to bits!!!!!!", 8)
|
||||
|
||||
fmt.Printf("Matches = %q, Error = %s\n", *letters, err)
|
||||
fmt.Printf("Matches = %q", *letters)
|
||||
// Output:
|
||||
// Matches = ["¡An" "y w" "ill" " dö" "!"], Error = <nil>
|
||||
// Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"]
|
||||
}
|
||||
|
|
|
@ -18,16 +18,17 @@ type ParseHandler func(*ParseAPI)
|
|||
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||
// ParseHandler methods to communicate with the parser.
|
||||
type ParseAPI struct {
|
||||
input string // the input that is being scanned by the parser
|
||||
inputPos int // current byte cursor position in the input
|
||||
cursorLine int // current rune cursor row number in the input
|
||||
cursorColumn int // current rune cursor column position in the input
|
||||
len int // the total length of the input in bytes
|
||||
newline bool // keep track of when we have scanned a newline
|
||||
expecting string // a description of what the current state expects to find (see P.Expects())
|
||||
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
input string // the input that is being scanned by the parser
|
||||
inputPos int // current byte cursor position in the input
|
||||
loopCheck map[string]bool // used for parser loop detection
|
||||
cursorLine int // current rune cursor row number in the input
|
||||
cursorColumn int // current rune cursor column position in the input
|
||||
len int // the total length of the input in bytes
|
||||
newline bool // keep track of when we have scanned a newline
|
||||
expecting string // a description of what the current state expects to find (see P.Expects())
|
||||
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
|
||||
LastMatch string // a string representation of the last matched input data
|
||||
}
|
||||
|
@ -62,6 +63,16 @@ func (p *ParseAPI) isStoppedOrInError() bool {
|
|||
return p.stopped || p.err != nil
|
||||
}
|
||||
|
||||
func (p *ParseAPI) checkForLoops() {
|
||||
pc, file, line, _ := runtime.Caller(2)
|
||||
id := fmt.Sprintf("%s:%d", file, line)
|
||||
if _, ok := p.loopCheck[id]; ok {
|
||||
caller := runtime.FuncForPC(pc)
|
||||
panic(fmt.Sprintf("Loop detected in parser in %s at %s, line %d", caller.Name(), file, line))
|
||||
}
|
||||
p.loopCheck[id] = true
|
||||
}
|
||||
|
||||
// peek returns but does not advance the cursor to the next rune in the input.
|
||||
// Returns the rune, its width in bytes and a boolean.
|
||||
//
|
||||
|
|
|
@ -38,6 +38,7 @@ package parsekit
|
|||
// }
|
||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.checkForLoops()
|
||||
|
||||
// Perform the matching operation.
|
||||
m := &TokenAPI{p: p}
|
||||
|
@ -109,6 +110,10 @@ func (a *ParseAPIOnAction) Stay() bool {
|
|||
// While doing so, it keeps tracks of newlines that are encountered, so we
|
||||
// can report on line + column positions on error.
|
||||
func (a *ParseAPIOnAction) advanceCursor() {
|
||||
if a.p.inputPos == a.inputPos {
|
||||
return
|
||||
}
|
||||
a.p.loopCheck = map[string]bool{}
|
||||
a.p.inputPos = a.inputPos
|
||||
for _, r := range a.input {
|
||||
if a.p.newline {
|
||||
|
|
|
@ -73,3 +73,36 @@ func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *te
|
|||
t.Fatalf("Got unexpected error: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
type parserWithLoop struct {
|
||||
loopCounter int
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.second)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.third)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
|
||||
if l.loopCounter++; l.loopCounter > 100 {
|
||||
p.Error("Loop not detected by parsekit")
|
||||
return
|
||||
}
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.first)
|
||||
}
|
||||
|
||||
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
||||
looper := &parserWithLoop{}
|
||||
parser := parsekit.NewParser(looper.first)
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("Het houdt niet op, niet vanzelf") },
|
||||
"Loop detected in parser in git.makaay.nl/mauricem/go-parsekit_test." +
|
||||
"(*parserWithLoop).second at /home/ubuntu/Projects/Parsekit/go-parsekit" +
|
||||
"/parsehandler_test.go, line 87"})
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ func (p *Parser) Execute(input string) *Error {
|
|||
len: len(input),
|
||||
cursorLine: 1,
|
||||
cursorColumn: 1,
|
||||
loopCheck: map[string]bool{},
|
||||
}
|
||||
api.Handle(p.startHandler)
|
||||
if !api.stopped && api.err == nil {
|
||||
|
|
|
@ -46,6 +46,9 @@ func TestCombinators(t *testing.T) {
|
|||
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
|
|
Loading…
Reference in New Issue