Added crude loop protection to the parser, which should prevent parsers running in circles (happened to me a few times too).
This commit is contained in:
parent
7aff3fc43e
commit
d31d09abf0
|
@ -1,6 +1,7 @@
|
||||||
// In this example, a Parser is created that can parse and normalize Dutch postcodes
|
// In this example, a Parser is created that can parse and normalize Dutch postcodes
|
||||||
// The implementation uses only TokenHandler functions and does not implement a
|
// The implementation uses only TokenHandler functions and does not implement a
|
||||||
// full-fledged state-based Parser for it.
|
// full-fledged state-based Parser for it.
|
||||||
|
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|
|
@ -15,20 +15,22 @@ import (
|
||||||
|
|
||||||
type letterCollection []string
|
type letterCollection []string
|
||||||
|
|
||||||
func (l *letterCollection) parseStart(p *parsekit.ParseAPI) {
|
func (l *letterCollection) AddChopped(s string, chunkSize int) *parsekit.Error {
|
||||||
for p.On(parsekit.C.MinMax(1, 3, parsekit.A.AnyRune)).Accept() {
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
*l = append(*l, p.BufLiteral())
|
for p.On(parsekit.C.MinMax(1, chunkSize, parsekit.A.AnyRune)).Accept() {
|
||||||
p.BufClear()
|
*l = append(*l, p.BufLiteral())
|
||||||
}
|
p.BufClear()
|
||||||
p.ExpectEndOfFile()
|
}
|
||||||
|
})
|
||||||
|
return parser.Execute(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Example_usingSliceAsParserState() {
|
func Example_usingSliceAsParserState() {
|
||||||
letters := &letterCollection{}
|
letters := &letterCollection{}
|
||||||
parser := parsekit.NewParser(letters.parseStart)
|
letters.AddChopped("This string will", 4)
|
||||||
err := parser.Execute("¡Any will dö!")
|
letters.AddChopped("be cut to bits!!!!!!", 8)
|
||||||
|
|
||||||
fmt.Printf("Matches = %q, Error = %s\n", *letters, err)
|
fmt.Printf("Matches = %q", *letters)
|
||||||
// Output:
|
// Output:
|
||||||
// Matches = ["¡An" "y w" "ill" " dö" "!"], Error = <nil>
|
// Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"]
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,16 +18,17 @@ type ParseHandler func(*ParseAPI)
|
||||||
// ParseAPI holds the internal state of a parse run and provides an API to
|
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||||
// ParseHandler methods to communicate with the parser.
|
// ParseHandler methods to communicate with the parser.
|
||||||
type ParseAPI struct {
|
type ParseAPI struct {
|
||||||
input string // the input that is being scanned by the parser
|
input string // the input that is being scanned by the parser
|
||||||
inputPos int // current byte cursor position in the input
|
inputPos int // current byte cursor position in the input
|
||||||
cursorLine int // current rune cursor row number in the input
|
loopCheck map[string]bool // used for parser loop detection
|
||||||
cursorColumn int // current rune cursor column position in the input
|
cursorLine int // current rune cursor row number in the input
|
||||||
len int // the total length of the input in bytes
|
cursorColumn int // current rune cursor column position in the input
|
||||||
newline bool // keep track of when we have scanned a newline
|
len int // the total length of the input in bytes
|
||||||
expecting string // a description of what the current state expects to find (see P.Expects())
|
newline bool // keep track of when we have scanned a newline
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
expecting string // a description of what the current state expects to find (see P.Expects())
|
||||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
||||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||||
|
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||||
|
|
||||||
LastMatch string // a string representation of the last matched input data
|
LastMatch string // a string representation of the last matched input data
|
||||||
}
|
}
|
||||||
|
@ -62,6 +63,16 @@ func (p *ParseAPI) isStoppedOrInError() bool {
|
||||||
return p.stopped || p.err != nil
|
return p.stopped || p.err != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *ParseAPI) checkForLoops() {
|
||||||
|
pc, file, line, _ := runtime.Caller(2)
|
||||||
|
id := fmt.Sprintf("%s:%d", file, line)
|
||||||
|
if _, ok := p.loopCheck[id]; ok {
|
||||||
|
caller := runtime.FuncForPC(pc)
|
||||||
|
panic(fmt.Sprintf("Loop detected in parser in %s at %s, line %d", caller.Name(), file, line))
|
||||||
|
}
|
||||||
|
p.loopCheck[id] = true
|
||||||
|
}
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune in the input.
|
// peek returns but does not advance the cursor to the next rune in the input.
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
//
|
//
|
||||||
|
|
|
@ -38,6 +38,7 @@ package parsekit
|
||||||
// }
|
// }
|
||||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
||||||
p.panicWhenStoppedOrInError()
|
p.panicWhenStoppedOrInError()
|
||||||
|
p.checkForLoops()
|
||||||
|
|
||||||
// Perform the matching operation.
|
// Perform the matching operation.
|
||||||
m := &TokenAPI{p: p}
|
m := &TokenAPI{p: p}
|
||||||
|
@ -109,6 +110,10 @@ func (a *ParseAPIOnAction) Stay() bool {
|
||||||
// While doing so, it keeps tracks of newlines that are encountered, so we
|
// While doing so, it keeps tracks of newlines that are encountered, so we
|
||||||
// can report on line + column positions on error.
|
// can report on line + column positions on error.
|
||||||
func (a *ParseAPIOnAction) advanceCursor() {
|
func (a *ParseAPIOnAction) advanceCursor() {
|
||||||
|
if a.p.inputPos == a.inputPos {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
a.p.loopCheck = map[string]bool{}
|
||||||
a.p.inputPos = a.inputPos
|
a.p.inputPos = a.inputPos
|
||||||
for _, r := range a.input {
|
for _, r := range a.input {
|
||||||
if a.p.newline {
|
if a.p.newline {
|
||||||
|
|
|
@ -73,3 +73,36 @@ func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *te
|
||||||
t.Fatalf("Got unexpected error: %s", err.Error())
|
t.Fatalf("Got unexpected error: %s", err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type parserWithLoop struct {
|
||||||
|
loopCounter int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
|
||||||
|
p.On(parsekit.A.ASCII).Accept()
|
||||||
|
p.Handle(l.second)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
|
||||||
|
p.On(parsekit.A.ASCII).Accept()
|
||||||
|
p.Handle(l.third)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
|
||||||
|
if l.loopCounter++; l.loopCounter > 100 {
|
||||||
|
p.Error("Loop not detected by parsekit")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
p.On(parsekit.A.ASCII).Accept()
|
||||||
|
p.Handle(l.first)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
||||||
|
looper := &parserWithLoop{}
|
||||||
|
parser := parsekit.NewParser(looper.first)
|
||||||
|
RunPanicTest(t, PanicTest{
|
||||||
|
func() { parser.Execute("Het houdt niet op, niet vanzelf") },
|
||||||
|
"Loop detected in parser in git.makaay.nl/mauricem/go-parsekit_test." +
|
||||||
|
"(*parserWithLoop).second at /home/ubuntu/Projects/Parsekit/go-parsekit" +
|
||||||
|
"/parsehandler_test.go, line 87"})
|
||||||
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ func (p *Parser) Execute(input string) *Error {
|
||||||
len: len(input),
|
len: len(input),
|
||||||
cursorLine: 1,
|
cursorLine: 1,
|
||||||
cursorColumn: 1,
|
cursorColumn: 1,
|
||||||
|
loopCheck: map[string]bool{},
|
||||||
}
|
}
|
||||||
api.Handle(p.startHandler)
|
api.Handle(p.startHandler)
|
||||||
if !api.stopped && api.err == nil {
|
if !api.stopped && api.err == nil {
|
||||||
|
|
|
@ -46,6 +46,9 @@ func TestCombinators(t *testing.T) {
|
||||||
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
||||||
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||||
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||||
|
{"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"},
|
||||||
|
{"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||||
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||||
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
||||||
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||||
|
|
Loading…
Reference in New Issue