diff --git a/example_dutchpostcode_test.go b/example_dutchpostcode_test.go index cd7c549..0905446 100644 --- a/example_dutchpostcode_test.go +++ b/example_dutchpostcode_test.go @@ -1,6 +1,7 @@ // In this example, a Parser is created that can parse and normalize Dutch postcodes // The implementation uses only TokenHandler functions and does not implement a // full-fledged state-based Parser for it. + package parsekit_test import ( diff --git a/examples_state_test.go b/examples_state_test.go index de4eb4e..6366bb2 100644 --- a/examples_state_test.go +++ b/examples_state_test.go @@ -15,20 +15,22 @@ import ( type letterCollection []string -func (l *letterCollection) parseStart(p *parsekit.ParseAPI) { - for p.On(parsekit.C.MinMax(1, 3, parsekit.A.AnyRune)).Accept() { - *l = append(*l, p.BufLiteral()) - p.BufClear() - } - p.ExpectEndOfFile() +func (l *letterCollection) AddChopped(s string, chunkSize int) *parsekit.Error { + parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + for p.On(parsekit.C.MinMax(1, chunkSize, parsekit.A.AnyRune)).Accept() { + *l = append(*l, p.BufLiteral()) + p.BufClear() + } + }) + return parser.Execute(s) } func Example_usingSliceAsParserState() { letters := &letterCollection{} - parser := parsekit.NewParser(letters.parseStart) - err := parser.Execute("¡Any will dö!") + letters.AddChopped("This string will", 4) + letters.AddChopped("be cut to bits!!!!!!", 8) - fmt.Printf("Matches = %q, Error = %s\n", *letters, err) + fmt.Printf("Matches = %q", *letters) // Output: - // Matches = ["¡An" "y w" "ill" " dö" "!"], Error = + // Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"] } diff --git a/parsehandler.go b/parsehandler.go index c9dd150..0020844 100644 --- a/parsehandler.go +++ b/parsehandler.go @@ -18,16 +18,17 @@ type ParseHandler func(*ParseAPI) // ParseAPI holds the internal state of a parse run and provides an API to // ParseHandler methods to communicate with the parser. type ParseAPI struct { - input string // the input that is being scanned by the parser - inputPos int // current byte cursor position in the input - cursorLine int // current rune cursor row number in the input - cursorColumn int // current rune cursor column position in the input - len int // the total length of the input in bytes - newline bool // keep track of when we have scanned a newline - expecting string // a description of what the current state expects to find (see P.Expects()) - buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) - err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored - stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored + input string // the input that is being scanned by the parser + inputPos int // current byte cursor position in the input + loopCheck map[string]bool // used for parser loop detection + cursorLine int // current rune cursor row number in the input + cursorColumn int // current rune cursor column position in the input + len int // the total length of the input in bytes + newline bool // keep track of when we have scanned a newline + expecting string // a description of what the current state expects to find (see P.Expects()) + buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept()) + err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored + stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored LastMatch string // a string representation of the last matched input data } @@ -62,6 +63,16 @@ func (p *ParseAPI) isStoppedOrInError() bool { return p.stopped || p.err != nil } +func (p *ParseAPI) checkForLoops() { + pc, file, line, _ := runtime.Caller(2) + id := fmt.Sprintf("%s:%d", file, line) + if _, ok := p.loopCheck[id]; ok { + caller := runtime.FuncForPC(pc) + panic(fmt.Sprintf("Loop detected in parser in %s at %s, line %d", caller.Name(), file, line)) + } + p.loopCheck[id] = true +} + // peek returns but does not advance the cursor to the next rune in the input. // Returns the rune, its width in bytes and a boolean. // diff --git a/parsehandler_on.go b/parsehandler_on.go index 0c4feb2..16e9b1d 100644 --- a/parsehandler_on.go +++ b/parsehandler_on.go @@ -38,6 +38,7 @@ package parsekit // } func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction { p.panicWhenStoppedOrInError() + p.checkForLoops() // Perform the matching operation. m := &TokenAPI{p: p} @@ -109,6 +110,10 @@ func (a *ParseAPIOnAction) Stay() bool { // While doing so, it keeps tracks of newlines that are encountered, so we // can report on line + column positions on error. func (a *ParseAPIOnAction) advanceCursor() { + if a.p.inputPos == a.inputPos { + return + } + a.p.loopCheck = map[string]bool{} a.p.inputPos = a.inputPos for _, r := range a.input { if a.p.newline { diff --git a/parsehandler_test.go b/parsehandler_test.go index f516926..b5c7ec5 100644 --- a/parsehandler_test.go +++ b/parsehandler_test.go @@ -73,3 +73,36 @@ func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *te t.Fatalf("Got unexpected error: %s", err.Error()) } } + +type parserWithLoop struct { + loopCounter int +} + +func (l *parserWithLoop) first(p *parsekit.ParseAPI) { + p.On(parsekit.A.ASCII).Accept() + p.Handle(l.second) +} + +func (l *parserWithLoop) second(p *parsekit.ParseAPI) { + p.On(parsekit.A.ASCII).Accept() + p.Handle(l.third) +} + +func (l *parserWithLoop) third(p *parsekit.ParseAPI) { + if l.loopCounter++; l.loopCounter > 100 { + p.Error("Loop not detected by parsekit") + return + } + p.On(parsekit.A.ASCII).Accept() + p.Handle(l.first) +} + +func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { + looper := &parserWithLoop{} + parser := parsekit.NewParser(looper.first) + RunPanicTest(t, PanicTest{ + func() { parser.Execute("Het houdt niet op, niet vanzelf") }, + "Loop detected in parser in git.makaay.nl/mauricem/go-parsekit_test." + + "(*parserWithLoop).second at /home/ubuntu/Projects/Parsekit/go-parsekit" + + "/parsehandler_test.go, line 87"}) +} diff --git a/parsekit.go b/parsekit.go index 6ed61c8..3dcf977 100644 --- a/parsekit.go +++ b/parsekit.go @@ -26,6 +26,7 @@ func (p *Parser) Execute(input string) *Error { len: len(input), cursorLine: 1, cursorColumn: 1, + loopCheck: map[string]bool{}, } api.Handle(p.startHandler) if !api.stopped && api.err == nil { diff --git a/tokenhandlers_builtin_test.go b/tokenhandlers_builtin_test.go index c5afbdd..e716df1 100644 --- a/tokenhandlers_builtin_test.go +++ b/tokenhandlers_builtin_test.go @@ -46,6 +46,9 @@ func TestCombinators(t *testing.T) { {"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"}, {"", c.MinMax(0, 0, c.Rune('c')), true, ""}, {"X", c.MinMax(0, 0, c.Rune('c')), true, ""}, + {"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"}, + {"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"}, + {"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"}, {"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""}, {"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"}, {"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},