Implemented an efficient M.DropUntilEndOfLine handler, which is now used in the TOML parser for a dramatic speed increase on comment parsing.

This commit is contained in:
Maurice Makaay 2019-07-17 23:51:37 +00:00
parent 64f92696b2
commit e659380a5f
1 changed files with 50 additions and 19 deletions

View File

@ -267,6 +267,7 @@ var A = struct {
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(Handler) Handler
DropUntilEndOfLine Handler
Trim func(handler Handler, cutset string) Handler
TrimLeft func(handler Handler, cutset string) Handler
TrimRight func(handler Handler, cutset string) Handler
@ -277,6 +278,7 @@ var M = struct {
ByCallback func(Handler, func(string) string) Handler
}{
Drop: ModifyDrop,
DropUntilEndOfLine: ModifyDropUntilEndOfLine(),
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
@ -698,7 +700,14 @@ func MatchStrNoCase(expected string) Handler {
// no output is generated but still a successful match is reported (but the
// result will be empty).
func MatchOptional(handler Handler) Handler {
return matchMinMax(0, 1, handler, "MatchOptional")
return func(t *API) bool {
child := t.Fork()
if handler(t) {
t.Merge(child)
}
t.Dispose(child)
return true
}
}
// MatchSeq creates a Handler that checks if the provided Handlers can be
@ -1542,6 +1551,28 @@ func ModifyDrop(handler Handler) Handler {
}
}
// ModifyDropUntilEndOfLine creates a Handler that drops all input until an end of line
// (or end of file). This handler is typically used when ignoring any input data after
// a comment start like '#' or '//' when parsing code or configuration data.
func ModifyDropUntilEndOfLine() Handler {
return func(t *API) bool {
for {
b, err := t.PeekByte(0)
if err != nil {
if err == io.EOF {
return true
} else {
return false
}
}
if b == '\n' {
return true
}
t.skipBytes(b)
}
}
}
// ModifyTrim creates a Handler that checks if the provided Handler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.