From f70bf8d07403f48ccbe3cd49635be26dc4d583ce Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Mon, 29 Jul 2019 23:51:09 +0000 Subject: [PATCH] Speed improvements --- tokenize/handlers_builtin.go | 65 ++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 42a76c9..5978e79 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -77,6 +77,7 @@ var A = struct { AnyByte Handler AnyRune Handler ValidRune Handler + ValidRunes Handler InvalidRune Handler Str func(string) Handler StrNoCase func(string) Handler @@ -166,6 +167,7 @@ var A = struct { AnyByte: MatchAnyByte(), AnyRune: MatchAnyRune(), ValidRune: MatchValidRune(), + ValidRunes: MatchValidRunes(), InvalidRune: MatchInvalidRune(), Str: MatchStr, StrNoCase: MatchStrNoCase, @@ -990,36 +992,21 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { } func MatchInOptionalBlanks(handler Handler) Handler { + blanks := MatchBlanks() return func(tokenAPI *API) bool { - skipBlanks(tokenAPI) + tokenAPI.Output.Suspend() + blanks(tokenAPI) + tokenAPI.Output.Resume() if !handler(tokenAPI) { return false } - skipBlanks(tokenAPI) + tokenAPI.Output.Suspend() + blanks(tokenAPI) + tokenAPI.Output.Resume() return true } } -func skipBlanks(tokenAPI *API) { - for { - bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128) - for i, b := range bs { - if b != ' ' && b != '\t' { - if i > 0 { - tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...) - } - return - } - } - if err != nil { - if len(bs) > 0 { - tokenAPI.Input.Byte.MoveCursorMulti(bs...) - } - return - } - } -} - // MakeInputFlusher creates a Handler that will flush the input buffer when the // provided handler matches. // @@ -1182,6 +1169,40 @@ func MatchValidRune() Handler { } } +func MatchValidRunes() Handler { + return func(tokenAPI *API) bool { + rs := make([]rune, 256, 256) + ok := false + for { + bs, err := tokenAPI.Input.Byte.PeekMulti(0, 256) + end := 0 + offset := 0 + for offset <= 256-utf8.UTFMax { + r, w := utf8.DecodeRune(bs[offset:]) + if r == utf8.RuneError { + if end > 0 { + tokenAPI.Input.Rune.AcceptMulti(rs[:end]...) + } + return ok + } + ok = true + offset += w + rs[end] = r + end++ + } + if end > 0 { + tokenAPI.Input.Rune.AcceptMulti(rs[:end]...) + } + if err != nil { + if err == io.EOF { + return ok + } + return false + } + } + } +} + // MatchInvalidRune creates a Handler function that checks if an invalid // UTF8 rune can be read from the input. func MatchInvalidRune() Handler {