From 7116aa47dfab29d7a810911c56a3465417464e9d Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Fri, 12 Jul 2019 00:21:02 +0000 Subject: [PATCH] Squishing out more performance. --- tokenize/api.go | 24 ++- tokenize/api_test.go | 14 ++ tokenize/handlers_builtin.go | 279 +++++++++++++++++++++++++++++++---- 3 files changed, 284 insertions(+), 33 deletions(-) diff --git a/tokenize/api.go b/tokenize/api.go index 6a9a01a..37b7512 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -134,6 +134,13 @@ func (i *API) NextRune() (rune, error) { return readRune, err } +// PeekRune returns the rune at the provided offset. +// +// The read cursor and current read offset are not updated by this operation. +func (i *API) PeekRune(offset int) (rune, error) { + return i.reader.RuneAt(i.stackFrame.offset + offset) +} + // Accept the last rune as read by NextRune() into the Result runes and move // the cursor forward. // @@ -148,7 +155,12 @@ func (i *API) Accept() { "but the prior call to NextRune() failed") } - newRuneEnd := i.stackFrame.runeEnd + 1 + i.accept(i.lastRune) +} + +func (i *API) accept(runes ...rune) { + curRuneEnd := i.stackFrame.runeEnd + newRuneEnd := curRuneEnd + len(runes) // Grow the runes capacity when needed. if cap(i.runes) < newRuneEnd { @@ -159,10 +171,12 @@ func (i *API) Accept() { i.runes = i.runes[0:newRuneEnd] } - i.runes[newRuneEnd-1] = i.lastRune - i.stackFrame.runeEnd++ - i.stackFrame.cursor.moveByRune(i.lastRune) - i.stackFrame.offset++ + for offset, r := range runes { + i.runes[curRuneEnd+offset] = r + i.stackFrame.cursor.moveByRune(r) + } + i.stackFrame.runeEnd = newRuneEnd + i.stackFrame.offset += len(runes) i.runeRead = false } diff --git a/tokenize/api_test.go b/tokenize/api_test.go index d77830d..4a25b41 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -26,6 +26,20 @@ func ExampleAPI_NextRune() { // API results: "" } +func ExampleAPI_PeekRune() { + api := tokenize.NewAPI("The input that the API will handle") + + r1, err := api.PeekRune(19) // 'A' + r2, err := api.PeekRune(20) // 'P' + r3, err := api.PeekRune(21) // 'I' + _, err = api.PeekRune(100) // EOF + + fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) + + // Output: + // API EOF +} + func ExampleAPI_Accept() { api := tokenize.NewAPI("The input that the API will handle") api.NextRune() // reads 'T' diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 6bb1aff..8f4b735 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -335,14 +335,32 @@ var T = struct { // MatchRune creates a Handler function that matches against the provided rune. func MatchRune(expected rune) Handler { - return MatchRuneByCallback(func(r rune) bool { return r == expected }) + return func(t *API) bool { + r, err := t.PeekRune(0) + if err == nil && r == expected { + t.accept(r) + return true + } + return false + } } // MatchRunes creates a Handler function that checks if the input matches // one of the provided runes. The first match counts. func MatchRunes(expected ...rune) Handler { - s := string(expected) - return MatchRuneByCallback(func(r rune) bool { return strings.ContainsRune(s, r) }) + return func(t *API) bool { + r, err := t.PeekRune(0) + if err != nil { + return false + } + for _, e := range expected { + if r == e { + t.accept(r) + return true + } + } + return false + } } // MatchRuneRange creates a Handler function that checks if the input @@ -356,13 +374,37 @@ func MatchRuneRange(start rune, end rune) Handler { if end < start { callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) } - return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end }) + return func(t *API) bool { + r, err := t.PeekRune(0) + if err == nil && r >= start && r <= end { + t.accept(r) + return true + } + return false + } } // MatchNewline creates a handler that matches a newline, which is either // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). func MatchNewline() Handler { - return MatchAny(MatchStr("\r\n"), MatchRune('\n')) + return func(t *API) bool { + r1, err := t.PeekRune(0) + if err != nil { + return false + } + if r1 == '\n' { + t.accept(r1) + return true + } + if r1 == '\r' { + r2, err := t.PeekRune(1) + if err == nil && r2 == '\n' { + t.accept(r1, r2) + return true + } + } + return false + } } // MatchBlank creates a Handler that matches one rune from the input @@ -371,7 +413,14 @@ func MatchNewline() Handler { // When you need whitespace matching, which also includes characters like // newlines, then take a look at MatchWhitespace(). func MatchBlank() Handler { - return MatchRuneByCallback(func(r rune) bool { return r == ' ' || r == '\t' }) + return func(t *API) bool { + r, err := t.NextRune() + if err == nil && (r == ' ' || r == '\t') { + t.Accept() + return true + } + return false + } } // MatchBlanks creates a Handler that matches the input against one @@ -382,14 +431,63 @@ func MatchBlank() Handler { // When you need unicode whitespace matching, which also includes characters // like a vertical tab, then make use of MatchUnicodeSpace(). func MatchBlanks() Handler { - return MatchOneOrMore(MatchBlank()) + return func(t *API) bool { + // Match the first blank. + r, err := t.PeekRune(0) + if err != nil || (r != ' ' && r != '\t') { + return false + } + + // Now match any number of followup blanks. We've already got + // a successful match at this point, so we'll always return true at the end. + for { + r, err := t.PeekRune(0) + if err != nil || (r != ' ' && r != '\t') { + return true + } + t.accept(r) + } + } } // MatchWhitespace creates a Handler that matches the input against one or more // whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and // carriage return '\r' followed by a newline '\n' (CRLF). func MatchWhitespace() Handler { - return MatchOneOrMore(MatchBlank().Or(MatchNewline())) + return func(t *API) bool { + // Match the first whitespace. + r1, err := t.PeekRune(0) + if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') { + return false + } + if r1 == '\r' { + r2, err := t.PeekRune(1) + if err != nil || r2 != '\n' { + return false + } + t.accept(r1, r2) + } else { + t.accept(r1) + } + + // Now match any number of followup whitespace. We've already got + // a successful match at this point, so we'll always return true at the end. + for { + r1, err := t.PeekRune(0) + if err != nil || (r1 != ' ' && r1 != '\t' && r1 != '\n' && r1 != '\r') { + return true + } + if r1 == '\r' { + r2, err := t.PeekRune(1) + if err != nil || r2 != '\n' { + return true + } + t.accept(r1, r2) + } else { + t.accept(r1) + } + } + } } // MatchUnicodeSpace creates a Handler that matches the input against one or more @@ -406,9 +504,9 @@ func MatchUnicodeSpace() Handler { // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). func MatchRuneByCallback(callback func(rune) bool) Handler { return func(t *API) bool { - r, err := t.NextRune() + r, err := t.PeekRune(0) if err == nil && callback(r) { - t.Accept() + t.accept(r) return true } return false @@ -417,28 +515,56 @@ func MatchRuneByCallback(callback func(rune) bool) Handler { // MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. func MatchEndOfLine() Handler { - return MatchAny(MatchNewline(), MatchEndOfFile()) + return func(t *API) bool { + r1, err := t.PeekRune(0) + if err != nil { + return err == io.EOF + } + if r1 == '\n' { + t.accept(r1) + return true + } + if r1 == '\r' { + r2, _ := t.PeekRune(1) + if r2 == '\n' { + t.accept(r1, r2) + return true + } + } + return false + } } // MatchStr creates a Handler that matches the input against the provided string. func MatchStr(expected string) Handler { - var handlers = make([]Handler, len(expected)) - for i, r := range expected { - handlers[i] = MatchRune(r) + return func(t *API) bool { + for i, e := range expected { + r, err := t.PeekRune(i) + if err != nil || e != r { + return false + } + } + t.accept([]rune(expected)...) + return true } - return MatchSeq(handlers...) } // MatchStrNoCase creates a Handler that matches the input against the // provided string in a case-insensitive manner. func MatchStrNoCase(expected string) Handler { - var handlers = []Handler{} - for _, r := range expected { - u := unicode.ToUpper(r) - l := unicode.ToLower(r) - handlers = append(handlers, MatchRunes(u, l)) + l := len([]rune(expected)) + matches := make([]rune, l) + return func(t *API) bool { + for i, e := range expected { + r, err := t.PeekRune(i) + if err != nil || unicode.ToUpper(e) != unicode.ToUpper(r) { + return false + } + matches[i] = r + } + t.accept(matches...) + return true } - return MatchSeq(handlers...) } // MatchOptional creates a Handler that makes the provided Handler optional. @@ -756,9 +882,9 @@ func MatchAnyRune() Handler { // UTF8 rune can be read from the input. func MatchValidRune() Handler { return func(t *API) bool { - r, err := t.NextRune() + r, err := t.PeekRune(0) if err == nil && r != utf8.RuneError { - t.Accept() + t.accept(r) return true } return false @@ -769,9 +895,9 @@ func MatchValidRune() Handler { // UTF8 rune can be read from the input. func MatchInvalidRune() Handler { return func(t *API) bool { - r, err := t.NextRune() + r, err := t.PeekRune(0) if err == nil && r == utf8.RuneError { - t.Accept() + t.accept(r) return true } return false @@ -822,9 +948,106 @@ func MatchFloat() Handler { // // False falues: false, FALSE, False, 0, f, F func MatchBoolean() Handler { - trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T')) - falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F')) - return MatchAny(trues, falses) + return func(t *API) bool { + r1, err := t.PeekRune(0) + if err != nil { + return false + } + if r1 == '1' || r1 == '0' { + t.accept(r1) + return true + } + if r1 == 't' { + r2, err := t.PeekRune(1) + if err == nil && r2 == 'r' { + r3, err := t.PeekRune(2) + if err == nil && r3 == 'u' { + r4, err := t.PeekRune(3) + if err == nil && r4 == 'e' { + t.accept(r1, r2, r3, r4) + return true + } + } + } + t.accept(r1) + return true + } + if r1 == 'T' { + r2, err := t.PeekRune(1) + if err == nil && r2 == 'r' { + r3, err := t.PeekRune(2) + if err == nil && r3 == 'u' { + r4, err := t.PeekRune(3) + if err == nil && r4 == 'e' { + t.accept(r1, r2, r3, r4) + return true + } + } + } + if err == nil && r2 == 'R' { + r3, err := t.PeekRune(2) + if err == nil && r3 == 'U' { + r4, err := t.PeekRune(3) + if err == nil && r4 == 'E' { + t.accept(r1, r2, r3, r4) + return true + } + } + } + t.accept(r1) + return true + } + if r1 == 'f' { + r2, err := t.PeekRune(1) + if err == nil && r2 == 'a' { + r3, err := t.PeekRune(2) + if err == nil && r3 == 'l' { + r4, err := t.PeekRune(3) + if err == nil && r4 == 's' { + r5, err := t.PeekRune(4) + if err == nil && r5 == 'e' { + t.accept(r1, r2, r3, r4, r5) + return true + } + } + } + } + t.accept(r1) + return true + } + if r1 == 'F' { + r2, err := t.PeekRune(1) + if err == nil && r2 == 'a' { + r3, err := t.PeekRune(2) + if err == nil && r3 == 'l' { + r4, err := t.PeekRune(3) + if err == nil && r4 == 's' { + r5, err := t.PeekRune(4) + if err == nil && r5 == 'e' { + t.accept(r1, r2, r3, r4, r5) + return true + } + } + } + } + if err == nil && r2 == 'A' { + r3, err := t.PeekRune(2) + if err == nil && r3 == 'L' { + r4, err := t.PeekRune(3) + if err == nil && r4 == 'S' { + r5, err := t.PeekRune(4) + if err == nil && r5 == 'E' { + t.accept(r1, r2, r3, r4, r5) + return true + } + } + } + } + t.accept(r1) + return true + } + return false + } } // MatchASCII creates a Handler function that matches against any