diff --git a/tokenize/api.go b/tokenize/api.go index 2503c1a..e68b7c9 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -247,15 +247,8 @@ func (i *API) Fork() int { i.stackLevel++ i.runeRead = false - // TODO do some good benchmarking on these two options. The explicit version might be - // the faster one, but I am not sure of that right now. - // A - // i.stackFrames[i.stackLevel] = *i.stackFrame - // i.stackFrame = &i.stackFrames[i.stackLevel] - // i.stackFrame.runeStart = i.stackFrame.runeEnd - // i.stackFrame.tokenStart = i.stackFrame.tokenEnd - - // B + // This can be written in a shorter way, but this turned out to + // be the best way performance-wise. parent := i.stackFrame child := &i.stackFrames[i.stackLevel] child.offset = parent.offset diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index b21ac69..e73acd8 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -67,6 +67,9 @@ var C = struct { // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var A = struct { + Byte func(byte) Handler + Bytes func(...byte) Handler + ByteRange func(byte, byte) Handler Rune func(rune) Handler Runes func(...rune) Handler RuneRange func(rune, rune) Handler @@ -75,6 +78,7 @@ var A = struct { EndOfLine Handler EndOfFile Handler UntilEndOfLine Handler + AnyByte Handler AnyRune Handler ValidRune Handler InvalidRune Handler @@ -153,6 +157,9 @@ var A = struct { IPv6CIDRMask Handler IPv6Net Handler }{ + Byte: MatchByte, + Bytes: MatchBytes, + ByteRange: MatchByteRange, Rune: MatchRune, Runes: MatchRunes, RuneRange: MatchRuneRange, @@ -161,6 +168,7 @@ var A = struct { EndOfFile: MatchEndOfFile(), EndOfLine: MatchEndOfLine(), UntilEndOfLine: MatchUntilEndOfLine(), + AnyByte: MatchAnyByte(), AnyRune: MatchAnyRune(), ValidRune: MatchValidRune(), InvalidRune: MatchInvalidRune(), @@ -333,8 +341,23 @@ var T = struct { Group: MakeTokenGroup, } +// MatchByte creates a Handler function that matches against the provided byte. +func MatchByte(expected byte) Handler { + return func(t *API) bool { + b, err := t.PeekByte(0) + if err == nil && b == expected { + t.acceptBytes(b) + return true + } + return false + } +} + // MatchRune creates a Handler function that matches against the provided rune. func MatchRune(expected rune) Handler { + if expected <= 255 { + return MatchByte(byte(expected)) + } return func(t *API) bool { r, w, err := t.PeekRune(0) if err == nil && r == expected { @@ -345,9 +368,39 @@ func MatchRune(expected rune) Handler { } } +// MatchBytes creates a Handler function that checks if the input matches +// one of the provided bytes. The first match counts. +func MatchBytes(expected ...byte) Handler { + return func(t *API) bool { + b, err := t.PeekByte(0) + if err != nil { + return false + } + for _, e := range expected { + if b == e { + t.acceptBytes(b) + return true + } + } + return false + } +} + // MatchRunes creates a Handler function that checks if the input matches // one of the provided runes. The first match counts. func MatchRunes(expected ...rune) Handler { + onlyBytes := true + expectedBytes := make([]byte, len(expected)) + for i, r := range expected { + if r > 255 { + onlyBytes = false + break + } + expectedBytes[i] = byte(r) + } + if onlyBytes { + return MatchBytes(expectedBytes...) + } return func(t *API) bool { r, w, err := t.PeekRune(0) if err != nil { @@ -363,6 +416,27 @@ func MatchRunes(expected ...rune) Handler { } } +// MatchByteRange creates a Handler function that checks if the input +// matches the provided byte range. The byte range is defined by a start and +// an end byte, inclusive, so: +// +// MatchByteRange('5', '9') +// +// creates a Handler that will match any of '5', '6', '7', '8' or '9'. +func MatchByteRange(start byte, end byte) Handler { + if end < start { + callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) + } + return func(t *API) bool { + r, err := t.PeekByte(0) + if err == nil && r >= start && r <= end { + t.acceptBytes(r) + return true + } + return false + } +} + // MatchRuneRange creates a Handler function that checks if the input // matches the provided rune range. The rune range is defined by a start and // an end rune, inclusive, so: @@ -374,17 +448,8 @@ func MatchRuneRange(start rune, end rune) Handler { if end < start { callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) } - if end <= 127 { - byteStart := byte(start) - byteEnd := byte(end) - return func(t *API) bool { - r, err := t.PeekByte(0) - if err == nil && r >= byteStart && r <= byteEnd { - t.acceptBytes(r) - return true - } - return false - } + if end <= 255 { + return MatchByteRange(byte(start), byte(end)) } return func(t *API) bool { r, w, err := t.PeekRune(0) @@ -554,10 +619,20 @@ func MatchStr(expected string) Handler { width := len(expected) return func(t *API) bool { - for i, e := range expectedRunes { - r, _, err := t.PeekRune(i) - if err != nil || e != r { - return false + offset := 0 + for _, e := range expectedRunes { + if e <= 255 { + b, err := t.PeekByte(offset) + if err != nil || b != byte(e) { + return false + } + offset++ + } else { + r, w, err := t.PeekRune(offset) + if err != nil || e != r { + return false + } + offset += w } } t.acceptRunes(width, expectedRunes...) @@ -569,16 +644,26 @@ func MatchStr(expected string) Handler { // provided string in a case-insensitive manner. func MatchStrNoCase(expected string) Handler { l := len([]rune(expected)) + return func(t *API) bool { matches := make([]rune, l) width := 0 for i, e := range expected { - r, w, err := t.PeekRune(i) - if err != nil || unicode.ToUpper(e) != unicode.ToUpper(r) { - return false + if e <= 255 { + b, err := t.PeekByte(width) + if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(rune(e))) { + return false + } + matches[i] = rune(b) + width++ + } else { + r, w, err := t.PeekRune(width) + if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) { + return false + } + matches[i] = r + width += w } - matches[i] = r - width += w } t.acceptRunes(width, matches...) return true @@ -882,14 +967,26 @@ func MatchUntilEndOfLine() Handler { return MatchOneOrMore(MatchNot(MatchEndOfLine())) } +// MatchAnyByte creates a Handler function that accepts any byte from the input. +func MatchAnyByte() Handler { + return func(t *API) bool { + b, err := t.PeekByte(0) + if err == nil { + t.acceptBytes(b) + return true + } + return false + } +} + // MatchAnyRune creates a Handler function that checks if a rune can be // read from the input. Invalid runes on the input are replaced with the UTF8 // replacement rune \uFFFD (i.e. utf8.RuneError), which displays as �. func MatchAnyRune() Handler { return func(t *API) bool { - _, err := t.NextRune() + r, w, err := t.PeekRune(0) if err == nil { - t.Accept() + t.acceptRunes(w, r) return true } return false