Further switching to byte-based input handling.

This commit is contained in:
Maurice Makaay 2019-07-16 07:05:10 +00:00
parent 0362763e83
commit 4cfdbafa6e
2 changed files with 121 additions and 31 deletions

View File

@ -247,15 +247,8 @@ func (i *API) Fork() int {
i.stackLevel++
i.runeRead = false
// TODO do some good benchmarking on these two options. The explicit version might be
// the faster one, but I am not sure of that right now.
// A
// i.stackFrames[i.stackLevel] = *i.stackFrame
// i.stackFrame = &i.stackFrames[i.stackLevel]
// i.stackFrame.runeStart = i.stackFrame.runeEnd
// i.stackFrame.tokenStart = i.stackFrame.tokenEnd
// B
// This can be written in a shorter way, but this turned out to
// be the best way performance-wise.
parent := i.stackFrame
child := &i.stackFrames[i.stackLevel]
child.offset = parent.offset

View File

@ -67,6 +67,9 @@ var C = struct {
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
Byte func(byte) Handler
Bytes func(...byte) Handler
ByteRange func(byte, byte) Handler
Rune func(rune) Handler
Runes func(...rune) Handler
RuneRange func(rune, rune) Handler
@ -75,6 +78,7 @@ var A = struct {
EndOfLine Handler
EndOfFile Handler
UntilEndOfLine Handler
AnyByte Handler
AnyRune Handler
ValidRune Handler
InvalidRune Handler
@ -153,6 +157,9 @@ var A = struct {
IPv6CIDRMask Handler
IPv6Net Handler
}{
Byte: MatchByte,
Bytes: MatchBytes,
ByteRange: MatchByteRange,
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
@ -161,6 +168,7 @@ var A = struct {
EndOfFile: MatchEndOfFile(),
EndOfLine: MatchEndOfLine(),
UntilEndOfLine: MatchUntilEndOfLine(),
AnyByte: MatchAnyByte(),
AnyRune: MatchAnyRune(),
ValidRune: MatchValidRune(),
InvalidRune: MatchInvalidRune(),
@ -333,8 +341,23 @@ var T = struct {
Group: MakeTokenGroup,
}
// MatchByte creates a Handler function that matches against the provided byte.
func MatchByte(expected byte) Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil && b == expected {
t.acceptBytes(b)
return true
}
return false
}
}
// MatchRune creates a Handler function that matches against the provided rune.
func MatchRune(expected rune) Handler {
if expected <= 255 {
return MatchByte(byte(expected))
}
return func(t *API) bool {
r, w, err := t.PeekRune(0)
if err == nil && r == expected {
@ -345,9 +368,39 @@ func MatchRune(expected rune) Handler {
}
}
// MatchBytes creates a Handler function that checks if the input matches
// one of the provided bytes. The first match counts.
func MatchBytes(expected ...byte) Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err != nil {
return false
}
for _, e := range expected {
if b == e {
t.acceptBytes(b)
return true
}
}
return false
}
}
// MatchRunes creates a Handler function that checks if the input matches
// one of the provided runes. The first match counts.
func MatchRunes(expected ...rune) Handler {
onlyBytes := true
expectedBytes := make([]byte, len(expected))
for i, r := range expected {
if r > 255 {
onlyBytes = false
break
}
expectedBytes[i] = byte(r)
}
if onlyBytes {
return MatchBytes(expectedBytes...)
}
return func(t *API) bool {
r, w, err := t.PeekRune(0)
if err != nil {
@ -363,6 +416,27 @@ func MatchRunes(expected ...rune) Handler {
}
}
// MatchByteRange creates a Handler function that checks if the input
// matches the provided byte range. The byte range is defined by a start and
// an end byte, inclusive, so:
//
// MatchByteRange('5', '9')
//
// creates a Handler that will match any of '5', '6', '7', '8' or '9'.
func MatchByteRange(start byte, end byte) Handler {
if end < start {
callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
}
return func(t *API) bool {
r, err := t.PeekByte(0)
if err == nil && r >= start && r <= end {
t.acceptBytes(r)
return true
}
return false
}
}
// MatchRuneRange creates a Handler function that checks if the input
// matches the provided rune range. The rune range is defined by a start and
// an end rune, inclusive, so:
@ -374,17 +448,8 @@ func MatchRuneRange(start rune, end rune) Handler {
if end < start {
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
}
if end <= 127 {
byteStart := byte(start)
byteEnd := byte(end)
return func(t *API) bool {
r, err := t.PeekByte(0)
if err == nil && r >= byteStart && r <= byteEnd {
t.acceptBytes(r)
return true
}
return false
}
if end <= 255 {
return MatchByteRange(byte(start), byte(end))
}
return func(t *API) bool {
r, w, err := t.PeekRune(0)
@ -554,10 +619,20 @@ func MatchStr(expected string) Handler {
width := len(expected)
return func(t *API) bool {
for i, e := range expectedRunes {
r, _, err := t.PeekRune(i)
if err != nil || e != r {
return false
offset := 0
for _, e := range expectedRunes {
if e <= 255 {
b, err := t.PeekByte(offset)
if err != nil || b != byte(e) {
return false
}
offset++
} else {
r, w, err := t.PeekRune(offset)
if err != nil || e != r {
return false
}
offset += w
}
}
t.acceptRunes(width, expectedRunes...)
@ -569,16 +644,26 @@ func MatchStr(expected string) Handler {
// provided string in a case-insensitive manner.
func MatchStrNoCase(expected string) Handler {
l := len([]rune(expected))
return func(t *API) bool {
matches := make([]rune, l)
width := 0
for i, e := range expected {
r, w, err := t.PeekRune(i)
if err != nil || unicode.ToUpper(e) != unicode.ToUpper(r) {
return false
if e <= 255 {
b, err := t.PeekByte(width)
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(rune(e))) {
return false
}
matches[i] = rune(b)
width++
} else {
r, w, err := t.PeekRune(width)
if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) {
return false
}
matches[i] = r
width += w
}
matches[i] = r
width += w
}
t.acceptRunes(width, matches...)
return true
@ -882,14 +967,26 @@ func MatchUntilEndOfLine() Handler {
return MatchOneOrMore(MatchNot(MatchEndOfLine()))
}
// MatchAnyByte creates a Handler function that accepts any byte from the input.
func MatchAnyByte() Handler {
return func(t *API) bool {
b, err := t.PeekByte(0)
if err == nil {
t.acceptBytes(b)
return true
}
return false
}
}
// MatchAnyRune creates a Handler function that checks if a rune can be
// read from the input. Invalid runes on the input are replaced with the UTF8
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() Handler {
return func(t *API) bool {
_, err := t.NextRune()
r, w, err := t.PeekRune(0)
if err == nil {
t.Accept()
t.acceptRunes(w, r)
return true
}
return false