More speed improvements.

This commit is contained in:
Maurice Makaay 2019-07-29 22:52:38 +00:00
parent 8ef9aed096
commit b9cc91c0ae
3 changed files with 362 additions and 324 deletions

View File

@ -8,36 +8,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func BenchmarkMemclrOptimization(b *testing.B) {
// TODO use or cleanup this one and the next. I'm playing around here.
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
for i := range x {
x[i] = s{}
}
}
}
func BenchmarkCodedClear(b *testing.B) {
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
x[0] = s{}
x[1] = s{}
x[2] = s{}
}
}
func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle")
}

View File

@ -39,6 +39,7 @@ var C = struct {
Except func(except Handler, handler Handler) Handler
FollowedBy func(lookAhead Handler, handler Handler) Handler
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
InOptionalBlanks func(handler Handler) Handler
FlushInput func(Handler) Handler
}{
Any: MatchAny,
@ -55,6 +56,7 @@ var C = struct {
Except: MatchExcept,
FollowedBy: MatchFollowedBy,
NotFollowedBy: MatchNotFollowedBy,
InOptionalBlanks: MatchInOptionalBlanks,
FlushInput: MakeInputFlusher,
}
@ -70,6 +72,7 @@ var A = struct {
Char func(...rune) Handler
CharRange func(...rune) Handler
ByteByCallback func(func(byte) bool) Handler
BytesByCallback func(func(byte) bool) Handler
RuneByCallback func(func(rune) bool) Handler
AnyByte Handler
AnyRune Handler
@ -158,6 +161,7 @@ var A = struct {
Char: MatchChar,
CharRange: MatchCharRange,
ByteByCallback: MatchByteByCallback,
BytesByCallback: MatchBytesByCallback,
RuneByCallback: MatchRuneByCallback,
AnyByte: MatchAnyByte(),
AnyRune: MatchAnyRune(),
@ -552,21 +556,32 @@ func MatchBlank() Handler {
// like a vertical tab, then make use of MatchUnicodeSpace().
func MatchBlanks() Handler {
return func(tokenAPI *API) bool {
// Match the first blank.
b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') {
f := tokenAPI.Input.Byte.AcceptMulti
if tokenAPI.Output.suspended > 0 {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range chunk {
if b != ' ' && b != '\t' {
if i > 0 {
f(chunk[:i]...)
}
return ok
}
ok = true
}
if err != nil {
if err == io.EOF {
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false
}
tokenAPI.Input.Byte.Accept(b)
// Now match any number of followup blanks. We've already got
// a successful match at this point, so we'll always return true at the end.
for {
b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') {
return true
}
tokenAPI.Input.Byte.Accept(b)
f(chunk...)
}
}
}
@ -576,37 +591,32 @@ func MatchBlanks() Handler {
// carriage return '\r' followed by a newline '\n' (CRLF).
func MatchWhitespace() Handler {
return func(tokenAPI *API) bool {
// Match the first whitespace.
b1, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
return false
f := tokenAPI.Input.Byte.AcceptMulti
if tokenAPI.Output.suspended > 0 {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
if b1 == '\r' {
b2, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b2 != '\n' {
return false
}
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
} else {
tokenAPI.Input.Byte.Accept(b1)
}
// Now match any number of followup whitespace. We've already got
// a successful match at this point, so we'll always return true at the end.
ok := false
for {
b1, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
return true
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range chunk {
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
if i > 0 {
f(chunk[:i]...)
}
if b1 == '\r' {
b2, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b2 != '\n' {
return true
return ok
}
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
} else {
tokenAPI.Input.Byte.Accept(b1)
ok = true
}
if err != nil {
if err == io.EOF {
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false
}
f(chunk...)
}
}
}
@ -620,9 +630,6 @@ func MatchUnicodeSpace() Handler {
// MatchByteByCallback creates a Handler that matches a single byte from the
// input against the provided callback function. When the callback returns true,
// it is considered a match.
//
// Note that the callback function matches the signature of the unicode.Is* functions,
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchByteByCallback(callback func(byte) bool) Handler {
return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0)
@ -634,6 +641,41 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
}
}
// MatchBytesByCallback creates a Handler that matches one or more bytes from the
// input against the provided callback function. As long as the callback returns true,
// it is considered a match.
func MatchBytesByCallback(callback func(byte) bool) Handler {
return func(tokenAPI *API) bool {
f := tokenAPI.Input.Byte.AcceptMulti
if tokenAPI.Output.suspended > 0 {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range chunk {
if !callback(b) {
if i > 0 {
f(chunk[:i]...)
}
return ok
}
ok = true
}
if err != nil {
if err == io.EOF {
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false
}
f(chunk...)
}
}
}
// MatchRuneByCallback creates a Handler that matches a single rune from the
// input against the provided callback function. When the callback returns true,
// it is considered a match.
@ -947,6 +989,37 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
}
}
func MatchInOptionalBlanks(handler Handler) Handler {
return func(tokenAPI *API) bool {
skipBlanks(tokenAPI)
if !handler(tokenAPI) {
return false
}
skipBlanks(tokenAPI)
return true
}
}
func skipBlanks(tokenAPI *API) {
for {
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range bs {
if b != ' ' && b != '\t' {
if i > 0 {
tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...)
}
return
}
}
if err != nil {
if len(bs) > 0 {
tokenAPI.Input.Byte.MoveCursorMulti(bs...)
}
return
}
}
}
// MakeInputFlusher creates a Handler that will flush the input buffer when the
// provided handler matches.
//
@ -1037,31 +1110,35 @@ func MatchUntilEndOfLine() Handler {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
for {
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
state := 0
for i, b := range bs {
ok := false
for i, b := range chunk {
if b == '\r' {
state = 1
continue
}
if b == '\n' {
if state == 1 {
f(bs[:i+1]...)
} else {
f(bs[:i]...)
f(chunk[:i+1]...)
} else if i > 0 {
f(chunk[:i]...)
}
return true
return ok
}
state = 0
ok = true
}
if err != nil {
if err == io.EOF {
f(bs...)
return true
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false
}
f(bs...)
f(chunk...)
}
}
}
@ -1350,50 +1427,41 @@ func MatchHexDigit() Handler {
// stripped from the octet.
func MatchOctet(normalize bool) Handler {
return func(tokenAPI *API) bool {
// Digit 1
b0, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || b0 < '0' || b0 > '9' {
chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3)
value := 0
start := 0
end := 0
for i, b := range chunk {
if b < '0' || b > '9' {
if i == 0 {
return false
}
break
}
if b == '0' && value == 0 {
start++
} else {
value = value*10 + int(b-'0')
}
end++
}
if value > 255 {
return false
}
// Digit 2
b1, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b1 < '0' || b1 > '9' {
// Output 1-digit octet.
tokenAPI.Input.Byte.Accept(b0)
return true
if normalize {
if value == 0 {
start--
}
if start > 0 {
tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...)
}
tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...)
} else {
tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...)
}
// Digit 3
b2, err := tokenAPI.Input.Byte.Peek(2)
if err != nil || b2 < '0' || b2 > '9' {
// Output 2-digit octet.
if normalize && b0 == '0' {
tokenAPI.Input.Byte.MoveCursor(b0)
tokenAPI.Input.Byte.Accept(b1)
} else {
tokenAPI.Input.Byte.AcceptMulti(b0, b1)
}
return true
}
// The value of the octet must be between 0 - 255.
if b0 > '2' || (b0 == '2' && b1 > '5') || (b0 == '2' && b1 == '5' && b2 > '5') {
return false
}
// Output 3-digit octet.
if normalize && b0 == '0' {
tokenAPI.Input.Byte.MoveCursor(b0)
if b1 == '0' {
tokenAPI.Input.Byte.MoveCursor(b1)
} else {
tokenAPI.Input.Byte.Accept(b1)
}
tokenAPI.Input.Byte.Accept(b2)
} else {
tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
}
return true
}
}

View File

@ -267,7 +267,7 @@ func TestIPv4Atoms(t *testing.T) {
{"256123", tokenize.MatchOctet(false), false, ""},
{"300", tokenize.MatchOctet(false), false, ""},
// Octet.
// // Octet.
{"0", tokenize.MatchOctet(false), true, "0"},
{"02", tokenize.MatchOctet(false), true, "02"},
{"003", tokenize.MatchOctet(false), true, "003"},