More speed improvements.

This commit is contained in:
Maurice Makaay 2019-07-29 22:52:38 +00:00
parent 8ef9aed096
commit b9cc91c0ae
3 changed files with 362 additions and 324 deletions

View File

@ -8,36 +8,6 @@ import (
"git.makaay.nl/mauricem/go-parsekit/tokenize" "git.makaay.nl/mauricem/go-parsekit/tokenize"
) )
func BenchmarkMemclrOptimization(b *testing.B) {
// TODO use or cleanup this one and the next. I'm playing around here.
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
for i := range x {
x[i] = s{}
}
}
}
func BenchmarkCodedClear(b *testing.B) {
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
x[0] = s{}
x[1] = s{}
x[2] = s{}
}
}
func ExampleNewAPI() { func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle") tokenize.NewAPI("The input that the API will handle")
} }

View File

@ -39,6 +39,7 @@ var C = struct {
Except func(except Handler, handler Handler) Handler Except func(except Handler, handler Handler) Handler
FollowedBy func(lookAhead Handler, handler Handler) Handler FollowedBy func(lookAhead Handler, handler Handler) Handler
NotFollowedBy func(lookAhead Handler, handler Handler) Handler NotFollowedBy func(lookAhead Handler, handler Handler) Handler
InOptionalBlanks func(handler Handler) Handler
FlushInput func(Handler) Handler FlushInput func(Handler) Handler
}{ }{
Any: MatchAny, Any: MatchAny,
@ -55,6 +56,7 @@ var C = struct {
Except: MatchExcept, Except: MatchExcept,
FollowedBy: MatchFollowedBy, FollowedBy: MatchFollowedBy,
NotFollowedBy: MatchNotFollowedBy, NotFollowedBy: MatchNotFollowedBy,
InOptionalBlanks: MatchInOptionalBlanks,
FlushInput: MakeInputFlusher, FlushInput: MakeInputFlusher,
} }
@ -70,6 +72,7 @@ var A = struct {
Char func(...rune) Handler Char func(...rune) Handler
CharRange func(...rune) Handler CharRange func(...rune) Handler
ByteByCallback func(func(byte) bool) Handler ByteByCallback func(func(byte) bool) Handler
BytesByCallback func(func(byte) bool) Handler
RuneByCallback func(func(rune) bool) Handler RuneByCallback func(func(rune) bool) Handler
AnyByte Handler AnyByte Handler
AnyRune Handler AnyRune Handler
@ -158,6 +161,7 @@ var A = struct {
Char: MatchChar, Char: MatchChar,
CharRange: MatchCharRange, CharRange: MatchCharRange,
ByteByCallback: MatchByteByCallback, ByteByCallback: MatchByteByCallback,
BytesByCallback: MatchBytesByCallback,
RuneByCallback: MatchRuneByCallback, RuneByCallback: MatchRuneByCallback,
AnyByte: MatchAnyByte(), AnyByte: MatchAnyByte(),
AnyRune: MatchAnyRune(), AnyRune: MatchAnyRune(),
@ -552,21 +556,32 @@ func MatchBlank() Handler {
// like a vertical tab, then make use of MatchUnicodeSpace(). // like a vertical tab, then make use of MatchUnicodeSpace().
func MatchBlanks() Handler { func MatchBlanks() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Match the first blank. f := tokenAPI.Input.Byte.AcceptMulti
b, err := tokenAPI.Input.Byte.Peek(0) if tokenAPI.Output.suspended > 0 {
if err != nil || (b != ' ' && b != '\t') { f = tokenAPI.Input.Byte.MoveCursorMulti
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range chunk {
if b != ' ' && b != '\t' {
if i > 0 {
f(chunk[:i]...)
}
return ok
}
ok = true
}
if err != nil {
if err == io.EOF {
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false return false
} }
tokenAPI.Input.Byte.Accept(b) f(chunk...)
// Now match any number of followup blanks. We've already got
// a successful match at this point, so we'll always return true at the end.
for {
b, err := tokenAPI.Input.Byte.Peek(0)
if err != nil || (b != ' ' && b != '\t') {
return true
}
tokenAPI.Input.Byte.Accept(b)
} }
} }
} }
@ -576,37 +591,32 @@ func MatchBlanks() Handler {
// carriage return '\r' followed by a newline '\n' (CRLF). // carriage return '\r' followed by a newline '\n' (CRLF).
func MatchWhitespace() Handler { func MatchWhitespace() Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Match the first whitespace. f := tokenAPI.Input.Byte.AcceptMulti
b1, err := tokenAPI.Input.Byte.Peek(0) if tokenAPI.Output.suspended > 0 {
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { f = tokenAPI.Input.Byte.MoveCursorMulti
return false
} }
if b1 == '\r' { ok := false
b2, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b2 != '\n' {
return false
}
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
} else {
tokenAPI.Input.Byte.Accept(b1)
}
// Now match any number of followup whitespace. We've already got
// a successful match at this point, so we'll always return true at the end.
for { for {
b1, err := tokenAPI.Input.Byte.Peek(0) chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { for i, b := range chunk {
return true if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
if i > 0 {
f(chunk[:i]...)
} }
if b1 == '\r' { return ok
b2, err := tokenAPI.Input.Byte.Peek(1)
if err != nil || b2 != '\n' {
return true
} }
tokenAPI.Input.Byte.AcceptMulti(b1, b2) ok = true
} else {
tokenAPI.Input.Byte.Accept(b1)
} }
if err != nil {
if err == io.EOF {
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false
}
f(chunk...)
} }
} }
} }
@ -620,9 +630,6 @@ func MatchUnicodeSpace() Handler {
// MatchByteByCallback creates a Handler that matches a single byte from the // MatchByteByCallback creates a Handler that matches a single byte from the
// input against the provided callback function. When the callback returns true, // input against the provided callback function. When the callback returns true,
// it is considered a match. // it is considered a match.
//
// Note that the callback function matches the signature of the unicode.Is* functions,
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchByteByCallback(callback func(byte) bool) Handler { func MatchByteByCallback(callback func(byte) bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
b, err := tokenAPI.Input.Byte.Peek(0) b, err := tokenAPI.Input.Byte.Peek(0)
@ -634,6 +641,41 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
} }
} }
// MatchBytesByCallback creates a Handler that matches one or more bytes from the
// input against the provided callback function. As long as the callback returns true,
// it is considered a match.
func MatchBytesByCallback(callback func(byte) bool) Handler {
return func(tokenAPI *API) bool {
f := tokenAPI.Input.Byte.AcceptMulti
if tokenAPI.Output.suspended > 0 {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range chunk {
if !callback(b) {
if i > 0 {
f(chunk[:i]...)
}
return ok
}
ok = true
}
if err != nil {
if err == io.EOF {
if len(chunk) > 0 {
f(chunk...)
}
return ok
}
return false
}
f(chunk...)
}
}
}
// MatchRuneByCallback creates a Handler that matches a single rune from the // MatchRuneByCallback creates a Handler that matches a single rune from the
// input against the provided callback function. When the callback returns true, // input against the provided callback function. When the callback returns true,
// it is considered a match. // it is considered a match.
@ -947,6 +989,37 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
} }
} }
func MatchInOptionalBlanks(handler Handler) Handler {
return func(tokenAPI *API) bool {
skipBlanks(tokenAPI)
if !handler(tokenAPI) {
return false
}
skipBlanks(tokenAPI)
return true
}
}
func skipBlanks(tokenAPI *API) {
for {
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
for i, b := range bs {
if b != ' ' && b != '\t' {
if i > 0 {
tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...)
}
return
}
}
if err != nil {
if len(bs) > 0 {
tokenAPI.Input.Byte.MoveCursorMulti(bs...)
}
return
}
}
}
// MakeInputFlusher creates a Handler that will flush the input buffer when the // MakeInputFlusher creates a Handler that will flush the input buffer when the
// provided handler matches. // provided handler matches.
// //
@ -1037,31 +1110,35 @@ func MatchUntilEndOfLine() Handler {
f = tokenAPI.Input.Byte.MoveCursorMulti f = tokenAPI.Input.Byte.MoveCursorMulti
} }
for { for {
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128) chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
state := 0 state := 0
for i, b := range bs { ok := false
for i, b := range chunk {
if b == '\r' { if b == '\r' {
state = 1 state = 1
continue continue
} }
if b == '\n' { if b == '\n' {
if state == 1 { if state == 1 {
f(bs[:i+1]...) f(chunk[:i+1]...)
} else { } else if i > 0 {
f(bs[:i]...) f(chunk[:i]...)
} }
return true return ok
} }
state = 0 state = 0
ok = true
} }
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
f(bs...) if len(chunk) > 0 {
return true f(chunk...)
}
return ok
} }
return false return false
} }
f(bs...) f(chunk...)
} }
} }
} }
@ -1350,50 +1427,41 @@ func MatchHexDigit() Handler {
// stripped from the octet. // stripped from the octet.
func MatchOctet(normalize bool) Handler { func MatchOctet(normalize bool) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
// Digit 1 chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3)
b0, err := tokenAPI.Input.Byte.Peek(0) value := 0
if err != nil || b0 < '0' || b0 > '9' { start := 0
end := 0
for i, b := range chunk {
if b < '0' || b > '9' {
if i == 0 {
return false
}
break
}
if b == '0' && value == 0 {
start++
} else {
value = value*10 + int(b-'0')
}
end++
}
if value > 255 {
return false return false
} }
// Digit 2 if normalize {
b1, err := tokenAPI.Input.Byte.Peek(1) if value == 0 {
if err != nil || b1 < '0' || b1 > '9' { start--
// Output 1-digit octet. }
tokenAPI.Input.Byte.Accept(b0) if start > 0 {
return true tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...)
}
tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...)
} else {
tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...)
} }
// Digit 3
b2, err := tokenAPI.Input.Byte.Peek(2)
if err != nil || b2 < '0' || b2 > '9' {
// Output 2-digit octet.
if normalize && b0 == '0' {
tokenAPI.Input.Byte.MoveCursor(b0)
tokenAPI.Input.Byte.Accept(b1)
} else {
tokenAPI.Input.Byte.AcceptMulti(b0, b1)
}
return true
}
// The value of the octet must be between 0 - 255.
if b0 > '2' || (b0 == '2' && b1 > '5') || (b0 == '2' && b1 == '5' && b2 > '5') {
return false
}
// Output 3-digit octet.
if normalize && b0 == '0' {
tokenAPI.Input.Byte.MoveCursor(b0)
if b1 == '0' {
tokenAPI.Input.Byte.MoveCursor(b1)
} else {
tokenAPI.Input.Byte.Accept(b1)
}
tokenAPI.Input.Byte.Accept(b2)
} else {
tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
}
return true return true
} }
} }

View File

@ -267,7 +267,7 @@ func TestIPv4Atoms(t *testing.T) {
{"256123", tokenize.MatchOctet(false), false, ""}, {"256123", tokenize.MatchOctet(false), false, ""},
{"300", tokenize.MatchOctet(false), false, ""}, {"300", tokenize.MatchOctet(false), false, ""},
// Octet. // // Octet.
{"0", tokenize.MatchOctet(false), true, "0"}, {"0", tokenize.MatchOctet(false), true, "0"},
{"02", tokenize.MatchOctet(false), true, "02"}, {"02", tokenize.MatchOctet(false), true, "02"},
{"003", tokenize.MatchOctet(false), true, "003"}, {"003", tokenize.MatchOctet(false), true, "003"},