More speed improvements.

2019-07-29 22:52:38 +00:00 · 2019-07-29 22:52:38 +00:00 · b9cc91c0ae
parent 8ef9aed096
commit b9cc91c0ae
3 changed files with 362 additions and 324 deletions
--- a/tokenize/api_test.go
+++ b/tokenize/api_test.go
@ -8,36 +8,6 @@ import (
 	"git.makaay.nl/mauricem/go-parsekit/tokenize"
 )

-func BenchmarkMemclrOptimization(b *testing.B) {
-	// TODO use or cleanup this one and the next. I'm playing around here.
-	type s struct {
-		a int
-		b string
-	}
-	x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
-
-	for i := 0; i < b.N; i++ {
-		for i := range x {
-			x[i] = s{}
-		}
-	}
-}
-
-func BenchmarkCodedClear(b *testing.B) {
-	type s struct {
-		a int
-		b string
-	}
-
-	x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
-
-	for i := 0; i < b.N; i++ {
-		x[0] = s{}
-		x[1] = s{}
-		x[2] = s{}
-	}
-}
-
 func ExampleNewAPI() {
 	tokenize.NewAPI("The input that the API will handle")
 }
--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@ -25,37 +25,39 @@ import (
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var C = struct {
-	Any           func(...Handler) Handler
-	Not           func(Handler) Handler
-	Seq           func(...Handler) Handler
-	Min           func(min int, handler Handler) Handler
-	Max           func(max int, handler Handler) Handler
-	Repeated      func(times int, handler Handler) Handler
-	Optional      func(Handler) Handler
-	ZeroOrMore    func(Handler) Handler
-	OneOrMore     func(Handler) Handler
-	MinMax        func(min int, max int, handler Handler) Handler
-	Separated     func(separator Handler, separated Handler) Handler
-	Except        func(except Handler, handler Handler) Handler
-	FollowedBy    func(lookAhead Handler, handler Handler) Handler
-	NotFollowedBy func(lookAhead Handler, handler Handler) Handler
-	FlushInput    func(Handler) Handler
+	Any              func(...Handler) Handler
+	Not              func(Handler) Handler
+	Seq              func(...Handler) Handler
+	Min              func(min int, handler Handler) Handler
+	Max              func(max int, handler Handler) Handler
+	Repeated         func(times int, handler Handler) Handler
+	Optional         func(Handler) Handler
+	ZeroOrMore       func(Handler) Handler
+	OneOrMore        func(Handler) Handler
+	MinMax           func(min int, max int, handler Handler) Handler
+	Separated        func(separator Handler, separated Handler) Handler
+	Except           func(except Handler, handler Handler) Handler
+	FollowedBy       func(lookAhead Handler, handler Handler) Handler
+	NotFollowedBy    func(lookAhead Handler, handler Handler) Handler
+	InOptionalBlanks func(handler Handler) Handler
+	FlushInput       func(Handler) Handler
 }{
-	Any:           MatchAny,
-	Not:           MatchNot,
-	Seq:           MatchSeq,
-	Min:           MatchMin,
-	Max:           MatchMax,
-	Repeated:      MatchRep,
-	Optional:      MatchOptional,
-	ZeroOrMore:    MatchZeroOrMore,
-	OneOrMore:     MatchOneOrMore,
-	MinMax:        MatchMinMax,
-	Separated:     MatchSeparated,
-	Except:        MatchExcept,
-	FollowedBy:    MatchFollowedBy,
-	NotFollowedBy: MatchNotFollowedBy,
-	FlushInput:    MakeInputFlusher,
+	Any:              MatchAny,
+	Not:              MatchNot,
+	Seq:              MatchSeq,
+	Min:              MatchMin,
+	Max:              MatchMax,
+	Repeated:         MatchRep,
+	Optional:         MatchOptional,
+	ZeroOrMore:       MatchZeroOrMore,
+	OneOrMore:        MatchOneOrMore,
+	MinMax:           MatchMinMax,
+	Separated:        MatchSeparated,
+	Except:           MatchExcept,
+	FollowedBy:       MatchFollowedBy,
+	NotFollowedBy:    MatchNotFollowedBy,
+	InOptionalBlanks: MatchInOptionalBlanks,
+	FlushInput:       MakeInputFlusher,
 }

 // A provides convenient access to a range of atoms or functions to build atoms.
@ -67,181 +69,183 @@ var C = struct {
 //
 // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
 var A = struct {
-	Char           func(...rune) Handler
-	CharRange      func(...rune) Handler
-	ByteByCallback func(func(byte) bool) Handler
-	RuneByCallback func(func(rune) bool) Handler
-	AnyByte        Handler
-	AnyRune        Handler
-	ValidRune      Handler
-	InvalidRune    Handler
-	Str            func(string) Handler
-	StrNoCase      func(string) Handler
-	EndOfLine      Handler
-	EndOfFile      Handler
-	UntilEndOfLine Handler
-	Space          Handler
-	Tab            Handler
-	CR             Handler
-	LF             Handler
-	CRLF           Handler
-	Excl           Handler
-	DoubleQuote    Handler
-	Hash           Handler
-	Dollar         Handler
-	Percent        Handler
-	Amp            Handler
-	SingleQuote    Handler
-	RoundOpen      Handler
-	LeftParen      Handler
-	RoundClose     Handler
-	RightParen     Handler
-	Asterisk       Handler
-	Multiply       Handler
-	Plus           Handler
-	Add            Handler
-	Comma          Handler
-	Minus          Handler
-	Subtract       Handler
-	Dot            Handler
-	Slash          Handler
-	Divide         Handler
-	Colon          Handler
-	Semicolon      Handler
-	AngleOpen      Handler
-	LessThan       Handler
-	Equal          Handler
-	AngleClose     Handler
-	GreaterThan    Handler
-	Question       Handler
-	At             Handler
-	SquareOpen     Handler
-	Backslash      Handler
-	SquareClose    Handler
-	Caret          Handler
-	Underscore     Handler
-	Backquote      Handler
-	CurlyOpen      Handler
-	Pipe           Handler
-	CurlyClose     Handler
-	Tilde          Handler
-	Newline        Handler
-	Blank          Handler
-	Blanks         Handler
-	Whitespace     Handler
-	UnicodeSpace   Handler
-	Digit          Handler
-	DigitNotZero   Handler
-	Digits         Handler
-	Zero           Handler
-	Boolean        Handler
-	Signed         func(Handler) Handler
-	Integer        Handler
-	IntegerBetween func(min int64, max int64) Handler
-	Decimal        Handler
-	ASCII          Handler
-	ASCIILower     Handler
-	ASCIIUpper     Handler
-	Letter         Handler
-	Lower          Handler
-	Upper          Handler
-	HexDigit       Handler
-	Octet          Handler
-	IPv4           Handler
-	IPv4CIDRMask   Handler
-	IPv4Netmask    Handler
-	IPv4Net        Handler
-	IPv6           Handler
-	IPv6CIDRMask   Handler
-	IPv6Net        Handler
+	Char            func(...rune) Handler
+	CharRange       func(...rune) Handler
+	ByteByCallback  func(func(byte) bool) Handler
+	BytesByCallback func(func(byte) bool) Handler
+	RuneByCallback  func(func(rune) bool) Handler
+	AnyByte         Handler
+	AnyRune         Handler
+	ValidRune       Handler
+	InvalidRune     Handler
+	Str             func(string) Handler
+	StrNoCase       func(string) Handler
+	EndOfLine       Handler
+	EndOfFile       Handler
+	UntilEndOfLine  Handler
+	Space           Handler
+	Tab             Handler
+	CR              Handler
+	LF              Handler
+	CRLF            Handler
+	Excl            Handler
+	DoubleQuote     Handler
+	Hash            Handler
+	Dollar          Handler
+	Percent         Handler
+	Amp             Handler
+	SingleQuote     Handler
+	RoundOpen       Handler
+	LeftParen       Handler
+	RoundClose      Handler
+	RightParen      Handler
+	Asterisk        Handler
+	Multiply        Handler
+	Plus            Handler
+	Add             Handler
+	Comma           Handler
+	Minus           Handler
+	Subtract        Handler
+	Dot             Handler
+	Slash           Handler
+	Divide          Handler
+	Colon           Handler
+	Semicolon       Handler
+	AngleOpen       Handler
+	LessThan        Handler
+	Equal           Handler
+	AngleClose      Handler
+	GreaterThan     Handler
+	Question        Handler
+	At              Handler
+	SquareOpen      Handler
+	Backslash       Handler
+	SquareClose     Handler
+	Caret           Handler
+	Underscore      Handler
+	Backquote       Handler
+	CurlyOpen       Handler
+	Pipe            Handler
+	CurlyClose      Handler
+	Tilde           Handler
+	Newline         Handler
+	Blank           Handler
+	Blanks          Handler
+	Whitespace      Handler
+	UnicodeSpace    Handler
+	Digit           Handler
+	DigitNotZero    Handler
+	Digits          Handler
+	Zero            Handler
+	Boolean         Handler
+	Signed          func(Handler) Handler
+	Integer         Handler
+	IntegerBetween  func(min int64, max int64) Handler
+	Decimal         Handler
+	ASCII           Handler
+	ASCIILower      Handler
+	ASCIIUpper      Handler
+	Letter          Handler
+	Lower           Handler
+	Upper           Handler
+	HexDigit        Handler
+	Octet           Handler
+	IPv4            Handler
+	IPv4CIDRMask    Handler
+	IPv4Netmask     Handler
+	IPv4Net         Handler
+	IPv6            Handler
+	IPv6CIDRMask    Handler
+	IPv6Net         Handler
 }{
-	Char:           MatchChar,
-	CharRange:      MatchCharRange,
-	ByteByCallback: MatchByteByCallback,
-	RuneByCallback: MatchRuneByCallback,
-	AnyByte:        MatchAnyByte(),
-	AnyRune:        MatchAnyRune(),
-	ValidRune:      MatchValidRune(),
-	InvalidRune:    MatchInvalidRune(),
-	Str:            MatchStr,
-	StrNoCase:      MatchStrNoCase,
-	EndOfFile:      MatchEndOfFile(),
-	EndOfLine:      MatchEndOfLine(),
-	UntilEndOfLine: MatchUntilEndOfLine(),
-	Space:          MatchChar(' '),
-	Tab:            MatchChar('\t'),
-	CR:             MatchChar('\r'),
-	LF:             MatchChar('\n'),
-	CRLF:           MatchStr("\r\n"),
-	Excl:           MatchChar('!'),
-	DoubleQuote:    MatchChar('"'),
-	Hash:           MatchChar('#'),
-	Dollar:         MatchChar('$'),
-	Percent:        MatchChar('%'),
-	Amp:            MatchChar('&'),
-	SingleQuote:    MatchChar('\''),
-	RoundOpen:      MatchChar('('),
-	LeftParen:      MatchChar('('),
-	RoundClose:     MatchChar(')'),
-	RightParen:     MatchChar(')'),
-	Asterisk:       MatchChar('*'),
-	Multiply:       MatchChar('*'),
-	Plus:           MatchChar('+'),
-	Add:            MatchChar('+'),
-	Comma:          MatchChar(','),
-	Minus:          MatchChar('-'),
-	Subtract:       MatchChar('-'),
-	Dot:            MatchChar('.'),
-	Slash:          MatchChar('/'),
-	Divide:         MatchChar('/'),
-	Colon:          MatchChar(':'),
-	Semicolon:      MatchChar(';'),
-	AngleOpen:      MatchChar('<'),
-	LessThan:       MatchChar('<'),
-	Equal:          MatchChar('='),
-	AngleClose:     MatchChar('>'),
-	GreaterThan:    MatchChar('>'),
-	Question:       MatchChar('?'),
-	At:             MatchChar('@'),
-	SquareOpen:     MatchChar('['),
-	Backslash:      MatchChar('\\'),
-	SquareClose:    MatchChar(']'),
-	Caret:          MatchChar('^'),
-	Underscore:     MatchChar('_'),
-	Backquote:      MatchChar('`'),
-	CurlyOpen:      MatchChar('{'),
-	Pipe:           MatchChar('|'),
-	CurlyClose:     MatchChar('}'),
-	Tilde:          MatchChar('~'),
-	Newline:        MatchNewline(),
-	Blank:          MatchBlank(),
-	Blanks:         MatchBlanks(),
-	Whitespace:     MatchWhitespace(),
-	UnicodeSpace:   MatchUnicodeSpace(),
-	Digit:          MatchDigit(),
-	DigitNotZero:   MatchDigitNotZero(),
-	Digits:         MatchDigits(),
-	Zero:           MatchChar('0'),
-	Signed:         MatchSigned,
-	Integer:        MatchInteger(true),
-	IntegerBetween: MatchIntegerBetween,
-	Decimal:        MatchDecimal(true),
-	Boolean:        MatchBoolean(),
-	ASCII:          MatchASCII(),
-	ASCIILower:     MatchASCIILower(),
-	ASCIIUpper:     MatchASCIIUpper(),
-	Letter:         MatchUnicodeLetter(),
-	Lower:          MatchUnicodeLower(),
-	Upper:          MatchUnicodeUpper(),
-	HexDigit:       MatchHexDigit(),
-	Octet:          MatchOctet(true),
-	IPv4:           MatchIPv4(true),
-	IPv4CIDRMask:   MatchIPv4CIDRMask(true),
-	IPv4Netmask:    MatchIPv4Netmask(true),
-	IPv4Net:        MatchIPv4Net(true),
-	IPv6:           MatchIPv6(true),
-	IPv6CIDRMask:   MatchIPv6CIDRMask(true),
-	IPv6Net:        MatchIPv6Net(true),
+	Char:            MatchChar,
+	CharRange:       MatchCharRange,
+	ByteByCallback:  MatchByteByCallback,
+	BytesByCallback: MatchBytesByCallback,
+	RuneByCallback:  MatchRuneByCallback,
+	AnyByte:         MatchAnyByte(),
+	AnyRune:         MatchAnyRune(),
+	ValidRune:       MatchValidRune(),
+	InvalidRune:     MatchInvalidRune(),
+	Str:             MatchStr,
+	StrNoCase:       MatchStrNoCase,
+	EndOfFile:       MatchEndOfFile(),
+	EndOfLine:       MatchEndOfLine(),
+	UntilEndOfLine:  MatchUntilEndOfLine(),
+	Space:           MatchChar(' '),
+	Tab:             MatchChar('\t'),
+	CR:              MatchChar('\r'),
+	LF:              MatchChar('\n'),
+	CRLF:            MatchStr("\r\n"),
+	Excl:            MatchChar('!'),
+	DoubleQuote:     MatchChar('"'),
+	Hash:            MatchChar('#'),
+	Dollar:          MatchChar('$'),
+	Percent:         MatchChar('%'),
+	Amp:             MatchChar('&'),
+	SingleQuote:     MatchChar('\''),
+	RoundOpen:       MatchChar('('),
+	LeftParen:       MatchChar('('),
+	RoundClose:      MatchChar(')'),
+	RightParen:      MatchChar(')'),
+	Asterisk:        MatchChar('*'),
+	Multiply:        MatchChar('*'),
+	Plus:            MatchChar('+'),
+	Add:             MatchChar('+'),
+	Comma:           MatchChar(','),
+	Minus:           MatchChar('-'),
+	Subtract:        MatchChar('-'),
+	Dot:             MatchChar('.'),
+	Slash:           MatchChar('/'),
+	Divide:          MatchChar('/'),
+	Colon:           MatchChar(':'),
+	Semicolon:       MatchChar(';'),
+	AngleOpen:       MatchChar('<'),
+	LessThan:        MatchChar('<'),
+	Equal:           MatchChar('='),
+	AngleClose:      MatchChar('>'),
+	GreaterThan:     MatchChar('>'),
+	Question:        MatchChar('?'),
+	At:              MatchChar('@'),
+	SquareOpen:      MatchChar('['),
+	Backslash:       MatchChar('\\'),
+	SquareClose:     MatchChar(']'),
+	Caret:           MatchChar('^'),
+	Underscore:      MatchChar('_'),
+	Backquote:       MatchChar('`'),
+	CurlyOpen:       MatchChar('{'),
+	Pipe:            MatchChar('|'),
+	CurlyClose:      MatchChar('}'),
+	Tilde:           MatchChar('~'),
+	Newline:         MatchNewline(),
+	Blank:           MatchBlank(),
+	Blanks:          MatchBlanks(),
+	Whitespace:      MatchWhitespace(),
+	UnicodeSpace:    MatchUnicodeSpace(),
+	Digit:           MatchDigit(),
+	DigitNotZero:    MatchDigitNotZero(),
+	Digits:          MatchDigits(),
+	Zero:            MatchChar('0'),
+	Signed:          MatchSigned,
+	Integer:         MatchInteger(true),
+	IntegerBetween:  MatchIntegerBetween,
+	Decimal:         MatchDecimal(true),
+	Boolean:         MatchBoolean(),
+	ASCII:           MatchASCII(),
+	ASCIILower:      MatchASCIILower(),
+	ASCIIUpper:      MatchASCIIUpper(),
+	Letter:          MatchUnicodeLetter(),
+	Lower:           MatchUnicodeLower(),
+	Upper:           MatchUnicodeUpper(),
+	HexDigit:        MatchHexDigit(),
+	Octet:           MatchOctet(true),
+	IPv4:            MatchIPv4(true),
+	IPv4CIDRMask:    MatchIPv4CIDRMask(true),
+	IPv4Netmask:     MatchIPv4Netmask(true),
+	IPv4Net:         MatchIPv4Net(true),
+	IPv6:            MatchIPv6(true),
+	IPv6CIDRMask:    MatchIPv6CIDRMask(true),
+	IPv6Net:         MatchIPv6Net(true),
 }

 // M provides convenient access to a range of modifiers (which in their nature are
@ -552,21 +556,32 @@ func MatchBlank() Handler {
 // like a vertical tab, then make use of MatchUnicodeSpace().
 func MatchBlanks() Handler {
 	return func(tokenAPI *API) bool {
-		// Match the first blank.
-		b, err := tokenAPI.Input.Byte.Peek(0)
-		if err != nil || (b != ' ' && b != '\t') {
-			return false
+		f := tokenAPI.Input.Byte.AcceptMulti
+		if tokenAPI.Output.suspended > 0 {
+			f = tokenAPI.Input.Byte.MoveCursorMulti
 		}
-		tokenAPI.Input.Byte.Accept(b)
-
-		// Now match any number of followup blanks. We've already got
-		// a successful match at this point, so we'll always return true at the end.
+		ok := false
 		for {
-			b, err := tokenAPI.Input.Byte.Peek(0)
-			if err != nil || (b != ' ' && b != '\t') {
-				return true
+			chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
+			for i, b := range chunk {
+				if b != ' ' && b != '\t' {
+					if i > 0 {
+						f(chunk[:i]...)
+					}
+					return ok
+				}
+				ok = true
 			}
-			tokenAPI.Input.Byte.Accept(b)
+			if err != nil {
+				if err == io.EOF {
+					if len(chunk) > 0 {
+						f(chunk...)
+					}
+					return ok
+				}
+				return false
+			}
+			f(chunk...)
 		}
 	}
 }
@ -576,37 +591,32 @@ func MatchBlanks() Handler {
 // carriage return '\r' followed by a newline '\n' (CRLF).
 func MatchWhitespace() Handler {
 	return func(tokenAPI *API) bool {
-		// Match the first whitespace.
-		b1, err := tokenAPI.Input.Byte.Peek(0)
-		if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
-			return false
+		f := tokenAPI.Input.Byte.AcceptMulti
+		if tokenAPI.Output.suspended > 0 {
+			f = tokenAPI.Input.Byte.MoveCursorMulti
 		}
-		if b1 == '\r' {
-			b2, err := tokenAPI.Input.Byte.Peek(1)
-			if err != nil || b2 != '\n' {
+		ok := false
+		for {
+			chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
+			for i, b := range chunk {
+				if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
+					if i > 0 {
+						f(chunk[:i]...)
+					}
+					return ok
+				}
+				ok = true
+			}
+			if err != nil {
+				if err == io.EOF {
+					if len(chunk) > 0 {
+						f(chunk...)
+					}
+					return ok
+				}
 				return false
 			}
-			tokenAPI.Input.Byte.AcceptMulti(b1, b2)
-		} else {
-			tokenAPI.Input.Byte.Accept(b1)
-		}
-
-		// Now match any number of followup whitespace. We've already got
-		// a successful match at this point, so we'll always return true at the end.
-		for {
-			b1, err := tokenAPI.Input.Byte.Peek(0)
-			if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
-				return true
-			}
-			if b1 == '\r' {
-				b2, err := tokenAPI.Input.Byte.Peek(1)
-				if err != nil || b2 != '\n' {
-					return true
-				}
-				tokenAPI.Input.Byte.AcceptMulti(b1, b2)
-			} else {
-				tokenAPI.Input.Byte.Accept(b1)
-			}
+			f(chunk...)
 		}
 	}
 }
@ -620,9 +630,6 @@ func MatchUnicodeSpace() Handler {
 // MatchByteByCallback creates a Handler that matches a single byte from the
 // input against the provided callback function. When the callback returns true,
 // it is considered a match.
-//
-// Note that the callback function matches the signature of the unicode.Is* functions,
-// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
 func MatchByteByCallback(callback func(byte) bool) Handler {
 	return func(tokenAPI *API) bool {
 		b, err := tokenAPI.Input.Byte.Peek(0)
@ -634,6 +641,41 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
 	}
 }

+// MatchBytesByCallback creates a Handler that matches one or more bytes from the
+// input against the provided callback function. As long as the callback returns true,
+// it is considered a match.
+func MatchBytesByCallback(callback func(byte) bool) Handler {
+	return func(tokenAPI *API) bool {
+		f := tokenAPI.Input.Byte.AcceptMulti
+		if tokenAPI.Output.suspended > 0 {
+			f = tokenAPI.Input.Byte.MoveCursorMulti
+		}
+		ok := false
+		for {
+			chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
+			for i, b := range chunk {
+				if !callback(b) {
+					if i > 0 {
+						f(chunk[:i]...)
+					}
+					return ok
+				}
+				ok = true
+			}
+			if err != nil {
+				if err == io.EOF {
+					if len(chunk) > 0 {
+						f(chunk...)
+					}
+					return ok
+				}
+				return false
+			}
+			f(chunk...)
+		}
+	}
+}
+
 // MatchRuneByCallback creates a Handler that matches a single rune from the
 // input against the provided callback function. When the callback returns true,
 // it is considered a match.
@ -947,6 +989,37 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
 	}
 }

+func MatchInOptionalBlanks(handler Handler) Handler {
+	return func(tokenAPI *API) bool {
+		skipBlanks(tokenAPI)
+		if !handler(tokenAPI) {
+			return false
+		}
+		skipBlanks(tokenAPI)
+		return true
+	}
+}
+
+func skipBlanks(tokenAPI *API) {
+	for {
+		bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
+		for i, b := range bs {
+			if b != ' ' && b != '\t' {
+				if i > 0 {
+					tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...)
+				}
+				return
+			}
+		}
+		if err != nil {
+			if len(bs) > 0 {
+				tokenAPI.Input.Byte.MoveCursorMulti(bs...)
+			}
+			return
+		}
+	}
+}
+
 // MakeInputFlusher creates a Handler that will flush the input buffer when the
 // provided handler matches.
 //
@ -1037,31 +1110,35 @@ func MatchUntilEndOfLine() Handler {
 			f = tokenAPI.Input.Byte.MoveCursorMulti
 		}
 		for {
-			bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
+			chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
 			state := 0
-			for i, b := range bs {
+			ok := false
+			for i, b := range chunk {
 				if b == '\r' {
 					state = 1
 					continue
 				}
 				if b == '\n' {
 					if state == 1 {
-						f(bs[:i+1]...)
-					} else {
-						f(bs[:i]...)
+						f(chunk[:i+1]...)
+					} else if i > 0 {
+						f(chunk[:i]...)
 					}
-					return true
+					return ok
 				}
 				state = 0
+				ok = true
 			}
 			if err != nil {
 				if err == io.EOF {
-					f(bs...)
-					return true
+					if len(chunk) > 0 {
+						f(chunk...)
+					}
+					return ok
 				}
 				return false
 			}
-			f(bs...)
+			f(chunk...)
 		}
 	}
 }
@ -1350,50 +1427,41 @@ func MatchHexDigit() Handler {
 // stripped from the octet.
 func MatchOctet(normalize bool) Handler {
 	return func(tokenAPI *API) bool {
-		// Digit 1
-		b0, err := tokenAPI.Input.Byte.Peek(0)
-		if err != nil || b0 < '0' || b0 > '9' {
+		chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3)
+		value := 0
+		start := 0
+		end := 0
+		for i, b := range chunk {
+			if b < '0' || b > '9' {
+				if i == 0 {
+					return false
+				}
+				break
+			}
+			if b == '0' && value == 0 {
+				start++
+			} else {
+				value = value*10 + int(b-'0')
+			}
+			end++
+		}
+
+		if value > 255 {
 			return false
 		}

-		// Digit 2
-		b1, err := tokenAPI.Input.Byte.Peek(1)
-		if err != nil || b1 < '0' || b1 > '9' {
-			// Output 1-digit octet.
-			tokenAPI.Input.Byte.Accept(b0)
-			return true
-		}
-
-		// Digit 3
-		b2, err := tokenAPI.Input.Byte.Peek(2)
-		if err != nil || b2 < '0' || b2 > '9' {
-			// Output 2-digit octet.
-			if normalize && b0 == '0' {
-				tokenAPI.Input.Byte.MoveCursor(b0)
-				tokenAPI.Input.Byte.Accept(b1)
-			} else {
-				tokenAPI.Input.Byte.AcceptMulti(b0, b1)
+		if normalize {
+			if value == 0 {
+				start--
 			}
-			return true
-		}
-
-		// The value of the octet must be between 0 - 255.
-		if b0 > '2' || (b0 == '2' && b1 > '5') || (b0 == '2' && b1 == '5' && b2 > '5') {
-			return false
-		}
-
-		// Output 3-digit octet.
-		if normalize && b0 == '0' {
-			tokenAPI.Input.Byte.MoveCursor(b0)
-			if b1 == '0' {
-				tokenAPI.Input.Byte.MoveCursor(b1)
-			} else {
-				tokenAPI.Input.Byte.Accept(b1)
+			if start > 0 {
+				tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...)
 			}
-			tokenAPI.Input.Byte.Accept(b2)
+			tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...)
 		} else {
-			tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
+			tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...)
 		}
+
 		return true
 	}
 }
--- a/tokenize/handlers_builtin_test.go
+++ b/tokenize/handlers_builtin_test.go
@ -267,7 +267,7 @@ func TestIPv4Atoms(t *testing.T) {
 		{"256123", tokenize.MatchOctet(false), false, ""},
 		{"300", tokenize.MatchOctet(false), false, ""},

-		// Octet.
+		// // Octet.
 		{"0", tokenize.MatchOctet(false), true, "0"},
 		{"02", tokenize.MatchOctet(false), true, "02"},
 		{"003", tokenize.MatchOctet(false), true, "003"},