More speed improvements.
This commit is contained in:
parent
8ef9aed096
commit
b9cc91c0ae
|
@ -8,36 +8,6 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func BenchmarkMemclrOptimization(b *testing.B) {
|
||||
// TODO use or cleanup this one and the next. I'm playing around here.
|
||||
type s struct {
|
||||
a int
|
||||
b string
|
||||
}
|
||||
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
for i := range x {
|
||||
x[i] = s{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCodedClear(b *testing.B) {
|
||||
type s struct {
|
||||
a int
|
||||
b string
|
||||
}
|
||||
|
||||
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
x[0] = s{}
|
||||
x[1] = s{}
|
||||
x[2] = s{}
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleNewAPI() {
|
||||
tokenize.NewAPI("The input that the API will handle")
|
||||
}
|
||||
|
|
|
@ -25,37 +25,39 @@ import (
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var C = struct {
|
||||
Any func(...Handler) Handler
|
||||
Not func(Handler) Handler
|
||||
Seq func(...Handler) Handler
|
||||
Min func(min int, handler Handler) Handler
|
||||
Max func(max int, handler Handler) Handler
|
||||
Repeated func(times int, handler Handler) Handler
|
||||
Optional func(Handler) Handler
|
||||
ZeroOrMore func(Handler) Handler
|
||||
OneOrMore func(Handler) Handler
|
||||
MinMax func(min int, max int, handler Handler) Handler
|
||||
Separated func(separator Handler, separated Handler) Handler
|
||||
Except func(except Handler, handler Handler) Handler
|
||||
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||
FlushInput func(Handler) Handler
|
||||
Any func(...Handler) Handler
|
||||
Not func(Handler) Handler
|
||||
Seq func(...Handler) Handler
|
||||
Min func(min int, handler Handler) Handler
|
||||
Max func(max int, handler Handler) Handler
|
||||
Repeated func(times int, handler Handler) Handler
|
||||
Optional func(Handler) Handler
|
||||
ZeroOrMore func(Handler) Handler
|
||||
OneOrMore func(Handler) Handler
|
||||
MinMax func(min int, max int, handler Handler) Handler
|
||||
Separated func(separator Handler, separated Handler) Handler
|
||||
Except func(except Handler, handler Handler) Handler
|
||||
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||
InOptionalBlanks func(handler Handler) Handler
|
||||
FlushInput func(Handler) Handler
|
||||
}{
|
||||
Any: MatchAny,
|
||||
Not: MatchNot,
|
||||
Seq: MatchSeq,
|
||||
Min: MatchMin,
|
||||
Max: MatchMax,
|
||||
Repeated: MatchRep,
|
||||
Optional: MatchOptional,
|
||||
ZeroOrMore: MatchZeroOrMore,
|
||||
OneOrMore: MatchOneOrMore,
|
||||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Except: MatchExcept,
|
||||
FollowedBy: MatchFollowedBy,
|
||||
NotFollowedBy: MatchNotFollowedBy,
|
||||
FlushInput: MakeInputFlusher,
|
||||
Any: MatchAny,
|
||||
Not: MatchNot,
|
||||
Seq: MatchSeq,
|
||||
Min: MatchMin,
|
||||
Max: MatchMax,
|
||||
Repeated: MatchRep,
|
||||
Optional: MatchOptional,
|
||||
ZeroOrMore: MatchZeroOrMore,
|
||||
OneOrMore: MatchOneOrMore,
|
||||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Except: MatchExcept,
|
||||
FollowedBy: MatchFollowedBy,
|
||||
NotFollowedBy: MatchNotFollowedBy,
|
||||
InOptionalBlanks: MatchInOptionalBlanks,
|
||||
FlushInput: MakeInputFlusher,
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms or functions to build atoms.
|
||||
|
@ -67,181 +69,183 @@ var C = struct {
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
Char func(...rune) Handler
|
||||
CharRange func(...rune) Handler
|
||||
ByteByCallback func(func(byte) bool) Handler
|
||||
RuneByCallback func(func(rune) bool) Handler
|
||||
AnyByte Handler
|
||||
AnyRune Handler
|
||||
ValidRune Handler
|
||||
InvalidRune Handler
|
||||
Str func(string) Handler
|
||||
StrNoCase func(string) Handler
|
||||
EndOfLine Handler
|
||||
EndOfFile Handler
|
||||
UntilEndOfLine Handler
|
||||
Space Handler
|
||||
Tab Handler
|
||||
CR Handler
|
||||
LF Handler
|
||||
CRLF Handler
|
||||
Excl Handler
|
||||
DoubleQuote Handler
|
||||
Hash Handler
|
||||
Dollar Handler
|
||||
Percent Handler
|
||||
Amp Handler
|
||||
SingleQuote Handler
|
||||
RoundOpen Handler
|
||||
LeftParen Handler
|
||||
RoundClose Handler
|
||||
RightParen Handler
|
||||
Asterisk Handler
|
||||
Multiply Handler
|
||||
Plus Handler
|
||||
Add Handler
|
||||
Comma Handler
|
||||
Minus Handler
|
||||
Subtract Handler
|
||||
Dot Handler
|
||||
Slash Handler
|
||||
Divide Handler
|
||||
Colon Handler
|
||||
Semicolon Handler
|
||||
AngleOpen Handler
|
||||
LessThan Handler
|
||||
Equal Handler
|
||||
AngleClose Handler
|
||||
GreaterThan Handler
|
||||
Question Handler
|
||||
At Handler
|
||||
SquareOpen Handler
|
||||
Backslash Handler
|
||||
SquareClose Handler
|
||||
Caret Handler
|
||||
Underscore Handler
|
||||
Backquote Handler
|
||||
CurlyOpen Handler
|
||||
Pipe Handler
|
||||
CurlyClose Handler
|
||||
Tilde Handler
|
||||
Newline Handler
|
||||
Blank Handler
|
||||
Blanks Handler
|
||||
Whitespace Handler
|
||||
UnicodeSpace Handler
|
||||
Digit Handler
|
||||
DigitNotZero Handler
|
||||
Digits Handler
|
||||
Zero Handler
|
||||
Boolean Handler
|
||||
Signed func(Handler) Handler
|
||||
Integer Handler
|
||||
IntegerBetween func(min int64, max int64) Handler
|
||||
Decimal Handler
|
||||
ASCII Handler
|
||||
ASCIILower Handler
|
||||
ASCIIUpper Handler
|
||||
Letter Handler
|
||||
Lower Handler
|
||||
Upper Handler
|
||||
HexDigit Handler
|
||||
Octet Handler
|
||||
IPv4 Handler
|
||||
IPv4CIDRMask Handler
|
||||
IPv4Netmask Handler
|
||||
IPv4Net Handler
|
||||
IPv6 Handler
|
||||
IPv6CIDRMask Handler
|
||||
IPv6Net Handler
|
||||
Char func(...rune) Handler
|
||||
CharRange func(...rune) Handler
|
||||
ByteByCallback func(func(byte) bool) Handler
|
||||
BytesByCallback func(func(byte) bool) Handler
|
||||
RuneByCallback func(func(rune) bool) Handler
|
||||
AnyByte Handler
|
||||
AnyRune Handler
|
||||
ValidRune Handler
|
||||
InvalidRune Handler
|
||||
Str func(string) Handler
|
||||
StrNoCase func(string) Handler
|
||||
EndOfLine Handler
|
||||
EndOfFile Handler
|
||||
UntilEndOfLine Handler
|
||||
Space Handler
|
||||
Tab Handler
|
||||
CR Handler
|
||||
LF Handler
|
||||
CRLF Handler
|
||||
Excl Handler
|
||||
DoubleQuote Handler
|
||||
Hash Handler
|
||||
Dollar Handler
|
||||
Percent Handler
|
||||
Amp Handler
|
||||
SingleQuote Handler
|
||||
RoundOpen Handler
|
||||
LeftParen Handler
|
||||
RoundClose Handler
|
||||
RightParen Handler
|
||||
Asterisk Handler
|
||||
Multiply Handler
|
||||
Plus Handler
|
||||
Add Handler
|
||||
Comma Handler
|
||||
Minus Handler
|
||||
Subtract Handler
|
||||
Dot Handler
|
||||
Slash Handler
|
||||
Divide Handler
|
||||
Colon Handler
|
||||
Semicolon Handler
|
||||
AngleOpen Handler
|
||||
LessThan Handler
|
||||
Equal Handler
|
||||
AngleClose Handler
|
||||
GreaterThan Handler
|
||||
Question Handler
|
||||
At Handler
|
||||
SquareOpen Handler
|
||||
Backslash Handler
|
||||
SquareClose Handler
|
||||
Caret Handler
|
||||
Underscore Handler
|
||||
Backquote Handler
|
||||
CurlyOpen Handler
|
||||
Pipe Handler
|
||||
CurlyClose Handler
|
||||
Tilde Handler
|
||||
Newline Handler
|
||||
Blank Handler
|
||||
Blanks Handler
|
||||
Whitespace Handler
|
||||
UnicodeSpace Handler
|
||||
Digit Handler
|
||||
DigitNotZero Handler
|
||||
Digits Handler
|
||||
Zero Handler
|
||||
Boolean Handler
|
||||
Signed func(Handler) Handler
|
||||
Integer Handler
|
||||
IntegerBetween func(min int64, max int64) Handler
|
||||
Decimal Handler
|
||||
ASCII Handler
|
||||
ASCIILower Handler
|
||||
ASCIIUpper Handler
|
||||
Letter Handler
|
||||
Lower Handler
|
||||
Upper Handler
|
||||
HexDigit Handler
|
||||
Octet Handler
|
||||
IPv4 Handler
|
||||
IPv4CIDRMask Handler
|
||||
IPv4Netmask Handler
|
||||
IPv4Net Handler
|
||||
IPv6 Handler
|
||||
IPv6CIDRMask Handler
|
||||
IPv6Net Handler
|
||||
}{
|
||||
Char: MatchChar,
|
||||
CharRange: MatchCharRange,
|
||||
ByteByCallback: MatchByteByCallback,
|
||||
RuneByCallback: MatchRuneByCallback,
|
||||
AnyByte: MatchAnyByte(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
ValidRune: MatchValidRune(),
|
||||
InvalidRune: MatchInvalidRune(),
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
EndOfLine: MatchEndOfLine(),
|
||||
UntilEndOfLine: MatchUntilEndOfLine(),
|
||||
Space: MatchChar(' '),
|
||||
Tab: MatchChar('\t'),
|
||||
CR: MatchChar('\r'),
|
||||
LF: MatchChar('\n'),
|
||||
CRLF: MatchStr("\r\n"),
|
||||
Excl: MatchChar('!'),
|
||||
DoubleQuote: MatchChar('"'),
|
||||
Hash: MatchChar('#'),
|
||||
Dollar: MatchChar('$'),
|
||||
Percent: MatchChar('%'),
|
||||
Amp: MatchChar('&'),
|
||||
SingleQuote: MatchChar('\''),
|
||||
RoundOpen: MatchChar('('),
|
||||
LeftParen: MatchChar('('),
|
||||
RoundClose: MatchChar(')'),
|
||||
RightParen: MatchChar(')'),
|
||||
Asterisk: MatchChar('*'),
|
||||
Multiply: MatchChar('*'),
|
||||
Plus: MatchChar('+'),
|
||||
Add: MatchChar('+'),
|
||||
Comma: MatchChar(','),
|
||||
Minus: MatchChar('-'),
|
||||
Subtract: MatchChar('-'),
|
||||
Dot: MatchChar('.'),
|
||||
Slash: MatchChar('/'),
|
||||
Divide: MatchChar('/'),
|
||||
Colon: MatchChar(':'),
|
||||
Semicolon: MatchChar(';'),
|
||||
AngleOpen: MatchChar('<'),
|
||||
LessThan: MatchChar('<'),
|
||||
Equal: MatchChar('='),
|
||||
AngleClose: MatchChar('>'),
|
||||
GreaterThan: MatchChar('>'),
|
||||
Question: MatchChar('?'),
|
||||
At: MatchChar('@'),
|
||||
SquareOpen: MatchChar('['),
|
||||
Backslash: MatchChar('\\'),
|
||||
SquareClose: MatchChar(']'),
|
||||
Caret: MatchChar('^'),
|
||||
Underscore: MatchChar('_'),
|
||||
Backquote: MatchChar('`'),
|
||||
CurlyOpen: MatchChar('{'),
|
||||
Pipe: MatchChar('|'),
|
||||
CurlyClose: MatchChar('}'),
|
||||
Tilde: MatchChar('~'),
|
||||
Newline: MatchNewline(),
|
||||
Blank: MatchBlank(),
|
||||
Blanks: MatchBlanks(),
|
||||
Whitespace: MatchWhitespace(),
|
||||
UnicodeSpace: MatchUnicodeSpace(),
|
||||
Digit: MatchDigit(),
|
||||
DigitNotZero: MatchDigitNotZero(),
|
||||
Digits: MatchDigits(),
|
||||
Zero: MatchChar('0'),
|
||||
Signed: MatchSigned,
|
||||
Integer: MatchInteger(true),
|
||||
IntegerBetween: MatchIntegerBetween,
|
||||
Decimal: MatchDecimal(true),
|
||||
Boolean: MatchBoolean(),
|
||||
ASCII: MatchASCII(),
|
||||
ASCIILower: MatchASCIILower(),
|
||||
ASCIIUpper: MatchASCIIUpper(),
|
||||
Letter: MatchUnicodeLetter(),
|
||||
Lower: MatchUnicodeLower(),
|
||||
Upper: MatchUnicodeUpper(),
|
||||
HexDigit: MatchHexDigit(),
|
||||
Octet: MatchOctet(true),
|
||||
IPv4: MatchIPv4(true),
|
||||
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
||||
IPv4Netmask: MatchIPv4Netmask(true),
|
||||
IPv4Net: MatchIPv4Net(true),
|
||||
IPv6: MatchIPv6(true),
|
||||
IPv6CIDRMask: MatchIPv6CIDRMask(true),
|
||||
IPv6Net: MatchIPv6Net(true),
|
||||
Char: MatchChar,
|
||||
CharRange: MatchCharRange,
|
||||
ByteByCallback: MatchByteByCallback,
|
||||
BytesByCallback: MatchBytesByCallback,
|
||||
RuneByCallback: MatchRuneByCallback,
|
||||
AnyByte: MatchAnyByte(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
ValidRune: MatchValidRune(),
|
||||
InvalidRune: MatchInvalidRune(),
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
EndOfLine: MatchEndOfLine(),
|
||||
UntilEndOfLine: MatchUntilEndOfLine(),
|
||||
Space: MatchChar(' '),
|
||||
Tab: MatchChar('\t'),
|
||||
CR: MatchChar('\r'),
|
||||
LF: MatchChar('\n'),
|
||||
CRLF: MatchStr("\r\n"),
|
||||
Excl: MatchChar('!'),
|
||||
DoubleQuote: MatchChar('"'),
|
||||
Hash: MatchChar('#'),
|
||||
Dollar: MatchChar('$'),
|
||||
Percent: MatchChar('%'),
|
||||
Amp: MatchChar('&'),
|
||||
SingleQuote: MatchChar('\''),
|
||||
RoundOpen: MatchChar('('),
|
||||
LeftParen: MatchChar('('),
|
||||
RoundClose: MatchChar(')'),
|
||||
RightParen: MatchChar(')'),
|
||||
Asterisk: MatchChar('*'),
|
||||
Multiply: MatchChar('*'),
|
||||
Plus: MatchChar('+'),
|
||||
Add: MatchChar('+'),
|
||||
Comma: MatchChar(','),
|
||||
Minus: MatchChar('-'),
|
||||
Subtract: MatchChar('-'),
|
||||
Dot: MatchChar('.'),
|
||||
Slash: MatchChar('/'),
|
||||
Divide: MatchChar('/'),
|
||||
Colon: MatchChar(':'),
|
||||
Semicolon: MatchChar(';'),
|
||||
AngleOpen: MatchChar('<'),
|
||||
LessThan: MatchChar('<'),
|
||||
Equal: MatchChar('='),
|
||||
AngleClose: MatchChar('>'),
|
||||
GreaterThan: MatchChar('>'),
|
||||
Question: MatchChar('?'),
|
||||
At: MatchChar('@'),
|
||||
SquareOpen: MatchChar('['),
|
||||
Backslash: MatchChar('\\'),
|
||||
SquareClose: MatchChar(']'),
|
||||
Caret: MatchChar('^'),
|
||||
Underscore: MatchChar('_'),
|
||||
Backquote: MatchChar('`'),
|
||||
CurlyOpen: MatchChar('{'),
|
||||
Pipe: MatchChar('|'),
|
||||
CurlyClose: MatchChar('}'),
|
||||
Tilde: MatchChar('~'),
|
||||
Newline: MatchNewline(),
|
||||
Blank: MatchBlank(),
|
||||
Blanks: MatchBlanks(),
|
||||
Whitespace: MatchWhitespace(),
|
||||
UnicodeSpace: MatchUnicodeSpace(),
|
||||
Digit: MatchDigit(),
|
||||
DigitNotZero: MatchDigitNotZero(),
|
||||
Digits: MatchDigits(),
|
||||
Zero: MatchChar('0'),
|
||||
Signed: MatchSigned,
|
||||
Integer: MatchInteger(true),
|
||||
IntegerBetween: MatchIntegerBetween,
|
||||
Decimal: MatchDecimal(true),
|
||||
Boolean: MatchBoolean(),
|
||||
ASCII: MatchASCII(),
|
||||
ASCIILower: MatchASCIILower(),
|
||||
ASCIIUpper: MatchASCIIUpper(),
|
||||
Letter: MatchUnicodeLetter(),
|
||||
Lower: MatchUnicodeLower(),
|
||||
Upper: MatchUnicodeUpper(),
|
||||
HexDigit: MatchHexDigit(),
|
||||
Octet: MatchOctet(true),
|
||||
IPv4: MatchIPv4(true),
|
||||
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
||||
IPv4Netmask: MatchIPv4Netmask(true),
|
||||
IPv4Net: MatchIPv4Net(true),
|
||||
IPv6: MatchIPv6(true),
|
||||
IPv6CIDRMask: MatchIPv6CIDRMask(true),
|
||||
IPv6Net: MatchIPv6Net(true),
|
||||
}
|
||||
|
||||
// M provides convenient access to a range of modifiers (which in their nature are
|
||||
|
@ -552,21 +556,32 @@ func MatchBlank() Handler {
|
|||
// like a vertical tab, then make use of MatchUnicodeSpace().
|
||||
func MatchBlanks() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Match the first blank.
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b != ' ' && b != '\t') {
|
||||
return false
|
||||
f := tokenAPI.Input.Byte.AcceptMulti
|
||||
if tokenAPI.Output.suspended > 0 {
|
||||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||
}
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
|
||||
// Now match any number of followup blanks. We've already got
|
||||
// a successful match at this point, so we'll always return true at the end.
|
||||
ok := false
|
||||
for {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b != ' ' && b != '\t') {
|
||||
return true
|
||||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
for i, b := range chunk {
|
||||
if b != ' ' && b != '\t' {
|
||||
if i > 0 {
|
||||
f(chunk[:i]...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
ok = true
|
||||
}
|
||||
tokenAPI.Input.Byte.Accept(b)
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
if len(chunk) > 0 {
|
||||
f(chunk...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
f(chunk...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -576,37 +591,32 @@ func MatchBlanks() Handler {
|
|||
// carriage return '\r' followed by a newline '\n' (CRLF).
|
||||
func MatchWhitespace() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Match the first whitespace.
|
||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
||||
return false
|
||||
f := tokenAPI.Input.Byte.AcceptMulti
|
||||
if tokenAPI.Output.suspended > 0 {
|
||||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||
}
|
||||
if b1 == '\r' {
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b2 != '\n' {
|
||||
ok := false
|
||||
for {
|
||||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
for i, b := range chunk {
|
||||
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
|
||||
if i > 0 {
|
||||
f(chunk[:i]...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
ok = true
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
if len(chunk) > 0 {
|
||||
f(chunk...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||||
} else {
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
}
|
||||
|
||||
// Now match any number of followup whitespace. We've already got
|
||||
// a successful match at this point, so we'll always return true at the end.
|
||||
for {
|
||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
||||
return true
|
||||
}
|
||||
if b1 == '\r' {
|
||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b2 != '\n' {
|
||||
return true
|
||||
}
|
||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
||||
} else {
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
}
|
||||
f(chunk...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -620,9 +630,6 @@ func MatchUnicodeSpace() Handler {
|
|||
// MatchByteByCallback creates a Handler that matches a single byte from the
|
||||
// input against the provided callback function. When the callback returns true,
|
||||
// it is considered a match.
|
||||
//
|
||||
// Note that the callback function matches the signature of the unicode.Is* functions,
|
||||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||
func MatchByteByCallback(callback func(byte) bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||
|
@ -634,6 +641,41 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
// MatchBytesByCallback creates a Handler that matches one or more bytes from the
|
||||
// input against the provided callback function. As long as the callback returns true,
|
||||
// it is considered a match.
|
||||
func MatchBytesByCallback(callback func(byte) bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
f := tokenAPI.Input.Byte.AcceptMulti
|
||||
if tokenAPI.Output.suspended > 0 {
|
||||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||
}
|
||||
ok := false
|
||||
for {
|
||||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
for i, b := range chunk {
|
||||
if !callback(b) {
|
||||
if i > 0 {
|
||||
f(chunk[:i]...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
ok = true
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
if len(chunk) > 0 {
|
||||
f(chunk...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
f(chunk...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRuneByCallback creates a Handler that matches a single rune from the
|
||||
// input against the provided callback function. When the callback returns true,
|
||||
// it is considered a match.
|
||||
|
@ -947,6 +989,37 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
|||
}
|
||||
}
|
||||
|
||||
func MatchInOptionalBlanks(handler Handler) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
skipBlanks(tokenAPI)
|
||||
if !handler(tokenAPI) {
|
||||
return false
|
||||
}
|
||||
skipBlanks(tokenAPI)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func skipBlanks(tokenAPI *API) {
|
||||
for {
|
||||
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
for i, b := range bs {
|
||||
if b != ' ' && b != '\t' {
|
||||
if i > 0 {
|
||||
tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
if len(bs) > 0 {
|
||||
tokenAPI.Input.Byte.MoveCursorMulti(bs...)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MakeInputFlusher creates a Handler that will flush the input buffer when the
|
||||
// provided handler matches.
|
||||
//
|
||||
|
@ -1037,31 +1110,35 @@ func MatchUntilEndOfLine() Handler {
|
|||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||
}
|
||||
for {
|
||||
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||
state := 0
|
||||
for i, b := range bs {
|
||||
ok := false
|
||||
for i, b := range chunk {
|
||||
if b == '\r' {
|
||||
state = 1
|
||||
continue
|
||||
}
|
||||
if b == '\n' {
|
||||
if state == 1 {
|
||||
f(bs[:i+1]...)
|
||||
} else {
|
||||
f(bs[:i]...)
|
||||
f(chunk[:i+1]...)
|
||||
} else if i > 0 {
|
||||
f(chunk[:i]...)
|
||||
}
|
||||
return true
|
||||
return ok
|
||||
}
|
||||
state = 0
|
||||
ok = true
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
f(bs...)
|
||||
return true
|
||||
if len(chunk) > 0 {
|
||||
f(chunk...)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
f(bs...)
|
||||
f(chunk...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1350,50 +1427,41 @@ func MatchHexDigit() Handler {
|
|||
// stripped from the octet.
|
||||
func MatchOctet(normalize bool) Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// Digit 1
|
||||
b0, err := tokenAPI.Input.Byte.Peek(0)
|
||||
if err != nil || b0 < '0' || b0 > '9' {
|
||||
chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3)
|
||||
value := 0
|
||||
start := 0
|
||||
end := 0
|
||||
for i, b := range chunk {
|
||||
if b < '0' || b > '9' {
|
||||
if i == 0 {
|
||||
return false
|
||||
}
|
||||
break
|
||||
}
|
||||
if b == '0' && value == 0 {
|
||||
start++
|
||||
} else {
|
||||
value = value*10 + int(b-'0')
|
||||
}
|
||||
end++
|
||||
}
|
||||
|
||||
if value > 255 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Digit 2
|
||||
b1, err := tokenAPI.Input.Byte.Peek(1)
|
||||
if err != nil || b1 < '0' || b1 > '9' {
|
||||
// Output 1-digit octet.
|
||||
tokenAPI.Input.Byte.Accept(b0)
|
||||
return true
|
||||
}
|
||||
|
||||
// Digit 3
|
||||
b2, err := tokenAPI.Input.Byte.Peek(2)
|
||||
if err != nil || b2 < '0' || b2 > '9' {
|
||||
// Output 2-digit octet.
|
||||
if normalize && b0 == '0' {
|
||||
tokenAPI.Input.Byte.MoveCursor(b0)
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
} else {
|
||||
tokenAPI.Input.Byte.AcceptMulti(b0, b1)
|
||||
if normalize {
|
||||
if value == 0 {
|
||||
start--
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// The value of the octet must be between 0 - 255.
|
||||
if b0 > '2' || (b0 == '2' && b1 > '5') || (b0 == '2' && b1 == '5' && b2 > '5') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Output 3-digit octet.
|
||||
if normalize && b0 == '0' {
|
||||
tokenAPI.Input.Byte.MoveCursor(b0)
|
||||
if b1 == '0' {
|
||||
tokenAPI.Input.Byte.MoveCursor(b1)
|
||||
} else {
|
||||
tokenAPI.Input.Byte.Accept(b1)
|
||||
if start > 0 {
|
||||
tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...)
|
||||
}
|
||||
tokenAPI.Input.Byte.Accept(b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...)
|
||||
} else {
|
||||
tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
|
||||
tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -267,7 +267,7 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||
{"300", tokenize.MatchOctet(false), false, ""},
|
||||
|
||||
// Octet.
|
||||
// // Octet.
|
||||
{"0", tokenize.MatchOctet(false), true, "0"},
|
||||
{"02", tokenize.MatchOctet(false), true, "02"},
|
||||
{"003", tokenize.MatchOctet(false), true, "003"},
|
||||
|
|
Loading…
Reference in New Issue