More speed improvements.
This commit is contained in:
parent
8ef9aed096
commit
b9cc91c0ae
|
@ -8,36 +8,6 @@ import (
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkMemclrOptimization(b *testing.B) {
|
|
||||||
// TODO use or cleanup this one and the next. I'm playing around here.
|
|
||||||
type s struct {
|
|
||||||
a int
|
|
||||||
b string
|
|
||||||
}
|
|
||||||
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
for i := range x {
|
|
||||||
x[i] = s{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func BenchmarkCodedClear(b *testing.B) {
|
|
||||||
type s struct {
|
|
||||||
a int
|
|
||||||
b string
|
|
||||||
}
|
|
||||||
|
|
||||||
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
|
||||||
x[0] = s{}
|
|
||||||
x[1] = s{}
|
|
||||||
x[2] = s{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleNewAPI() {
|
func ExampleNewAPI() {
|
||||||
tokenize.NewAPI("The input that the API will handle")
|
tokenize.NewAPI("The input that the API will handle")
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,37 +25,39 @@ import (
|
||||||
//
|
//
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
var C = struct {
|
var C = struct {
|
||||||
Any func(...Handler) Handler
|
Any func(...Handler) Handler
|
||||||
Not func(Handler) Handler
|
Not func(Handler) Handler
|
||||||
Seq func(...Handler) Handler
|
Seq func(...Handler) Handler
|
||||||
Min func(min int, handler Handler) Handler
|
Min func(min int, handler Handler) Handler
|
||||||
Max func(max int, handler Handler) Handler
|
Max func(max int, handler Handler) Handler
|
||||||
Repeated func(times int, handler Handler) Handler
|
Repeated func(times int, handler Handler) Handler
|
||||||
Optional func(Handler) Handler
|
Optional func(Handler) Handler
|
||||||
ZeroOrMore func(Handler) Handler
|
ZeroOrMore func(Handler) Handler
|
||||||
OneOrMore func(Handler) Handler
|
OneOrMore func(Handler) Handler
|
||||||
MinMax func(min int, max int, handler Handler) Handler
|
MinMax func(min int, max int, handler Handler) Handler
|
||||||
Separated func(separator Handler, separated Handler) Handler
|
Separated func(separator Handler, separated Handler) Handler
|
||||||
Except func(except Handler, handler Handler) Handler
|
Except func(except Handler, handler Handler) Handler
|
||||||
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
FollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||||
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
NotFollowedBy func(lookAhead Handler, handler Handler) Handler
|
||||||
FlushInput func(Handler) Handler
|
InOptionalBlanks func(handler Handler) Handler
|
||||||
|
FlushInput func(Handler) Handler
|
||||||
}{
|
}{
|
||||||
Any: MatchAny,
|
Any: MatchAny,
|
||||||
Not: MatchNot,
|
Not: MatchNot,
|
||||||
Seq: MatchSeq,
|
Seq: MatchSeq,
|
||||||
Min: MatchMin,
|
Min: MatchMin,
|
||||||
Max: MatchMax,
|
Max: MatchMax,
|
||||||
Repeated: MatchRep,
|
Repeated: MatchRep,
|
||||||
Optional: MatchOptional,
|
Optional: MatchOptional,
|
||||||
ZeroOrMore: MatchZeroOrMore,
|
ZeroOrMore: MatchZeroOrMore,
|
||||||
OneOrMore: MatchOneOrMore,
|
OneOrMore: MatchOneOrMore,
|
||||||
MinMax: MatchMinMax,
|
MinMax: MatchMinMax,
|
||||||
Separated: MatchSeparated,
|
Separated: MatchSeparated,
|
||||||
Except: MatchExcept,
|
Except: MatchExcept,
|
||||||
FollowedBy: MatchFollowedBy,
|
FollowedBy: MatchFollowedBy,
|
||||||
NotFollowedBy: MatchNotFollowedBy,
|
NotFollowedBy: MatchNotFollowedBy,
|
||||||
FlushInput: MakeInputFlusher,
|
InOptionalBlanks: MatchInOptionalBlanks,
|
||||||
|
FlushInput: MakeInputFlusher,
|
||||||
}
|
}
|
||||||
|
|
||||||
// A provides convenient access to a range of atoms or functions to build atoms.
|
// A provides convenient access to a range of atoms or functions to build atoms.
|
||||||
|
@ -67,181 +69,183 @@ var C = struct {
|
||||||
//
|
//
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
var A = struct {
|
var A = struct {
|
||||||
Char func(...rune) Handler
|
Char func(...rune) Handler
|
||||||
CharRange func(...rune) Handler
|
CharRange func(...rune) Handler
|
||||||
ByteByCallback func(func(byte) bool) Handler
|
ByteByCallback func(func(byte) bool) Handler
|
||||||
RuneByCallback func(func(rune) bool) Handler
|
BytesByCallback func(func(byte) bool) Handler
|
||||||
AnyByte Handler
|
RuneByCallback func(func(rune) bool) Handler
|
||||||
AnyRune Handler
|
AnyByte Handler
|
||||||
ValidRune Handler
|
AnyRune Handler
|
||||||
InvalidRune Handler
|
ValidRune Handler
|
||||||
Str func(string) Handler
|
InvalidRune Handler
|
||||||
StrNoCase func(string) Handler
|
Str func(string) Handler
|
||||||
EndOfLine Handler
|
StrNoCase func(string) Handler
|
||||||
EndOfFile Handler
|
EndOfLine Handler
|
||||||
UntilEndOfLine Handler
|
EndOfFile Handler
|
||||||
Space Handler
|
UntilEndOfLine Handler
|
||||||
Tab Handler
|
Space Handler
|
||||||
CR Handler
|
Tab Handler
|
||||||
LF Handler
|
CR Handler
|
||||||
CRLF Handler
|
LF Handler
|
||||||
Excl Handler
|
CRLF Handler
|
||||||
DoubleQuote Handler
|
Excl Handler
|
||||||
Hash Handler
|
DoubleQuote Handler
|
||||||
Dollar Handler
|
Hash Handler
|
||||||
Percent Handler
|
Dollar Handler
|
||||||
Amp Handler
|
Percent Handler
|
||||||
SingleQuote Handler
|
Amp Handler
|
||||||
RoundOpen Handler
|
SingleQuote Handler
|
||||||
LeftParen Handler
|
RoundOpen Handler
|
||||||
RoundClose Handler
|
LeftParen Handler
|
||||||
RightParen Handler
|
RoundClose Handler
|
||||||
Asterisk Handler
|
RightParen Handler
|
||||||
Multiply Handler
|
Asterisk Handler
|
||||||
Plus Handler
|
Multiply Handler
|
||||||
Add Handler
|
Plus Handler
|
||||||
Comma Handler
|
Add Handler
|
||||||
Minus Handler
|
Comma Handler
|
||||||
Subtract Handler
|
Minus Handler
|
||||||
Dot Handler
|
Subtract Handler
|
||||||
Slash Handler
|
Dot Handler
|
||||||
Divide Handler
|
Slash Handler
|
||||||
Colon Handler
|
Divide Handler
|
||||||
Semicolon Handler
|
Colon Handler
|
||||||
AngleOpen Handler
|
Semicolon Handler
|
||||||
LessThan Handler
|
AngleOpen Handler
|
||||||
Equal Handler
|
LessThan Handler
|
||||||
AngleClose Handler
|
Equal Handler
|
||||||
GreaterThan Handler
|
AngleClose Handler
|
||||||
Question Handler
|
GreaterThan Handler
|
||||||
At Handler
|
Question Handler
|
||||||
SquareOpen Handler
|
At Handler
|
||||||
Backslash Handler
|
SquareOpen Handler
|
||||||
SquareClose Handler
|
Backslash Handler
|
||||||
Caret Handler
|
SquareClose Handler
|
||||||
Underscore Handler
|
Caret Handler
|
||||||
Backquote Handler
|
Underscore Handler
|
||||||
CurlyOpen Handler
|
Backquote Handler
|
||||||
Pipe Handler
|
CurlyOpen Handler
|
||||||
CurlyClose Handler
|
Pipe Handler
|
||||||
Tilde Handler
|
CurlyClose Handler
|
||||||
Newline Handler
|
Tilde Handler
|
||||||
Blank Handler
|
Newline Handler
|
||||||
Blanks Handler
|
Blank Handler
|
||||||
Whitespace Handler
|
Blanks Handler
|
||||||
UnicodeSpace Handler
|
Whitespace Handler
|
||||||
Digit Handler
|
UnicodeSpace Handler
|
||||||
DigitNotZero Handler
|
Digit Handler
|
||||||
Digits Handler
|
DigitNotZero Handler
|
||||||
Zero Handler
|
Digits Handler
|
||||||
Boolean Handler
|
Zero Handler
|
||||||
Signed func(Handler) Handler
|
Boolean Handler
|
||||||
Integer Handler
|
Signed func(Handler) Handler
|
||||||
IntegerBetween func(min int64, max int64) Handler
|
Integer Handler
|
||||||
Decimal Handler
|
IntegerBetween func(min int64, max int64) Handler
|
||||||
ASCII Handler
|
Decimal Handler
|
||||||
ASCIILower Handler
|
ASCII Handler
|
||||||
ASCIIUpper Handler
|
ASCIILower Handler
|
||||||
Letter Handler
|
ASCIIUpper Handler
|
||||||
Lower Handler
|
Letter Handler
|
||||||
Upper Handler
|
Lower Handler
|
||||||
HexDigit Handler
|
Upper Handler
|
||||||
Octet Handler
|
HexDigit Handler
|
||||||
IPv4 Handler
|
Octet Handler
|
||||||
IPv4CIDRMask Handler
|
IPv4 Handler
|
||||||
IPv4Netmask Handler
|
IPv4CIDRMask Handler
|
||||||
IPv4Net Handler
|
IPv4Netmask Handler
|
||||||
IPv6 Handler
|
IPv4Net Handler
|
||||||
IPv6CIDRMask Handler
|
IPv6 Handler
|
||||||
IPv6Net Handler
|
IPv6CIDRMask Handler
|
||||||
|
IPv6Net Handler
|
||||||
}{
|
}{
|
||||||
Char: MatchChar,
|
Char: MatchChar,
|
||||||
CharRange: MatchCharRange,
|
CharRange: MatchCharRange,
|
||||||
ByteByCallback: MatchByteByCallback,
|
ByteByCallback: MatchByteByCallback,
|
||||||
RuneByCallback: MatchRuneByCallback,
|
BytesByCallback: MatchBytesByCallback,
|
||||||
AnyByte: MatchAnyByte(),
|
RuneByCallback: MatchRuneByCallback,
|
||||||
AnyRune: MatchAnyRune(),
|
AnyByte: MatchAnyByte(),
|
||||||
ValidRune: MatchValidRune(),
|
AnyRune: MatchAnyRune(),
|
||||||
InvalidRune: MatchInvalidRune(),
|
ValidRune: MatchValidRune(),
|
||||||
Str: MatchStr,
|
InvalidRune: MatchInvalidRune(),
|
||||||
StrNoCase: MatchStrNoCase,
|
Str: MatchStr,
|
||||||
EndOfFile: MatchEndOfFile(),
|
StrNoCase: MatchStrNoCase,
|
||||||
EndOfLine: MatchEndOfLine(),
|
EndOfFile: MatchEndOfFile(),
|
||||||
UntilEndOfLine: MatchUntilEndOfLine(),
|
EndOfLine: MatchEndOfLine(),
|
||||||
Space: MatchChar(' '),
|
UntilEndOfLine: MatchUntilEndOfLine(),
|
||||||
Tab: MatchChar('\t'),
|
Space: MatchChar(' '),
|
||||||
CR: MatchChar('\r'),
|
Tab: MatchChar('\t'),
|
||||||
LF: MatchChar('\n'),
|
CR: MatchChar('\r'),
|
||||||
CRLF: MatchStr("\r\n"),
|
LF: MatchChar('\n'),
|
||||||
Excl: MatchChar('!'),
|
CRLF: MatchStr("\r\n"),
|
||||||
DoubleQuote: MatchChar('"'),
|
Excl: MatchChar('!'),
|
||||||
Hash: MatchChar('#'),
|
DoubleQuote: MatchChar('"'),
|
||||||
Dollar: MatchChar('$'),
|
Hash: MatchChar('#'),
|
||||||
Percent: MatchChar('%'),
|
Dollar: MatchChar('$'),
|
||||||
Amp: MatchChar('&'),
|
Percent: MatchChar('%'),
|
||||||
SingleQuote: MatchChar('\''),
|
Amp: MatchChar('&'),
|
||||||
RoundOpen: MatchChar('('),
|
SingleQuote: MatchChar('\''),
|
||||||
LeftParen: MatchChar('('),
|
RoundOpen: MatchChar('('),
|
||||||
RoundClose: MatchChar(')'),
|
LeftParen: MatchChar('('),
|
||||||
RightParen: MatchChar(')'),
|
RoundClose: MatchChar(')'),
|
||||||
Asterisk: MatchChar('*'),
|
RightParen: MatchChar(')'),
|
||||||
Multiply: MatchChar('*'),
|
Asterisk: MatchChar('*'),
|
||||||
Plus: MatchChar('+'),
|
Multiply: MatchChar('*'),
|
||||||
Add: MatchChar('+'),
|
Plus: MatchChar('+'),
|
||||||
Comma: MatchChar(','),
|
Add: MatchChar('+'),
|
||||||
Minus: MatchChar('-'),
|
Comma: MatchChar(','),
|
||||||
Subtract: MatchChar('-'),
|
Minus: MatchChar('-'),
|
||||||
Dot: MatchChar('.'),
|
Subtract: MatchChar('-'),
|
||||||
Slash: MatchChar('/'),
|
Dot: MatchChar('.'),
|
||||||
Divide: MatchChar('/'),
|
Slash: MatchChar('/'),
|
||||||
Colon: MatchChar(':'),
|
Divide: MatchChar('/'),
|
||||||
Semicolon: MatchChar(';'),
|
Colon: MatchChar(':'),
|
||||||
AngleOpen: MatchChar('<'),
|
Semicolon: MatchChar(';'),
|
||||||
LessThan: MatchChar('<'),
|
AngleOpen: MatchChar('<'),
|
||||||
Equal: MatchChar('='),
|
LessThan: MatchChar('<'),
|
||||||
AngleClose: MatchChar('>'),
|
Equal: MatchChar('='),
|
||||||
GreaterThan: MatchChar('>'),
|
AngleClose: MatchChar('>'),
|
||||||
Question: MatchChar('?'),
|
GreaterThan: MatchChar('>'),
|
||||||
At: MatchChar('@'),
|
Question: MatchChar('?'),
|
||||||
SquareOpen: MatchChar('['),
|
At: MatchChar('@'),
|
||||||
Backslash: MatchChar('\\'),
|
SquareOpen: MatchChar('['),
|
||||||
SquareClose: MatchChar(']'),
|
Backslash: MatchChar('\\'),
|
||||||
Caret: MatchChar('^'),
|
SquareClose: MatchChar(']'),
|
||||||
Underscore: MatchChar('_'),
|
Caret: MatchChar('^'),
|
||||||
Backquote: MatchChar('`'),
|
Underscore: MatchChar('_'),
|
||||||
CurlyOpen: MatchChar('{'),
|
Backquote: MatchChar('`'),
|
||||||
Pipe: MatchChar('|'),
|
CurlyOpen: MatchChar('{'),
|
||||||
CurlyClose: MatchChar('}'),
|
Pipe: MatchChar('|'),
|
||||||
Tilde: MatchChar('~'),
|
CurlyClose: MatchChar('}'),
|
||||||
Newline: MatchNewline(),
|
Tilde: MatchChar('~'),
|
||||||
Blank: MatchBlank(),
|
Newline: MatchNewline(),
|
||||||
Blanks: MatchBlanks(),
|
Blank: MatchBlank(),
|
||||||
Whitespace: MatchWhitespace(),
|
Blanks: MatchBlanks(),
|
||||||
UnicodeSpace: MatchUnicodeSpace(),
|
Whitespace: MatchWhitespace(),
|
||||||
Digit: MatchDigit(),
|
UnicodeSpace: MatchUnicodeSpace(),
|
||||||
DigitNotZero: MatchDigitNotZero(),
|
Digit: MatchDigit(),
|
||||||
Digits: MatchDigits(),
|
DigitNotZero: MatchDigitNotZero(),
|
||||||
Zero: MatchChar('0'),
|
Digits: MatchDigits(),
|
||||||
Signed: MatchSigned,
|
Zero: MatchChar('0'),
|
||||||
Integer: MatchInteger(true),
|
Signed: MatchSigned,
|
||||||
IntegerBetween: MatchIntegerBetween,
|
Integer: MatchInteger(true),
|
||||||
Decimal: MatchDecimal(true),
|
IntegerBetween: MatchIntegerBetween,
|
||||||
Boolean: MatchBoolean(),
|
Decimal: MatchDecimal(true),
|
||||||
ASCII: MatchASCII(),
|
Boolean: MatchBoolean(),
|
||||||
ASCIILower: MatchASCIILower(),
|
ASCII: MatchASCII(),
|
||||||
ASCIIUpper: MatchASCIIUpper(),
|
ASCIILower: MatchASCIILower(),
|
||||||
Letter: MatchUnicodeLetter(),
|
ASCIIUpper: MatchASCIIUpper(),
|
||||||
Lower: MatchUnicodeLower(),
|
Letter: MatchUnicodeLetter(),
|
||||||
Upper: MatchUnicodeUpper(),
|
Lower: MatchUnicodeLower(),
|
||||||
HexDigit: MatchHexDigit(),
|
Upper: MatchUnicodeUpper(),
|
||||||
Octet: MatchOctet(true),
|
HexDigit: MatchHexDigit(),
|
||||||
IPv4: MatchIPv4(true),
|
Octet: MatchOctet(true),
|
||||||
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
IPv4: MatchIPv4(true),
|
||||||
IPv4Netmask: MatchIPv4Netmask(true),
|
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
||||||
IPv4Net: MatchIPv4Net(true),
|
IPv4Netmask: MatchIPv4Netmask(true),
|
||||||
IPv6: MatchIPv6(true),
|
IPv4Net: MatchIPv4Net(true),
|
||||||
IPv6CIDRMask: MatchIPv6CIDRMask(true),
|
IPv6: MatchIPv6(true),
|
||||||
IPv6Net: MatchIPv6Net(true),
|
IPv6CIDRMask: MatchIPv6CIDRMask(true),
|
||||||
|
IPv6Net: MatchIPv6Net(true),
|
||||||
}
|
}
|
||||||
|
|
||||||
// M provides convenient access to a range of modifiers (which in their nature are
|
// M provides convenient access to a range of modifiers (which in their nature are
|
||||||
|
@ -552,21 +556,32 @@ func MatchBlank() Handler {
|
||||||
// like a vertical tab, then make use of MatchUnicodeSpace().
|
// like a vertical tab, then make use of MatchUnicodeSpace().
|
||||||
func MatchBlanks() Handler {
|
func MatchBlanks() Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
// Match the first blank.
|
f := tokenAPI.Input.Byte.AcceptMulti
|
||||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
if tokenAPI.Output.suspended > 0 {
|
||||||
if err != nil || (b != ' ' && b != '\t') {
|
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
tokenAPI.Input.Byte.Accept(b)
|
ok := false
|
||||||
|
|
||||||
// Now match any number of followup blanks. We've already got
|
|
||||||
// a successful match at this point, so we'll always return true at the end.
|
|
||||||
for {
|
for {
|
||||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||||
if err != nil || (b != ' ' && b != '\t') {
|
for i, b := range chunk {
|
||||||
return true
|
if b != ' ' && b != '\t' {
|
||||||
|
if i > 0 {
|
||||||
|
f(chunk[:i]...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
ok = true
|
||||||
}
|
}
|
||||||
tokenAPI.Input.Byte.Accept(b)
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
if len(chunk) > 0 {
|
||||||
|
f(chunk...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
f(chunk...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -576,37 +591,32 @@ func MatchBlanks() Handler {
|
||||||
// carriage return '\r' followed by a newline '\n' (CRLF).
|
// carriage return '\r' followed by a newline '\n' (CRLF).
|
||||||
func MatchWhitespace() Handler {
|
func MatchWhitespace() Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
// Match the first whitespace.
|
f := tokenAPI.Input.Byte.AcceptMulti
|
||||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
if tokenAPI.Output.suspended > 0 {
|
||||||
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
if b1 == '\r' {
|
ok := false
|
||||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
for {
|
||||||
if err != nil || b2 != '\n' {
|
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||||
|
for i, b := range chunk {
|
||||||
|
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
|
||||||
|
if i > 0 {
|
||||||
|
f(chunk[:i]...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
ok = true
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
if len(chunk) > 0 {
|
||||||
|
f(chunk...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
f(chunk...)
|
||||||
} else {
|
|
||||||
tokenAPI.Input.Byte.Accept(b1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now match any number of followup whitespace. We've already got
|
|
||||||
// a successful match at this point, so we'll always return true at the end.
|
|
||||||
for {
|
|
||||||
b1, err := tokenAPI.Input.Byte.Peek(0)
|
|
||||||
if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if b1 == '\r' {
|
|
||||||
b2, err := tokenAPI.Input.Byte.Peek(1)
|
|
||||||
if err != nil || b2 != '\n' {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
tokenAPI.Input.Byte.AcceptMulti(b1, b2)
|
|
||||||
} else {
|
|
||||||
tokenAPI.Input.Byte.Accept(b1)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -620,9 +630,6 @@ func MatchUnicodeSpace() Handler {
|
||||||
// MatchByteByCallback creates a Handler that matches a single byte from the
|
// MatchByteByCallback creates a Handler that matches a single byte from the
|
||||||
// input against the provided callback function. When the callback returns true,
|
// input against the provided callback function. When the callback returns true,
|
||||||
// it is considered a match.
|
// it is considered a match.
|
||||||
//
|
|
||||||
// Note that the callback function matches the signature of the unicode.Is* functions,
|
|
||||||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
|
||||||
func MatchByteByCallback(callback func(byte) bool) Handler {
|
func MatchByteByCallback(callback func(byte) bool) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
b, err := tokenAPI.Input.Byte.Peek(0)
|
b, err := tokenAPI.Input.Byte.Peek(0)
|
||||||
|
@ -634,6 +641,41 @@ func MatchByteByCallback(callback func(byte) bool) Handler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchBytesByCallback creates a Handler that matches one or more bytes from the
|
||||||
|
// input against the provided callback function. As long as the callback returns true,
|
||||||
|
// it is considered a match.
|
||||||
|
func MatchBytesByCallback(callback func(byte) bool) Handler {
|
||||||
|
return func(tokenAPI *API) bool {
|
||||||
|
f := tokenAPI.Input.Byte.AcceptMulti
|
||||||
|
if tokenAPI.Output.suspended > 0 {
|
||||||
|
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||||
|
}
|
||||||
|
ok := false
|
||||||
|
for {
|
||||||
|
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||||
|
for i, b := range chunk {
|
||||||
|
if !callback(b) {
|
||||||
|
if i > 0 {
|
||||||
|
f(chunk[:i]...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
ok = true
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
if len(chunk) > 0 {
|
||||||
|
f(chunk...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
f(chunk...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MatchRuneByCallback creates a Handler that matches a single rune from the
|
// MatchRuneByCallback creates a Handler that matches a single rune from the
|
||||||
// input against the provided callback function. When the callback returns true,
|
// input against the provided callback function. When the callback returns true,
|
||||||
// it is considered a match.
|
// it is considered a match.
|
||||||
|
@ -947,6 +989,37 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func MatchInOptionalBlanks(handler Handler) Handler {
|
||||||
|
return func(tokenAPI *API) bool {
|
||||||
|
skipBlanks(tokenAPI)
|
||||||
|
if !handler(tokenAPI) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
skipBlanks(tokenAPI)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func skipBlanks(tokenAPI *API) {
|
||||||
|
for {
|
||||||
|
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||||
|
for i, b := range bs {
|
||||||
|
if b != ' ' && b != '\t' {
|
||||||
|
if i > 0 {
|
||||||
|
tokenAPI.Input.Byte.MoveCursorMulti(bs[:i]...)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
if len(bs) > 0 {
|
||||||
|
tokenAPI.Input.Byte.MoveCursorMulti(bs...)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MakeInputFlusher creates a Handler that will flush the input buffer when the
|
// MakeInputFlusher creates a Handler that will flush the input buffer when the
|
||||||
// provided handler matches.
|
// provided handler matches.
|
||||||
//
|
//
|
||||||
|
@ -1037,31 +1110,35 @@ func MatchUntilEndOfLine() Handler {
|
||||||
f = tokenAPI.Input.Byte.MoveCursorMulti
|
f = tokenAPI.Input.Byte.MoveCursorMulti
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
|
||||||
state := 0
|
state := 0
|
||||||
for i, b := range bs {
|
ok := false
|
||||||
|
for i, b := range chunk {
|
||||||
if b == '\r' {
|
if b == '\r' {
|
||||||
state = 1
|
state = 1
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if b == '\n' {
|
if b == '\n' {
|
||||||
if state == 1 {
|
if state == 1 {
|
||||||
f(bs[:i+1]...)
|
f(chunk[:i+1]...)
|
||||||
} else {
|
} else if i > 0 {
|
||||||
f(bs[:i]...)
|
f(chunk[:i]...)
|
||||||
}
|
}
|
||||||
return true
|
return ok
|
||||||
}
|
}
|
||||||
state = 0
|
state = 0
|
||||||
|
ok = true
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
f(bs...)
|
if len(chunk) > 0 {
|
||||||
return true
|
f(chunk...)
|
||||||
|
}
|
||||||
|
return ok
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
f(bs...)
|
f(chunk...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1350,50 +1427,41 @@ func MatchHexDigit() Handler {
|
||||||
// stripped from the octet.
|
// stripped from the octet.
|
||||||
func MatchOctet(normalize bool) Handler {
|
func MatchOctet(normalize bool) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
// Digit 1
|
chunk, _ := tokenAPI.Input.Byte.PeekMulti(0, 3)
|
||||||
b0, err := tokenAPI.Input.Byte.Peek(0)
|
value := 0
|
||||||
if err != nil || b0 < '0' || b0 > '9' {
|
start := 0
|
||||||
|
end := 0
|
||||||
|
for i, b := range chunk {
|
||||||
|
if b < '0' || b > '9' {
|
||||||
|
if i == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if b == '0' && value == 0 {
|
||||||
|
start++
|
||||||
|
} else {
|
||||||
|
value = value*10 + int(b-'0')
|
||||||
|
}
|
||||||
|
end++
|
||||||
|
}
|
||||||
|
|
||||||
|
if value > 255 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Digit 2
|
if normalize {
|
||||||
b1, err := tokenAPI.Input.Byte.Peek(1)
|
if value == 0 {
|
||||||
if err != nil || b1 < '0' || b1 > '9' {
|
start--
|
||||||
// Output 1-digit octet.
|
|
||||||
tokenAPI.Input.Byte.Accept(b0)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Digit 3
|
|
||||||
b2, err := tokenAPI.Input.Byte.Peek(2)
|
|
||||||
if err != nil || b2 < '0' || b2 > '9' {
|
|
||||||
// Output 2-digit octet.
|
|
||||||
if normalize && b0 == '0' {
|
|
||||||
tokenAPI.Input.Byte.MoveCursor(b0)
|
|
||||||
tokenAPI.Input.Byte.Accept(b1)
|
|
||||||
} else {
|
|
||||||
tokenAPI.Input.Byte.AcceptMulti(b0, b1)
|
|
||||||
}
|
}
|
||||||
return true
|
if start > 0 {
|
||||||
}
|
tokenAPI.Input.Byte.MoveCursorMulti(chunk[0:start]...)
|
||||||
|
|
||||||
// The value of the octet must be between 0 - 255.
|
|
||||||
if b0 > '2' || (b0 == '2' && b1 > '5') || (b0 == '2' && b1 == '5' && b2 > '5') {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output 3-digit octet.
|
|
||||||
if normalize && b0 == '0' {
|
|
||||||
tokenAPI.Input.Byte.MoveCursor(b0)
|
|
||||||
if b1 == '0' {
|
|
||||||
tokenAPI.Input.Byte.MoveCursor(b1)
|
|
||||||
} else {
|
|
||||||
tokenAPI.Input.Byte.Accept(b1)
|
|
||||||
}
|
}
|
||||||
tokenAPI.Input.Byte.Accept(b2)
|
tokenAPI.Input.Byte.AcceptMulti(chunk[start:end]...)
|
||||||
} else {
|
} else {
|
||||||
tokenAPI.Input.Byte.AcceptMulti(b0, b1, b2)
|
tokenAPI.Input.Byte.AcceptMulti(chunk[0:end]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -267,7 +267,7 @@ func TestIPv4Atoms(t *testing.T) {
|
||||||
{"256123", tokenize.MatchOctet(false), false, ""},
|
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||||
{"300", tokenize.MatchOctet(false), false, ""},
|
{"300", tokenize.MatchOctet(false), false, ""},
|
||||||
|
|
||||||
// Octet.
|
// // Octet.
|
||||||
{"0", tokenize.MatchOctet(false), true, "0"},
|
{"0", tokenize.MatchOctet(false), true, "0"},
|
||||||
{"02", tokenize.MatchOctet(false), true, "02"},
|
{"02", tokenize.MatchOctet(false), true, "02"},
|
||||||
{"003", tokenize.MatchOctet(false), true, "003"},
|
{"003", tokenize.MatchOctet(false), true, "003"},
|
||||||
|
|
Loading…
Reference in New Issue