Yay! First version for which parsing long.toml drops below 100ms! Got an outcome of 93ms. Almost down to BurntSushi's speed level, but still with a generic parser backing. Looking good!!
This commit is contained in:
parent
ddd0ed49f6
commit
5e3e4b0f0a
|
@ -133,7 +133,7 @@ func (calc *calculator) factor(p *parse.API) {
|
||||||
var A, T = tokenize.A, tokenize.T
|
var A, T = tokenize.A, tokenize.T
|
||||||
p.Accept(A.Blanks)
|
p.Accept(A.Blanks)
|
||||||
switch {
|
switch {
|
||||||
case p.Accept(T.Float64(nil, A.Signed(A.Float))):
|
case p.Accept(T.Float64(nil, A.Signed(A.Decimal))):
|
||||||
value := p.Result.Tokens[0].Value.(float64)
|
value := p.Result.Tokens[0].Value.(float64)
|
||||||
calc.interpreter.pushValue(value)
|
calc.interpreter.pushValue(value)
|
||||||
case p.Accept(A.LeftParen):
|
case p.Accept(A.LeftParen):
|
||||||
|
|
|
@ -170,6 +170,14 @@ func (i *API) Accept() {
|
||||||
i.acceptRunes(i.lastRuneWidth, i.lastRune)
|
i.acceptRunes(i.lastRuneWidth, i.lastRune)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *API) skipBytes(bytes ...byte) {
|
||||||
|
for _, b := range bytes {
|
||||||
|
i.stackFrame.moveCursorByByte(b)
|
||||||
|
}
|
||||||
|
i.stackFrame.offset += len(bytes)
|
||||||
|
i.runeRead = false
|
||||||
|
}
|
||||||
|
|
||||||
func (i *API) acceptBytes(bytes ...byte) {
|
func (i *API) acceptBytes(bytes ...byte) {
|
||||||
curRuneEnd := i.stackFrame.runeEnd
|
curRuneEnd := i.stackFrame.runeEnd
|
||||||
newRuneEnd := curRuneEnd + len(bytes)
|
newRuneEnd := curRuneEnd + len(bytes)
|
||||||
|
@ -190,6 +198,14 @@ func (i *API) acceptBytes(bytes ...byte) {
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *API) skipRunes(width int, runes ...rune) {
|
||||||
|
for _, r := range runes {
|
||||||
|
i.stackFrame.moveCursorByRune(r)
|
||||||
|
}
|
||||||
|
i.stackFrame.offset += width
|
||||||
|
i.runeRead = false
|
||||||
|
}
|
||||||
|
|
||||||
func (i *API) acceptRunes(width int, runes ...rune) {
|
func (i *API) acceptRunes(width int, runes ...rune) {
|
||||||
curRuneEnd := i.stackFrame.runeEnd
|
curRuneEnd := i.stackFrame.runeEnd
|
||||||
newRuneEnd := curRuneEnd + len(runes)
|
newRuneEnd := curRuneEnd + len(runes)
|
||||||
|
|
|
@ -70,9 +70,11 @@ var A = struct {
|
||||||
Byte func(byte) Handler
|
Byte func(byte) Handler
|
||||||
Bytes func(...byte) Handler
|
Bytes func(...byte) Handler
|
||||||
ByteRange func(byte, byte) Handler
|
ByteRange func(byte, byte) Handler
|
||||||
|
ByteByCallback func(func(byte) bool) Handler
|
||||||
Rune func(rune) Handler
|
Rune func(rune) Handler
|
||||||
Runes func(...rune) Handler
|
Runes func(...rune) Handler
|
||||||
RuneRange func(rune, rune) Handler
|
RuneRange func(rune, rune) Handler
|
||||||
|
RuneByCallback func(func(rune) bool) Handler
|
||||||
Str func(string) Handler
|
Str func(string) Handler
|
||||||
StrNoCase func(string) Handler
|
StrNoCase func(string) Handler
|
||||||
EndOfLine Handler
|
EndOfLine Handler
|
||||||
|
@ -136,11 +138,11 @@ var A = struct {
|
||||||
DigitNotZero Handler
|
DigitNotZero Handler
|
||||||
Digits Handler
|
Digits Handler
|
||||||
Zero Handler
|
Zero Handler
|
||||||
Float Handler
|
|
||||||
Boolean Handler
|
Boolean Handler
|
||||||
Integer Handler
|
|
||||||
Signed func(Handler) Handler
|
Signed func(Handler) Handler
|
||||||
|
Integer Handler
|
||||||
IntegerBetween func(min int64, max int64) Handler
|
IntegerBetween func(min int64, max int64) Handler
|
||||||
|
Decimal Handler
|
||||||
ASCII Handler
|
ASCII Handler
|
||||||
ASCIILower Handler
|
ASCIILower Handler
|
||||||
ASCIIUpper Handler
|
ASCIIUpper Handler
|
||||||
|
@ -160,9 +162,11 @@ var A = struct {
|
||||||
Byte: MatchByte,
|
Byte: MatchByte,
|
||||||
Bytes: MatchBytes,
|
Bytes: MatchBytes,
|
||||||
ByteRange: MatchByteRange,
|
ByteRange: MatchByteRange,
|
||||||
|
ByteByCallback: MatchByteByCallback,
|
||||||
Rune: MatchRune,
|
Rune: MatchRune,
|
||||||
Runes: MatchRunes,
|
Runes: MatchRunes,
|
||||||
RuneRange: MatchRuneRange,
|
RuneRange: MatchRuneRange,
|
||||||
|
RuneByCallback: MatchRuneByCallback,
|
||||||
Str: MatchStr,
|
Str: MatchStr,
|
||||||
StrNoCase: MatchStrNoCase,
|
StrNoCase: MatchStrNoCase,
|
||||||
EndOfFile: MatchEndOfFile(),
|
EndOfFile: MatchEndOfFile(),
|
||||||
|
@ -172,51 +176,51 @@ var A = struct {
|
||||||
AnyRune: MatchAnyRune(),
|
AnyRune: MatchAnyRune(),
|
||||||
ValidRune: MatchValidRune(),
|
ValidRune: MatchValidRune(),
|
||||||
InvalidRune: MatchInvalidRune(),
|
InvalidRune: MatchInvalidRune(),
|
||||||
Space: MatchRune(' '),
|
Space: MatchByte(' '),
|
||||||
Tab: MatchRune('\t'),
|
Tab: MatchByte('\t'),
|
||||||
CR: MatchRune('\r'),
|
CR: MatchByte('\r'),
|
||||||
LF: MatchRune('\n'),
|
LF: MatchByte('\n'),
|
||||||
CRLF: MatchStr("\r\n"),
|
CRLF: MatchStr("\r\n"),
|
||||||
Excl: MatchRune('!'),
|
Excl: MatchByte('!'),
|
||||||
DoubleQuote: MatchRune('"'),
|
DoubleQuote: MatchByte('"'),
|
||||||
Hash: MatchRune('#'),
|
Hash: MatchByte('#'),
|
||||||
Dollar: MatchRune('$'),
|
Dollar: MatchByte('$'),
|
||||||
Percent: MatchRune('%'),
|
Percent: MatchByte('%'),
|
||||||
Amp: MatchRune('&'),
|
Amp: MatchByte('&'),
|
||||||
SingleQuote: MatchRune('\''),
|
SingleQuote: MatchByte('\''),
|
||||||
RoundOpen: MatchRune('('),
|
RoundOpen: MatchByte('('),
|
||||||
LeftParen: MatchRune('('),
|
LeftParen: MatchByte('('),
|
||||||
RoundClose: MatchRune(')'),
|
RoundClose: MatchByte(')'),
|
||||||
RightParen: MatchRune(')'),
|
RightParen: MatchByte(')'),
|
||||||
Asterisk: MatchRune('*'),
|
Asterisk: MatchByte('*'),
|
||||||
Multiply: MatchRune('*'),
|
Multiply: MatchByte('*'),
|
||||||
Plus: MatchRune('+'),
|
Plus: MatchByte('+'),
|
||||||
Add: MatchRune('+'),
|
Add: MatchByte('+'),
|
||||||
Comma: MatchRune(','),
|
Comma: MatchByte(','),
|
||||||
Minus: MatchRune('-'),
|
Minus: MatchByte('-'),
|
||||||
Subtract: MatchRune('-'),
|
Subtract: MatchByte('-'),
|
||||||
Dot: MatchRune('.'),
|
Dot: MatchByte('.'),
|
||||||
Slash: MatchRune('/'),
|
Slash: MatchByte('/'),
|
||||||
Divide: MatchRune('/'),
|
Divide: MatchByte('/'),
|
||||||
Colon: MatchRune(':'),
|
Colon: MatchByte(':'),
|
||||||
Semicolon: MatchRune(';'),
|
Semicolon: MatchByte(';'),
|
||||||
AngleOpen: MatchRune('<'),
|
AngleOpen: MatchByte('<'),
|
||||||
LessThan: MatchRune('<'),
|
LessThan: MatchByte('<'),
|
||||||
Equal: MatchRune('='),
|
Equal: MatchByte('='),
|
||||||
AngleClose: MatchRune('>'),
|
AngleClose: MatchByte('>'),
|
||||||
GreaterThan: MatchRune('>'),
|
GreaterThan: MatchByte('>'),
|
||||||
Question: MatchRune('?'),
|
Question: MatchByte('?'),
|
||||||
At: MatchRune('@'),
|
At: MatchByte('@'),
|
||||||
SquareOpen: MatchRune('['),
|
SquareOpen: MatchByte('['),
|
||||||
Backslash: MatchRune('\\'),
|
Backslash: MatchByte('\\'),
|
||||||
SquareClose: MatchRune(']'),
|
SquareClose: MatchByte(']'),
|
||||||
Caret: MatchRune('^'),
|
Caret: MatchByte('^'),
|
||||||
Underscore: MatchRune('_'),
|
Underscore: MatchByte('_'),
|
||||||
Backquote: MatchRune('`'),
|
Backquote: MatchByte('`'),
|
||||||
CurlyOpen: MatchRune('{'),
|
CurlyOpen: MatchByte('{'),
|
||||||
Pipe: MatchRune('|'),
|
Pipe: MatchByte('|'),
|
||||||
CurlyClose: MatchRune('}'),
|
CurlyClose: MatchByte('}'),
|
||||||
Tilde: MatchRune('~'),
|
Tilde: MatchByte('~'),
|
||||||
Newline: MatchNewline(),
|
Newline: MatchNewline(),
|
||||||
Blank: MatchBlank(),
|
Blank: MatchBlank(),
|
||||||
Blanks: MatchBlanks(),
|
Blanks: MatchBlanks(),
|
||||||
|
@ -225,11 +229,11 @@ var A = struct {
|
||||||
Digit: MatchDigit(),
|
Digit: MatchDigit(),
|
||||||
DigitNotZero: MatchDigitNotZero(),
|
DigitNotZero: MatchDigitNotZero(),
|
||||||
Digits: MatchDigits(),
|
Digits: MatchDigits(),
|
||||||
Zero: MatchRune('0'),
|
Zero: MatchByte('0'),
|
||||||
Integer: MatchInteger(),
|
|
||||||
Signed: MatchSigned,
|
Signed: MatchSigned,
|
||||||
|
Integer: MatchInteger(true),
|
||||||
IntegerBetween: MatchIntegerBetween,
|
IntegerBetween: MatchIntegerBetween,
|
||||||
Float: MatchFloat(),
|
Decimal: MatchDecimal(true),
|
||||||
Boolean: MatchBoolean(),
|
Boolean: MatchBoolean(),
|
||||||
ASCII: MatchASCII(),
|
ASCII: MatchASCII(),
|
||||||
ASCIILower: MatchASCIILower(),
|
ASCIILower: MatchASCIILower(),
|
||||||
|
@ -355,7 +359,7 @@ func MatchByte(expected byte) Handler {
|
||||||
|
|
||||||
// MatchRune creates a Handler function that matches against the provided rune.
|
// MatchRune creates a Handler function that matches against the provided rune.
|
||||||
func MatchRune(expected rune) Handler {
|
func MatchRune(expected rune) Handler {
|
||||||
if expected <= 127 {
|
if expected <= '\x7F' {
|
||||||
return MatchByte(byte(expected))
|
return MatchByte(byte(expected))
|
||||||
}
|
}
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
|
@ -392,7 +396,7 @@ func MatchRunes(expected ...rune) Handler {
|
||||||
onlyBytes := true
|
onlyBytes := true
|
||||||
expectedBytes := make([]byte, len(expected))
|
expectedBytes := make([]byte, len(expected))
|
||||||
for i, r := range expected {
|
for i, r := range expected {
|
||||||
if r > 255 {
|
if r > '\x7F' {
|
||||||
onlyBytes = false
|
onlyBytes = false
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -448,7 +452,7 @@ func MatchRuneRange(start rune, end rune) Handler {
|
||||||
if end < start {
|
if end < start {
|
||||||
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
callerPanic("MatchRuneRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end)
|
||||||
}
|
}
|
||||||
if end <= 127 {
|
if end <= '\x7F' {
|
||||||
return MatchByteRange(byte(start), byte(end))
|
return MatchByteRange(byte(start), byte(end))
|
||||||
}
|
}
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
|
@ -574,6 +578,23 @@ func MatchUnicodeSpace() Handler {
|
||||||
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
|
return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchByteByCallback creates a Handler that matches a single byte from the
|
||||||
|
// input against the provided callback function. When the callback returns true,
|
||||||
|
// it is considered a match.
|
||||||
|
//
|
||||||
|
// Note that the callback function matches the signature of the unicode.Is* functions,
|
||||||
|
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||||
|
func MatchByteByCallback(callback func(byte) bool) Handler {
|
||||||
|
return func(t *API) bool {
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err == nil && callback(b) {
|
||||||
|
t.acceptBytes(b)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MatchRuneByCallback creates a Handler that matches a single rune from the
|
// MatchRuneByCallback creates a Handler that matches a single rune from the
|
||||||
// input against the provided callback function. When the callback returns true,
|
// input against the provided callback function. When the callback returns true,
|
||||||
// it is considered a match.
|
// it is considered a match.
|
||||||
|
@ -621,7 +642,7 @@ func MatchStr(expected string) Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
offset := 0
|
offset := 0
|
||||||
for _, e := range expectedRunes {
|
for _, e := range expectedRunes {
|
||||||
if e <= 127 {
|
if e <= '\x7F' {
|
||||||
b, err := t.PeekByte(offset)
|
b, err := t.PeekByte(offset)
|
||||||
if err != nil || b != byte(e) {
|
if err != nil || b != byte(e) {
|
||||||
return false
|
return false
|
||||||
|
@ -650,7 +671,7 @@ func MatchStrNoCase(expected string) Handler {
|
||||||
width := 0
|
width := 0
|
||||||
i := 0
|
i := 0
|
||||||
for _, e := range expected {
|
for _, e := range expected {
|
||||||
if e <= 127 {
|
if e <= '\x7F' {
|
||||||
b, err := t.PeekByte(width)
|
b, err := t.PeekByte(width)
|
||||||
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
|
if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) {
|
||||||
return false
|
return false
|
||||||
|
@ -732,9 +753,9 @@ func MatchNot(handler Handler) Handler {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
t.Dispose(child)
|
t.Dispose(child)
|
||||||
_, err := t.NextRune()
|
r, w, err := t.PeekRune(0)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Accept()
|
t.acceptRunes(w, r)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -924,8 +945,24 @@ func MakeInputFlusher(handler Handler) Handler {
|
||||||
//
|
//
|
||||||
// C.Signed(A.Integer)
|
// C.Signed(A.Integer)
|
||||||
func MatchSigned(handler Handler) Handler {
|
func MatchSigned(handler Handler) Handler {
|
||||||
sign := MatchOptional(MatchAny(MatchRune('+'), MatchRune('-')))
|
return func(t *API) bool {
|
||||||
return MatchSeq(sign, handler)
|
child := t.Fork()
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err != nil {
|
||||||
|
t.Dispose(child)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if b == '-' || b == '+' {
|
||||||
|
t.acceptBytes(b)
|
||||||
|
}
|
||||||
|
if handler(t) {
|
||||||
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
t.Dispose(child)
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchIntegerBetween creates a Handler that checks for an integer
|
// MatchIntegerBetween creates a Handler that checks for an integer
|
||||||
|
@ -956,7 +993,7 @@ func MatchIntegerBetween(min int64, max int64) Handler {
|
||||||
func MatchEndOfFile() Handler {
|
func MatchEndOfFile() Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
_, err := t.NextRune()
|
_, err := t.PeekByte(0)
|
||||||
t.Dispose(child)
|
t.Dispose(child)
|
||||||
return err == io.EOF
|
return err == io.EOF
|
||||||
}
|
}
|
||||||
|
@ -1024,37 +1061,157 @@ func MatchInvalidRune() Handler {
|
||||||
// MatchDigit creates a Handler that checks if a single digit can be read
|
// MatchDigit creates a Handler that checks if a single digit can be read
|
||||||
// from the input.
|
// from the input.
|
||||||
func MatchDigit() Handler {
|
func MatchDigit() Handler {
|
||||||
return MatchRuneRange('0', '9')
|
return MatchByteRange('0', '9')
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchDigits creates a Handler that checks if one or more digits can be read
|
// MatchDigits creates a Handler that checks if one or more digits can be read
|
||||||
// from the input.
|
// from the input.
|
||||||
func MatchDigits() Handler {
|
func MatchDigits() Handler {
|
||||||
return MatchOneOrMore(MatchDigit())
|
return func(t *API) bool {
|
||||||
|
// Check if the first character is a digit.
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
t.acceptBytes(b)
|
||||||
|
|
||||||
|
// Continue accepting bytes as long as they are digits.
|
||||||
|
for {
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
t.acceptBytes(b)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchDigitNotZero creates a Handler that checks if a single digit not equal
|
// MatchDigitNotZero creates a Handler that checks if a single digit not equal
|
||||||
// to zero '0' can be read from the input.
|
// to zero '0' can be read from the input.
|
||||||
func MatchDigitNotZero() Handler {
|
func MatchDigitNotZero() Handler {
|
||||||
return MatchRuneRange('1', '9')
|
return MatchByteRange('1', '9')
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchInteger creates a Handler function that checks if a valid integer
|
// MatchInteger creates a Handler function that checks if a valid integer
|
||||||
// can be read from the input. In line with Go, an integer cannot start with
|
// can be read from the input.
|
||||||
// a zero. Starting with a zero is used to indicate other bases, like octal or
|
//
|
||||||
// hexadecimal.
|
// Leading zeroes are allowed. When the normalize parameter is true, these
|
||||||
func MatchInteger() Handler {
|
// will be stripped from the input.
|
||||||
justZero := MatchRune('0')
|
func MatchInteger(normalize bool) Handler {
|
||||||
integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
return func(t *API) bool {
|
||||||
return MatchAny(integer, justZero)
|
// Check if the first character is a digit.
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchFloat creates a Handler function that checks if a valid float value
|
// When normalization is requested, drop leading zeroes.
|
||||||
// can be read from the input. In case the fractional part is missing, this
|
if normalize && b == '0' {
|
||||||
// Handler will report a match, so both "123" and "123.123" will match.
|
for {
|
||||||
func MatchFloat() Handler {
|
b2, err := t.PeekByte(1)
|
||||||
digits := MatchDigits()
|
|
||||||
return MatchSeq(digits, MatchOptional(MatchSeq(MatchRune('.'), digits)))
|
// The next character is a zero, skip the leading zero and check again.
|
||||||
|
if err == nil && b2 == b {
|
||||||
|
t.skipBytes('0')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// The next character is not a zero, nor a digit at all.
|
||||||
|
// We're looking at a zero on its own here.
|
||||||
|
if err != nil || b2 < '1' || b2 > '9' {
|
||||||
|
t.acceptBytes('0')
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// The next character is a digit. SKip the leading zero and go with the digit.
|
||||||
|
t.skipBytes('0')
|
||||||
|
t.acceptBytes(b2)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue accepting bytes as long as they are digits.
|
||||||
|
for {
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
t.acceptBytes(b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchDecimal creates a Handler function that checks if a valid decimal value
|
||||||
|
// can be read from the input. In case the fractional part is missing (which is
|
||||||
|
// a valid decimal number), this Handler will report a match, so both "123" and
|
||||||
|
// "123.123" will match.
|
||||||
|
//
|
||||||
|
// Leading zeroes are allowed. When the normalize parameter is true, these
|
||||||
|
// will be stripped from the input.
|
||||||
|
func MatchDecimal(normalize bool) Handler {
|
||||||
|
return func(t *API) bool {
|
||||||
|
// Check if the first character is a digit.
|
||||||
|
b, err := t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// When normalization is requested, drop leading zeroes.
|
||||||
|
if normalize && b == '0' {
|
||||||
|
for {
|
||||||
|
b2, err := t.PeekByte(1)
|
||||||
|
|
||||||
|
// The next character is a zero, skip the leading zero and check again.
|
||||||
|
if err == nil && b2 == b {
|
||||||
|
t.skipBytes('0')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// The next character is a dot, go with the zero before the dot and
|
||||||
|
// let the upcoming code handle the dot.
|
||||||
|
if err == nil && b2 == '.' {
|
||||||
|
t.acceptBytes('0')
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// The next character is not a zero, nor a digit at all.
|
||||||
|
// We're looking at a zero on its own here.
|
||||||
|
if err != nil || b2 < '1' || b2 > '9' {
|
||||||
|
t.acceptBytes('0')
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// The next character is a digit. SKip the leading zero and go with the digit.
|
||||||
|
t.skipBytes('0')
|
||||||
|
t.acceptBytes(b2)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue accepting bytes as long as they are digits.
|
||||||
|
for {
|
||||||
|
b, err = t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t.acceptBytes(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// No dot or no digit after a dot? Then we're done.
|
||||||
|
if b != '.' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
b, err = t.PeekByte(1)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue accepting bytes as long as they are digits.
|
||||||
|
t.acceptBytes('.', b)
|
||||||
|
for {
|
||||||
|
b, err = t.PeekByte(0)
|
||||||
|
if err != nil || b < '0' || b > '9' {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t.acceptBytes(b)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchBoolean creates a Handler function that checks if a boolean
|
// MatchBoolean creates a Handler function that checks if a boolean
|
||||||
|
@ -1075,7 +1232,11 @@ func MatchBoolean() Handler {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if b1 == 't' || b1 == 'T' {
|
if b1 == 't' || b1 == 'T' {
|
||||||
b2, _ := t.PeekByte(1)
|
b2, err := t.PeekByte(1)
|
||||||
|
if err != nil || (b2 != 'R' && b2 != 'r') {
|
||||||
|
t.acceptBytes(b1)
|
||||||
|
return true
|
||||||
|
}
|
||||||
b3, _ := t.PeekByte(2)
|
b3, _ := t.PeekByte(2)
|
||||||
b4, err := t.PeekByte(3)
|
b4, err := t.PeekByte(3)
|
||||||
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
|
if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' {
|
||||||
|
@ -1091,11 +1252,14 @@ func MatchBoolean() Handler {
|
||||||
}
|
}
|
||||||
|
|
||||||
if b1 == 'f' || b1 == 'F' {
|
if b1 == 'f' || b1 == 'F' {
|
||||||
b2, _ := t.PeekByte(1)
|
b2, err := t.PeekByte(1)
|
||||||
|
if err != nil || (b2 != 'A' && b2 != 'a') {
|
||||||
|
t.acceptBytes(b1)
|
||||||
|
return true
|
||||||
|
}
|
||||||
b3, _ := t.PeekByte(2)
|
b3, _ := t.PeekByte(2)
|
||||||
b4, _ := t.PeekByte(3)
|
b4, _ := t.PeekByte(3)
|
||||||
b5, err := t.PeekByte(4)
|
b5, err := t.PeekByte(4)
|
||||||
|
|
||||||
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
|
if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' {
|
||||||
t.acceptBytes(b1, b2, b3, b4, b5)
|
t.acceptBytes(b1, b2, b3, b4, b5)
|
||||||
return true
|
return true
|
||||||
|
@ -1114,19 +1278,19 @@ func MatchBoolean() Handler {
|
||||||
// MatchASCII creates a Handler function that matches against any
|
// MatchASCII creates a Handler function that matches against any
|
||||||
// ASCII value on the input.
|
// ASCII value on the input.
|
||||||
func MatchASCII() Handler {
|
func MatchASCII() Handler {
|
||||||
return MatchRuneRange('\x00', '\x7F')
|
return MatchByteRange('\x00', '\x7F')
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchASCIILower creates a Handler function that matches against any
|
// MatchASCIILower creates a Handler function that matches against any
|
||||||
// lower case ASCII letter on the input (a - z).
|
// lower case ASCII letter on the input (a - z).
|
||||||
func MatchASCIILower() Handler {
|
func MatchASCIILower() Handler {
|
||||||
return MatchRuneRange('a', 'z')
|
return MatchByteRange('a', 'z')
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchASCIIUpper creates a Handler function that matches against any
|
// MatchASCIIUpper creates a Handler function that matches against any
|
||||||
// upper case ASCII letter on the input (a - z).
|
// upper case ASCII letter on the input (a - z).
|
||||||
func MatchASCIIUpper() Handler {
|
func MatchASCIIUpper() Handler {
|
||||||
return MatchRuneRange('A', 'Z')
|
return MatchByteRange('A', 'Z')
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchUnicodeLetter creates a Handler function that matches against any
|
// MatchUnicodeLetter creates a Handler function that matches against any
|
||||||
|
@ -1365,19 +1529,15 @@ func MatchIPv6Net(normalize bool) Handler {
|
||||||
// In both cases, it would match the first form.
|
// In both cases, it would match the first form.
|
||||||
func ModifyDrop(handler Handler) Handler {
|
func ModifyDrop(handler Handler) Handler {
|
||||||
return func(t *API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
runeEnd := t.stackFrame.runeEnd
|
||||||
|
tokenEnd := t.stackFrame.tokenEnd
|
||||||
if handler(t) {
|
if handler(t) {
|
||||||
// Do a partial merge: only move the cursor and read offset forward.
|
// We keep offset and cursor updates, but rollback any runes / tokens
|
||||||
// Any produced runes and tokens are ignored and not merged to the parent
|
// that were added by the handler.
|
||||||
// (since we're dropping those here).
|
t.stackFrame.runeEnd = runeEnd
|
||||||
parent := &t.stackFrames[t.stackLevel-1]
|
t.stackFrame.tokenEnd = tokenEnd
|
||||||
parent.offset = t.stackFrame.offset
|
|
||||||
parent.line = t.stackFrame.line
|
|
||||||
parent.column = t.stackFrame.column
|
|
||||||
t.Dispose(child)
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
t.Dispose(child)
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -231,22 +231,27 @@ func TestAtoms(t *testing.T) {
|
||||||
{"F", a.HexDigit, true, "F"},
|
{"F", a.HexDigit, true, "F"},
|
||||||
{"g", a.HexDigit, false, "g"},
|
{"g", a.HexDigit, false, "g"},
|
||||||
{"G", a.HexDigit, false, "G"},
|
{"G", a.HexDigit, false, "G"},
|
||||||
|
{"09", a.Integer, true, "9"},
|
||||||
|
{"0000129", a.Integer, true, "129"},
|
||||||
{"0", a.Integer, true, "0"},
|
{"0", a.Integer, true, "0"},
|
||||||
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
|
{"00000", a.Integer, true, "0"},
|
||||||
{"1", a.Integer, true, "1"},
|
{"1", a.Integer, true, "1"},
|
||||||
{"-10X", a.Integer, false, ""},
|
{"-10X", a.Integer, false, ""},
|
||||||
{"+10X", a.Integer, false, ""},
|
{"+10X", a.Integer, false, ""},
|
||||||
{"-10X", a.Signed(a.Integer), true, "-10"},
|
{"-10X", a.Signed(a.Integer), true, "-10"},
|
||||||
{"+10X", a.Signed(a.Integer), true, "+10"},
|
{"+10X", a.Signed(a.Integer), true, "+10"},
|
||||||
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
||||||
{"0X", a.Float, true, "0"},
|
{"0X", a.Decimal, true, "0"},
|
||||||
{"0X", a.Float, true, "0"},
|
{"0000X", a.Decimal, true, "0"},
|
||||||
{"1X", a.Float, true, "1"},
|
{"1X", a.Decimal, true, "1"},
|
||||||
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
{"01X", a.Decimal, true, "1"},
|
||||||
{"123.321X", a.Float, true, "123.321"},
|
{"000001X", a.Decimal, true, "1"},
|
||||||
{"-3.14X", a.Float, false, ""},
|
{"1.", a.Decimal, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||||
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
{"123.321X", a.Decimal, true, "123.321"},
|
||||||
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
{"0.6X", a.Decimal, true, "0.6"},
|
||||||
|
{"-3.14X", a.Decimal, false, ""},
|
||||||
|
{"-3.14X", a.Signed(a.Decimal), true, "-3.14"},
|
||||||
|
{"-003.0014X", a.Signed(a.Decimal), true, "-3.0014"},
|
||||||
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
||||||
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
||||||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||||
|
@ -430,8 +435,8 @@ func TestTokenMakers(t *testing.T) {
|
||||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
||||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
||||||
|
|
||||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
{`3.1415=PI`, tok.Float32("N", a.Decimal), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
||||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
{`24.19287=PI`, tok.Float64("O", a.Decimal), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
||||||
|
|
||||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||||
{Type: "P", Value: true},
|
{Type: "P", Value: true},
|
||||||
|
|
Loading…
Reference in New Issue