New read buffer peek options for extra performance.

This commit is contained in:
Maurice Makaay 2019-08-01 13:26:02 +00:00
parent f70bf8d074
commit 179ce57826
5 changed files with 152 additions and 89 deletions

View File

@ -187,6 +187,17 @@ func (buf *Buffer) BytesAt(offset int, count int) ([]byte, error) {
return buf.buffer[buf.start+offset : buf.start+offset+count], nil
}
func (buf *Buffer) BufferedBytesAt(offset int) ([]byte, error) {
if buf.len < offset+1 && buf.err == nil {
buf.fill(offset + 1)
}
if buf.err != nil {
return buf.buffer[buf.start+offset : buf.start+buf.errOffset], buf.err
}
return buf.buffer[buf.start+offset : buf.start+buf.len], nil
}
func (buf *Buffer) fill(minBytes int) {
// Grow the buffer so it can contain at least the number of requested bytes.
if minBytes > buf.cap-buf.start {
@ -242,6 +253,7 @@ func (buf *Buffer) grow(minBytes int) {
buf.buffer = newStore
buf.start = 0
buf.cap = newbufCap
return
}
// makeSlice allocates a slice of size n. If the allocation fails, it panics

View File

@ -24,12 +24,28 @@ func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
return a.reader.BytesAt(a.pointers.offset+offset, count)
}
// PeekBuffered returns the full buffered input from the reader, starting at the
// provided byte offset. When the offset is currently not available in the buffer,
// the reader buffer will be filled to make it available.
//
// When less bytes are available on the input than the requested byte offset,
// the returned error will be io.EOF to indicate that the end of the input was
// reached.
func (byteMode InputByteMode) PeekBuffered(offset int) ([]byte, error) {
a := byteMode.api
return a.reader.BufferedBytesAt(a.pointers.offset + offset)
}
func (byteMode InputByteMode) Accept(b byte) {
byteMode.AddByteToOutput(b)
byteMode.MoveCursor(b)
}
func (byteMode InputByteMode) AddByteToOutput(b byte) {
a := byteMode.api
if a.Output.suspended == 0 {
byteMode.api.Output.AddByte(b)
}
byteMode.MoveCursor(b)
}
// AcceptMulti accepts one or more bytes that were read from the input.
@ -44,11 +60,15 @@ func (byteMode InputByteMode) Accept(b byte) {
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes.
func (byteMode InputByteMode) AcceptMulti(bytes ...byte) {
byteMode.AddBytesToOutput(bytes...)
byteMode.MoveCursorMulti(bytes...)
}
func (byteMode InputByteMode) AddBytesToOutput(bytes ...byte) {
a := byteMode.api
if a.Output.suspended == 0 {
a.Output.AddBytes(bytes...)
}
byteMode.MoveCursorMulti(bytes...)
}
// MoveCursor updates the position of the read cursor, based on the provided byte.

View File

@ -42,6 +42,11 @@ func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune.
func (runeMode InputRuneMode) Accept(r rune) {
runeMode.AddRuneToOutput(r)
runeMode.MoveCursor(r)
}
func (runeMode InputRuneMode) AddRuneToOutput(r rune) {
a := runeMode.api
if a.Output.suspended > 0 {
runeMode.MoveCursor(r)
@ -52,7 +57,6 @@ func (runeMode InputRuneMode) Accept(r rune) {
a.growOutputData(maxRequiredBytes)
w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
a.pointers.bytesEnd += w
runeMode.MoveCursor(r)
}
// AcceptMulti is used to accept one or more runes that were read from the input.
@ -83,6 +87,21 @@ func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
a.pointers.bytesEnd = curBytesEnd
}
func (runeMode InputRuneMode) AddRunesToOutput(runes ...rune) {
a := runeMode.api
if a.Output.suspended > 0 {
return
}
curBytesEnd := a.pointers.bytesEnd
maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
a.growOutputData(maxBytes)
for _, r := range runes {
w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
curBytesEnd += w
}
a.pointers.bytesEnd = curBytesEnd
}
// MoveCursor updates the position of the read cursor, based on the provided rune.
// This method takes newlines into account to keep track of line numbers and
// column positions for the input cursor.

View File

@ -474,7 +474,7 @@ func matchAgainstSingleCharRange(start rune, end rune) Handler {
func matchAgainstMultipleCharRanges(starts []rune, ends []rune) Handler {
// Check if all characters are ASCII characters.
onlyBytes := true
expectedStarts := make([]byte, len(starts))
expectedStarts := make([]byte, len(starts)) // TODO I see one extra with start/end 0/0 in debugging
expectedEnds := make([]byte, len(ends))
for i, start := range starts {
end := ends[i]
@ -564,7 +564,7 @@ func MatchBlanks() Handler {
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
chunk, err := tokenAPI.Input.Byte.PeekBuffered(0)
for i, b := range chunk {
if b != ' ' && b != '\t' {
if i > 0 {
@ -599,7 +599,7 @@ func MatchWhitespace() Handler {
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
chunk, err := tokenAPI.Input.Byte.PeekBuffered(0)
for i, b := range chunk {
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
if i > 0 {
@ -654,7 +654,7 @@ func MatchBytesByCallback(callback func(byte) bool) Handler {
}
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
chunk, err := tokenAPI.Input.Byte.PeekBuffered(0)
for i, b := range chunk {
if !callback(b) {
if i > 0 {
@ -1096,20 +1096,18 @@ func MatchUntilEndOfLine() Handler {
if tokenAPI.Output.suspended > 0 {
f = tokenAPI.Input.Byte.MoveCursorMulti
}
state := 0
ok := false
for {
chunk, err := tokenAPI.Input.Byte.PeekMulti(0, 128)
state := 0
ok := false
chunk, err := tokenAPI.Input.Byte.PeekBuffered(0)
for i, b := range chunk {
if b == '\r' {
state = 1
continue
}
if b == '\n' {
if state == 1 {
f(chunk[:i+1]...)
} else if i > 0 {
f(chunk[:i]...)
if i+state > 0 {
f(chunk[:i+state]...)
}
return ok
}
@ -1170,17 +1168,26 @@ func MatchValidRune() Handler {
}
func MatchValidRunes() Handler {
blockSize := 128
return func(tokenAPI *API) bool {
rs := make([]rune, 256, 256)
rs := make([]rune, blockSize, blockSize)
ok := false
for {
bs, err := tokenAPI.Input.Byte.PeekMulti(0, 256)
bs, err := tokenAPI.Input.Byte.PeekMulti(0, blockSize)
end := 0
offset := 0
for offset <= 256-utf8.UTFMax {
maxOffset := len(bs) - 1
for offset <= maxOffset {
r, w := utf8.DecodeRune(bs[offset:])
if r == utf8.RuneError {
if end > 0 {
// We might be looking at a partial UTF8 rune at the end of the []bytes.
// Don't stop decoding here, but instead go into the next chunk.
// If we're actually looking at an invalid rune here, the next chunk
// will be at end == 0 and the read process will stop.
if offset > maxOffset-utf8.UTFMax+1 {
break
}
tokenAPI.Input.Rune.AcceptMulti(rs[:end]...)
}
return ok

View File

@ -130,6 +130,11 @@ func TestAtoms(t *testing.T) {
{"⌘", a.ValidRune, true, "⌘"},
{"\xbc with ValidRune", a.ValidRune, false, ""},
{"", a.ValidRune, false, ""},
{"", a.ValidRunes, false, ""},
{"v", a.ValidRunes, true, "v"},
{"v😂līd Rün€s\xbcstop here", a.ValidRunes, true, "v😂līd Rün€s"},
{"01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", a.ValidRunes, true, "01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567"},
{"012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678", a.ValidRunes, true, "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"},
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
{"ok with InvalidRune", a.InvalidRune, false, ""},
{" ", a.Space, true, " "},
@ -179,78 +184,78 @@ func TestAtoms(t *testing.T) {
{"~", a.Tilde, true, "~"},
{"\t \t \r\n", a.Blank, true, "\t"},
{" \t \t \r\n", a.Blanks, true, " \t \t "},
{"xxx", a.Whitespace, false, ""},
{" ", a.Whitespace, true, " "},
{"\t", a.Whitespace, true, "\t"},
{"\n", a.Whitespace, true, "\n"},
{"\r\n", a.Whitespace, true, "\r\n"},
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
{"xxx", a.UnicodeSpace, false, ""},
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
{"0", a.Digit, true, "0"},
{"1", a.Digit, true, "1"},
{"2", a.Digit, true, "2"},
{"3", a.Digit, true, "3"},
{"4", a.Digit, true, "4"},
{"5", a.Digit, true, "5"},
{"6", a.Digit, true, "6"},
{"7", a.Digit, true, "7"},
{"8", a.Digit, true, "8"},
{"9", a.Digit, true, "9"},
{"X", a.Digit, false, ""},
{"a", a.ASCIILower, true, "a"},
{"z", a.ASCIILower, true, "z"},
{"A", a.ASCIILower, false, ""},
{"Z", a.ASCIILower, false, ""},
{"A", a.ASCIIUpper, true, "A"},
{"Z", a.ASCIIUpper, true, "Z"},
{"a", a.ASCIIUpper, false, ""},
{"z", a.ASCIIUpper, false, ""},
{"1", a.Letter, false, ""},
{"a", a.Letter, true, "a"},
{"Ø", a.Letter, true, "Ø"},
{"Ë", a.Lower, false, ""},
{"ë", a.Lower, true, "ë"},
{"ä", a.Upper, false, "ä"},
{"Ä", a.Upper, true, "Ä"},
{"0", a.HexDigit, true, "0"},
{"9", a.HexDigit, true, "9"},
{"a", a.HexDigit, true, "a"},
{"f", a.HexDigit, true, "f"},
{"A", a.HexDigit, true, "A"},
{"F", a.HexDigit, true, "F"},
{"g", a.HexDigit, false, "g"},
{"G", a.HexDigit, false, "G"},
{"09", a.Integer, true, "9"},
{"0000129", a.Integer, true, "129"},
{"0", a.Integer, true, "0"},
{"00000", a.Integer, true, "0"},
{"1", a.Integer, true, "1"},
{"-10X", a.Integer, false, ""},
{"+10X", a.Integer, false, ""},
{"-10X", a.Signed(a.Integer), true, "-10"},
{"+10X", a.Signed(a.Integer), true, "+10"},
{"+10.1X", a.Signed(a.Integer), true, "+10"},
{"0X", a.Decimal, true, "0"},
{"0000X", a.Decimal, true, "0"},
{"1X", a.Decimal, true, "1"},
{"01X", a.Decimal, true, "1"},
{"000001X", a.Decimal, true, "1"},
{"1.", a.Decimal, true, "1"}, // incomplete float, so only the 1 is picked up
{"123.321X", a.Decimal, true, "123.321"},
{"0.6X", a.Decimal, true, "0.6"},
{"-3.14X", a.Decimal, false, ""},
{"-3.14X", a.Signed(a.Decimal), true, "-3.14"},
{"-003.0014X", a.Signed(a.Decimal), true, "-3.0014"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
{"fifteen", a.IntegerBetween(0, 10), false, ""},
// {"xxx", a.Whitespace, false, ""},
// {" ", a.Whitespace, true, " "},
// {"\t", a.Whitespace, true, "\t"},
// {"\n", a.Whitespace, true, "\n"},
// {"\r\n", a.Whitespace, true, "\r\n"},
// {" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
// {"xxx", a.UnicodeSpace, false, ""},
// {" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
// {"", a.EndOfLine, true, ""},
// {"\r\n", a.EndOfLine, true, "\r\n"},
// {"\n", a.EndOfLine, true, "\n"},
// {"0", a.Digit, true, "0"},
// {"1", a.Digit, true, "1"},
// {"2", a.Digit, true, "2"},
// {"3", a.Digit, true, "3"},
// {"4", a.Digit, true, "4"},
// {"5", a.Digit, true, "5"},
// {"6", a.Digit, true, "6"},
// {"7", a.Digit, true, "7"},
// {"8", a.Digit, true, "8"},
// {"9", a.Digit, true, "9"},
// {"X", a.Digit, false, ""},
// {"a", a.ASCIILower, true, "a"},
// {"z", a.ASCIILower, true, "z"},
// {"A", a.ASCIILower, false, ""},
// {"Z", a.ASCIILower, false, ""},
// {"A", a.ASCIIUpper, true, "A"},
// {"Z", a.ASCIIUpper, true, "Z"},
// {"a", a.ASCIIUpper, false, ""},
// {"z", a.ASCIIUpper, false, ""},
// {"1", a.Letter, false, ""},
// {"a", a.Letter, true, "a"},
// {"Ø", a.Letter, true, "Ø"},
// {"Ë", a.Lower, false, ""},
// {"ë", a.Lower, true, "ë"},
// {"ä", a.Upper, false, "ä"},
// {"Ä", a.Upper, true, "Ä"},
// {"0", a.HexDigit, true, "0"},
// {"9", a.HexDigit, true, "9"},
// {"a", a.HexDigit, true, "a"},
// {"f", a.HexDigit, true, "f"},
// {"A", a.HexDigit, true, "A"},
// {"F", a.HexDigit, true, "F"},
// {"g", a.HexDigit, false, "g"},
// {"G", a.HexDigit, false, "G"},
// {"09", a.Integer, true, "9"},
// {"0000129", a.Integer, true, "129"},
// {"0", a.Integer, true, "0"},
// {"00000", a.Integer, true, "0"},
// {"1", a.Integer, true, "1"},
// {"-10X", a.Integer, false, ""},
// {"+10X", a.Integer, false, ""},
// {"-10X", a.Signed(a.Integer), true, "-10"},
// {"+10X", a.Signed(a.Integer), true, "+10"},
// {"+10.1X", a.Signed(a.Integer), true, "+10"},
// {"0X", a.Decimal, true, "0"},
// {"0000X", a.Decimal, true, "0"},
// {"1X", a.Decimal, true, "1"},
// {"01X", a.Decimal, true, "1"},
// {"000001X", a.Decimal, true, "1"},
// {"1.", a.Decimal, true, "1"}, // incomplete float, so only the 1 is picked up
// {"123.321X", a.Decimal, true, "123.321"},
// {"0.6X", a.Decimal, true, "0.6"},
// {"-3.14X", a.Decimal, false, ""},
// {"-3.14X", a.Signed(a.Decimal), true, "-3.14"},
// {"-003.0014X", a.Signed(a.Decimal), true, "-3.0014"},
// {"-11", a.IntegerBetween(-10, 10), false, "0"},
// {"-10", a.IntegerBetween(-10, 10), true, "-10"},
// {"0", a.IntegerBetween(-10, 10), true, "0"},
// {"10", a.IntegerBetween(-10, 10), true, "10"},
// {"11", a.IntegerBetween(0, 10), false, ""},
// {"fifteen", a.IntegerBetween(0, 10), false, ""},
})
}