Use zero-indexed cursor positioning data inside stackframes. This simplifies some things.

Also a bit of code cleanup.
This commit is contained in:
Maurice Makaay 2019-07-24 10:34:24 +00:00
parent 802701ade5
commit 62cd84bb74
6 changed files with 99 additions and 98 deletions

View File

@ -236,6 +236,7 @@ func (buf *Buffer) grow(minBytes int) {
newbufCap += defaultBufferSize
}
newStore := makeSlice(newbufCap)
copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
buf.buffer = newStore
buf.start = 0

View File

@ -71,7 +71,7 @@ import (
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type API struct {
stackFrames []stackFrame // the stack frames, containing stack level-specific dat
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame
@ -87,8 +87,8 @@ type API struct {
type stackFrame struct {
offset int // the read offset (relative to the start of the reader buffer) for this stack frame
column int // the column at which the cursor is (0-indexed)
line int // the line at which the cursor is (0-indexed)
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
@ -147,23 +147,20 @@ func (tokenAPI *API) Fork() int {
// Grow the stack frames capacity when needed.
frames := tokenAPI.stackFrames
if cap(frames) < (newStackLevel + 1) {
newFrames := make([]stackFrame, (newStackLevel+1)*2)
newFrames := make([]stackFrame, cap(frames)*2)
copy(newFrames, frames)
frames = newFrames
}
// This can be written in a shorter way, but this turned out to
// be the best way performance-wise.
parent := tokenAPI.stackFrame
child := &frames[newStackLevel]
child.offset = parent.offset
child.column = parent.column
child.line = parent.line
child.bytesStart = parent.bytesEnd
child.bytesEnd = parent.bytesEnd
child.tokenStart = parent.tokenEnd
child.tokenEnd = parent.tokenEnd
tokenAPI.stackFrame = child
frames[newStackLevel] = stackFrame{
offset: parent.offset,
bytesStart: parent.bytesEnd,
bytesEnd: parent.bytesEnd,
tokenStart: parent.tokenEnd,
tokenEnd: parent.tokenEnd,
}
tokenAPI.stackFrame = &frames[newStackLevel]
return newStackLevel
}
@ -209,8 +206,14 @@ func (tokenAPI *API) Merge(stackLevel int) {
f.tokenStart = f.tokenEnd
parent.offset = f.offset
parent.line = f.line
parent.column = f.column
if f.line > parent.line {
parent.line += f.line
parent.column = f.column
} else {
parent.column += f.column
}
f.line = 0
f.column = 0
f.err = nil
}
@ -226,8 +229,8 @@ func (tokenAPI *API) Reset() {
f.offset = 0
} else {
parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
f.column = parent.column
f.line = parent.line
f.column = 0
f.line = 0
f.offset = parent.offset
}
f.bytesEnd = f.bytesStart

View File

@ -15,11 +15,19 @@ type Input struct {
// Cursor returns a string that describes the current read cursor position.
func (i Input) Cursor() string {
f := i.api.stackFrame
if f.line == 0 && f.column == 0 {
column, line := 0, 0
for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
if f.line > 0 {
column = f.column
line += f.line
} else {
column += f.column
}
}
if line == 0 && column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1)
return fmt.Sprintf("line %d, column %d", line+1, column+1)
}
// Flush flushes input data from the read buffer up to the current

View File

@ -8,6 +8,36 @@ import (
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func BenchmarkMemclrOptimization(b *testing.B) {
// TODO use or cleanup this one and the next. I'm playing around here.
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
for i := range x {
x[i] = s{}
}
}
}
func BenchmarkCodedClear(b *testing.B) {
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
x[0] = s{}
x[1] = s{}
x[2] = s{}
}
}
func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle")
}

View File

@ -638,7 +638,6 @@ func MatchEndOfLine() Handler {
// MatchStr creates a Handler that matches the input against the provided string.
func MatchStr(expected string) Handler {
expectedRunes := []rune(expected)
expectedBytes := []byte(expected)
expectedLength := len(expectedBytes)
@ -652,7 +651,7 @@ func MatchStr(expected string) Handler {
return false
}
}
tokenAPI.Rune.AcceptMulti(expectedRunes...)
tokenAPI.Byte.AcceptMulti(expectedBytes...)
return true
}
}
@ -1226,62 +1225,20 @@ func MatchDecimal(normalize bool) Handler {
//
// False falues: false, FALSE, False, 0, f, F
func MatchBoolean() Handler {
return func(tokenAPI *API) bool {
// 5 bytes can hold all possible boolean values.
b, _ := tokenAPI.Byte.PeekMulti(0, 5)
l := len(b)
// No bytes read at all, so a definitive mismatch.
if l < 1 {
return false
}
// Boolean '0' or '1'.
if b[0] == '1' || b[0] == '0' {
tokenAPI.Byte.Accept(b[0])
return true
}
// Booleans 't', 'T', 'TRUE', True' or 'true'.
if b[0] == 't' || b[0] == 'T' {
tokenAPI.Byte.Accept(b[0])
if l < 4 {
return true
}
if b[0] == 't' {
if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:4]...)
}
return true
}
if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:4]...)
}
return true
}
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
if b[0] == 'f' || b[0] == 'F' {
tokenAPI.Byte.Accept(b[0])
if l < 5 {
return true
}
if b[0] == 'f' {
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
return false
}
return MatchAny(
MatchStr("true"),
MatchStr("TRUE"),
MatchStr("True"),
MatchByte('t'),
MatchByte('T'),
MatchByte('1'),
MatchStr("false"),
MatchStr("FALSE"),
MatchStr("False"),
MatchByte('f'),
MatchByte('F'),
MatchByte('0'),
)
}
// MatchASCII creates a Handler function that matches against any

View File

@ -512,24 +512,26 @@ func TestCombination(t *testing.T) {
// 46709 ns/op
func BenchmarkBoolean(b *testing.B) {
tokenizer := tokenize.New(tokenize.A.Boolean)
for i := 0; i < b.N; i++ {
tokenize.A.Boolean.Match("0")
tokenize.A.Boolean.Match("1")
tokenize.A.Boolean.Match("t")
tokenize.A.Boolean.Match("f")
tokenize.A.Boolean.Match("T")
tokenize.A.Boolean.Match("F")
tokenize.A.Boolean.Match("0XX")
tokenize.A.Boolean.Match("1XX")
tokenize.A.Boolean.Match("tXX")
tokenize.A.Boolean.Match("fXX")
tokenize.A.Boolean.Match("TXX")
tokenize.A.Boolean.Match("FXX")
tokenize.A.Boolean.Match("true")
tokenize.A.Boolean.Match("TRUE")
tokenize.A.Boolean.Match("True")
tokenize.A.Boolean.Match("false")
tokenize.A.Boolean.Match("FALSE")
tokenize.A.Boolean.Match("False")
tokenizer("0")
tokenizer("1")
tokenizer("t")
tokenizer("f")
tokenizer("T")
tokenizer("F")
tokenizer("0XX")
tokenizer("1XX")
tokenizer("tXX")
tokenizer("fXX")
tokenizer("TXX")
tokenizer("FXX")
tokenizer("true")
tokenizer("TRUE")
tokenizer("True")
tokenizer("false")
tokenizer("FALSE")
tokenizer("False")
}
}