Use zero-indexed cursor positioning data inside stackframes. This simplifies some things.

Also a bit of code cleanup.
This commit is contained in:
Maurice Makaay 2019-07-24 10:34:24 +00:00
parent 802701ade5
commit 62cd84bb74
6 changed files with 99 additions and 98 deletions

View File

@ -236,6 +236,7 @@ func (buf *Buffer) grow(minBytes int) {
newbufCap += defaultBufferSize newbufCap += defaultBufferSize
} }
newStore := makeSlice(newbufCap) newStore := makeSlice(newbufCap)
copy(newStore, buf.buffer[buf.start:buf.start+buf.len]) copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
buf.buffer = newStore buf.buffer = newStore
buf.start = 0 buf.start = 0

View File

@ -71,7 +71,7 @@ import (
// can lead to hard to track bugs. I much prefer this forking method, since // can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser. // no bookkeeping has to be implemented when implementing a parser.
type API struct { type API struct {
stackFrames []stackFrame // the stack frames, containing stack level-specific dat stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame stackFrame *stackFrame // the current stack frame
@ -87,8 +87,8 @@ type API struct {
type stackFrame struct { type stackFrame struct {
offset int // the read offset (relative to the start of the reader buffer) for this stack frame offset int // the read offset (relative to the start of the reader buffer) for this stack frame
column int // the column at which the cursor is (0-indexed) column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
line int // the line at which the cursor is (0-indexed) line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
@ -147,23 +147,20 @@ func (tokenAPI *API) Fork() int {
// Grow the stack frames capacity when needed. // Grow the stack frames capacity when needed.
frames := tokenAPI.stackFrames frames := tokenAPI.stackFrames
if cap(frames) < (newStackLevel + 1) { if cap(frames) < (newStackLevel + 1) {
newFrames := make([]stackFrame, (newStackLevel+1)*2) newFrames := make([]stackFrame, cap(frames)*2)
copy(newFrames, frames) copy(newFrames, frames)
frames = newFrames frames = newFrames
} }
// This can be written in a shorter way, but this turned out to
// be the best way performance-wise.
parent := tokenAPI.stackFrame parent := tokenAPI.stackFrame
child := &frames[newStackLevel] frames[newStackLevel] = stackFrame{
child.offset = parent.offset offset: parent.offset,
child.column = parent.column bytesStart: parent.bytesEnd,
child.line = parent.line bytesEnd: parent.bytesEnd,
child.bytesStart = parent.bytesEnd tokenStart: parent.tokenEnd,
child.bytesEnd = parent.bytesEnd tokenEnd: parent.tokenEnd,
child.tokenStart = parent.tokenEnd }
child.tokenEnd = parent.tokenEnd tokenAPI.stackFrame = &frames[newStackLevel]
tokenAPI.stackFrame = child
return newStackLevel return newStackLevel
} }
@ -209,8 +206,14 @@ func (tokenAPI *API) Merge(stackLevel int) {
f.tokenStart = f.tokenEnd f.tokenStart = f.tokenEnd
parent.offset = f.offset parent.offset = f.offset
parent.line = f.line if f.line > parent.line {
parent.column = f.column parent.line += f.line
parent.column = f.column
} else {
parent.column += f.column
}
f.line = 0
f.column = 0
f.err = nil f.err = nil
} }
@ -226,8 +229,8 @@ func (tokenAPI *API) Reset() {
f.offset = 0 f.offset = 0
} else { } else {
parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1] parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
f.column = parent.column f.column = 0
f.line = parent.line f.line = 0
f.offset = parent.offset f.offset = parent.offset
} }
f.bytesEnd = f.bytesStart f.bytesEnd = f.bytesStart

View File

@ -15,11 +15,19 @@ type Input struct {
// Cursor returns a string that describes the current read cursor position. // Cursor returns a string that describes the current read cursor position.
func (i Input) Cursor() string { func (i Input) Cursor() string {
f := i.api.stackFrame column, line := 0, 0
if f.line == 0 && f.column == 0 { for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
if f.line > 0 {
column = f.column
line += f.line
} else {
column += f.column
}
}
if line == 0 && column == 0 {
return fmt.Sprintf("start of file") return fmt.Sprintf("start of file")
} }
return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1) return fmt.Sprintf("line %d, column %d", line+1, column+1)
} }
// Flush flushes input data from the read buffer up to the current // Flush flushes input data from the read buffer up to the current

View File

@ -8,6 +8,36 @@ import (
"git.makaay.nl/mauricem/go-parsekit/tokenize" "git.makaay.nl/mauricem/go-parsekit/tokenize"
) )
func BenchmarkMemclrOptimization(b *testing.B) {
// TODO use or cleanup this one and the next. I'm playing around here.
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
for i := range x {
x[i] = s{}
}
}
}
func BenchmarkCodedClear(b *testing.B) {
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
x[0] = s{}
x[1] = s{}
x[2] = s{}
}
}
func ExampleNewAPI() { func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle") tokenize.NewAPI("The input that the API will handle")
} }

View File

@ -638,7 +638,6 @@ func MatchEndOfLine() Handler {
// MatchStr creates a Handler that matches the input against the provided string. // MatchStr creates a Handler that matches the input against the provided string.
func MatchStr(expected string) Handler { func MatchStr(expected string) Handler {
expectedRunes := []rune(expected)
expectedBytes := []byte(expected) expectedBytes := []byte(expected)
expectedLength := len(expectedBytes) expectedLength := len(expectedBytes)
@ -652,7 +651,7 @@ func MatchStr(expected string) Handler {
return false return false
} }
} }
tokenAPI.Rune.AcceptMulti(expectedRunes...) tokenAPI.Byte.AcceptMulti(expectedBytes...)
return true return true
} }
} }
@ -1226,62 +1225,20 @@ func MatchDecimal(normalize bool) Handler {
// //
// False falues: false, FALSE, False, 0, f, F // False falues: false, FALSE, False, 0, f, F
func MatchBoolean() Handler { func MatchBoolean() Handler {
return func(tokenAPI *API) bool { return MatchAny(
// 5 bytes can hold all possible boolean values. MatchStr("true"),
b, _ := tokenAPI.Byte.PeekMulti(0, 5) MatchStr("TRUE"),
l := len(b) MatchStr("True"),
MatchByte('t'),
// No bytes read at all, so a definitive mismatch. MatchByte('T'),
if l < 1 { MatchByte('1'),
return false MatchStr("false"),
} MatchStr("FALSE"),
MatchStr("False"),
// Boolean '0' or '1'. MatchByte('f'),
if b[0] == '1' || b[0] == '0' { MatchByte('F'),
tokenAPI.Byte.Accept(b[0]) MatchByte('0'),
return true )
}
// Booleans 't', 'T', 'TRUE', True' or 'true'.
if b[0] == 't' || b[0] == 'T' {
tokenAPI.Byte.Accept(b[0])
if l < 4 {
return true
}
if b[0] == 't' {
if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:4]...)
}
return true
}
if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:4]...)
}
return true
}
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
if b[0] == 'f' || b[0] == 'F' {
tokenAPI.Byte.Accept(b[0])
if l < 5 {
return true
}
if b[0] == 'f' {
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
tokenAPI.Byte.AcceptMulti(b[1:5]...)
}
return true
}
return false
}
} }
// MatchASCII creates a Handler function that matches against any // MatchASCII creates a Handler function that matches against any

View File

@ -512,24 +512,26 @@ func TestCombination(t *testing.T) {
// 46709 ns/op // 46709 ns/op
func BenchmarkBoolean(b *testing.B) { func BenchmarkBoolean(b *testing.B) {
tokenizer := tokenize.New(tokenize.A.Boolean)
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
tokenize.A.Boolean.Match("0") tokenizer("0")
tokenize.A.Boolean.Match("1") tokenizer("1")
tokenize.A.Boolean.Match("t") tokenizer("t")
tokenize.A.Boolean.Match("f") tokenizer("f")
tokenize.A.Boolean.Match("T") tokenizer("T")
tokenize.A.Boolean.Match("F") tokenizer("F")
tokenize.A.Boolean.Match("0XX") tokenizer("0XX")
tokenize.A.Boolean.Match("1XX") tokenizer("1XX")
tokenize.A.Boolean.Match("tXX") tokenizer("tXX")
tokenize.A.Boolean.Match("fXX") tokenizer("fXX")
tokenize.A.Boolean.Match("TXX") tokenizer("TXX")
tokenize.A.Boolean.Match("FXX") tokenizer("FXX")
tokenize.A.Boolean.Match("true") tokenizer("true")
tokenize.A.Boolean.Match("TRUE") tokenizer("TRUE")
tokenize.A.Boolean.Match("True") tokenizer("True")
tokenize.A.Boolean.Match("false") tokenizer("false")
tokenize.A.Boolean.Match("FALSE") tokenizer("FALSE")
tokenize.A.Boolean.Match("False") tokenizer("False")
} }
} }