Use zero-indexed cursor positioning data inside stackframes. This simplifies some things.
Also a bit of code cleanup.
This commit is contained in:
parent
802701ade5
commit
62cd84bb74
|
@ -236,6 +236,7 @@ func (buf *Buffer) grow(minBytes int) {
|
||||||
newbufCap += defaultBufferSize
|
newbufCap += defaultBufferSize
|
||||||
}
|
}
|
||||||
newStore := makeSlice(newbufCap)
|
newStore := makeSlice(newbufCap)
|
||||||
|
|
||||||
copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
|
copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
|
||||||
buf.buffer = newStore
|
buf.buffer = newStore
|
||||||
buf.start = 0
|
buf.start = 0
|
||||||
|
|
|
@ -71,7 +71,7 @@ import (
|
||||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||||
// no bookkeeping has to be implemented when implementing a parser.
|
// no bookkeeping has to be implemented when implementing a parser.
|
||||||
type API struct {
|
type API struct {
|
||||||
stackFrames []stackFrame // the stack frames, containing stack level-specific dat
|
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||||
stackLevel int // the current stack level
|
stackLevel int // the current stack level
|
||||||
stackFrame *stackFrame // the current stack frame
|
stackFrame *stackFrame // the current stack frame
|
||||||
|
|
||||||
|
@ -87,8 +87,8 @@ type API struct {
|
||||||
|
|
||||||
type stackFrame struct {
|
type stackFrame struct {
|
||||||
offset int // the read offset (relative to the start of the reader buffer) for this stack frame
|
offset int // the read offset (relative to the start of the reader buffer) for this stack frame
|
||||||
column int // the column at which the cursor is (0-indexed)
|
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||||
line int // the line at which the cursor is (0-indexed)
|
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||||
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
|
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
|
||||||
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
||||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||||
|
@ -147,23 +147,20 @@ func (tokenAPI *API) Fork() int {
|
||||||
// Grow the stack frames capacity when needed.
|
// Grow the stack frames capacity when needed.
|
||||||
frames := tokenAPI.stackFrames
|
frames := tokenAPI.stackFrames
|
||||||
if cap(frames) < (newStackLevel + 1) {
|
if cap(frames) < (newStackLevel + 1) {
|
||||||
newFrames := make([]stackFrame, (newStackLevel+1)*2)
|
newFrames := make([]stackFrame, cap(frames)*2)
|
||||||
copy(newFrames, frames)
|
copy(newFrames, frames)
|
||||||
frames = newFrames
|
frames = newFrames
|
||||||
}
|
}
|
||||||
|
|
||||||
// This can be written in a shorter way, but this turned out to
|
|
||||||
// be the best way performance-wise.
|
|
||||||
parent := tokenAPI.stackFrame
|
parent := tokenAPI.stackFrame
|
||||||
child := &frames[newStackLevel]
|
frames[newStackLevel] = stackFrame{
|
||||||
child.offset = parent.offset
|
offset: parent.offset,
|
||||||
child.column = parent.column
|
bytesStart: parent.bytesEnd,
|
||||||
child.line = parent.line
|
bytesEnd: parent.bytesEnd,
|
||||||
child.bytesStart = parent.bytesEnd
|
tokenStart: parent.tokenEnd,
|
||||||
child.bytesEnd = parent.bytesEnd
|
tokenEnd: parent.tokenEnd,
|
||||||
child.tokenStart = parent.tokenEnd
|
}
|
||||||
child.tokenEnd = parent.tokenEnd
|
tokenAPI.stackFrame = &frames[newStackLevel]
|
||||||
tokenAPI.stackFrame = child
|
|
||||||
|
|
||||||
return newStackLevel
|
return newStackLevel
|
||||||
}
|
}
|
||||||
|
@ -209,8 +206,14 @@ func (tokenAPI *API) Merge(stackLevel int) {
|
||||||
f.tokenStart = f.tokenEnd
|
f.tokenStart = f.tokenEnd
|
||||||
|
|
||||||
parent.offset = f.offset
|
parent.offset = f.offset
|
||||||
parent.line = f.line
|
if f.line > parent.line {
|
||||||
parent.column = f.column
|
parent.line += f.line
|
||||||
|
parent.column = f.column
|
||||||
|
} else {
|
||||||
|
parent.column += f.column
|
||||||
|
}
|
||||||
|
f.line = 0
|
||||||
|
f.column = 0
|
||||||
|
|
||||||
f.err = nil
|
f.err = nil
|
||||||
}
|
}
|
||||||
|
@ -226,8 +229,8 @@ func (tokenAPI *API) Reset() {
|
||||||
f.offset = 0
|
f.offset = 0
|
||||||
} else {
|
} else {
|
||||||
parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
|
parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
|
||||||
f.column = parent.column
|
f.column = 0
|
||||||
f.line = parent.line
|
f.line = 0
|
||||||
f.offset = parent.offset
|
f.offset = parent.offset
|
||||||
}
|
}
|
||||||
f.bytesEnd = f.bytesStart
|
f.bytesEnd = f.bytesStart
|
||||||
|
|
|
@ -15,11 +15,19 @@ type Input struct {
|
||||||
|
|
||||||
// Cursor returns a string that describes the current read cursor position.
|
// Cursor returns a string that describes the current read cursor position.
|
||||||
func (i Input) Cursor() string {
|
func (i Input) Cursor() string {
|
||||||
f := i.api.stackFrame
|
column, line := 0, 0
|
||||||
if f.line == 0 && f.column == 0 {
|
for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
|
||||||
|
if f.line > 0 {
|
||||||
|
column = f.column
|
||||||
|
line += f.line
|
||||||
|
} else {
|
||||||
|
column += f.column
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if line == 0 && column == 0 {
|
||||||
return fmt.Sprintf("start of file")
|
return fmt.Sprintf("start of file")
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1)
|
return fmt.Sprintf("line %d, column %d", line+1, column+1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush flushes input data from the read buffer up to the current
|
// Flush flushes input data from the read buffer up to the current
|
||||||
|
|
|
@ -8,6 +8,36 @@ import (
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func BenchmarkMemclrOptimization(b *testing.B) {
|
||||||
|
// TODO use or cleanup this one and the next. I'm playing around here.
|
||||||
|
type s struct {
|
||||||
|
a int
|
||||||
|
b string
|
||||||
|
}
|
||||||
|
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
for i := range x {
|
||||||
|
x[i] = s{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkCodedClear(b *testing.B) {
|
||||||
|
type s struct {
|
||||||
|
a int
|
||||||
|
b string
|
||||||
|
}
|
||||||
|
|
||||||
|
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
x[0] = s{}
|
||||||
|
x[1] = s{}
|
||||||
|
x[2] = s{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func ExampleNewAPI() {
|
func ExampleNewAPI() {
|
||||||
tokenize.NewAPI("The input that the API will handle")
|
tokenize.NewAPI("The input that the API will handle")
|
||||||
}
|
}
|
||||||
|
|
|
@ -638,7 +638,6 @@ func MatchEndOfLine() Handler {
|
||||||
|
|
||||||
// MatchStr creates a Handler that matches the input against the provided string.
|
// MatchStr creates a Handler that matches the input against the provided string.
|
||||||
func MatchStr(expected string) Handler {
|
func MatchStr(expected string) Handler {
|
||||||
expectedRunes := []rune(expected)
|
|
||||||
expectedBytes := []byte(expected)
|
expectedBytes := []byte(expected)
|
||||||
expectedLength := len(expectedBytes)
|
expectedLength := len(expectedBytes)
|
||||||
|
|
||||||
|
@ -652,7 +651,7 @@ func MatchStr(expected string) Handler {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tokenAPI.Rune.AcceptMulti(expectedRunes...)
|
tokenAPI.Byte.AcceptMulti(expectedBytes...)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1226,62 +1225,20 @@ func MatchDecimal(normalize bool) Handler {
|
||||||
//
|
//
|
||||||
// False falues: false, FALSE, False, 0, f, F
|
// False falues: false, FALSE, False, 0, f, F
|
||||||
func MatchBoolean() Handler {
|
func MatchBoolean() Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return MatchAny(
|
||||||
// 5 bytes can hold all possible boolean values.
|
MatchStr("true"),
|
||||||
b, _ := tokenAPI.Byte.PeekMulti(0, 5)
|
MatchStr("TRUE"),
|
||||||
l := len(b)
|
MatchStr("True"),
|
||||||
|
MatchByte('t'),
|
||||||
// No bytes read at all, so a definitive mismatch.
|
MatchByte('T'),
|
||||||
if l < 1 {
|
MatchByte('1'),
|
||||||
return false
|
MatchStr("false"),
|
||||||
}
|
MatchStr("FALSE"),
|
||||||
|
MatchStr("False"),
|
||||||
// Boolean '0' or '1'.
|
MatchByte('f'),
|
||||||
if b[0] == '1' || b[0] == '0' {
|
MatchByte('F'),
|
||||||
tokenAPI.Byte.Accept(b[0])
|
MatchByte('0'),
|
||||||
return true
|
)
|
||||||
}
|
|
||||||
|
|
||||||
// Booleans 't', 'T', 'TRUE', True' or 'true'.
|
|
||||||
if b[0] == 't' || b[0] == 'T' {
|
|
||||||
tokenAPI.Byte.Accept(b[0])
|
|
||||||
if l < 4 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if b[0] == 't' {
|
|
||||||
if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
|
|
||||||
tokenAPI.Byte.AcceptMulti(b[1:4]...)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
|
|
||||||
(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
|
|
||||||
tokenAPI.Byte.AcceptMulti(b[1:4]...)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
|
|
||||||
if b[0] == 'f' || b[0] == 'F' {
|
|
||||||
tokenAPI.Byte.Accept(b[0])
|
|
||||||
if l < 5 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if b[0] == 'f' {
|
|
||||||
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
|
|
||||||
tokenAPI.Byte.AcceptMulti(b[1:5]...)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
|
|
||||||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
|
|
||||||
tokenAPI.Byte.AcceptMulti(b[1:5]...)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchASCII creates a Handler function that matches against any
|
// MatchASCII creates a Handler function that matches against any
|
||||||
|
|
|
@ -512,24 +512,26 @@ func TestCombination(t *testing.T) {
|
||||||
|
|
||||||
// 46709 ns/op
|
// 46709 ns/op
|
||||||
func BenchmarkBoolean(b *testing.B) {
|
func BenchmarkBoolean(b *testing.B) {
|
||||||
|
tokenizer := tokenize.New(tokenize.A.Boolean)
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
tokenize.A.Boolean.Match("0")
|
tokenizer("0")
|
||||||
tokenize.A.Boolean.Match("1")
|
tokenizer("1")
|
||||||
tokenize.A.Boolean.Match("t")
|
tokenizer("t")
|
||||||
tokenize.A.Boolean.Match("f")
|
tokenizer("f")
|
||||||
tokenize.A.Boolean.Match("T")
|
tokenizer("T")
|
||||||
tokenize.A.Boolean.Match("F")
|
tokenizer("F")
|
||||||
tokenize.A.Boolean.Match("0XX")
|
tokenizer("0XX")
|
||||||
tokenize.A.Boolean.Match("1XX")
|
tokenizer("1XX")
|
||||||
tokenize.A.Boolean.Match("tXX")
|
tokenizer("tXX")
|
||||||
tokenize.A.Boolean.Match("fXX")
|
tokenizer("fXX")
|
||||||
tokenize.A.Boolean.Match("TXX")
|
tokenizer("TXX")
|
||||||
tokenize.A.Boolean.Match("FXX")
|
tokenizer("FXX")
|
||||||
tokenize.A.Boolean.Match("true")
|
tokenizer("true")
|
||||||
tokenize.A.Boolean.Match("TRUE")
|
tokenizer("TRUE")
|
||||||
tokenize.A.Boolean.Match("True")
|
tokenizer("True")
|
||||||
tokenize.A.Boolean.Match("false")
|
tokenizer("false")
|
||||||
tokenize.A.Boolean.Match("FALSE")
|
tokenizer("FALSE")
|
||||||
tokenize.A.Boolean.Match("False")
|
tokenizer("False")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue