Use zero-indexed cursor positioning data inside stackframes. This simplifies some things.
Also a bit of code cleanup.
This commit is contained in:
parent
802701ade5
commit
62cd84bb74
|
@ -236,6 +236,7 @@ func (buf *Buffer) grow(minBytes int) {
|
|||
newbufCap += defaultBufferSize
|
||||
}
|
||||
newStore := makeSlice(newbufCap)
|
||||
|
||||
copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
|
||||
buf.buffer = newStore
|
||||
buf.start = 0
|
||||
|
|
|
@ -71,7 +71,7 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific dat
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
|
||||
|
@ -87,8 +87,8 @@ type API struct {
|
|||
|
||||
type stackFrame struct {
|
||||
offset int // the read offset (relative to the start of the reader buffer) for this stack frame
|
||||
column int // the column at which the cursor is (0-indexed)
|
||||
line int // the line at which the cursor is (0-indexed)
|
||||
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||
bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
|
||||
bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level
|
||||
tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
|
||||
|
@ -147,23 +147,20 @@ func (tokenAPI *API) Fork() int {
|
|||
// Grow the stack frames capacity when needed.
|
||||
frames := tokenAPI.stackFrames
|
||||
if cap(frames) < (newStackLevel + 1) {
|
||||
newFrames := make([]stackFrame, (newStackLevel+1)*2)
|
||||
newFrames := make([]stackFrame, cap(frames)*2)
|
||||
copy(newFrames, frames)
|
||||
frames = newFrames
|
||||
}
|
||||
|
||||
// This can be written in a shorter way, but this turned out to
|
||||
// be the best way performance-wise.
|
||||
parent := tokenAPI.stackFrame
|
||||
child := &frames[newStackLevel]
|
||||
child.offset = parent.offset
|
||||
child.column = parent.column
|
||||
child.line = parent.line
|
||||
child.bytesStart = parent.bytesEnd
|
||||
child.bytesEnd = parent.bytesEnd
|
||||
child.tokenStart = parent.tokenEnd
|
||||
child.tokenEnd = parent.tokenEnd
|
||||
tokenAPI.stackFrame = child
|
||||
frames[newStackLevel] = stackFrame{
|
||||
offset: parent.offset,
|
||||
bytesStart: parent.bytesEnd,
|
||||
bytesEnd: parent.bytesEnd,
|
||||
tokenStart: parent.tokenEnd,
|
||||
tokenEnd: parent.tokenEnd,
|
||||
}
|
||||
tokenAPI.stackFrame = &frames[newStackLevel]
|
||||
|
||||
return newStackLevel
|
||||
}
|
||||
|
@ -209,8 +206,14 @@ func (tokenAPI *API) Merge(stackLevel int) {
|
|||
f.tokenStart = f.tokenEnd
|
||||
|
||||
parent.offset = f.offset
|
||||
parent.line = f.line
|
||||
parent.column = f.column
|
||||
if f.line > parent.line {
|
||||
parent.line += f.line
|
||||
parent.column = f.column
|
||||
} else {
|
||||
parent.column += f.column
|
||||
}
|
||||
f.line = 0
|
||||
f.column = 0
|
||||
|
||||
f.err = nil
|
||||
}
|
||||
|
@ -226,8 +229,8 @@ func (tokenAPI *API) Reset() {
|
|||
f.offset = 0
|
||||
} else {
|
||||
parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
|
||||
f.column = parent.column
|
||||
f.line = parent.line
|
||||
f.column = 0
|
||||
f.line = 0
|
||||
f.offset = parent.offset
|
||||
}
|
||||
f.bytesEnd = f.bytesStart
|
||||
|
|
|
@ -15,11 +15,19 @@ type Input struct {
|
|||
|
||||
// Cursor returns a string that describes the current read cursor position.
|
||||
func (i Input) Cursor() string {
|
||||
f := i.api.stackFrame
|
||||
if f.line == 0 && f.column == 0 {
|
||||
column, line := 0, 0
|
||||
for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
|
||||
if f.line > 0 {
|
||||
column = f.column
|
||||
line += f.line
|
||||
} else {
|
||||
column += f.column
|
||||
}
|
||||
}
|
||||
if line == 0 && column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1)
|
||||
return fmt.Sprintf("line %d, column %d", line+1, column+1)
|
||||
}
|
||||
|
||||
// Flush flushes input data from the read buffer up to the current
|
||||
|
|
|
@ -8,6 +8,36 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func BenchmarkMemclrOptimization(b *testing.B) {
|
||||
// TODO use or cleanup this one and the next. I'm playing around here.
|
||||
type s struct {
|
||||
a int
|
||||
b string
|
||||
}
|
||||
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
for i := range x {
|
||||
x[i] = s{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCodedClear(b *testing.B) {
|
||||
type s struct {
|
||||
a int
|
||||
b string
|
||||
}
|
||||
|
||||
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
x[0] = s{}
|
||||
x[1] = s{}
|
||||
x[2] = s{}
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleNewAPI() {
|
||||
tokenize.NewAPI("The input that the API will handle")
|
||||
}
|
||||
|
|
|
@ -638,7 +638,6 @@ func MatchEndOfLine() Handler {
|
|||
|
||||
// MatchStr creates a Handler that matches the input against the provided string.
|
||||
func MatchStr(expected string) Handler {
|
||||
expectedRunes := []rune(expected)
|
||||
expectedBytes := []byte(expected)
|
||||
expectedLength := len(expectedBytes)
|
||||
|
||||
|
@ -652,7 +651,7 @@ func MatchStr(expected string) Handler {
|
|||
return false
|
||||
}
|
||||
}
|
||||
tokenAPI.Rune.AcceptMulti(expectedRunes...)
|
||||
tokenAPI.Byte.AcceptMulti(expectedBytes...)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -1226,62 +1225,20 @@ func MatchDecimal(normalize bool) Handler {
|
|||
//
|
||||
// False falues: false, FALSE, False, 0, f, F
|
||||
func MatchBoolean() Handler {
|
||||
return func(tokenAPI *API) bool {
|
||||
// 5 bytes can hold all possible boolean values.
|
||||
b, _ := tokenAPI.Byte.PeekMulti(0, 5)
|
||||
l := len(b)
|
||||
|
||||
// No bytes read at all, so a definitive mismatch.
|
||||
if l < 1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Boolean '0' or '1'.
|
||||
if b[0] == '1' || b[0] == '0' {
|
||||
tokenAPI.Byte.Accept(b[0])
|
||||
return true
|
||||
}
|
||||
|
||||
// Booleans 't', 'T', 'TRUE', True' or 'true'.
|
||||
if b[0] == 't' || b[0] == 'T' {
|
||||
tokenAPI.Byte.Accept(b[0])
|
||||
if l < 4 {
|
||||
return true
|
||||
}
|
||||
if b[0] == 't' {
|
||||
if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:4]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
|
||||
(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:4]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Booleans 'f', 'F', 'FALSE', False' or 'false'.
|
||||
if b[0] == 'f' || b[0] == 'F' {
|
||||
tokenAPI.Byte.Accept(b[0])
|
||||
if l < 5 {
|
||||
return true
|
||||
}
|
||||
if b[0] == 'f' {
|
||||
if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:5]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
|
||||
(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
|
||||
tokenAPI.Byte.AcceptMulti(b[1:5]...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
return MatchAny(
|
||||
MatchStr("true"),
|
||||
MatchStr("TRUE"),
|
||||
MatchStr("True"),
|
||||
MatchByte('t'),
|
||||
MatchByte('T'),
|
||||
MatchByte('1'),
|
||||
MatchStr("false"),
|
||||
MatchStr("FALSE"),
|
||||
MatchStr("False"),
|
||||
MatchByte('f'),
|
||||
MatchByte('F'),
|
||||
MatchByte('0'),
|
||||
)
|
||||
}
|
||||
|
||||
// MatchASCII creates a Handler function that matches against any
|
||||
|
|
|
@ -512,24 +512,26 @@ func TestCombination(t *testing.T) {
|
|||
|
||||
// 46709 ns/op
|
||||
func BenchmarkBoolean(b *testing.B) {
|
||||
tokenizer := tokenize.New(tokenize.A.Boolean)
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
tokenize.A.Boolean.Match("0")
|
||||
tokenize.A.Boolean.Match("1")
|
||||
tokenize.A.Boolean.Match("t")
|
||||
tokenize.A.Boolean.Match("f")
|
||||
tokenize.A.Boolean.Match("T")
|
||||
tokenize.A.Boolean.Match("F")
|
||||
tokenize.A.Boolean.Match("0XX")
|
||||
tokenize.A.Boolean.Match("1XX")
|
||||
tokenize.A.Boolean.Match("tXX")
|
||||
tokenize.A.Boolean.Match("fXX")
|
||||
tokenize.A.Boolean.Match("TXX")
|
||||
tokenize.A.Boolean.Match("FXX")
|
||||
tokenize.A.Boolean.Match("true")
|
||||
tokenize.A.Boolean.Match("TRUE")
|
||||
tokenize.A.Boolean.Match("True")
|
||||
tokenize.A.Boolean.Match("false")
|
||||
tokenize.A.Boolean.Match("FALSE")
|
||||
tokenize.A.Boolean.Match("False")
|
||||
tokenizer("0")
|
||||
tokenizer("1")
|
||||
tokenizer("t")
|
||||
tokenizer("f")
|
||||
tokenizer("T")
|
||||
tokenizer("F")
|
||||
tokenizer("0XX")
|
||||
tokenizer("1XX")
|
||||
tokenizer("tXX")
|
||||
tokenizer("fXX")
|
||||
tokenizer("TXX")
|
||||
tokenizer("FXX")
|
||||
tokenizer("true")
|
||||
tokenizer("TRUE")
|
||||
tokenizer("True")
|
||||
tokenizer("false")
|
||||
tokenizer("FALSE")
|
||||
tokenizer("False")
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue