From 62cd84bb74aeaad6bee6624088e98d58475b9827 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Wed, 24 Jul 2019 10:34:24 +0000 Subject: [PATCH] Use zero-indexed cursor positioning data inside stackframes. This simplifies some things. Also a bit of code cleanup. --- read/read.go | 1 + tokenize/api.go | 41 +++++++++-------- tokenize/api_input.go | 14 ++++-- tokenize/api_test.go | 30 +++++++++++++ tokenize/handlers_builtin.go | 73 +++++++------------------------ tokenize/handlers_builtin_test.go | 38 ++++++++-------- 6 files changed, 99 insertions(+), 98 deletions(-) diff --git a/read/read.go b/read/read.go index c002808..cc35e9a 100644 --- a/read/read.go +++ b/read/read.go @@ -236,6 +236,7 @@ func (buf *Buffer) grow(minBytes int) { newbufCap += defaultBufferSize } newStore := makeSlice(newbufCap) + copy(newStore, buf.buffer[buf.start:buf.start+buf.len]) buf.buffer = newStore buf.start = 0 diff --git a/tokenize/api.go b/tokenize/api.go index ba08046..3de533a 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -71,7 +71,7 @@ import ( // can lead to hard to track bugs. I much prefer this forking method, since // no bookkeeping has to be implemented when implementing a parser. type API struct { - stackFrames []stackFrame // the stack frames, containing stack level-specific dat + stackFrames []stackFrame // the stack frames, containing stack level-specific data stackLevel int // the current stack level stackFrame *stackFrame // the current stack frame @@ -87,8 +87,8 @@ type API struct { type stackFrame struct { offset int // the read offset (relative to the start of the reader buffer) for this stack frame - column int // the column at which the cursor is (0-indexed) - line int // the line at which the cursor is (0-indexed) + column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame) + line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame) bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level @@ -147,23 +147,20 @@ func (tokenAPI *API) Fork() int { // Grow the stack frames capacity when needed. frames := tokenAPI.stackFrames if cap(frames) < (newStackLevel + 1) { - newFrames := make([]stackFrame, (newStackLevel+1)*2) + newFrames := make([]stackFrame, cap(frames)*2) copy(newFrames, frames) frames = newFrames } - // This can be written in a shorter way, but this turned out to - // be the best way performance-wise. parent := tokenAPI.stackFrame - child := &frames[newStackLevel] - child.offset = parent.offset - child.column = parent.column - child.line = parent.line - child.bytesStart = parent.bytesEnd - child.bytesEnd = parent.bytesEnd - child.tokenStart = parent.tokenEnd - child.tokenEnd = parent.tokenEnd - tokenAPI.stackFrame = child + frames[newStackLevel] = stackFrame{ + offset: parent.offset, + bytesStart: parent.bytesEnd, + bytesEnd: parent.bytesEnd, + tokenStart: parent.tokenEnd, + tokenEnd: parent.tokenEnd, + } + tokenAPI.stackFrame = &frames[newStackLevel] return newStackLevel } @@ -209,8 +206,14 @@ func (tokenAPI *API) Merge(stackLevel int) { f.tokenStart = f.tokenEnd parent.offset = f.offset - parent.line = f.line - parent.column = f.column + if f.line > parent.line { + parent.line += f.line + parent.column = f.column + } else { + parent.column += f.column + } + f.line = 0 + f.column = 0 f.err = nil } @@ -226,8 +229,8 @@ func (tokenAPI *API) Reset() { f.offset = 0 } else { parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1] - f.column = parent.column - f.line = parent.line + f.column = 0 + f.line = 0 f.offset = parent.offset } f.bytesEnd = f.bytesStart diff --git a/tokenize/api_input.go b/tokenize/api_input.go index 469ec0d..49e3978 100644 --- a/tokenize/api_input.go +++ b/tokenize/api_input.go @@ -15,11 +15,19 @@ type Input struct { // Cursor returns a string that describes the current read cursor position. func (i Input) Cursor() string { - f := i.api.stackFrame - if f.line == 0 && f.column == 0 { + column, line := 0, 0 + for _, f := range i.api.stackFrames[:i.api.stackLevel+1] { + if f.line > 0 { + column = f.column + line += f.line + } else { + column += f.column + } + } + if line == 0 && column == 0 { return fmt.Sprintf("start of file") } - return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1) + return fmt.Sprintf("line %d, column %d", line+1, column+1) } // Flush flushes input data from the read buffer up to the current diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 72f6aaf..42893b0 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -8,6 +8,36 @@ import ( "git.makaay.nl/mauricem/go-parsekit/tokenize" ) +func BenchmarkMemclrOptimization(b *testing.B) { + // TODO use or cleanup this one and the next. I'm playing around here. + type s struct { + a int + b string + } + x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}} + + for i := 0; i < b.N; i++ { + for i := range x { + x[i] = s{} + } + } +} + +func BenchmarkCodedClear(b *testing.B) { + type s struct { + a int + b string + } + + x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}} + + for i := 0; i < b.N; i++ { + x[0] = s{} + x[1] = s{} + x[2] = s{} + } +} + func ExampleNewAPI() { tokenize.NewAPI("The input that the API will handle") } diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index 77a5d13..5261c42 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -638,7 +638,6 @@ func MatchEndOfLine() Handler { // MatchStr creates a Handler that matches the input against the provided string. func MatchStr(expected string) Handler { - expectedRunes := []rune(expected) expectedBytes := []byte(expected) expectedLength := len(expectedBytes) @@ -652,7 +651,7 @@ func MatchStr(expected string) Handler { return false } } - tokenAPI.Rune.AcceptMulti(expectedRunes...) + tokenAPI.Byte.AcceptMulti(expectedBytes...) return true } } @@ -1226,62 +1225,20 @@ func MatchDecimal(normalize bool) Handler { // // False falues: false, FALSE, False, 0, f, F func MatchBoolean() Handler { - return func(tokenAPI *API) bool { - // 5 bytes can hold all possible boolean values. - b, _ := tokenAPI.Byte.PeekMulti(0, 5) - l := len(b) - - // No bytes read at all, so a definitive mismatch. - if l < 1 { - return false - } - - // Boolean '0' or '1'. - if b[0] == '1' || b[0] == '0' { - tokenAPI.Byte.Accept(b[0]) - return true - } - - // Booleans 't', 'T', 'TRUE', True' or 'true'. - if b[0] == 't' || b[0] == 'T' { - tokenAPI.Byte.Accept(b[0]) - if l < 4 { - return true - } - if b[0] == 't' { - if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' { - tokenAPI.Byte.AcceptMulti(b[1:4]...) - } - return true - } - if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') || - (b[1] == 'r' && b[2] == 'u' && b[3] == 'e') { - tokenAPI.Byte.AcceptMulti(b[1:4]...) - } - return true - } - - // Booleans 'f', 'F', 'FALSE', False' or 'false'. - if b[0] == 'f' || b[0] == 'F' { - tokenAPI.Byte.Accept(b[0]) - if l < 5 { - return true - } - if b[0] == 'f' { - if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' { - tokenAPI.Byte.AcceptMulti(b[1:5]...) - } - return true - } - if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') || - (b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') { - tokenAPI.Byte.AcceptMulti(b[1:5]...) - } - return true - } - - return false - } + return MatchAny( + MatchStr("true"), + MatchStr("TRUE"), + MatchStr("True"), + MatchByte('t'), + MatchByte('T'), + MatchByte('1'), + MatchStr("false"), + MatchStr("FALSE"), + MatchStr("False"), + MatchByte('f'), + MatchByte('F'), + MatchByte('0'), + ) } // MatchASCII creates a Handler function that matches against any diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go index 866c902..21d1886 100644 --- a/tokenize/handlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -512,24 +512,26 @@ func TestCombination(t *testing.T) { // 46709 ns/op func BenchmarkBoolean(b *testing.B) { + tokenizer := tokenize.New(tokenize.A.Boolean) + for i := 0; i < b.N; i++ { - tokenize.A.Boolean.Match("0") - tokenize.A.Boolean.Match("1") - tokenize.A.Boolean.Match("t") - tokenize.A.Boolean.Match("f") - tokenize.A.Boolean.Match("T") - tokenize.A.Boolean.Match("F") - tokenize.A.Boolean.Match("0XX") - tokenize.A.Boolean.Match("1XX") - tokenize.A.Boolean.Match("tXX") - tokenize.A.Boolean.Match("fXX") - tokenize.A.Boolean.Match("TXX") - tokenize.A.Boolean.Match("FXX") - tokenize.A.Boolean.Match("true") - tokenize.A.Boolean.Match("TRUE") - tokenize.A.Boolean.Match("True") - tokenize.A.Boolean.Match("false") - tokenize.A.Boolean.Match("FALSE") - tokenize.A.Boolean.Match("False") + tokenizer("0") + tokenizer("1") + tokenizer("t") + tokenizer("f") + tokenizer("T") + tokenizer("F") + tokenizer("0XX") + tokenizer("1XX") + tokenizer("tXX") + tokenizer("fXX") + tokenizer("TXX") + tokenizer("FXX") + tokenizer("true") + tokenizer("TRUE") + tokenizer("True") + tokenizer("false") + tokenizer("FALSE") + tokenizer("False") } }