From 62cd84bb74aeaad6bee6624088e98d58475b9827 Mon Sep 17 00:00:00 2001
From: Maurice Makaay <maurice@makaay.nl>
Date: Wed, 24 Jul 2019 10:34:24 +0000
Subject: [PATCH] Use zero-indexed cursor positioning data inside stackframes.
 This simplifies some things. Also a bit of code cleanup.

---
 read/read.go                      |  1 +
 tokenize/api.go                   | 41 +++++++++--------
 tokenize/api_input.go             | 14 ++++--
 tokenize/api_test.go              | 30 +++++++++++++
 tokenize/handlers_builtin.go      | 73 +++++++------------------------
 tokenize/handlers_builtin_test.go | 38 ++++++++--------
 6 files changed, 99 insertions(+), 98 deletions(-)

diff --git a/read/read.go b/read/read.go
index c002808..cc35e9a 100644
--- a/read/read.go
+++ b/read/read.go
@@ -236,6 +236,7 @@ func (buf *Buffer) grow(minBytes int) {
 		newbufCap += defaultBufferSize
 	}
 	newStore := makeSlice(newbufCap)
+
 	copy(newStore, buf.buffer[buf.start:buf.start+buf.len])
 	buf.buffer = newStore
 	buf.start = 0
diff --git a/tokenize/api.go b/tokenize/api.go
index ba08046..3de533a 100644
--- a/tokenize/api.go
+++ b/tokenize/api.go
@@ -71,7 +71,7 @@ import (
 // can lead to hard to track bugs. I much prefer this forking method, since
 // no bookkeeping has to be implemented when implementing a parser.
 type API struct {
-	stackFrames []stackFrame // the stack frames, containing stack level-specific dat
+	stackFrames []stackFrame // the stack frames, containing stack level-specific data
 	stackLevel  int          // the current stack level
 	stackFrame  *stackFrame  // the current stack frame
 
@@ -87,8 +87,8 @@ type API struct {
 
 type stackFrame struct {
 	offset     int // the read offset (relative to the start of the reader buffer) for this stack frame
-	column     int // the column at which the cursor is (0-indexed)
-	line       int // the line at which the cursor is (0-indexed)
+	column     int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
+	line       int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
 	bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level
 	bytesEnd   int // the end point in the API.bytes slice for runes produced by this stack level
 	tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level
@@ -147,23 +147,20 @@ func (tokenAPI *API) Fork() int {
 	// Grow the stack frames capacity when needed.
 	frames := tokenAPI.stackFrames
 	if cap(frames) < (newStackLevel + 1) {
-		newFrames := make([]stackFrame, (newStackLevel+1)*2)
+		newFrames := make([]stackFrame, cap(frames)*2)
 		copy(newFrames, frames)
 		frames = newFrames
 	}
 
-	// This can be written in a shorter way, but this turned out to
-	// be the best way performance-wise.
 	parent := tokenAPI.stackFrame
-	child := &frames[newStackLevel]
-	child.offset = parent.offset
-	child.column = parent.column
-	child.line = parent.line
-	child.bytesStart = parent.bytesEnd
-	child.bytesEnd = parent.bytesEnd
-	child.tokenStart = parent.tokenEnd
-	child.tokenEnd = parent.tokenEnd
-	tokenAPI.stackFrame = child
+	frames[newStackLevel] = stackFrame{
+		offset:     parent.offset,
+		bytesStart: parent.bytesEnd,
+		bytesEnd:   parent.bytesEnd,
+		tokenStart: parent.tokenEnd,
+		tokenEnd:   parent.tokenEnd,
+	}
+	tokenAPI.stackFrame = &frames[newStackLevel]
 
 	return newStackLevel
 }
@@ -209,8 +206,14 @@ func (tokenAPI *API) Merge(stackLevel int) {
 	f.tokenStart = f.tokenEnd
 
 	parent.offset = f.offset
-	parent.line = f.line
-	parent.column = f.column
+	if f.line > parent.line {
+		parent.line += f.line
+		parent.column = f.column
+	} else {
+		parent.column += f.column
+	}
+	f.line = 0
+	f.column = 0
 
 	f.err = nil
 }
@@ -226,8 +229,8 @@ func (tokenAPI *API) Reset() {
 		f.offset = 0
 	} else {
 		parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1]
-		f.column = parent.column
-		f.line = parent.line
+		f.column = 0
+		f.line = 0
 		f.offset = parent.offset
 	}
 	f.bytesEnd = f.bytesStart
diff --git a/tokenize/api_input.go b/tokenize/api_input.go
index 469ec0d..49e3978 100644
--- a/tokenize/api_input.go
+++ b/tokenize/api_input.go
@@ -15,11 +15,19 @@ type Input struct {
 
 // Cursor returns a string that describes the current read cursor position.
 func (i Input) Cursor() string {
-	f := i.api.stackFrame
-	if f.line == 0 && f.column == 0 {
+	column, line := 0, 0
+	for _, f := range i.api.stackFrames[:i.api.stackLevel+1] {
+		if f.line > 0 {
+			column = f.column
+			line += f.line
+		} else {
+			column += f.column
+		}
+	}
+	if line == 0 && column == 0 {
 		return fmt.Sprintf("start of file")
 	}
-	return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1)
+	return fmt.Sprintf("line %d, column %d", line+1, column+1)
 }
 
 // Flush flushes input data from the read buffer up to the current
diff --git a/tokenize/api_test.go b/tokenize/api_test.go
index 72f6aaf..42893b0 100644
--- a/tokenize/api_test.go
+++ b/tokenize/api_test.go
@@ -8,6 +8,36 @@ import (
 	"git.makaay.nl/mauricem/go-parsekit/tokenize"
 )
 
+func BenchmarkMemclrOptimization(b *testing.B) {
+	// TODO use or cleanup this one and the next. I'm playing around here.
+	type s struct {
+		a int
+		b string
+	}
+	x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
+
+	for i := 0; i < b.N; i++ {
+		for i := range x {
+			x[i] = s{}
+		}
+	}
+}
+
+func BenchmarkCodedClear(b *testing.B) {
+	type s struct {
+		a int
+		b string
+	}
+
+	x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
+
+	for i := 0; i < b.N; i++ {
+		x[0] = s{}
+		x[1] = s{}
+		x[2] = s{}
+	}
+}
+
 func ExampleNewAPI() {
 	tokenize.NewAPI("The input that the API will handle")
 }
diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go
index 77a5d13..5261c42 100644
--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@@ -638,7 +638,6 @@ func MatchEndOfLine() Handler {
 
 // MatchStr creates a Handler that matches the input against the provided string.
 func MatchStr(expected string) Handler {
-	expectedRunes := []rune(expected)
 	expectedBytes := []byte(expected)
 	expectedLength := len(expectedBytes)
 
@@ -652,7 +651,7 @@ func MatchStr(expected string) Handler {
 				return false
 			}
 		}
-		tokenAPI.Rune.AcceptMulti(expectedRunes...)
+		tokenAPI.Byte.AcceptMulti(expectedBytes...)
 		return true
 	}
 }
@@ -1226,62 +1225,20 @@ func MatchDecimal(normalize bool) Handler {
 //
 // False falues: false, FALSE, False, 0, f, F
 func MatchBoolean() Handler {
-	return func(tokenAPI *API) bool {
-		// 5 bytes can hold all possible boolean values.
-		b, _ := tokenAPI.Byte.PeekMulti(0, 5)
-		l := len(b)
-
-		// No bytes read at all, so a definitive mismatch.
-		if l < 1 {
-			return false
-		}
-
-		// Boolean '0' or '1'.
-		if b[0] == '1' || b[0] == '0' {
-			tokenAPI.Byte.Accept(b[0])
-			return true
-		}
-
-		// Booleans 't', 'T', 'TRUE', True' or 'true'.
-		if b[0] == 't' || b[0] == 'T' {
-			tokenAPI.Byte.Accept(b[0])
-			if l < 4 {
-				return true
-			}
-			if b[0] == 't' {
-				if b[1] == 'r' && b[2] == 'u' && b[3] == 'e' {
-					tokenAPI.Byte.AcceptMulti(b[1:4]...)
-				}
-				return true
-			}
-			if (b[1] == 'R' && b[2] == 'U' && b[3] == 'E') ||
-				(b[1] == 'r' && b[2] == 'u' && b[3] == 'e') {
-				tokenAPI.Byte.AcceptMulti(b[1:4]...)
-			}
-			return true
-		}
-
-		// Booleans 'f', 'F', 'FALSE', False' or 'false'.
-		if b[0] == 'f' || b[0] == 'F' {
-			tokenAPI.Byte.Accept(b[0])
-			if l < 5 {
-				return true
-			}
-			if b[0] == 'f' {
-				if b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e' {
-					tokenAPI.Byte.AcceptMulti(b[1:5]...)
-				}
-				return true
-			}
-			if (b[1] == 'A' && b[2] == 'L' && b[3] == 'S' && b[4] == 'E') ||
-				(b[1] == 'a' && b[2] == 'l' && b[3] == 's' && b[4] == 'e') {
-				tokenAPI.Byte.AcceptMulti(b[1:5]...)
-			}
-			return true
-		}
-
-		return false
-	}
+	return MatchAny(
+		MatchStr("true"),
+		MatchStr("TRUE"),
+		MatchStr("True"),
+		MatchByte('t'),
+		MatchByte('T'),
+		MatchByte('1'),
+		MatchStr("false"),
+		MatchStr("FALSE"),
+		MatchStr("False"),
+		MatchByte('f'),
+		MatchByte('F'),
+		MatchByte('0'),
+	)
 }
 
 // MatchASCII creates a Handler function that matches against any
diff --git a/tokenize/handlers_builtin_test.go b/tokenize/handlers_builtin_test.go
index 866c902..21d1886 100644
--- a/tokenize/handlers_builtin_test.go
+++ b/tokenize/handlers_builtin_test.go
@@ -512,24 +512,26 @@ func TestCombination(t *testing.T) {
 
 // 46709 ns/op
 func BenchmarkBoolean(b *testing.B) {
+	tokenizer := tokenize.New(tokenize.A.Boolean)
+
 	for i := 0; i < b.N; i++ {
-		tokenize.A.Boolean.Match("0")
-		tokenize.A.Boolean.Match("1")
-		tokenize.A.Boolean.Match("t")
-		tokenize.A.Boolean.Match("f")
-		tokenize.A.Boolean.Match("T")
-		tokenize.A.Boolean.Match("F")
-		tokenize.A.Boolean.Match("0XX")
-		tokenize.A.Boolean.Match("1XX")
-		tokenize.A.Boolean.Match("tXX")
-		tokenize.A.Boolean.Match("fXX")
-		tokenize.A.Boolean.Match("TXX")
-		tokenize.A.Boolean.Match("FXX")
-		tokenize.A.Boolean.Match("true")
-		tokenize.A.Boolean.Match("TRUE")
-		tokenize.A.Boolean.Match("True")
-		tokenize.A.Boolean.Match("false")
-		tokenize.A.Boolean.Match("FALSE")
-		tokenize.A.Boolean.Match("False")
+		tokenizer("0")
+		tokenizer("1")
+		tokenizer("t")
+		tokenizer("f")
+		tokenizer("T")
+		tokenizer("F")
+		tokenizer("0XX")
+		tokenizer("1XX")
+		tokenizer("tXX")
+		tokenizer("fXX")
+		tokenizer("TXX")
+		tokenizer("FXX")
+		tokenizer("true")
+		tokenizer("TRUE")
+		tokenizer("True")
+		tokenizer("false")
+		tokenizer("FALSE")
+		tokenizer("False")
 	}
 }