Code cleanup, making the byte and rune inputs look as much the same as possible and get rid of some unneeded functionality.

2019-07-23 08:03:16 +00:00 · 2019-07-23 08:03:16 +00:00 · a968f22d45
parent 93d2cfa6f1
commit a968f22d45
7 changed files with 121 additions and 140 deletions
--- a/tokenize/api.go
+++ b/tokenize/api.go
@ -73,7 +73,7 @@ import (
 // can lead to hard to track bugs. I much prefer this forking method, since
 // no bookkeeping has to be implemented when implementing a parser.
 type API struct {
-	stackFrames  []stackFrame // the stack frames, containing stack level-specific data
+	stackFrames  []stackFrame // the stack frames, containing stack level-specific dat
 	stackLevel   int          // the current stack level
 	stackFrame   *stackFrame  // the current stack frame
 	reader       *read.Buffer // the buffered input reader
@ -105,12 +105,13 @@ const initialByteStoreLength = 1024
 // For an overview of allowed inputs, take a look at the documentation
 // for parsekit.read.New().
 func NewAPI(input interface{}) *API {
+	reader := read.New(input)
 	api := &API{
-		reader:      read.New(input),
 		stackFrames: make([]stackFrame, initialStackDepth),
+		reader:      reader,
 	}
-	api.Byte = ByteMode{api: api}
-	api.Rune = RuneMode{api: api}
+	api.Byte = ByteMode{api: api, reader: reader}
+	api.Rune = RuneMode{api: api, reader: reader}
 	api.Output = Output{api: api}
 	api.stackFrame = &api.stackFrames[0]

@ -210,8 +211,8 @@ func (tokenAPI *API) Merge(stackLevel int) {
 	f.err = nil
 }

-// Reset moves the input cursor back to the beginning for the currently active API child.
-// Aditionally, any output (bytes and tokens) that was emitted from the API child are
+// Reset moves the read cursor back to the beginning for the currently active API child.
+// Aditionally, all output (bytes and tokens) that was emitted from the API child is
 // cleared as well.
 func (api *API) Reset() {
 	f := api.stackFrame
--- a/tokenize/api_bytemode.go
+++ b/tokenize/api_bytemode.go
@ -1,8 +1,11 @@
 package tokenize

+import "git.makaay.nl/mauricem/go-parsekit/read"
+
 // ByteMode provides byte-driven input/output functionality for the tokenize API.
 type ByteMode struct {
-	api *API
+	api    *API
+	reader *read.Buffer // the buffered input reader
 }

 // Peek returns the byte at the provided byte offset.
@ -11,37 +14,12 @@ type ByteMode struct {
 // When an offset is requested that is beyond the length of the available input
 // data, then the error will be io.EOF.
 func (byteMode ByteMode) Peek(offset int) (byte, error) {
-	a := byteMode.api
-	return a.reader.ByteAt(a.stackFrame.offset + offset)
-}
-
-// Skip is used to skip over one or more bytes that were read from the input.
-// This tells the tokenizer: "I've seen these bytes. They are of no interest.
-// I will now continue reading after these bytes."
-//
-// This will merely update the position of the cursor (which keeps track of what
-// line and column we are on in the input data). The bytes are not added to
-// the output.
-//
-// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
-// the first byte after the skipped bytes.
-func (byteMode ByteMode) Skip(b byte) {
-	f := byteMode.api.stackFrame
-	f.moveCursorByByte(b)
-	f.offset++
-}
-
-func (byteMode ByteMode) SkipMulti(bytes ...byte) {
-	f := byteMode.api.stackFrame
-	for _, b := range bytes {
-		f.moveCursorByByte(b)
-		f.offset++
-	}
+	return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
 }

 func (byteMode ByteMode) Accept(b byte) {
 	byteMode.api.Output.AddByte(b)
-	byteMode.Skip(b)
+	byteMode.MoveCursor(b)
 }

 // AcceptMulti is used to accept one or more bytes that were read from the input.
@ -57,5 +35,35 @@ func (byteMode ByteMode) Accept(b byte) {
 // the first byte after the accepted bytes.
 func (byteMode ByteMode) AcceptMulti(bytes ...byte) {
 	byteMode.api.Output.AddBytes(bytes...)
-	byteMode.SkipMulti(bytes...)
+	byteMode.MoveCursorMulti(bytes...)
+}
+
+// MoveCursor updates the position of the read cursor, based on the provided byte.
+// This method takes newlines into account to keep track of line numbers and
+// column positions for the input cursor.
+//
+// After the call, byte offset 0 for Peek() and PeekMulti() will point at
+// the first byte at the new cursor position.
+func (byteMode ByteMode) MoveCursor(b byte) {
+	f := byteMode.api.stackFrame
+	if b == '\n' {
+		f.column = 0
+		f.line++
+	} else {
+		f.column++
+	}
+
+	f.offset++
+}
+
+// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
+// This method takes newlines into account to keep track of line numbers and
+// column positions for the input cursor.
+//
+// After the call, byte offset 0 for Peek() and PeekMulti() will point at
+// the first byte at the new cursor position.
+func (byteMode ByteMode) MoveCursorMulti(bytes ...byte) {
+	for _, b := range bytes {
+		byteMode.MoveCursor(b)
+	}
 }
--- a/tokenize/api_runemode.go
+++ b/tokenize/api_runemode.go
@ -2,11 +2,14 @@ package tokenize

 import (
 	"unicode/utf8"
+
+	"git.makaay.nl/mauricem/go-parsekit/read"
 )

 // RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
 type RuneMode struct {
-	api *API
+	api    *API
+	reader *read.Buffer // the buffered input reader
 }

 // Peek returns the UTF8 rune at the provided byte offset, including its byte width.
@ -23,42 +26,7 @@ type RuneMode struct {
 // When an offset is requested that is beyond the length of the available input
 // data, then the error will be io.EOF.
 func (runeMode RuneMode) Peek(offset int) (rune, int, error) {
-	a := runeMode.api
-	return a.reader.RuneAt(a.stackFrame.offset + offset)
-}
-
-// Skip is used to skip over a single rune that was read from the input.
-// This tells the tokenizer: "I've seen this rune. It is of no interest.
-// I will now continue reading after this rune."
-//
-// This will merely update the position of the cursor (which keeps track of what
-// line and column we are on in APIthe input data). The rune is not added to
-// the output.
-//
-// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
-// the first byte after the skipped rune.
-func (runeMode RuneMode) Skip(r rune) {
-	f := runeMode.api.stackFrame
-	f.moveCursorByRune(r)
-	f.offset += utf8.RuneLen(r)
-}
-
-// SkipMulti is used to skip over one or more runes that were read from the input.
-// This tells the tokenizer: "I've seen these runes. They are of no interest.
-// I will now continue reading after these runes."
-//
-// This will merely update the position of the cursor (which keeps track of what
-// line and column we are on in the input data). The runes are not added to
-// the output.
-//
-// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
-// the first byte after the skipped runes.
-func (runeMode RuneMode) SkipMulti(runes ...rune) {
-	f := runeMode.api.stackFrame
-	for _, r := range runes {
-		f.moveCursorByRune(r)
-		f.offset += utf8.RuneLen(r)
-	}
+	return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset)
 }

 // Accept is used to accept a single rune that was read from the input.
@ -67,7 +35,7 @@ func (runeMode RuneMode) SkipMulti(runes ...rune) {
 // reading after this rune."
 //
 // This will update the position of the cursor (which keeps track of what line
-// and column we are on in the input data) and add the rune to the tokenizer
+// and column we are on intin the input data) and add the rune to the tokenizer
 // output.
 //
 // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
@ -75,15 +43,14 @@ func (runeMode RuneMode) SkipMulti(runes ...rune) {
 func (runeMode RuneMode) Accept(r rune) {
 	a := runeMode.api
 	f := a.stackFrame
+
 	curBytesEnd := f.bytesEnd
 	maxRequiredBytes := curBytesEnd + utf8.UTFMax
-
 	a.growOutputData(maxRequiredBytes)
 	w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
 	f.bytesEnd += w
-	f.offset += w

-	f.moveCursorByRune(r)
+	runeMode.MoveCursor(r)
 }

 // AcceptMulti is used to accept one or more runes that were read from the input.
@ -100,17 +67,47 @@ func (runeMode RuneMode) Accept(r rune) {
 func (runeMode RuneMode) AcceptMulti(runes ...rune) {
 	a := runeMode.api
 	f := a.stackFrame
-	runesAsString := string(runes)
-	byteLen := len(runesAsString)
-	curBytesEnd := f.bytesEnd
-	newBytesEnd := curBytesEnd + byteLen

-	a.growOutputData(newBytesEnd)
-	copy(a.outputData[curBytesEnd:], runesAsString)
-	f.bytesEnd = newBytesEnd
-	f.offset += byteLen
+	curBytesEnd := f.bytesEnd
+	maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
+	a.growOutputData(maxBytes)

 	for _, r := range runes {
-		f.moveCursorByRune(r)
+		w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
+		curBytesEnd += w
+		runeMode.MoveCursor(r)
+	}
+	f.bytesEnd = curBytesEnd
+}
+
+// MoveCursor updates the position of the read cursor, based on the provided rune.
+// This method takes newlines into account to keep track of line numbers and
+// column positions for the input cursor.
+//
+// After the call, byte offset 0 for Peek() and PeekMulti() will point at
+// the first rune at the new cursor position.
+func (runeMode RuneMode) MoveCursor(r rune) int {
+	f := runeMode.api.stackFrame
+	if r == '\n' {
+		f.column = 0
+		f.line++
+	} else {
+		f.column++
+	}
+
+	width := utf8.RuneLen(r)
+	f.offset += width
+	return width
+}
+
+// MoveCursorMulti updates the position of the read cursor, based on the provided runes.
+// This method takes newlines into account to keep track of line numbers and
+// column positions for the input cursor.
+//
+// After the call, byte offset 0 for Peek() and PeekMulti() will point at
+// the first rune at the new cursor position.
+func (runeMode RuneMode) MoveCursorMulti(runes ...rune) {
+	for _, r := range runes {
+		runeMode.MoveCursor(r)
 	}
 }
--- a/tokenize/api_test.go
+++ b/tokenize/api_test.go
@ -94,7 +94,7 @@ func ExampleAPI_SkipRune() {
 		if strings.ContainsRune("aeiouAEIOU", r) {
 			api.Rune.Accept(r)
 		} else {
-			api.Rune.Skip(r)
+			api.Rune.MoveCursor(r)
 		}
 	}

--- a/tokenize/cursor.go
+++ b/tokenize/cursor.go
@ -1,30 +0,0 @@
-package tokenize
-
-// move updates the position of the cursor, based on the provided input string.
-// The input string represents the runes that the cursor must be moved over.
-// This method will take newlines into account to keep track of line numbers and
-// column positions automatically.
-func (f *stackFrame) moveCursor(input string) *stackFrame {
-	for _, r := range input {
-		f.moveCursorByRune(r)
-	}
-	return f
-}
-
-func (f *stackFrame) moveCursorByRune(r rune) {
-	if r == '\n' {
-		f.column = 0
-		f.line++
-	} else {
-		f.column++
-	}
-}
-
-func (f *stackFrame) moveCursorByByte(b byte) {
-	if b == '\n' {
-		f.column = 0
-		f.line++
-	} else {
-		f.column++
-	}
-}
--- a/tokenize/cursor_test.go
+++ b/tokenize/cursor_test.go
@ -6,27 +6,29 @@ import (

 func TestMoveCursorByBytes(t *testing.T) {
 	api := NewAPI("")
-	api.stackFrame.moveCursorByByte('a')
-	api.stackFrame.moveCursorByByte('b')
-	api.stackFrame.moveCursorByByte('c')
-	api.stackFrame.moveCursorByByte('\r')
-	api.stackFrame.moveCursorByByte('\n')
-	api.stackFrame.moveCursorByByte('a')
-	api.stackFrame.moveCursorByByte('b')
+	api.Byte.MoveCursor('a')
+	api.Byte.MoveCursor('b')
+	api.Byte.MoveCursor('c')
+	api.Byte.MoveCursor('\r')
+	api.Byte.MoveCursor('\n')
+	api.Byte.MoveCursor('a')
+	api.Byte.MoveCursor('b')

 	AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte")
+	AssertEqual(t, 7, api.stackFrame.offset, "Offset after moving by byte")
 }

 func TestMoveCursorByRunes(t *testing.T) {
 	api := NewAPI("")
-	api.stackFrame.moveCursorByRune('ɹ')
-	api.stackFrame.moveCursorByRune('n')
-	api.stackFrame.moveCursorByRune('u')
-	api.stackFrame.moveCursorByRune('\r')
-	api.stackFrame.moveCursorByRune('\n')
-	api.stackFrame.moveCursorByRune('ǝ')
+	api.Rune.MoveCursor('ɹ')
+	api.Rune.MoveCursor('n')
+	api.Rune.MoveCursor('u')
+	api.Rune.MoveCursor('\r')
+	api.Rune.MoveCursor('\n')
+	api.Rune.MoveCursor('ǝ')

 	AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune")
+	AssertEqual(t, 8, api.stackFrame.offset, "Offset after moving by rune")
 }

 func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
@ -49,7 +51,9 @@ func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
 	} {
 		api := NewAPI("")
 		for _, s := range test.input {
-			api.stackFrame.moveCursor(s)
+			for _, r := range s {
+				api.Rune.MoveCursor(r)
+			}
 		}
 		if api.stackFrame.line != test.line {
 			t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@ -476,7 +476,7 @@ func MatchNewline() Handler {
 			return false
 		}
 		if b1 == '\n' {
-			t.Byte.AcceptMulti(b1)
+			t.Byte.Accept(b1)
 			return true
 		}
 		if b1 == '\r' {
@ -1121,7 +1121,7 @@ func MatchInteger(normalize bool) Handler {

 				// The next character is a zero, skip the leading zero and check again.
 				if err == nil && b2 == b {
-					t.Byte.Skip('0')
+					t.Byte.MoveCursor('0')
 					continue
 				}
 				// The next character is not a zero, nor a digit at all.
@ -1131,7 +1131,7 @@ func MatchInteger(normalize bool) Handler {
 					return true
 				}
 				// The next character is a digit. SKip the leading zero and go with the digit.
-				t.Byte.Skip('0')
+				t.Byte.MoveCursor('0')
 				t.Byte.Accept(b2)
 				break
 			}
@ -1170,7 +1170,7 @@ func MatchDecimal(normalize bool) Handler {

 				// The next character is a zero, skip the leading zero and check again.
 				if err == nil && b2 == b {
-					t.Byte.Skip('0')
+					t.Byte.MoveCursor('0')
 					continue
 				}
 				// The next character is a dot, go with the zero before the dot and
@ -1186,7 +1186,7 @@ func MatchDecimal(normalize bool) Handler {
 					return true
 				}
 				// The next character is a digit. SKip the leading zero and go with the digit.
-				t.Byte.Skip('0')
+				t.Byte.MoveCursor('0')
 				t.Byte.Accept(b2)
 				break
 			}
@ -1198,7 +1198,7 @@ func MatchDecimal(normalize bool) Handler {
 			if err != nil || b < '0' || b > '9' {
 				break
 			}
-			t.Byte.AcceptMulti(b)
+			t.Byte.Accept(b)
 		}

 		// No dot or no digit after a dot? Then we're done.
@ -1266,6 +1266,7 @@ func MatchBoolean() Handler {
 				t.Byte.Accept(b1)
 				return true
 			}
+			// TODO Multibyte peeks (also useful for strings)
 			b3, _ := t.Byte.Peek(2)
 			b4, _ := t.Byte.Peek(3)
 			b5, err := t.Byte.Peek(4)
@ -1362,7 +1363,7 @@ func MatchOctet(normalize bool) Handler {
 		if err != nil || b2 < '0' || b2 > '9' {
 			// Output 2-digit octet.
 			if normalize && b0 == '0' {
-				t.Byte.Skip(b0)
+				t.Byte.MoveCursor(b0)
 				t.Byte.Accept(b1)
 			} else {
 				t.Byte.AcceptMulti(b0, b1)
@ -1377,9 +1378,9 @@ func MatchOctet(normalize bool) Handler {

 		// Output 3-digit octet.
 		if normalize && b0 == '0' {
-			t.Byte.Skip(b0)
+			t.Byte.MoveCursor(b0)
 			if b1 == '0' {
-				t.Byte.Skip(b1)
+				t.Byte.MoveCursor(b1)
 			} else {
 				t.Byte.Accept(b1)
 			}
@ -1598,7 +1599,7 @@ func ModifyDropUntilEndOfLine() Handler {
 			if b == '\n' {
 				return true
 			}
-			t.Byte.Skip(b)
+			t.Byte.MoveCursor(b)
 		}
 	}
 }