Code cleanup, making the byte and rune inputs look as much the same as possible and get rid of some unneeded functionality.

2019-07-23 08:03:16 +00:00 · 2019-07-23 08:03:16 +00:00 · a968f22d45
parent 93d2cfa6f1
commit a968f22d45
7 changed files with 121 additions and 140 deletions
--- a/tokenize/api.go
+++ b/tokenize/api.go
@ -73,7 +73,7 @@ import (
 // can lead to hard to track bugs. I much prefer this forking method, since
 // no bookkeeping has to be implemented when implementing a parser.
 type API struct {
-	stackFrames  []stackFrame // the stack frames, containing stack level-specific data
+	stackFrames  []stackFrame // the stack frames, containing stack level-specific dat
 	stackLevel   int          // the current stack level
 	stackFrame   *stackFrame  // the current stack frame
 	reader       *read.Buffer // the buffered input reader
@ -105,12 +105,13 @@ const initialByteStoreLength = 1024
 // For an overview of allowed inputs, take a look at the documentation
 // for parsekit.read.New().
 func NewAPI(input interface{}) *API {
 	reader := read.New(input)
 	api := &API{
 		reader:      read.New(input),
 		stackFrames: make([]stackFrame, initialStackDepth),
 		reader:      reader,
 	}
-	api.Byte = ByteMode{api: api}
+	api.Byte = ByteMode{api: api, reader: reader}
-	api.Rune = RuneMode{api: api}
+	api.Rune = RuneMode{api: api, reader: reader}
 	api.Output = Output{api: api}
 	api.stackFrame = &api.stackFrames[0]
@ -210,8 +211,8 @@ func (tokenAPI *API) Merge(stackLevel int) {
 	f.err = nil
 }
-// Reset moves the input cursor back to the beginning for the currently active API child.
+// Reset moves the read cursor back to the beginning for the currently active API child.
-// Aditionally, any output (bytes and tokens) that was emitted from the API child are
+// Aditionally, all output (bytes and tokens) that was emitted from the API child is
 // cleared as well.
 func (api *API) Reset() {
 	f := api.stackFrame
--- a/tokenize/api_bytemode.go
+++ b/tokenize/api_bytemode.go
@ -1,8 +1,11 @@
 package tokenize
 import "git.makaay.nl/mauricem/go-parsekit/read"
 // ByteMode provides byte-driven input/output functionality for the tokenize API.
 type ByteMode struct {
 	api    *API
 	reader *read.Buffer // the buffered input reader
 }
 // Peek returns the byte at the provided byte offset.
@ -11,37 +14,12 @@ type ByteMode struct {
 // When an offset is requested that is beyond the length of the available input
 // data, then the error will be io.EOF.
 func (byteMode ByteMode) Peek(offset int) (byte, error) {
-	a := byteMode.api
+	return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset)
 	return a.reader.ByteAt(a.stackFrame.offset + offset)
 }
 // Skip is used to skip over one or more bytes that were read from the input.
 // This tells the tokenizer: "I've seen these bytes. They are of no interest.
 // I will now continue reading after these bytes."
 //
 // This will merely update the position of the cursor (which keeps track of what
 // line and column we are on in the input data). The bytes are not added to
 // the output.
 //
 // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
 // the first byte after the skipped bytes.
 func (byteMode ByteMode) Skip(b byte) {
 	f := byteMode.api.stackFrame
 	f.moveCursorByByte(b)
 	f.offset++
 }
 func (byteMode ByteMode) SkipMulti(bytes ...byte) {
 	f := byteMode.api.stackFrame
 	for _, b := range bytes {
 		f.moveCursorByByte(b)
 		f.offset++
 	}
 }
 func (byteMode ByteMode) Accept(b byte) {
 	byteMode.api.Output.AddByte(b)
-	byteMode.Skip(b)
+	byteMode.MoveCursor(b)
 }
 // AcceptMulti is used to accept one or more bytes that were read from the input.
@ -57,5 +35,35 @@ func (byteMode ByteMode) Accept(b byte) {
 // the first byte after the accepted bytes.
 func (byteMode ByteMode) AcceptMulti(bytes ...byte) {
 	byteMode.api.Output.AddBytes(bytes...)
-	byteMode.SkipMulti(bytes...)
+	byteMode.MoveCursorMulti(bytes...)
 }
 // MoveCursor updates the position of the read cursor, based on the provided byte.
 // This method takes newlines into account to keep track of line numbers and
 // column positions for the input cursor.
 //
 // After the call, byte offset 0 for Peek() and PeekMulti() will point at
 // the first byte at the new cursor position.
 func (byteMode ByteMode) MoveCursor(b byte) {
 	f := byteMode.api.stackFrame
 	if b == '\n' {
 		f.column = 0
 		f.line++
 	} else {
 		f.column++
 	}
 	f.offset++
 }
 // MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
 // This method takes newlines into account to keep track of line numbers and
 // column positions for the input cursor.
 //
 // After the call, byte offset 0 for Peek() and PeekMulti() will point at
 // the first byte at the new cursor position.
 func (byteMode ByteMode) MoveCursorMulti(bytes ...byte) {
 	for _, b := range bytes {
 		byteMode.MoveCursor(b)
 	}
 }
--- a/tokenize/api_runemode.go
+++ b/tokenize/api_runemode.go
@ -2,11 +2,14 @@ package tokenize
 import (
 	"unicode/utf8"
 	"git.makaay.nl/mauricem/go-parsekit/read"
 )
 // RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
 type RuneMode struct {
 	api    *API
 	reader *read.Buffer // the buffered input reader
 }
 // Peek returns the UTF8 rune at the provided byte offset, including its byte width.
@ -23,42 +26,7 @@ type RuneMode struct {
 // When an offset is requested that is beyond the length of the available input
 // data, then the error will be io.EOF.
 func (runeMode RuneMode) Peek(offset int) (rune, int, error) {
-	a := runeMode.api
+	return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset)
 	return a.reader.RuneAt(a.stackFrame.offset + offset)
 }
 // Skip is used to skip over a single rune that was read from the input.
 // This tells the tokenizer: "I've seen this rune. It is of no interest.
 // I will now continue reading after this rune."
 //
 // This will merely update the position of the cursor (which keeps track of what
 // line and column we are on in APIthe input data). The rune is not added to
 // the output.
 //
 // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
 // the first byte after the skipped rune.
 func (runeMode RuneMode) Skip(r rune) {
 	f := runeMode.api.stackFrame
 	f.moveCursorByRune(r)
 	f.offset += utf8.RuneLen(r)
 }
 // SkipMulti is used to skip over one or more runes that were read from the input.
 // This tells the tokenizer: "I've seen these runes. They are of no interest.
 // I will now continue reading after these runes."
 //
 // This will merely update the position of the cursor (which keeps track of what
 // line and column we are on in the input data). The runes are not added to
 // the output.
 //
 // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
 // the first byte after the skipped runes.
 func (runeMode RuneMode) SkipMulti(runes ...rune) {
 	f := runeMode.api.stackFrame
 	for _, r := range runes {
 		f.moveCursorByRune(r)
 		f.offset += utf8.RuneLen(r)
 	}
 }
 // Accept is used to accept a single rune that was read from the input.
@ -67,7 +35,7 @@ func (runeMode RuneMode) SkipMulti(runes ...rune) {
 // reading after this rune."
 //
 // This will update the position of the cursor (which keeps track of what line
-// and column we are on in the input data) and add the rune to the tokenizer
+// and column we are on intin the input data) and add the rune to the tokenizer
 // output.
 //
 // After the call, byte offset 0 for PeekByte() and PeekRune() will point at
@ -75,15 +43,14 @@ func (runeMode RuneMode) SkipMulti(runes ...rune) {
 func (runeMode RuneMode) Accept(r rune) {
 	a := runeMode.api
 	f := a.stackFrame
 	curBytesEnd := f.bytesEnd
 	maxRequiredBytes := curBytesEnd + utf8.UTFMax
 	a.growOutputData(maxRequiredBytes)
 	w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
 	f.bytesEnd += w
 	f.offset += w
-	f.moveCursorByRune(r)
+	runeMode.MoveCursor(r)
 }
 // AcceptMulti is used to accept one or more runes that were read from the input.
@ -100,17 +67,47 @@ func (runeMode RuneMode) Accept(r rune) {
 func (runeMode RuneMode) AcceptMulti(runes ...rune) {
 	a := runeMode.api
 	f := a.stackFrame
 	runesAsString := string(runes)
 	byteLen := len(runesAsString)
 	curBytesEnd := f.bytesEnd
 	newBytesEnd := curBytesEnd + byteLen
-	a.growOutputData(newBytesEnd)
+	curBytesEnd := f.bytesEnd
-	copy(a.outputData[curBytesEnd:], runesAsString)
+	maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
-	f.bytesEnd = newBytesEnd
+	a.growOutputData(maxBytes)
 	f.offset += byteLen
 	for _, r := range runes {
-		f.moveCursorByRune(r)
+		w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
 		curBytesEnd += w
 		runeMode.MoveCursor(r)
 	}
 	f.bytesEnd = curBytesEnd
 }
 // MoveCursor updates the position of the read cursor, based on the provided rune.
 // This method takes newlines into account to keep track of line numbers and
 // column positions for the input cursor.
 //
 // After the call, byte offset 0 for Peek() and PeekMulti() will point at
 // the first rune at the new cursor position.
 func (runeMode RuneMode) MoveCursor(r rune) int {
 	f := runeMode.api.stackFrame
 	if r == '\n' {
 		f.column = 0
 		f.line++
 	} else {
 		f.column++
 	}
 	width := utf8.RuneLen(r)
 	f.offset += width
 	return width
 }
 // MoveCursorMulti updates the position of the read cursor, based on the provided runes.
 // This method takes newlines into account to keep track of line numbers and
 // column positions for the input cursor.
 //
 // After the call, byte offset 0 for Peek() and PeekMulti() will point at
 // the first rune at the new cursor position.
 func (runeMode RuneMode) MoveCursorMulti(runes ...rune) {
 	for _, r := range runes {
 		runeMode.MoveCursor(r)
 	}
 }
--- a/tokenize/api_test.go
+++ b/tokenize/api_test.go
@ -94,7 +94,7 @@ func ExampleAPI_SkipRune() {
 		if strings.ContainsRune("aeiouAEIOU", r) {
 			api.Rune.Accept(r)
 		} else {
-			api.Rune.Skip(r)
+			api.Rune.MoveCursor(r)
 		}
 	}
--- a/tokenize/cursor.go
+++ b/tokenize/cursor.go
@ -1,30 +0,0 @@
 package tokenize
 // move updates the position of the cursor, based on the provided input string.
 // The input string represents the runes that the cursor must be moved over.
 // This method will take newlines into account to keep track of line numbers and
 // column positions automatically.
 func (f *stackFrame) moveCursor(input string) *stackFrame {
 	for _, r := range input {
 		f.moveCursorByRune(r)
 	}
 	return f
 }
 func (f *stackFrame) moveCursorByRune(r rune) {
 	if r == '\n' {
 		f.column = 0
 		f.line++
 	} else {
 		f.column++
 	}
 }
 func (f *stackFrame) moveCursorByByte(b byte) {
 	if b == '\n' {
 		f.column = 0
 		f.line++
 	} else {
 		f.column++
 	}
 }
--- a/tokenize/cursor_test.go
+++ b/tokenize/cursor_test.go
@ -6,27 +6,29 @@ import (
 func TestMoveCursorByBytes(t *testing.T) {
 	api := NewAPI("")
-	api.stackFrame.moveCursorByByte('a')
+	api.Byte.MoveCursor('a')
-	api.stackFrame.moveCursorByByte('b')
+	api.Byte.MoveCursor('b')
-	api.stackFrame.moveCursorByByte('c')
+	api.Byte.MoveCursor('c')
-	api.stackFrame.moveCursorByByte('\r')
+	api.Byte.MoveCursor('\r')
-	api.stackFrame.moveCursorByByte('\n')
+	api.Byte.MoveCursor('\n')
-	api.stackFrame.moveCursorByByte('a')
+	api.Byte.MoveCursor('a')
-	api.stackFrame.moveCursorByByte('b')
+	api.Byte.MoveCursor('b')
 	AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte")
 	AssertEqual(t, 7, api.stackFrame.offset, "Offset after moving by byte")
 }
 func TestMoveCursorByRunes(t *testing.T) {
 	api := NewAPI("")
-	api.stackFrame.moveCursorByRune('ɹ')
+	api.Rune.MoveCursor('ɹ')
-	api.stackFrame.moveCursorByRune('n')
+	api.Rune.MoveCursor('n')
-	api.stackFrame.moveCursorByRune('u')
+	api.Rune.MoveCursor('u')
-	api.stackFrame.moveCursorByRune('\r')
+	api.Rune.MoveCursor('\r')
-	api.stackFrame.moveCursorByRune('\n')
+	api.Rune.MoveCursor('\n')
-	api.stackFrame.moveCursorByRune('ǝ')
+	api.Rune.MoveCursor('ǝ')
 	AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune")
 	AssertEqual(t, 8, api.stackFrame.offset, "Offset after moving by rune")
 }
 func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
@ -49,7 +51,9 @@ func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) {
 	} {
 		api := NewAPI("")
 		for _, s := range test.input {
-			api.stackFrame.moveCursor(s)
+			for _, r := range s {
 				api.Rune.MoveCursor(r)
 			}
 		}
 		if api.stackFrame.line != test.line {
 			t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line)
--- a/tokenize/handlers_builtin.go
+++ b/tokenize/handlers_builtin.go
@ -476,7 +476,7 @@ func MatchNewline() Handler {
 			return false
 		}
 		if b1 == '\n' {
-			t.Byte.AcceptMulti(b1)
+			t.Byte.Accept(b1)
 			return true
 		}
 		if b1 == '\r' {
@ -1121,7 +1121,7 @@ func MatchInteger(normalize bool) Handler {
 				// The next character is a zero, skip the leading zero and check again.
 				if err == nil && b2 == b {
-					t.Byte.Skip('0')
+					t.Byte.MoveCursor('0')
 					continue
 				}
 				// The next character is not a zero, nor a digit at all.
@ -1131,7 +1131,7 @@ func MatchInteger(normalize bool) Handler {
 					return true
 				}
 				// The next character is a digit. SKip the leading zero and go with the digit.
-				t.Byte.Skip('0')
+				t.Byte.MoveCursor('0')
 				t.Byte.Accept(b2)
 				break
 			}
@ -1170,7 +1170,7 @@ func MatchDecimal(normalize bool) Handler {
 				// The next character is a zero, skip the leading zero and check again.
 				if err == nil && b2 == b {
-					t.Byte.Skip('0')
+					t.Byte.MoveCursor('0')
 					continue
 				}
 				// The next character is a dot, go with the zero before the dot and
@ -1186,7 +1186,7 @@ func MatchDecimal(normalize bool) Handler {
 					return true
 				}
 				// The next character is a digit. SKip the leading zero and go with the digit.
-				t.Byte.Skip('0')
+				t.Byte.MoveCursor('0')
 				t.Byte.Accept(b2)
 				break
 			}
@ -1198,7 +1198,7 @@ func MatchDecimal(normalize bool) Handler {
 			if err != nil || b < '0' || b > '9' {
 				break
 			}
-			t.Byte.AcceptMulti(b)
+			t.Byte.Accept(b)
 		}
 		// No dot or no digit after a dot? Then we're done.
@ -1266,6 +1266,7 @@ func MatchBoolean() Handler {
 				t.Byte.Accept(b1)
 				return true
 			}
 			// TODO Multibyte peeks (also useful for strings)
 			b3, _ := t.Byte.Peek(2)
 			b4, _ := t.Byte.Peek(3)
 			b5, err := t.Byte.Peek(4)
@ -1362,7 +1363,7 @@ func MatchOctet(normalize bool) Handler {
 		if err != nil || b2 < '0' || b2 > '9' {
 			// Output 2-digit octet.
 			if normalize && b0 == '0' {
-				t.Byte.Skip(b0)
+				t.Byte.MoveCursor(b0)
 				t.Byte.Accept(b1)
 			} else {
 				t.Byte.AcceptMulti(b0, b1)
@ -1377,9 +1378,9 @@ func MatchOctet(normalize bool) Handler {
 		// Output 3-digit octet.
 		if normalize && b0 == '0' {
-			t.Byte.Skip(b0)
+			t.Byte.MoveCursor(b0)
 			if b1 == '0' {
-				t.Byte.Skip(b1)
+				t.Byte.MoveCursor(b1)
 			} else {
 				t.Byte.Accept(b1)
 			}
@ -1598,7 +1599,7 @@ func ModifyDropUntilEndOfLine() Handler {
 			if b == '\n' {
 				return true
 			}
-			t.Byte.Skip(b)
+			t.Byte.MoveCursor(b)
 		}
 	}
 }