From 92e6eec7f39833ba0ea4f205c29a9cdb12633a53 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Sun, 30 Jun 2019 10:16:46 +0000 Subject: [PATCH] implemented Cursor.moveByRune(), to get rid of some useless rune->string conversion for updating cursor positions. --- tokenize/api.go | 4 +--- tokenize/cursor.go | 26 +++++++++++++++++--------- tokenize/result.go | 5 +++-- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tokenize/api.go b/tokenize/api.go index 3ff37a3..e64873e 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -1,8 +1,6 @@ package tokenize import ( - "fmt" - "git.makaay.nl/mauricem/go-parsekit/read" ) @@ -120,7 +118,7 @@ func (i *API) Accept() { callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed") } i.result.runes = append(i.result.runes, i.result.lastRune.r) - i.result.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r)) + i.result.cursor.moveByRune(i.result.lastRune.r) i.result.offset++ i.result.lastRune = nil } diff --git a/tokenize/cursor.go b/tokenize/cursor.go index 5c1ea9f..db8af3b 100644 --- a/tokenize/cursor.go +++ b/tokenize/cursor.go @@ -1,6 +1,9 @@ package tokenize -import "fmt" +import ( + "fmt" + "unicode/utf8" +) // Cursor represents the position of a cursor in various ways. type Cursor struct { @@ -23,15 +26,20 @@ func (c Cursor) String() string { // This method will take newlines into account to keep track of line numbers and // column positions automatically. func (c *Cursor) move(input string) *Cursor { - c.Byte += len(input) for _, r := range input { - c.Rune++ - if r == '\n' { - c.Column = 0 - c.Line++ - } else { - c.Column++ - } + c.moveByRune(r) + } + return c +} + +func (c *Cursor) moveByRune(r rune) *Cursor { + c.Byte += utf8.RuneLen(r) + c.Rune++ + if r == '\n' { + c.Column = 0 + c.Line++ + } else { + c.Column++ } return c } diff --git a/tokenize/result.go b/tokenize/result.go index a91ee0c..47d6cfd 100644 --- a/tokenize/result.go +++ b/tokenize/result.go @@ -17,8 +17,9 @@ type Result struct { } type runeInfo struct { - r rune - err error + r rune + width int8 + err error } // Token defines a lexical token as produced by tokenize.Handlers.