Fixed cursor position tracking (to report row + column on error). All tests are green again :-)

2019-05-17 00:11:12 +00:00 · 2019-05-17 00:11:12 +00:00 · 453a625a38
parent 29a13834dd
commit 453a625a38
3 changed files with 56 additions and 63 deletions
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@ -168,12 +168,7 @@ func (l *Lexer) backup() {
 // no upcoming rune can be peeked (end of data or invalid UTF8 character).
 func (l *Lexer) peek() (rune, int, bool) {
 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
-	switch {
-	case r == utf8.RuneError:
-		return utf8.RuneError, w, false
-	default:
-		return r, w, true
-	}
+	return r, w, r != utf8.RuneError
 }

 // peekMulti takes a peek at multiple upcoming runes in the input.
@ -187,7 +182,7 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
 		r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
 		switch {
 		case r == utf8.RuneError:
-			return peeked, 0, false
+			return peeked, width, false
 		default:
 			width += w
 			peeked = append(peeked, r)
@ -209,23 +204,10 @@ func (l *Lexer) acceptNext(count int) bool {
 	return true
 }

-// acceptFrom adds the next rune from the input to the string buffer
-// when it matches in the provided runes. If the next rune does
-// not match, false is returned.
-// func (l *Lexer) acceptFrom(runes string) bool {
-// 	r, ok := l.next()
-// 	if strings.IndexRune(runes, r) >= 0 {
-// 		l.buffer.WriteRune(r)
-// 		return true
-// 	}
-// 	l.backup()
-// 	return false
-// }
-
-// acceptRun adds consecutive runes from the input to the string
-// buffer when they match the provided runes. If no runes were added
-// at all, false it returned.
-func (l *Lexer) acceptRun(match string) bool {
+// acceptConsecutive adds consecutive runes from the input to the string
+// buffer when they match the rune match.
+// If any runes were added then true is returned, false otherwise.
+func (l *Lexer) acceptConsecutive(match string) bool {
 	accepted := false
 	for l.accept(match) {
 		accepted = true
@ -233,42 +215,45 @@ func (l *Lexer) acceptRun(match string) bool {
 	return accepted
 }

-// TODO meh... ugly rune.
-var endOfFile rune = -1
-
 // next returns the next rune from the input and a boolean indicating if
 // reading the input was successful.
 // When the end of input is reached, or an invalid UTF8 character is
 // read, then false is returned.
 func (l *Lexer) next() (rune, bool) {
+	r, w, ok := l.peek()
+	if ok {
+		l.width = w
+		l.pos += w
+		l.advanceCursor(r)
+		return r, true
+	}
+	l.width = 0
+	if r == utf8.RuneError && w == 0 {
+		l.emitError("unexpected end of file")
+	} else {
+		l.emitError("invalid UTF8 character")
+	}
+	return r, false
+}
+
+func (l *Lexer) advanceCursor(r rune) {
 	if l.newline {
 		l.linepos = 0
 		l.linenr++
 	} else {
 		l.linepos++
 	}
-	l.width = 0
-	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
-	switch {
-	case r == utf8.RuneError && w == 0:
-		l.emitError("unexpected end of file")
-		return utf8.RuneError, false
-	case r == utf8.RuneError:
-		l.emitError("invalid UTF8 character")
-		return utf8.RuneError, false
-	default:
-		l.width = w
-		l.pos += w
 	l.newline = r == '\n'
-		return r, true
-	}
 }

-// skip skips runes when all provided matches are satisfied.
+// skip skips runes, but only when all provided matches are satisfied.
 // Returns true when one or more runes were skipped.
 func (l *Lexer) skipMatching(matches ...string) bool {
-	if _, w, ok := l.match(matches...); ok {
+	if runes, w, ok := l.match(matches...); ok {
 		l.pos += w
+		for _, r := range runes {
+			l.advanceCursor(r)
+		}
 		return true
 	}
 	return false
@ -297,11 +282,18 @@ func (l *Lexer) accept(match string) bool {
 	return false
 }

-func (l *Lexer) upcoming(runes ...string) bool {
-	_, _, ok := l.match(runes...)
+// upcoming checks if the upcoming runes satisfy the provided rune matches.
+// This is a lot like the match method, with the difference that
+// this one only returns the boolean value.
+func (l *Lexer) upcoming(matches ...string) bool {
+	_, _, ok := l.match(matches...)
 	return ok
 }

+// match checks if the upcoming runes satisfy the provided rune matches.
+// It returns a slice of runes that were found, their total byte width
+// and a boolean indicating whether or not all provided matches matched
+// the input data.
 func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
 	peeked, width, ok := l.peekMulti(len(matches))
 	if ok {
--- a/lexer/states.go
+++ b/lexer/states.go
@ -67,7 +67,7 @@ func stateKey(l *Lexer) stateFn {
 // keys are allowed to be composed of only ASCII digits,
 // e.g. 1234, but are always interpreted as strings.
 func statebareKeyChars(l *Lexer) stateFn {
-	l.acceptRun(bareKeyChars)
+	l.acceptConsecutive(bareKeyChars)
 	l.emitLiteral(ItemKey)
 	return stateEndOfKeyOrKeyDot
 }
--- a/lexer/states_test.go
+++ b/lexer/states_test.go
@ -9,12 +9,12 @@ import (
 )

 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
-	_, err := lexer.Lex("# 12345\n# 67890\r\n# 12345\xbc").ToArray()
+	_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
 	t.Logf("Got error: %s", err.Error())
-	if err.LineNr != 2 {
-		t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 2)
+	if err.LineNr != 4 {
+		t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 4)
 	}
-	if err.LinePos != 2 {
+	if err.LinePos != 6 {
 		t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
 	}
 }
@ -62,13 +62,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"bare with whitespace", " a ", "[a]", err},
 		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
-		// {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
-		// {"bare numbers", "0123456789", "[0123456789]", err},
-		// {"bare underscore", "_", "[_]", err},
-		// {"bare dash", "-", "[-]", err},
-		// {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
-		// {"bare dotted", "a._.c", "[a].[_].[c]", err},
-		// {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
+		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
+		{"bare numbers", "0123456789", "", err},
+		{"bare underscore", "_", "", err},
+		{"bare dash", "-", "", err},
+		{"bare big mix", "-hey_good_Lookin123-", "", err},
+		{"bare dotted", "a._.c", "[a].[_].", err},
+		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
 	})
 }

@ -123,12 +123,13 @@ func TestBasicString(t *testing.T) {
 }

 func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
+	err := "Invalid escape sequence in basic string"
 	runStatesTs(t, []statesT{
-		{"invalid escape sequence", `a="\x"`, "[a]=", "Invalid escape sequence in basic string"},
-		{"too short \\u UTF8", `a="\u123"`, "[a]=", "Invalid escape sequence in basic string"},
-		{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", "Invalid escape sequence in basic string"},
-		{"too short \\U UTF8", `a="\U1234567"`, "[a]=", "Invalid escape sequence in basic string"},
-		{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", "Invalid escape sequence in basic string"},
+		{"invalid escape sequence", `a="\x"`, "[a]=", err},
+		{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
+		{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
+		{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
+		{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
 	})
 }