Fixed cursor position tracking (to report row + column on error). All tests are green again :-)

2019-05-17 00:11:12 +00:00 · 2019-05-17 00:11:12 +00:00 · 453a625a38
parent 29a13834dd
commit 453a625a38
3 changed files with 56 additions and 63 deletions
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@ -168,12 +168,7 @@ func (l *Lexer) backup() {
 // no upcoming rune can be peeked (end of data or invalid UTF8 character).
 func (l *Lexer) peek() (rune, int, bool) {
 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
-	switch {
+	return r, w, r != utf8.RuneError
 	case r == utf8.RuneError:
 		return utf8.RuneError, w, false
 	default:
 		return r, w, true
 	}
 }
 // peekMulti takes a peek at multiple upcoming runes in the input.
@ -187,7 +182,7 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
 		r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
 		switch {
 		case r == utf8.RuneError:
-			return peeked, 0, false
+			return peeked, width, false
 		default:
 			width += w
 			peeked = append(peeked, r)
@ -209,23 +204,10 @@ func (l *Lexer) acceptNext(count int) bool {
 	return true
 }
-// acceptFrom adds the next rune from the input to the string buffer
+// acceptConsecutive adds consecutive runes from the input to the string
-// when it matches in the provided runes. If the next rune does
+// buffer when they match the rune match.
-// not match, false is returned.
+// If any runes were added then true is returned, false otherwise.
-// func (l *Lexer) acceptFrom(runes string) bool {
+func (l *Lexer) acceptConsecutive(match string) bool {
 // 	r, ok := l.next()
 // 	if strings.IndexRune(runes, r) >= 0 {
 // 		l.buffer.WriteRune(r)
 // 		return true
 // 	}
 // 	l.backup()
 // 	return false
 // }
 // acceptRun adds consecutive runes from the input to the string
 // buffer when they match the provided runes. If no runes were added
 // at all, false it returned.
 func (l *Lexer) acceptRun(match string) bool {
 	accepted := false
 	for l.accept(match) {
 		accepted = true
@ -233,42 +215,45 @@ func (l *Lexer) acceptRun(match string) bool {
 	return accepted
 }
 // TODO meh... ugly rune.
 var endOfFile rune = -1
 // next returns the next rune from the input and a boolean indicating if
 // reading the input was successful.
 // When the end of input is reached, or an invalid UTF8 character is
 // read, then false is returned.
 func (l *Lexer) next() (rune, bool) {
 	r, w, ok := l.peek()
 	if ok {
 		l.width = w
 		l.pos += w
 		l.advanceCursor(r)
 		return r, true
 	}
 	l.width = 0
 	if r == utf8.RuneError && w == 0 {
 		l.emitError("unexpected end of file")
 	} else {
 		l.emitError("invalid UTF8 character")
 	}
 	return r, false
 }
 func (l *Lexer) advanceCursor(r rune) {
 	if l.newline {
 		l.linepos = 0
 		l.linenr++
 	} else {
 		l.linepos++
 	}
-	l.width = 0
+	l.newline = r == '\n'
 	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
 	switch {
 	case r == utf8.RuneError && w == 0:
 		l.emitError("unexpected end of file")
 		return utf8.RuneError, false
 	case r == utf8.RuneError:
 		l.emitError("invalid UTF8 character")
 		return utf8.RuneError, false
 	default:
 		l.width = w
 		l.pos += w
 		l.newline = r == '\n'
 		return r, true
 	}
 }
-// skip skips runes when all provided matches are satisfied.
+// skip skips runes, but only when all provided matches are satisfied.
 // Returns true when one or more runes were skipped.
 func (l *Lexer) skipMatching(matches ...string) bool {
-	if _, w, ok := l.match(matches...); ok {
+	if runes, w, ok := l.match(matches...); ok {
 		l.pos += w
 		for _, r := range runes {
 			l.advanceCursor(r)
 		}
 		return true
 	}
 	return false
@ -297,11 +282,18 @@ func (l *Lexer) accept(match string) bool {
 	return false
 }
-func (l *Lexer) upcoming(runes ...string) bool {
+// upcoming checks if the upcoming runes satisfy the provided rune matches.
-	_, _, ok := l.match(runes...)
+// This is a lot like the match method, with the difference that
 // this one only returns the boolean value.
 func (l *Lexer) upcoming(matches ...string) bool {
 	_, _, ok := l.match(matches...)
 	return ok
 }
 // match checks if the upcoming runes satisfy the provided rune matches.
 // It returns a slice of runes that were found, their total byte width
 // and a boolean indicating whether or not all provided matches matched
 // the input data.
 func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
 	peeked, width, ok := l.peekMulti(len(matches))
 	if ok {
--- a/lexer/states.go
+++ b/lexer/states.go
@ -67,7 +67,7 @@ func stateKey(l *Lexer) stateFn {
 // keys are allowed to be composed of only ASCII digits,
 // e.g. 1234, but are always interpreted as strings.
 func statebareKeyChars(l *Lexer) stateFn {
-	l.acceptRun(bareKeyChars)
+	l.acceptConsecutive(bareKeyChars)
 	l.emitLiteral(ItemKey)
 	return stateEndOfKeyOrKeyDot
 }
--- a/lexer/states_test.go
+++ b/lexer/states_test.go
@ -9,12 +9,12 @@ import (
 )
 func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
-	_, err := lexer.Lex("# 12345\n# 67890\r\n# 12345\xbc").ToArray()
+	_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
 	t.Logf("Got error: %s", err.Error())
-	if err.LineNr != 2 {
+	if err.LineNr != 4 {
-		t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 2)
+		t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 4)
 	}
-	if err.LinePos != 2 {
+	if err.LinePos != 6 {
 		t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
 	}
 }
@ -62,13 +62,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
 	runStatesTs(t, []statesT{
 		{"bare with whitespace", " a ", "[a]", err},
 		{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
-		// {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
+		{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
-		// {"bare numbers", "0123456789", "[0123456789]", err},
+		{"bare numbers", "0123456789", "", err},
-		// {"bare underscore", "_", "[_]", err},
+		{"bare underscore", "_", "", err},
-		// {"bare dash", "-", "[-]", err},
+		{"bare dash", "-", "", err},
-		// {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
+		{"bare big mix", "-hey_good_Lookin123-", "", err},
-		// {"bare dotted", "a._.c", "[a].[_].[c]", err},
+		{"bare dotted", "a._.c", "[a].[_].", err},
-		// {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
+		{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
 	})
 }
@ -123,12 +123,13 @@ func TestBasicString(t *testing.T) {
 }
 func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
 	err := "Invalid escape sequence in basic string"
 	runStatesTs(t, []statesT{
-		{"invalid escape sequence", `a="\x"`, "[a]=", "Invalid escape sequence in basic string"},
+		{"invalid escape sequence", `a="\x"`, "[a]=", err},
-		{"too short \\u UTF8", `a="\u123"`, "[a]=", "Invalid escape sequence in basic string"},
+		{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
-		{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", "Invalid escape sequence in basic string"},
+		{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
-		{"too short \\U UTF8", `a="\U1234567"`, "[a]=", "Invalid escape sequence in basic string"},
+		{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
-		{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", "Invalid escape sequence in basic string"},
+		{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
 	})
 }