Fixed cursor position tracking (to report row + column on error). All tests are green again :-)

This commit is contained in:
Maurice Makaay 2019-05-17 00:11:12 +00:00
parent 29a13834dd
commit 453a625a38
3 changed files with 56 additions and 63 deletions

View File

@ -168,12 +168,7 @@ func (l *Lexer) backup() {
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
func (l *Lexer) peek() (rune, int, bool) {
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
switch {
case r == utf8.RuneError:
return utf8.RuneError, w, false
default:
return r, w, true
}
return r, w, r != utf8.RuneError
}
// peekMulti takes a peek at multiple upcoming runes in the input.
@ -187,7 +182,7 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
switch {
case r == utf8.RuneError:
return peeked, 0, false
return peeked, width, false
default:
width += w
peeked = append(peeked, r)
@ -209,23 +204,10 @@ func (l *Lexer) acceptNext(count int) bool {
return true
}
// acceptFrom adds the next rune from the input to the string buffer
// when it matches in the provided runes. If the next rune does
// not match, false is returned.
// func (l *Lexer) acceptFrom(runes string) bool {
// r, ok := l.next()
// if strings.IndexRune(runes, r) >= 0 {
// l.buffer.WriteRune(r)
// return true
// }
// l.backup()
// return false
// }
// acceptRun adds consecutive runes from the input to the string
// buffer when they match the provided runes. If no runes were added
// at all, false it returned.
func (l *Lexer) acceptRun(match string) bool {
// acceptConsecutive adds consecutive runes from the input to the string
// buffer when they match the rune match.
// If any runes were added then true is returned, false otherwise.
func (l *Lexer) acceptConsecutive(match string) bool {
accepted := false
for l.accept(match) {
accepted = true
@ -233,42 +215,45 @@ func (l *Lexer) acceptRun(match string) bool {
return accepted
}
// TODO meh... ugly rune.
var endOfFile rune = -1
// next returns the next rune from the input and a boolean indicating if
// reading the input was successful.
// When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned.
func (l *Lexer) next() (rune, bool) {
r, w, ok := l.peek()
if ok {
l.width = w
l.pos += w
l.advanceCursor(r)
return r, true
}
l.width = 0
if r == utf8.RuneError && w == 0 {
l.emitError("unexpected end of file")
} else {
l.emitError("invalid UTF8 character")
}
return r, false
}
func (l *Lexer) advanceCursor(r rune) {
if l.newline {
l.linepos = 0
l.linenr++
} else {
l.linepos++
}
l.width = 0
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
switch {
case r == utf8.RuneError && w == 0:
l.emitError("unexpected end of file")
return utf8.RuneError, false
case r == utf8.RuneError:
l.emitError("invalid UTF8 character")
return utf8.RuneError, false
default:
l.width = w
l.pos += w
l.newline = r == '\n'
return r, true
}
}
// skip skips runes when all provided matches are satisfied.
// skip skips runes, but only when all provided matches are satisfied.
// Returns true when one or more runes were skipped.
func (l *Lexer) skipMatching(matches ...string) bool {
if _, w, ok := l.match(matches...); ok {
if runes, w, ok := l.match(matches...); ok {
l.pos += w
for _, r := range runes {
l.advanceCursor(r)
}
return true
}
return false
@ -297,11 +282,18 @@ func (l *Lexer) accept(match string) bool {
return false
}
func (l *Lexer) upcoming(runes ...string) bool {
_, _, ok := l.match(runes...)
// upcoming checks if the upcoming runes satisfy the provided rune matches.
// This is a lot like the match method, with the difference that
// this one only returns the boolean value.
func (l *Lexer) upcoming(matches ...string) bool {
_, _, ok := l.match(matches...)
return ok
}
// match checks if the upcoming runes satisfy the provided rune matches.
// It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided matches matched
// the input data.
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
peeked, width, ok := l.peekMulti(len(matches))
if ok {

View File

@ -67,7 +67,7 @@ func stateKey(l *Lexer) stateFn {
// keys are allowed to be composed of only ASCII digits,
// e.g. 1234, but are always interpreted as strings.
func statebareKeyChars(l *Lexer) stateFn {
l.acceptRun(bareKeyChars)
l.acceptConsecutive(bareKeyChars)
l.emitLiteral(ItemKey)
return stateEndOfKeyOrKeyDot
}

View File

@ -9,12 +9,12 @@ import (
)
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
_, err := lexer.Lex("# 12345\n# 67890\r\n# 12345\xbc").ToArray()
_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
t.Logf("Got error: %s", err.Error())
if err.LineNr != 2 {
t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 2)
if err.LineNr != 4 {
t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 4)
}
if err.LinePos != 2 {
if err.LinePos != 6 {
t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
}
}
@ -62,13 +62,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
runStatesTs(t, []statesT{
{"bare with whitespace", " a ", "[a]", err},
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
// {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
// {"bare numbers", "0123456789", "[0123456789]", err},
// {"bare underscore", "_", "[_]", err},
// {"bare dash", "-", "[-]", err},
// {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
// {"bare dotted", "a._.c", "[a].[_].[c]", err},
// {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
{"bare numbers", "0123456789", "", err},
{"bare underscore", "_", "", err},
{"bare dash", "-", "", err},
{"bare big mix", "-hey_good_Lookin123-", "", err},
{"bare dotted", "a._.c", "[a].[_].", err},
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
})
}
@ -123,12 +123,13 @@ func TestBasicString(t *testing.T) {
}
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
err := "Invalid escape sequence in basic string"
runStatesTs(t, []statesT{
{"invalid escape sequence", `a="\x"`, "[a]=", "Invalid escape sequence in basic string"},
{"too short \\u UTF8", `a="\u123"`, "[a]=", "Invalid escape sequence in basic string"},
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", "Invalid escape sequence in basic string"},
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", "Invalid escape sequence in basic string"},
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", "Invalid escape sequence in basic string"},
{"invalid escape sequence", `a="\x"`, "[a]=", err},
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
})
}