Fixed cursor position tracking (to report row + column on error). All tests are green again :-)
This commit is contained in:
parent
29a13834dd
commit
453a625a38
|
@ -168,12 +168,7 @@ func (l *Lexer) backup() {
|
|||
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
||||
func (l *Lexer) peek() (rune, int, bool) {
|
||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
switch {
|
||||
case r == utf8.RuneError:
|
||||
return utf8.RuneError, w, false
|
||||
default:
|
||||
return r, w, true
|
||||
}
|
||||
return r, w, r != utf8.RuneError
|
||||
}
|
||||
|
||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||
|
@ -187,7 +182,7 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
|
|||
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
||||
switch {
|
||||
case r == utf8.RuneError:
|
||||
return peeked, 0, false
|
||||
return peeked, width, false
|
||||
default:
|
||||
width += w
|
||||
peeked = append(peeked, r)
|
||||
|
@ -209,23 +204,10 @@ func (l *Lexer) acceptNext(count int) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// acceptFrom adds the next rune from the input to the string buffer
|
||||
// when it matches in the provided runes. If the next rune does
|
||||
// not match, false is returned.
|
||||
// func (l *Lexer) acceptFrom(runes string) bool {
|
||||
// r, ok := l.next()
|
||||
// if strings.IndexRune(runes, r) >= 0 {
|
||||
// l.buffer.WriteRune(r)
|
||||
// return true
|
||||
// }
|
||||
// l.backup()
|
||||
// return false
|
||||
// }
|
||||
|
||||
// acceptRun adds consecutive runes from the input to the string
|
||||
// buffer when they match the provided runes. If no runes were added
|
||||
// at all, false it returned.
|
||||
func (l *Lexer) acceptRun(match string) bool {
|
||||
// acceptConsecutive adds consecutive runes from the input to the string
|
||||
// buffer when they match the rune match.
|
||||
// If any runes were added then true is returned, false otherwise.
|
||||
func (l *Lexer) acceptConsecutive(match string) bool {
|
||||
accepted := false
|
||||
for l.accept(match) {
|
||||
accepted = true
|
||||
|
@ -233,42 +215,45 @@ func (l *Lexer) acceptRun(match string) bool {
|
|||
return accepted
|
||||
}
|
||||
|
||||
// TODO meh... ugly rune.
|
||||
var endOfFile rune = -1
|
||||
|
||||
// next returns the next rune from the input and a boolean indicating if
|
||||
// reading the input was successful.
|
||||
// When the end of input is reached, or an invalid UTF8 character is
|
||||
// read, then false is returned.
|
||||
func (l *Lexer) next() (rune, bool) {
|
||||
r, w, ok := l.peek()
|
||||
if ok {
|
||||
l.width = w
|
||||
l.pos += w
|
||||
l.advanceCursor(r)
|
||||
return r, true
|
||||
}
|
||||
l.width = 0
|
||||
if r == utf8.RuneError && w == 0 {
|
||||
l.emitError("unexpected end of file")
|
||||
} else {
|
||||
l.emitError("invalid UTF8 character")
|
||||
}
|
||||
return r, false
|
||||
}
|
||||
|
||||
func (l *Lexer) advanceCursor(r rune) {
|
||||
if l.newline {
|
||||
l.linepos = 0
|
||||
l.linenr++
|
||||
} else {
|
||||
l.linepos++
|
||||
}
|
||||
l.width = 0
|
||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
switch {
|
||||
case r == utf8.RuneError && w == 0:
|
||||
l.emitError("unexpected end of file")
|
||||
return utf8.RuneError, false
|
||||
case r == utf8.RuneError:
|
||||
l.emitError("invalid UTF8 character")
|
||||
return utf8.RuneError, false
|
||||
default:
|
||||
l.width = w
|
||||
l.pos += w
|
||||
l.newline = r == '\n'
|
||||
return r, true
|
||||
}
|
||||
l.newline = r == '\n'
|
||||
}
|
||||
|
||||
// skip skips runes when all provided matches are satisfied.
|
||||
// skip skips runes, but only when all provided matches are satisfied.
|
||||
// Returns true when one or more runes were skipped.
|
||||
func (l *Lexer) skipMatching(matches ...string) bool {
|
||||
if _, w, ok := l.match(matches...); ok {
|
||||
if runes, w, ok := l.match(matches...); ok {
|
||||
l.pos += w
|
||||
for _, r := range runes {
|
||||
l.advanceCursor(r)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
@ -297,11 +282,18 @@ func (l *Lexer) accept(match string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (l *Lexer) upcoming(runes ...string) bool {
|
||||
_, _, ok := l.match(runes...)
|
||||
// upcoming checks if the upcoming runes satisfy the provided rune matches.
|
||||
// This is a lot like the match method, with the difference that
|
||||
// this one only returns the boolean value.
|
||||
func (l *Lexer) upcoming(matches ...string) bool {
|
||||
_, _, ok := l.match(matches...)
|
||||
return ok
|
||||
}
|
||||
|
||||
// match checks if the upcoming runes satisfy the provided rune matches.
|
||||
// It returns a slice of runes that were found, their total byte width
|
||||
// and a boolean indicating whether or not all provided matches matched
|
||||
// the input data.
|
||||
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
|
||||
peeked, width, ok := l.peekMulti(len(matches))
|
||||
if ok {
|
||||
|
|
|
@ -67,7 +67,7 @@ func stateKey(l *Lexer) stateFn {
|
|||
// keys are allowed to be composed of only ASCII digits,
|
||||
// e.g. 1234, but are always interpreted as strings.
|
||||
func statebareKeyChars(l *Lexer) stateFn {
|
||||
l.acceptRun(bareKeyChars)
|
||||
l.acceptConsecutive(bareKeyChars)
|
||||
l.emitLiteral(ItemKey)
|
||||
return stateEndOfKeyOrKeyDot
|
||||
}
|
||||
|
|
|
@ -9,12 +9,12 @@ import (
|
|||
)
|
||||
|
||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||
_, err := lexer.Lex("# 12345\n# 67890\r\n# 12345\xbc").ToArray()
|
||||
_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
||||
t.Logf("Got error: %s", err.Error())
|
||||
if err.LineNr != 2 {
|
||||
t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 2)
|
||||
if err.LineNr != 4 {
|
||||
t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 4)
|
||||
}
|
||||
if err.LinePos != 2 {
|
||||
if err.LinePos != 6 {
|
||||
t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
|
||||
}
|
||||
}
|
||||
|
@ -62,13 +62,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
|
|||
runStatesTs(t, []statesT{
|
||||
{"bare with whitespace", " a ", "[a]", err},
|
||||
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
|
||||
// {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
|
||||
// {"bare numbers", "0123456789", "[0123456789]", err},
|
||||
// {"bare underscore", "_", "[_]", err},
|
||||
// {"bare dash", "-", "[-]", err},
|
||||
// {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
|
||||
// {"bare dotted", "a._.c", "[a].[_].[c]", err},
|
||||
// {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
|
||||
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
|
||||
{"bare numbers", "0123456789", "", err},
|
||||
{"bare underscore", "_", "", err},
|
||||
{"bare dash", "-", "", err},
|
||||
{"bare big mix", "-hey_good_Lookin123-", "", err},
|
||||
{"bare dotted", "a._.c", "[a].[_].", err},
|
||||
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -123,12 +123,13 @@ func TestBasicString(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
|
||||
err := "Invalid escape sequence in basic string"
|
||||
runStatesTs(t, []statesT{
|
||||
{"invalid escape sequence", `a="\x"`, "[a]=", "Invalid escape sequence in basic string"},
|
||||
{"too short \\u UTF8", `a="\u123"`, "[a]=", "Invalid escape sequence in basic string"},
|
||||
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", "Invalid escape sequence in basic string"},
|
||||
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", "Invalid escape sequence in basic string"},
|
||||
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", "Invalid escape sequence in basic string"},
|
||||
{"invalid escape sequence", `a="\x"`, "[a]=", err},
|
||||
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
|
||||
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
|
||||
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
|
||||
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue