Fixed cursor position tracking (to report row + column on error). All tests are green again :-)
This commit is contained in:
parent
29a13834dd
commit
453a625a38
|
@ -168,12 +168,7 @@ func (l *Lexer) backup() {
|
||||||
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
||||||
func (l *Lexer) peek() (rune, int, bool) {
|
func (l *Lexer) peek() (rune, int, bool) {
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||||
switch {
|
return r, w, r != utf8.RuneError
|
||||||
case r == utf8.RuneError:
|
|
||||||
return utf8.RuneError, w, false
|
|
||||||
default:
|
|
||||||
return r, w, true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||||
|
@ -187,7 +182,7 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
||||||
switch {
|
switch {
|
||||||
case r == utf8.RuneError:
|
case r == utf8.RuneError:
|
||||||
return peeked, 0, false
|
return peeked, width, false
|
||||||
default:
|
default:
|
||||||
width += w
|
width += w
|
||||||
peeked = append(peeked, r)
|
peeked = append(peeked, r)
|
||||||
|
@ -209,23 +204,10 @@ func (l *Lexer) acceptNext(count int) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// acceptFrom adds the next rune from the input to the string buffer
|
// acceptConsecutive adds consecutive runes from the input to the string
|
||||||
// when it matches in the provided runes. If the next rune does
|
// buffer when they match the rune match.
|
||||||
// not match, false is returned.
|
// If any runes were added then true is returned, false otherwise.
|
||||||
// func (l *Lexer) acceptFrom(runes string) bool {
|
func (l *Lexer) acceptConsecutive(match string) bool {
|
||||||
// r, ok := l.next()
|
|
||||||
// if strings.IndexRune(runes, r) >= 0 {
|
|
||||||
// l.buffer.WriteRune(r)
|
|
||||||
// return true
|
|
||||||
// }
|
|
||||||
// l.backup()
|
|
||||||
// return false
|
|
||||||
// }
|
|
||||||
|
|
||||||
// acceptRun adds consecutive runes from the input to the string
|
|
||||||
// buffer when they match the provided runes. If no runes were added
|
|
||||||
// at all, false it returned.
|
|
||||||
func (l *Lexer) acceptRun(match string) bool {
|
|
||||||
accepted := false
|
accepted := false
|
||||||
for l.accept(match) {
|
for l.accept(match) {
|
||||||
accepted = true
|
accepted = true
|
||||||
|
@ -233,42 +215,45 @@ func (l *Lexer) acceptRun(match string) bool {
|
||||||
return accepted
|
return accepted
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO meh... ugly rune.
|
|
||||||
var endOfFile rune = -1
|
|
||||||
|
|
||||||
// next returns the next rune from the input and a boolean indicating if
|
// next returns the next rune from the input and a boolean indicating if
|
||||||
// reading the input was successful.
|
// reading the input was successful.
|
||||||
// When the end of input is reached, or an invalid UTF8 character is
|
// When the end of input is reached, or an invalid UTF8 character is
|
||||||
// read, then false is returned.
|
// read, then false is returned.
|
||||||
func (l *Lexer) next() (rune, bool) {
|
func (l *Lexer) next() (rune, bool) {
|
||||||
|
r, w, ok := l.peek()
|
||||||
|
if ok {
|
||||||
|
l.width = w
|
||||||
|
l.pos += w
|
||||||
|
l.advanceCursor(r)
|
||||||
|
return r, true
|
||||||
|
}
|
||||||
|
l.width = 0
|
||||||
|
if r == utf8.RuneError && w == 0 {
|
||||||
|
l.emitError("unexpected end of file")
|
||||||
|
} else {
|
||||||
|
l.emitError("invalid UTF8 character")
|
||||||
|
}
|
||||||
|
return r, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) advanceCursor(r rune) {
|
||||||
if l.newline {
|
if l.newline {
|
||||||
l.linepos = 0
|
l.linepos = 0
|
||||||
l.linenr++
|
l.linenr++
|
||||||
} else {
|
} else {
|
||||||
l.linepos++
|
l.linepos++
|
||||||
}
|
}
|
||||||
l.width = 0
|
l.newline = r == '\n'
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
||||||
switch {
|
|
||||||
case r == utf8.RuneError && w == 0:
|
|
||||||
l.emitError("unexpected end of file")
|
|
||||||
return utf8.RuneError, false
|
|
||||||
case r == utf8.RuneError:
|
|
||||||
l.emitError("invalid UTF8 character")
|
|
||||||
return utf8.RuneError, false
|
|
||||||
default:
|
|
||||||
l.width = w
|
|
||||||
l.pos += w
|
|
||||||
l.newline = r == '\n'
|
|
||||||
return r, true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// skip skips runes when all provided matches are satisfied.
|
// skip skips runes, but only when all provided matches are satisfied.
|
||||||
// Returns true when one or more runes were skipped.
|
// Returns true when one or more runes were skipped.
|
||||||
func (l *Lexer) skipMatching(matches ...string) bool {
|
func (l *Lexer) skipMatching(matches ...string) bool {
|
||||||
if _, w, ok := l.match(matches...); ok {
|
if runes, w, ok := l.match(matches...); ok {
|
||||||
l.pos += w
|
l.pos += w
|
||||||
|
for _, r := range runes {
|
||||||
|
l.advanceCursor(r)
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -297,11 +282,18 @@ func (l *Lexer) accept(match string) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *Lexer) upcoming(runes ...string) bool {
|
// upcoming checks if the upcoming runes satisfy the provided rune matches.
|
||||||
_, _, ok := l.match(runes...)
|
// This is a lot like the match method, with the difference that
|
||||||
|
// this one only returns the boolean value.
|
||||||
|
func (l *Lexer) upcoming(matches ...string) bool {
|
||||||
|
_, _, ok := l.match(matches...)
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// match checks if the upcoming runes satisfy the provided rune matches.
|
||||||
|
// It returns a slice of runes that were found, their total byte width
|
||||||
|
// and a boolean indicating whether or not all provided matches matched
|
||||||
|
// the input data.
|
||||||
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
|
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
|
||||||
peeked, width, ok := l.peekMulti(len(matches))
|
peeked, width, ok := l.peekMulti(len(matches))
|
||||||
if ok {
|
if ok {
|
||||||
|
|
|
@ -67,7 +67,7 @@ func stateKey(l *Lexer) stateFn {
|
||||||
// keys are allowed to be composed of only ASCII digits,
|
// keys are allowed to be composed of only ASCII digits,
|
||||||
// e.g. 1234, but are always interpreted as strings.
|
// e.g. 1234, but are always interpreted as strings.
|
||||||
func statebareKeyChars(l *Lexer) stateFn {
|
func statebareKeyChars(l *Lexer) stateFn {
|
||||||
l.acceptRun(bareKeyChars)
|
l.acceptConsecutive(bareKeyChars)
|
||||||
l.emitLiteral(ItemKey)
|
l.emitLiteral(ItemKey)
|
||||||
return stateEndOfKeyOrKeyDot
|
return stateEndOfKeyOrKeyDot
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,12 +9,12 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||||
_, err := lexer.Lex("# 12345\n# 67890\r\n# 12345\xbc").ToArray()
|
_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
||||||
t.Logf("Got error: %s", err.Error())
|
t.Logf("Got error: %s", err.Error())
|
||||||
if err.LineNr != 2 {
|
if err.LineNr != 4 {
|
||||||
t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 2)
|
t.Errorf("Unexpected line number: %d (expected %d)", err.LineNr, 4)
|
||||||
}
|
}
|
||||||
if err.LinePos != 2 {
|
if err.LinePos != 6 {
|
||||||
t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
|
t.Errorf("Unexpected line position: %d (expected %d)", err.LinePos, 6)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,13 +62,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"bare with whitespace", " a ", "[a]", err},
|
{"bare with whitespace", " a ", "[a]", err},
|
||||||
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
|
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err},
|
||||||
// {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
|
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err},
|
||||||
// {"bare numbers", "0123456789", "[0123456789]", err},
|
{"bare numbers", "0123456789", "", err},
|
||||||
// {"bare underscore", "_", "[_]", err},
|
{"bare underscore", "_", "", err},
|
||||||
// {"bare dash", "-", "[-]", err},
|
{"bare dash", "-", "", err},
|
||||||
// {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
|
{"bare big mix", "-hey_good_Lookin123-", "", err},
|
||||||
// {"bare dotted", "a._.c", "[a].[_].[c]", err},
|
{"bare dotted", "a._.c", "[a].[_].", err},
|
||||||
// {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
|
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,12 +123,13 @@ func TestBasicString(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
|
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
|
||||||
|
err := "Invalid escape sequence in basic string"
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"invalid escape sequence", `a="\x"`, "[a]=", "Invalid escape sequence in basic string"},
|
{"invalid escape sequence", `a="\x"`, "[a]=", err},
|
||||||
{"too short \\u UTF8", `a="\u123"`, "[a]=", "Invalid escape sequence in basic string"},
|
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
|
||||||
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", "Invalid escape sequence in basic string"},
|
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
|
||||||
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", "Invalid escape sequence in basic string"},
|
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
|
||||||
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", "Invalid escape sequence in basic string"},
|
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue