Code cleanup and refactoring run, both functional code and the tests.
This commit is contained in:
parent
6636a7a672
commit
cbc4f04179
|
@ -7,12 +7,13 @@ type itemType int
|
||||||
|
|
||||||
// Definition of all the lexer item types for the TOML lexer.
|
// Definition of all the lexer item types for the TOML lexer.
|
||||||
const (
|
const (
|
||||||
ItemError itemType = iota // An error occurred
|
ItemError itemType = iota // An error occurred
|
||||||
ItemEOF // End of input reached
|
ItemEOF // End of input reached
|
||||||
ItemComment // Comment string, starts with # till en of line
|
ItemComment // Comment string, starts with # till en of line
|
||||||
ItemKey // Key of a key/value pair
|
ItemKey // Key of a key/value pair
|
||||||
ItemKeyDot // Dot for a dotted key
|
ItemKeyDot // Dot for a dotted key
|
||||||
ItemString // A value of type string
|
ItemAssignment // Value assignment coming up (=)
|
||||||
|
ItemString // A value of type string
|
||||||
)
|
)
|
||||||
|
|
||||||
// Item represents a lexer item returned from the scanner.
|
// Item represents a lexer item returned from the scanner.
|
||||||
|
@ -26,26 +27,26 @@ func (i Item) String() string {
|
||||||
switch i.Type {
|
switch i.Type {
|
||||||
case ItemEOF:
|
case ItemEOF:
|
||||||
return "EOF"
|
return "EOF"
|
||||||
case ItemError:
|
case ItemKey:
|
||||||
return "Error: " + i.Value
|
return fmt.Sprintf("[%s]", i.Value)
|
||||||
|
case ItemKeyDot:
|
||||||
|
return "."
|
||||||
|
case ItemAssignment:
|
||||||
|
return "="
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("%s(%q)", i.Type, i.Value)
|
return fmt.Sprintf("%s(%s)", i.Type, i.Value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// String returns a string representation of the lexer item type.
|
// String returns a string representation of the lexer item type.
|
||||||
func (i itemType) String() string {
|
func (i itemType) String() string {
|
||||||
switch i {
|
switch i {
|
||||||
case ItemError:
|
case ItemError:
|
||||||
return "Error"
|
return "ERR"
|
||||||
case ItemComment:
|
case ItemComment:
|
||||||
return "Comment"
|
return "#"
|
||||||
case ItemKey:
|
|
||||||
return "Key"
|
|
||||||
case ItemKeyDot:
|
|
||||||
return "KeyDot"
|
|
||||||
case ItemString:
|
case ItemString:
|
||||||
return "String"
|
return "STR"
|
||||||
default:
|
default:
|
||||||
return fmt.Sprintf("<type id %d>", i)
|
panic(fmt.Sprintf("No translation available for type id %d", i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
206
lexer/lexer.go
206
lexer/lexer.go
|
@ -12,7 +12,6 @@ type Lexer struct {
|
||||||
input string // the scanned input string
|
input string // the scanned input string
|
||||||
state stateFn // a function that handles the current state
|
state stateFn // a function that handles the current state
|
||||||
stack []stateFn // state function stack, for nested parsing
|
stack []stateFn // state function stack, for nested parsing
|
||||||
start int // start position of the currently scanned item
|
|
||||||
pos int // current scanning position in the input
|
pos int // current scanning position in the input
|
||||||
width int // width of the last rune read, for supporting backup()
|
width int // width of the last rune read, for supporting backup()
|
||||||
buffer StringBuffer // an efficient buffer, used to build string values
|
buffer StringBuffer // an efficient buffer, used to build string values
|
||||||
|
@ -99,29 +98,44 @@ func (l *Lexer) popState() stateFn {
|
||||||
return tail
|
return tail
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO niet meer nodig?
|
// atEndOfFile returns true when there is no more data available in the input.
|
||||||
// getAcceptedString returns the string as accepted by the
|
|
||||||
// accept* methods so far.
|
|
||||||
func (l *Lexer) getAcceptedString() string {
|
|
||||||
return l.input[l.start:l.pos]
|
|
||||||
}
|
|
||||||
|
|
||||||
// emit passes a scanned item back to the client.
|
|
||||||
func (l *Lexer) emit(t itemType, v string) {
|
|
||||||
l.items <- Item{t, v}
|
|
||||||
l.start = l.pos
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO niet meer nodig met stringbuilder?
|
|
||||||
// ignore skips over the pending input before the current position.
|
|
||||||
func (l *Lexer) ignore() {
|
|
||||||
l.start = l.pos
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Lexer) atEndOfFile() bool {
|
func (l *Lexer) atEndOfFile() bool {
|
||||||
return l.pos >= len(l.input)
|
return l.pos >= len(l.input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// emit passes a lexer item back to the client, including the provided string.
|
||||||
|
func (l *Lexer) emit(t itemType, s string) {
|
||||||
|
l.items <- Item{t, s}
|
||||||
|
l.buffer.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitLiteral passes a lexer item back to the client, including the accumulated
|
||||||
|
// string buffer data as a literal string.
|
||||||
|
func (l *Lexer) emitLiteral(t itemType) {
|
||||||
|
l.emit(t, l.buffer.AsLiteralString())
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitTrimmedLiteral passes a lexer item back to the client, including the
|
||||||
|
// accumulated string buffer data as a literal string with whitespace
|
||||||
|
// trimmed from it.
|
||||||
|
func (l *Lexer) emitTrimmedLiteral(t itemType) {
|
||||||
|
l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitInterpreted passes a lexer item back to the client, including the
|
||||||
|
// accumulated string buffer data an interpreted string (handling escape
|
||||||
|
// codes like \n, \t, \uXXXX, etc.)
|
||||||
|
// This method might return an error, in case there is data in the
|
||||||
|
// string buffer that is not valid for string interpretation.
|
||||||
|
func (l *Lexer) emitInterpreted(t itemType) error {
|
||||||
|
s, err := l.buffer.AsInterpretedString()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
l.emit(t, s)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// backup steps back one rune
|
// backup steps back one rune
|
||||||
// Can be called only once per call of next.
|
// Can be called only once per call of next.
|
||||||
func (l *Lexer) backup() {
|
func (l *Lexer) backup() {
|
||||||
|
@ -129,16 +143,119 @@ func (l *Lexer) backup() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// peek returns but does not advance to the next rune(s) in the input.
|
// peek returns but does not advance to the next rune(s) in the input.
|
||||||
func (l *Lexer) peek() rune {
|
// Returns the rune, its width and a boolean. The boolean will be false in case
|
||||||
r := l.next()
|
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
||||||
l.backup()
|
func (l *Lexer) peek() (rune, int, bool) {
|
||||||
return r
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||||
|
switch {
|
||||||
|
case r == utf8.RuneError:
|
||||||
|
return utf8.RuneError, w, false
|
||||||
|
default:
|
||||||
|
return r, w, true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO nog nodig met stringbuffer?
|
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||||
// accept consumes the next rune if it's from the valid set of runes.
|
// Returns a slice of runes and a boolean. The boolean will be false in case
|
||||||
|
// less upcoming runes can be peeked than the requested amount
|
||||||
|
// (end of data or invalid UTF8 character).
|
||||||
|
func (l *Lexer) peekMulti(amount int) ([]rune, bool) {
|
||||||
|
offset := 0
|
||||||
|
var peeked []rune
|
||||||
|
for i := 0; i < amount; i++ {
|
||||||
|
r, w := utf8.DecodeRuneInString(l.input[l.pos+offset:])
|
||||||
|
switch {
|
||||||
|
case r == utf8.RuneError:
|
||||||
|
return peeked, false
|
||||||
|
default:
|
||||||
|
offset += w
|
||||||
|
peeked = append(peeked, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peeked, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptNext adds the next rune from the input to the string buffer.
|
||||||
|
// If no rune could be read (end of file or invalid UTF8 data),
|
||||||
|
// then false is returned.
|
||||||
|
func (l *Lexer) acceptNext() bool {
|
||||||
|
r := l.next()
|
||||||
|
if r == endOfFile || r == utf8.RuneError {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
l.buffer.WriteRune(r)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptFrom adds the next rune from the input to the string buffer
|
||||||
|
// when it matches in the provided runes. If the next rune does
|
||||||
|
// not match, false is returned.
|
||||||
|
func (l *Lexer) acceptFrom(runes string) bool {
|
||||||
|
r := l.next()
|
||||||
|
if strings.IndexRune(runes, r) >= 0 {
|
||||||
|
l.buffer.WriteRune(r)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptRun adds consecutive runes from the input to the string
|
||||||
|
// buffer when they match the provided runes. If no runes were added
|
||||||
|
// at all, false it returned.
|
||||||
|
func (l *Lexer) acceptRun(runes string) bool {
|
||||||
|
accepted := false
|
||||||
|
for l.acceptFrom(runes) {
|
||||||
|
accepted = true
|
||||||
|
}
|
||||||
|
return accepted
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO meh... ugly rune.
|
||||||
|
var endOfFile rune = -1
|
||||||
|
|
||||||
|
// next returns the next rune from the input.
|
||||||
|
func (l *Lexer) next() rune {
|
||||||
|
l.width = 0
|
||||||
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||||
|
switch {
|
||||||
|
case r == utf8.RuneError && w == 0:
|
||||||
|
return endOfFile
|
||||||
|
case r == utf8.RuneError:
|
||||||
|
return utf8.RuneError
|
||||||
|
default:
|
||||||
|
l.width = w
|
||||||
|
l.pos += w
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip skips a rune from the set of accepted runes.
|
||||||
|
// Returns true when a rune was skipped.
|
||||||
|
func (l *Lexer) skip(runes string) bool {
|
||||||
|
r, w, _ := l.peek()
|
||||||
|
if strings.IndexRune(runes, r) >= 0 {
|
||||||
|
l.pos += w
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// skipRun skips a run of runes from the set of accepted runes.
|
||||||
|
// Returns true when one or more runes were skipped.
|
||||||
|
func (l *Lexer) skipRun(runes string) bool {
|
||||||
|
didSkip := false
|
||||||
|
for l.skip(runes) {
|
||||||
|
didSkip = true
|
||||||
|
}
|
||||||
|
return didSkip
|
||||||
|
}
|
||||||
|
|
||||||
|
// accept adds the next rune to the string buffer and returns true if it's
|
||||||
|
// from the valid set of runes. Otherwise false is returned.
|
||||||
func (l *Lexer) accept(runes string) bool {
|
func (l *Lexer) accept(runes string) bool {
|
||||||
if strings.IndexRune(runes, l.next()) >= 0 {
|
r := l.next()
|
||||||
|
if strings.IndexRune(runes, r) >= 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
l.backup()
|
l.backup()
|
||||||
|
@ -187,34 +304,10 @@ func (l *Lexer) acceptWhile(runes string) bool {
|
||||||
return accepted
|
return accepted
|
||||||
}
|
}
|
||||||
|
|
||||||
// skip skips a run of runes from the set of accepted runs.
|
|
||||||
func (l *Lexer) skip(runes string) {
|
|
||||||
if l.acceptWhile(runes) {
|
|
||||||
l.ignore()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// skipUntil skips a run of runes, until a rune from the set of
|
// skipUntil skips a run of runes, until a rune from the set of
|
||||||
// runes of EOF is reached.
|
// runes of EOF is reached.
|
||||||
func (l *Lexer) skipUntil(runes string) {
|
func (l *Lexer) skipUntil(runes string) {
|
||||||
if l.acceptUntil(runes) {
|
l.acceptUntil(runes)
|
||||||
l.ignore()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO meh... ugly rune.
|
|
||||||
var endOfFile rune = -1
|
|
||||||
|
|
||||||
// next returns the next rune in the input.
|
|
||||||
func (l *Lexer) next() rune {
|
|
||||||
if l.atEndOfFile() {
|
|
||||||
l.width = 0
|
|
||||||
return endOfFile // TODO phase out this bizarro rune?
|
|
||||||
}
|
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
||||||
l.width = w
|
|
||||||
l.pos += w
|
|
||||||
return r
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// error returns an error token and terminates the scan
|
// error returns an error token and terminates the scan
|
||||||
|
@ -227,15 +320,16 @@ func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *Lexer) unexpectedTokenError(expected string) stateFn {
|
func (l *Lexer) unexpectedInputError(expected string) stateFn {
|
||||||
var actual string
|
var actual string
|
||||||
switch {
|
switch {
|
||||||
case l.peek() == endOfFile:
|
case l.atEndOfFile(): // TODO maybe not hit anymore after refactoring?
|
||||||
actual = "end of file"
|
actual = "end of file"
|
||||||
case !utf8.ValidString(l.input[l.start:]):
|
case !utf8.ValidString(l.input[l.pos:]):
|
||||||
actual = "non-UTF8 data"
|
actual = "non-UTF8 data"
|
||||||
default:
|
default:
|
||||||
actual = fmt.Sprintf("token '%c'", l.peek())
|
r, _, _ := l.peek()
|
||||||
|
actual = fmt.Sprintf("token '%c'", r)
|
||||||
}
|
}
|
||||||
return l.errorf("Unexpected %s (expected %s)", actual, expected)
|
return l.errorf("Unexpected %s (expected %s)", actual, expected)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,175 +0,0 @@
|
||||||
package lexer_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/mmakaay/toml/lexer"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestInvalidUtf8Data(t *testing.T) {
|
|
||||||
assertFailureAndCheck(t, "\xbc", []string{}, "Unexpected non-UTF8 data (expected end of file)")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEmptyInput(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, "", []string{})
|
|
||||||
}
|
|
||||||
func TestWhiteSpace(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, " ", []string{})
|
|
||||||
assertSuccessAndCheck(t, "\t", []string{})
|
|
||||||
assertSuccessAndCheck(t, " \t \t ", []string{})
|
|
||||||
}
|
|
||||||
func TestWhiteSpaceAndNewlines(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, "\n", []string{})
|
|
||||||
assertSuccessAndCheck(t, "\n \t\r\n", []string{})
|
|
||||||
}
|
|
||||||
func TestComments(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, "#", []string{`Comment("#")`})
|
|
||||||
assertSuccessAndCheck(t, " \t \t #", []string{`Comment("#")`})
|
|
||||||
assertSuccessAndCheck(t, " \t \t # not empty", []string{`Comment("# not empty")`})
|
|
||||||
assertSuccessAndCheck(t, " \t \t # not empty\r\r\r\n", []string{`Comment("# not empty")`})
|
|
||||||
assertSuccessAndCheck(t, "\n \t\r\n# AAP\r\n", []string{`Comment("# AAP")`})
|
|
||||||
assertSuccessAndCheck(t,
|
|
||||||
"# two lines\n# of comments\n",
|
|
||||||
[]string{`Comment("# two lines")`, `Comment("# of comments")`})
|
|
||||||
assertSuccessAndCheck(t,
|
|
||||||
`# \tcomment\nwith escape-y chars`,
|
|
||||||
[]string{`Comment("# \\tcomment\\nwith escape-y chars")`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestBareKeyWithoutValue(t *testing.T) {
|
|
||||||
err := "Unexpected end of file (expected an '=' value assignment)"
|
|
||||||
assertFailureAndCheck(t, "a", []string{`Key("a")`}, err)
|
|
||||||
assertFailureAndCheck(t, "_", []string{`Key("_")`}, err)
|
|
||||||
assertFailureAndCheck(t, " a", []string{`Key("a")`}, err)
|
|
||||||
assertFailureAndCheck(t, " a ", []string{`Key("a")`}, err)
|
|
||||||
assertFailureAndCheck(t, "ab", []string{`Key("ab")`}, err)
|
|
||||||
assertFailureAndCheck(t, "Ab", []string{`Key("Ab")`}, err)
|
|
||||||
assertFailureAndCheck(t, "Ab1", []string{`Key("Ab1")`}, err)
|
|
||||||
assertFailureAndCheck(t, "_Ab1", []string{`Key("_Ab1")`}, err)
|
|
||||||
assertFailureAndCheck(t, "_-Ab1", []string{`Key("_-Ab1")`}, err)
|
|
||||||
assertFailureAndCheck(t, "_-Ab1_this-is_GOOD987", []string{`Key("_-Ab1_this-is_GOOD987")`}, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDottedKey(t *testing.T) {
|
|
||||||
err := "Unexpected end of file (expected an '=' value assignment)"
|
|
||||||
assertFailureAndCheck(t, "a.b", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
|
|
||||||
assertFailureAndCheck(t, " a .\t\t b\t ", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
|
||||||
err := "Unexpected end of file (expected a value)"
|
|
||||||
assertFailureAndCheck(t, " some_cool_key = ", []string{`Key("some_cool_key")`}, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestUnterminatedBasicString(t *testing.T) {
|
|
||||||
assertFailure(t, `key="value`, "Unexpected end of file (expected basic string token)")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestBasicStringWithNewline(t *testing.T) {
|
|
||||||
assertFailure(t, "key=\"value\nwith\nnewlines\"", "ohoh")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestEmptyBasicString(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, `a=""`, []string{`Key("a")`, `String("")`})
|
|
||||||
assertSuccessAndCheck(t, `a=""#hi`, []string{`Key("a")`, `String("")`, `Comment("#hi")`})
|
|
||||||
assertSuccessAndCheck(t, `a = ""`, []string{`Key("a")`, `String("")`})
|
|
||||||
assertSuccessAndCheck(t, `a.b = ""`, []string{`Key("a")`, `KeyDot(".")`, `Key("b")`, `String("")`})
|
|
||||||
assertSuccessAndCheck(t, `a=""b=""`, []string{`Key("a")`, `String("")`, `Key("b")`, `String("")`})
|
|
||||||
}
|
|
||||||
func TestBasicString(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, `_ = "b"`,
|
|
||||||
[]string{
|
|
||||||
`Key("_")`,
|
|
||||||
`String("b")`})
|
|
||||||
assertSuccessAndCheck(t, `thing = "A cool ʎǝʞ" # huh, it's up-side down!!`,
|
|
||||||
[]string{
|
|
||||||
`Key("thing")`,
|
|
||||||
`String("A cool ʎǝʞ")`,
|
|
||||||
`Comment("# huh, it's up-side down!!")`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInvalidEscapeSequence(t *testing.T) {
|
|
||||||
assertFailure(t, `a="\x"`, `Invalid escape sequence \x in string value`)
|
|
||||||
}
|
|
||||||
func TestBasicStringEscapes(t *testing.T) {
|
|
||||||
for in, out := range map[string]string{
|
|
||||||
`\b`: "\b",
|
|
||||||
`\t`: "\t",
|
|
||||||
`\n`: "\n",
|
|
||||||
`\f`: "\f",
|
|
||||||
`\r`: "\r",
|
|
||||||
`\"`: "\"",
|
|
||||||
`\b\t\nhuh\f\r\"`: "\b\t\nhuh\f\r\"",
|
|
||||||
`\u2318`: "⌘",
|
|
||||||
`\U0001014D`: "𐅍",
|
|
||||||
} {
|
|
||||||
l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
|
|
||||||
if out != l[1].Value {
|
|
||||||
t.Fatalf("Unexpected result when parsing '%s'\nexpected: %q\nactual: %q", in, out, l[1].Value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// func TestBasicStringUnicodeEscapes(t *testing.T) {
|
|
||||||
// for in, out := range map[string]string{
|
|
||||||
// `\u`: "\b",
|
|
||||||
// } {
|
|
||||||
// l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
|
|
||||||
// s := l[2]
|
|
||||||
// if out != s.Value {
|
|
||||||
// t.Fatalf("Unexpected result when parsing '%s'", in)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
func TestTwoKeyValuePairs(t *testing.T) {
|
|
||||||
assertSuccessAndCheck(t, "a=\"Hello\" #comment1\nb=\"World!\"#comment2\r\n",
|
|
||||||
[]string{
|
|
||||||
`Key("a")`,
|
|
||||||
`String("Hello")`,
|
|
||||||
`Comment("#comment1")`,
|
|
||||||
`Key("b")`,
|
|
||||||
`String("World!")`,
|
|
||||||
`Comment("#comment2")`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func assertSuccessAndCheck(t *testing.T, input string, expected []string) {
|
|
||||||
l := assertSuccess(t, input)
|
|
||||||
assertItems(t, l, expected)
|
|
||||||
}
|
|
||||||
|
|
||||||
func assertFailureAndCheck(t *testing.T, input string, expected []string, expectedErr string) {
|
|
||||||
l := assertFailure(t, input, expectedErr)
|
|
||||||
assertItems(t, l, expected)
|
|
||||||
}
|
|
||||||
|
|
||||||
func assertFailure(t *testing.T, input string, expectedErr string) []lexer.Item {
|
|
||||||
l, err := lexer.Lex(input).ToArray()
|
|
||||||
if err == nil {
|
|
||||||
t.Fatalf("Expected lexer error '%s', but no error occurred", expectedErr)
|
|
||||||
}
|
|
||||||
if err.Error() != expectedErr {
|
|
||||||
t.Fatalf("Mismatch between expected and actual error:\nExpected: %s\nActual: %s\n", expectedErr, err)
|
|
||||||
}
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
func assertSuccess(t *testing.T, input string) []lexer.Item {
|
|
||||||
l, err := lexer.Lex(input).ToArray()
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Unexpected lexer error: %s", err)
|
|
||||||
}
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
func assertItems(t *testing.T, l []lexer.Item, expected []string) {
|
|
||||||
if len(expected) != len(l) {
|
|
||||||
t.Fatalf("Unexpected number of lexer items: %d (expected: %d)", len(l), len(expected))
|
|
||||||
}
|
|
||||||
for i, e := range expected {
|
|
||||||
if l[i].String() != e {
|
|
||||||
t.Fatalf("Unexpected lexer item at index %d: %s (expected: %s)", i, l[i], e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
171
lexer/states.go
171
lexer/states.go
|
@ -1,33 +1,35 @@
|
||||||
package lexer
|
package lexer
|
||||||
|
|
||||||
// stateFn represents the state of the scanner as a function
|
// stateFn represents the state of the lexer as a function
|
||||||
// that returns the next state.
|
// that returns the next state.
|
||||||
type stateFn func(*Lexer) stateFn
|
type stateFn func(*Lexer) stateFn
|
||||||
|
|
||||||
const (
|
const (
|
||||||
whitespace string = " \t"
|
whitespace string = " \t"
|
||||||
carriageReturn string = "\r"
|
carriageReturn string = "\r"
|
||||||
newline string = "\n"
|
newline string = "\n"
|
||||||
hash string = "#"
|
hash string = "#"
|
||||||
equal string = "="
|
equal string = "="
|
||||||
lower string = "abcdefghijklmnopqrstuvwxyz"
|
lower string = "abcdefghijklmnopqrstuvwxyz"
|
||||||
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
digits string = "0123456789"
|
digits string = "0123456789"
|
||||||
dot string = "."
|
dot string = "."
|
||||||
underscore string = "_"
|
underscore string = "_"
|
||||||
dash string = "-"
|
dash string = "-"
|
||||||
singleQuote string = "'"
|
singleQuote string = "'"
|
||||||
doubleQuote string = "\""
|
doubleQuote string = "\""
|
||||||
backslash string = "\\"
|
backslash string = "\\"
|
||||||
someQuote string = singleQuote + doubleQuote
|
quoteChars string = singleQuote + doubleQuote
|
||||||
bareKey string = lower + upper + digits + underscore + dash
|
bareKeyChars string = lower + upper + digits + underscore + dash
|
||||||
startOfKey string = bareKey + someQuote
|
startOfKey string = bareKeyChars + quoteChars
|
||||||
quotable string = `btnfr\"`
|
escapeChars string = `btnfr"\`
|
||||||
|
shortUtf8Escape string = "u"
|
||||||
|
longUtf8Escape string = "U"
|
||||||
)
|
)
|
||||||
|
|
||||||
func stateKeyValuePair(l *Lexer) stateFn {
|
func stateKeyValuePair(l *Lexer) stateFn {
|
||||||
l.skip(whitespace + carriageReturn + newline)
|
l.skipRun(whitespace + carriageReturn + newline)
|
||||||
if l.upcoming(hash) {
|
if l.skip(hash) {
|
||||||
return stateComment
|
return stateComment
|
||||||
}
|
}
|
||||||
if l.upcoming(startOfKey) {
|
if l.upcoming(startOfKey) {
|
||||||
|
@ -38,36 +40,34 @@ func stateKeyValuePair(l *Lexer) stateFn {
|
||||||
|
|
||||||
// A '#' hash symbol marks the rest of the line as a comment.
|
// A '#' hash symbol marks the rest of the line as a comment.
|
||||||
func stateComment(l *Lexer) stateFn {
|
func stateComment(l *Lexer) stateFn {
|
||||||
l.buffer.Reset()
|
|
||||||
for {
|
for {
|
||||||
switch {
|
switch {
|
||||||
case l.atEndOfFile() || l.accept(newline):
|
case l.atEndOfFile() || l.skip(newline):
|
||||||
s := l.buffer.AsLiteralString()
|
l.emitTrimmedLiteral(ItemComment)
|
||||||
l.emit(ItemComment, s)
|
|
||||||
return stateKeyValuePair
|
return stateKeyValuePair
|
||||||
case l.accept(carriageReturn):
|
|
||||||
l.ignore()
|
|
||||||
default:
|
default:
|
||||||
l.buffer.WriteRune(l.next())
|
if !l.acceptNext() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A key may be either bare, quoted or dotted.
|
// A key may be either bare, quoted or dotted.
|
||||||
func stateKey(l *Lexer) stateFn {
|
func stateKey(l *Lexer) stateFn {
|
||||||
if l.upcoming(bareKey) {
|
if l.acceptFrom(bareKeyChars) {
|
||||||
return stateBareKey
|
return statebareKeyChars
|
||||||
}
|
}
|
||||||
return l.unexpectedTokenError("a valid key name")
|
return l.unexpectedInputError("a valid key name")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||||
// keys are allowed to be composed of only ASCII digits,
|
// keys are allowed to be composed of only ASCII digits,
|
||||||
// e.g. 1234, but are always interpreted as strings.
|
// e.g. 1234, but are always interpreted as strings.
|
||||||
func stateBareKey(l *Lexer) stateFn {
|
func statebareKeyChars(l *Lexer) stateFn {
|
||||||
l.acceptWhile(bareKey)
|
l.acceptRun(bareKeyChars)
|
||||||
l.emit(ItemKey, l.getAcceptedString())
|
l.emitLiteral(ItemKey)
|
||||||
return stateEndOfKeyOrKeyDot
|
return stateEndOfKeyOrKeyDot
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,10 +76,10 @@ func stateBareKey(l *Lexer) stateFn {
|
||||||
func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
|
func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
|
||||||
// Whitespace around dot-separated parts is ignored, however,
|
// Whitespace around dot-separated parts is ignored, however,
|
||||||
// best practice is to not use any extraneous whitespace.
|
// best practice is to not use any extraneous whitespace.
|
||||||
l.skip(whitespace)
|
l.skipRun(whitespace)
|
||||||
if l.accept(dot) {
|
if l.skip(dot) {
|
||||||
l.emit(ItemKeyDot, ".")
|
l.emit(ItemKeyDot, "")
|
||||||
l.skip(whitespace)
|
l.skipRun(whitespace)
|
||||||
return stateKey
|
return stateKey
|
||||||
}
|
}
|
||||||
return stateKeyAssignment
|
return stateKeyAssignment
|
||||||
|
@ -90,70 +90,57 @@ func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
|
||||||
// sign, and value must be on the same line (though some values can
|
// sign, and value must be on the same line (though some values can
|
||||||
// be broken over multiple lines).
|
// be broken over multiple lines).
|
||||||
func stateKeyAssignment(l *Lexer) stateFn {
|
func stateKeyAssignment(l *Lexer) stateFn {
|
||||||
l.skip(whitespace)
|
l.skipRun(whitespace)
|
||||||
if l.accept(equal) {
|
if l.skip(equal) {
|
||||||
l.skip(whitespace)
|
l.emit(ItemAssignment, "")
|
||||||
|
l.skipRun(whitespace)
|
||||||
return stateValue
|
return stateValue
|
||||||
}
|
}
|
||||||
return l.unexpectedTokenError("an '=' value assignment")
|
return l.unexpectedInputError("a value assignment")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||||
|
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||||
func stateValue(l *Lexer) stateFn {
|
func stateValue(l *Lexer) stateFn {
|
||||||
l.skip(whitespace)
|
l.skipRun(whitespace)
|
||||||
if l.upcoming(someQuote) {
|
if l.upcoming(quoteChars) {
|
||||||
return stateStringValue
|
return stateStringValue
|
||||||
}
|
}
|
||||||
return l.unexpectedTokenError("a value")
|
return l.unexpectedInputError("a value")
|
||||||
}
|
}
|
||||||
|
|
||||||
// There are four ways to express strings: basic, multi-line basic, literal,
|
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||||
func stateStringValue(l *Lexer) stateFn {
|
func stateStringValue(l *Lexer) stateFn {
|
||||||
if l.accept(doubleQuote) {
|
// Basic strings are surrounded by quotation marks.
|
||||||
|
if l.skip(doubleQuote) {
|
||||||
return stateBasicStringValue
|
return stateBasicStringValue
|
||||||
}
|
}
|
||||||
return l.unexpectedTokenError("a string value")
|
return l.unexpectedInputError("a string value")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Basic strings are surrounded by quotation marks. Any Unicode character
|
|
||||||
// may be used except those that must be escaped: quotation mark, backslash,
|
|
||||||
// and the control characters (U+0000 to U+001F, U+007F).
|
|
||||||
//
|
|
||||||
// For convenience, some popular characters have a compact escape sequence.
|
|
||||||
//
|
|
||||||
// \b - backspace (U+0008)
|
|
||||||
// \t - tab (U+0009)
|
|
||||||
// \n - linefeed (U+000A)
|
|
||||||
// \f - form feed (U+000C)
|
|
||||||
// \r - carriage return (U+000D)
|
|
||||||
// \" - quote (U+0022)
|
|
||||||
// \\ - backslash (U+005C)
|
|
||||||
// \uXXXX - unicode (U+XXXX)
|
|
||||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
|
||||||
//
|
|
||||||
// Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
|
|
||||||
// The escape codes must be valid Unicode scalar values.
|
|
||||||
//
|
|
||||||
// All other escape sequences not listed above are reserved and,
|
|
||||||
// if used, TOML should produce an error.
|
|
||||||
func stateBasicStringValue(l *Lexer) stateFn {
|
func stateBasicStringValue(l *Lexer) stateFn {
|
||||||
// Possibly a """ multi-line string start,
|
// Possibly a """ multi-line string start,
|
||||||
// possibly the end of an "" empty string.
|
// possibly the end of an "" empty string.
|
||||||
if l.accept(doubleQuote) {
|
if l.skip(doubleQuote) {
|
||||||
// It's a """ multi-line string.
|
// It's a """ multi-line string.
|
||||||
if l.accept(doubleQuote) {
|
if l.skip(doubleQuote) {
|
||||||
l.ignore()
|
|
||||||
return stateMultiLineBasicString
|
return stateMultiLineBasicString
|
||||||
}
|
}
|
||||||
// It's an "" empty string.
|
// It's an "" empty string.
|
||||||
l.ignore()
|
|
||||||
l.emit(ItemString, "")
|
l.emit(ItemString, "")
|
||||||
return stateKeyValuePair
|
return stateKeyValuePair
|
||||||
}
|
}
|
||||||
l.ignore()
|
|
||||||
return stateBasicString
|
return stateBasicString
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const invalidBasicStringCharacters string = "" +
|
||||||
|
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
||||||
|
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
||||||
|
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
||||||
|
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
||||||
|
"\u007F"
|
||||||
|
|
||||||
func stateParseBasicString(l *Lexer) stateFn {
|
func stateParseBasicString(l *Lexer) stateFn {
|
||||||
for {
|
for {
|
||||||
switch {
|
switch {
|
||||||
|
@ -162,26 +149,47 @@ func stateParseBasicString(l *Lexer) stateFn {
|
||||||
case l.accept(doubleQuote):
|
case l.accept(doubleQuote):
|
||||||
return l.popState()
|
return l.popState()
|
||||||
case l.accept(backslash):
|
case l.accept(backslash):
|
||||||
if l.upcoming(quotable) {
|
// For convenience, some popular characters have a compact escape sequence.
|
||||||
|
// Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
|
||||||
|
// The escape codes must be valid Unicode scalar values.
|
||||||
|
switch {
|
||||||
|
case l.upcoming(escapeChars):
|
||||||
|
// \b - backspace (U+0008)
|
||||||
|
// \t - tab (U+0009)
|
||||||
|
// \n - linefeed (U+000A)
|
||||||
|
// \f - form feed (U+000C)
|
||||||
|
// \r - carriage return (U+000D)
|
||||||
|
// \" - quote (U+0022)
|
||||||
|
// \\ - backslash (U+005C)
|
||||||
l.buffer.WriteRune('\\')
|
l.buffer.WriteRune('\\')
|
||||||
l.buffer.WriteRune(l.next())
|
l.buffer.WriteRune(l.next())
|
||||||
} else {
|
case l.upcoming(shortUtf8Escape):
|
||||||
|
// \uXXXX - unicode (U+XXXX)
|
||||||
|
return l.errorf("Not yet implemented: short utf8")
|
||||||
|
case l.upcoming(longUtf8Escape):
|
||||||
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||||
|
return l.errorf("Not yet implemented: long utf8")
|
||||||
|
default:
|
||||||
|
// All other escape sequences not listed above are reserved and,
|
||||||
|
// if used, TOML should produce an error.
|
||||||
return l.errorf("Invalid escape sequence \\%c in string value", l.next())
|
return l.errorf("Invalid escape sequence \\%c in string value", l.next())
|
||||||
}
|
}
|
||||||
|
case l.upcoming(invalidBasicStringCharacters):
|
||||||
|
// Any Unicode character may be used except those that must be escaped:
|
||||||
|
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||||
|
return l.errorf("Invalid character in basic string: %q", l.next())
|
||||||
default:
|
default:
|
||||||
l.buffer.WriteRune(l.next())
|
l.acceptNext()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateBasicString(l *Lexer) stateFn {
|
func stateBasicString(l *Lexer) stateFn {
|
||||||
l.buffer.Reset()
|
|
||||||
l.pushState(func(l *Lexer) stateFn {
|
l.pushState(func(l *Lexer) stateFn {
|
||||||
s, err := l.buffer.AsInterpretedString()
|
err := l.emitInterpreted(ItemString)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return l.errorf("Invalid data in string: %s", err)
|
return l.errorf("Invalid data in string: %s", err)
|
||||||
}
|
}
|
||||||
l.emit(ItemString, s)
|
|
||||||
return stateKeyValuePair
|
return stateKeyValuePair
|
||||||
})
|
})
|
||||||
return stateParseBasicString
|
return stateParseBasicString
|
||||||
|
@ -192,10 +200,9 @@ func stateMultiLineBasicString(l *Lexer) stateFn {
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateEndOfFile(l *Lexer) stateFn {
|
func stateEndOfFile(l *Lexer) stateFn {
|
||||||
i := l.peek()
|
if l.atEndOfFile() {
|
||||||
if i == endOfFile {
|
|
||||||
l.emit(ItemEOF, "EOF")
|
l.emit(ItemEOF, "EOF")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return l.unexpectedTokenError("end of file")
|
return l.unexpectedInputError("end of file")
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
package lexer_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mmakaay/toml/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInvalidUtf8Data(t *testing.T) {
|
||||||
|
runStatesT(t, statesT{
|
||||||
|
"invalid UTF8 data", "\xbc", "",
|
||||||
|
"Unexpected non-UTF8 data (expected end of file)"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmptyInput(t *testing.T) {
|
||||||
|
runStatesT(t, statesT{"empty string", "", "", ""})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWhiteSpaceAndNewlines(t *testing.T) {
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"space", " ", "", ""},
|
||||||
|
{"tab", "\t", "", ""},
|
||||||
|
{"newline", "\n", "", ""},
|
||||||
|
{"carriage return", "\r", "", ""},
|
||||||
|
{"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestComments(t *testing.T) {
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"empty comment", "#", "#()", ""},
|
||||||
|
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||||
|
{"basic comment", "#chicken", "#(chicken)", ""},
|
||||||
|
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||||
|
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||||
|
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||||
|
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||||
|
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestKeyWithoutAssignment(t *testing.T) {
|
||||||
|
err := "Unexpected end of file (expected a value assignment)"
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"bare with whitespace", " a ", []string{"[a]"}, err},
|
||||||
|
{"bare lower", "abcdefghijklmnopqrstuvwxyz", []string{"[abcdefghijklmnopqrstuvwxyz]"}, err},
|
||||||
|
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", []string{"[ABCDEFGHIJKLMNOPQRSTUVWXYZ]"}, err},
|
||||||
|
{"bare numbers", "0123456789", []string{"[0123456789]"}, err},
|
||||||
|
{"bare underscore", "_", []string{"[_]"}, err},
|
||||||
|
{"bare dash", "-", []string{"[-]"}, err},
|
||||||
|
{"bare big mix", "-hey_good_Lookin123-", []string{"[-hey_good_Lookin123-]"}, err},
|
||||||
|
{"bare dotted", "a._.c", []string{"[a]", ".", "[_]", ".", "[c]"}, err},
|
||||||
|
{"bare dotted with whitespace", " a .\t\t b\t ", []string{"[a]", ".", "[b]"}, err},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
||||||
|
err := "Unexpected end of file (expected a value)"
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"bare", "a=", "[a]=", err},
|
||||||
|
{"double equal sign", "a==", "[a]=", "Unexpected token '=' (expected a value)"},
|
||||||
|
{"bare dotted", "a.b=", "[a].[b]=", err},
|
||||||
|
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUnterminatedBasicString(t *testing.T) {
|
||||||
|
runStatesT(t, statesT{
|
||||||
|
"missing closing quote", `a="value`, "[a]=",
|
||||||
|
"Unexpected end of file (expected basic string token)"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00'`},
|
||||||
|
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n'`},
|
||||||
|
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f'`},
|
||||||
|
})
|
||||||
|
|
||||||
|
// No need to write all test cases for disallowed characters by hand.
|
||||||
|
for i := 0x00; i <= 0x1F; i++ {
|
||||||
|
name := fmt.Sprintf("control character %x", rune(i))
|
||||||
|
runStatesT(
|
||||||
|
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
|
||||||
|
fmt.Sprintf(`Invalid character in basic string: %q`, rune(i))})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmptyBasicString(t *testing.T) {
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"empty", `a=""`, "[a]=STR()", ""},
|
||||||
|
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
|
||||||
|
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
|
||||||
|
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
|
||||||
|
{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
|
||||||
|
{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBasicString(t *testing.T) {
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
|
||||||
|
{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
|
||||||
|
runStatesT(t, statesT{
|
||||||
|
"invalid escape sequence", `a="\x"`, "[a]=", `Invalid escape sequence \x in string value`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBasicStringEscapes(t *testing.T) {
|
||||||
|
runStatesTs(t, []statesT{
|
||||||
|
{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
|
||||||
|
{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
|
||||||
|
{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
|
||||||
|
{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
|
||||||
|
{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
|
||||||
|
{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
|
||||||
|
{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
|
||||||
|
{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
|
||||||
|
{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
|
||||||
|
{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type statesT struct {
|
||||||
|
name string
|
||||||
|
in string
|
||||||
|
out interface{}
|
||||||
|
err string
|
||||||
|
}
|
||||||
|
|
||||||
|
func runStatesTs(t *testing.T, tests []statesT) {
|
||||||
|
for _, c := range tests {
|
||||||
|
runStatesT(t, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func runStatesT(t *testing.T, c statesT) {
|
||||||
|
l, err := lexer.Lex(c.in).ToArray()
|
||||||
|
if err == nil && c.err != "" {
|
||||||
|
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
||||||
|
}
|
||||||
|
if err != nil && c.err == "" {
|
||||||
|
t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
|
||||||
|
}
|
||||||
|
if err != nil && c.err != "" && err.Error() != c.err {
|
||||||
|
t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
|
||||||
|
}
|
||||||
|
switch expected := c.out.(type) {
|
||||||
|
case []string:
|
||||||
|
if len(expected) != len(l) {
|
||||||
|
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
|
||||||
|
}
|
||||||
|
for i, e := range expected {
|
||||||
|
if l[i].String() != e {
|
||||||
|
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, l[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case string:
|
||||||
|
a := make([]string, len(l))
|
||||||
|
for _, v := range l {
|
||||||
|
a = append(a, v.String())
|
||||||
|
}
|
||||||
|
actual := strings.Join(a, "")
|
||||||
|
if actual != expected {
|
||||||
|
t.Errorf("[%s] Unexpected lexer output:\nexpected; %s\nactual: %s\n", c.name, expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,7 +19,7 @@ func (b *StringBuffer) Reset() *StringBuffer {
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddString adds the runes of the input string to the string buffer.
|
// WriteString adds the runes of the input string to the string buffer.
|
||||||
func (b *StringBuffer) WriteString(s string) *StringBuffer {
|
func (b *StringBuffer) WriteString(s string) *StringBuffer {
|
||||||
for _, r := range s {
|
for _, r := range s {
|
||||||
b.WriteRune(r)
|
b.WriteRune(r)
|
||||||
|
|
|
@ -23,7 +23,7 @@ func TestResetResetsBuffer(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type testCase struct {
|
type stringbufT struct {
|
||||||
name string
|
name string
|
||||||
in string
|
in string
|
||||||
out string
|
out string
|
||||||
|
@ -37,7 +37,7 @@ const (
|
||||||
|
|
||||||
func TestAsLiteralString(t *testing.T) {
|
func TestAsLiteralString(t *testing.T) {
|
||||||
b := lexer.StringBuffer{}
|
b := lexer.StringBuffer{}
|
||||||
for _, c := range []testCase{
|
for _, c := range []stringbufT{
|
||||||
{"empty string", ``, ``, OK},
|
{"empty string", ``, ``, OK},
|
||||||
{"simple string", `Simple string!`, `Simple string!`, OK},
|
{"simple string", `Simple string!`, `Simple string!`, OK},
|
||||||
{"single quote", `'`, `'`, OK},
|
{"single quote", `'`, `'`, OK},
|
||||||
|
@ -57,7 +57,7 @@ func TestAsLiteralString(t *testing.T) {
|
||||||
|
|
||||||
func TestAsInterpretedString(t *testing.T) {
|
func TestAsInterpretedString(t *testing.T) {
|
||||||
b := lexer.StringBuffer{}
|
b := lexer.StringBuffer{}
|
||||||
for _, c := range []testCase{
|
for _, c := range []stringbufT{
|
||||||
{"empty string", "", "", OK},
|
{"empty string", "", "", OK},
|
||||||
{"one character", "Simple string!", "Simple string!", OK},
|
{"one character", "Simple string!", "Simple string!", OK},
|
||||||
{"escaped single quote", `\'`, "", FAIL},
|
{"escaped single quote", `\'`, "", FAIL},
|
||||||
|
|
Loading…
Reference in New Issue