Code cleanup and refactoring run, both functional code and the tests.
This commit is contained in:
parent
6636a7a672
commit
cbc4f04179
|
@ -12,6 +12,7 @@ const (
|
|||
ItemComment // Comment string, starts with # till en of line
|
||||
ItemKey // Key of a key/value pair
|
||||
ItemKeyDot // Dot for a dotted key
|
||||
ItemAssignment // Value assignment coming up (=)
|
||||
ItemString // A value of type string
|
||||
)
|
||||
|
||||
|
@ -26,26 +27,26 @@ func (i Item) String() string {
|
|||
switch i.Type {
|
||||
case ItemEOF:
|
||||
return "EOF"
|
||||
case ItemError:
|
||||
return "Error: " + i.Value
|
||||
case ItemKey:
|
||||
return fmt.Sprintf("[%s]", i.Value)
|
||||
case ItemKeyDot:
|
||||
return "."
|
||||
case ItemAssignment:
|
||||
return "="
|
||||
}
|
||||
return fmt.Sprintf("%s(%q)", i.Type, i.Value)
|
||||
return fmt.Sprintf("%s(%s)", i.Type, i.Value)
|
||||
}
|
||||
|
||||
// String returns a string representation of the lexer item type.
|
||||
func (i itemType) String() string {
|
||||
switch i {
|
||||
case ItemError:
|
||||
return "Error"
|
||||
return "ERR"
|
||||
case ItemComment:
|
||||
return "Comment"
|
||||
case ItemKey:
|
||||
return "Key"
|
||||
case ItemKeyDot:
|
||||
return "KeyDot"
|
||||
return "#"
|
||||
case ItemString:
|
||||
return "String"
|
||||
return "STR"
|
||||
default:
|
||||
return fmt.Sprintf("<type id %d>", i)
|
||||
panic(fmt.Sprintf("No translation available for type id %d", i))
|
||||
}
|
||||
}
|
||||
|
|
206
lexer/lexer.go
206
lexer/lexer.go
|
@ -12,7 +12,6 @@ type Lexer struct {
|
|||
input string // the scanned input string
|
||||
state stateFn // a function that handles the current state
|
||||
stack []stateFn // state function stack, for nested parsing
|
||||
start int // start position of the currently scanned item
|
||||
pos int // current scanning position in the input
|
||||
width int // width of the last rune read, for supporting backup()
|
||||
buffer StringBuffer // an efficient buffer, used to build string values
|
||||
|
@ -99,29 +98,44 @@ func (l *Lexer) popState() stateFn {
|
|||
return tail
|
||||
}
|
||||
|
||||
// TODO niet meer nodig?
|
||||
// getAcceptedString returns the string as accepted by the
|
||||
// accept* methods so far.
|
||||
func (l *Lexer) getAcceptedString() string {
|
||||
return l.input[l.start:l.pos]
|
||||
}
|
||||
|
||||
// emit passes a scanned item back to the client.
|
||||
func (l *Lexer) emit(t itemType, v string) {
|
||||
l.items <- Item{t, v}
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// TODO niet meer nodig met stringbuilder?
|
||||
// ignore skips over the pending input before the current position.
|
||||
func (l *Lexer) ignore() {
|
||||
l.start = l.pos
|
||||
}
|
||||
|
||||
// atEndOfFile returns true when there is no more data available in the input.
|
||||
func (l *Lexer) atEndOfFile() bool {
|
||||
return l.pos >= len(l.input)
|
||||
}
|
||||
|
||||
// emit passes a lexer item back to the client, including the provided string.
|
||||
func (l *Lexer) emit(t itemType, s string) {
|
||||
l.items <- Item{t, s}
|
||||
l.buffer.Reset()
|
||||
}
|
||||
|
||||
// emitLiteral passes a lexer item back to the client, including the accumulated
|
||||
// string buffer data as a literal string.
|
||||
func (l *Lexer) emitLiteral(t itemType) {
|
||||
l.emit(t, l.buffer.AsLiteralString())
|
||||
}
|
||||
|
||||
// emitTrimmedLiteral passes a lexer item back to the client, including the
|
||||
// accumulated string buffer data as a literal string with whitespace
|
||||
// trimmed from it.
|
||||
func (l *Lexer) emitTrimmedLiteral(t itemType) {
|
||||
l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
|
||||
}
|
||||
|
||||
// emitInterpreted passes a lexer item back to the client, including the
|
||||
// accumulated string buffer data an interpreted string (handling escape
|
||||
// codes like \n, \t, \uXXXX, etc.)
|
||||
// This method might return an error, in case there is data in the
|
||||
// string buffer that is not valid for string interpretation.
|
||||
func (l *Lexer) emitInterpreted(t itemType) error {
|
||||
s, err := l.buffer.AsInterpretedString()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
l.emit(t, s)
|
||||
return nil
|
||||
}
|
||||
|
||||
// backup steps back one rune
|
||||
// Can be called only once per call of next.
|
||||
func (l *Lexer) backup() {
|
||||
|
@ -129,16 +143,119 @@ func (l *Lexer) backup() {
|
|||
}
|
||||
|
||||
// peek returns but does not advance to the next rune(s) in the input.
|
||||
func (l *Lexer) peek() rune {
|
||||
r := l.next()
|
||||
l.backup()
|
||||
return r
|
||||
// Returns the rune, its width and a boolean. The boolean will be false in case
|
||||
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
||||
func (l *Lexer) peek() (rune, int, bool) {
|
||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
switch {
|
||||
case r == utf8.RuneError:
|
||||
return utf8.RuneError, w, false
|
||||
default:
|
||||
return r, w, true
|
||||
}
|
||||
}
|
||||
|
||||
// TODO nog nodig met stringbuffer?
|
||||
// accept consumes the next rune if it's from the valid set of runes.
|
||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||
// Returns a slice of runes and a boolean. The boolean will be false in case
|
||||
// less upcoming runes can be peeked than the requested amount
|
||||
// (end of data or invalid UTF8 character).
|
||||
func (l *Lexer) peekMulti(amount int) ([]rune, bool) {
|
||||
offset := 0
|
||||
var peeked []rune
|
||||
for i := 0; i < amount; i++ {
|
||||
r, w := utf8.DecodeRuneInString(l.input[l.pos+offset:])
|
||||
switch {
|
||||
case r == utf8.RuneError:
|
||||
return peeked, false
|
||||
default:
|
||||
offset += w
|
||||
peeked = append(peeked, r)
|
||||
}
|
||||
}
|
||||
return peeked, true
|
||||
}
|
||||
|
||||
// acceptNext adds the next rune from the input to the string buffer.
|
||||
// If no rune could be read (end of file or invalid UTF8 data),
|
||||
// then false is returned.
|
||||
func (l *Lexer) acceptNext() bool {
|
||||
r := l.next()
|
||||
if r == endOfFile || r == utf8.RuneError {
|
||||
return false
|
||||
}
|
||||
l.buffer.WriteRune(r)
|
||||
return true
|
||||
}
|
||||
|
||||
// acceptFrom adds the next rune from the input to the string buffer
|
||||
// when it matches in the provided runes. If the next rune does
|
||||
// not match, false is returned.
|
||||
func (l *Lexer) acceptFrom(runes string) bool {
|
||||
r := l.next()
|
||||
if strings.IndexRune(runes, r) >= 0 {
|
||||
l.buffer.WriteRune(r)
|
||||
return true
|
||||
}
|
||||
l.backup()
|
||||
return false
|
||||
}
|
||||
|
||||
// acceptRun adds consecutive runes from the input to the string
|
||||
// buffer when they match the provided runes. If no runes were added
|
||||
// at all, false it returned.
|
||||
func (l *Lexer) acceptRun(runes string) bool {
|
||||
accepted := false
|
||||
for l.acceptFrom(runes) {
|
||||
accepted = true
|
||||
}
|
||||
return accepted
|
||||
}
|
||||
|
||||
// TODO meh... ugly rune.
|
||||
var endOfFile rune = -1
|
||||
|
||||
// next returns the next rune from the input.
|
||||
func (l *Lexer) next() rune {
|
||||
l.width = 0
|
||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
switch {
|
||||
case r == utf8.RuneError && w == 0:
|
||||
return endOfFile
|
||||
case r == utf8.RuneError:
|
||||
return utf8.RuneError
|
||||
default:
|
||||
l.width = w
|
||||
l.pos += w
|
||||
return r
|
||||
}
|
||||
}
|
||||
|
||||
// skip skips a rune from the set of accepted runes.
|
||||
// Returns true when a rune was skipped.
|
||||
func (l *Lexer) skip(runes string) bool {
|
||||
r, w, _ := l.peek()
|
||||
if strings.IndexRune(runes, r) >= 0 {
|
||||
l.pos += w
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// skipRun skips a run of runes from the set of accepted runes.
|
||||
// Returns true when one or more runes were skipped.
|
||||
func (l *Lexer) skipRun(runes string) bool {
|
||||
didSkip := false
|
||||
for l.skip(runes) {
|
||||
didSkip = true
|
||||
}
|
||||
return didSkip
|
||||
}
|
||||
|
||||
// accept adds the next rune to the string buffer and returns true if it's
|
||||
// from the valid set of runes. Otherwise false is returned.
|
||||
func (l *Lexer) accept(runes string) bool {
|
||||
if strings.IndexRune(runes, l.next()) >= 0 {
|
||||
r := l.next()
|
||||
if strings.IndexRune(runes, r) >= 0 {
|
||||
return true
|
||||
}
|
||||
l.backup()
|
||||
|
@ -187,34 +304,10 @@ func (l *Lexer) acceptWhile(runes string) bool {
|
|||
return accepted
|
||||
}
|
||||
|
||||
// skip skips a run of runes from the set of accepted runs.
|
||||
func (l *Lexer) skip(runes string) {
|
||||
if l.acceptWhile(runes) {
|
||||
l.ignore()
|
||||
}
|
||||
}
|
||||
|
||||
// skipUntil skips a run of runes, until a rune from the set of
|
||||
// runes of EOF is reached.
|
||||
func (l *Lexer) skipUntil(runes string) {
|
||||
if l.acceptUntil(runes) {
|
||||
l.ignore()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO meh... ugly rune.
|
||||
var endOfFile rune = -1
|
||||
|
||||
// next returns the next rune in the input.
|
||||
func (l *Lexer) next() rune {
|
||||
if l.atEndOfFile() {
|
||||
l.width = 0
|
||||
return endOfFile // TODO phase out this bizarro rune?
|
||||
}
|
||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||
l.width = w
|
||||
l.pos += w
|
||||
return r
|
||||
l.acceptUntil(runes)
|
||||
}
|
||||
|
||||
// error returns an error token and terminates the scan
|
||||
|
@ -227,15 +320,16 @@ func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (l *Lexer) unexpectedTokenError(expected string) stateFn {
|
||||
func (l *Lexer) unexpectedInputError(expected string) stateFn {
|
||||
var actual string
|
||||
switch {
|
||||
case l.peek() == endOfFile:
|
||||
case l.atEndOfFile(): // TODO maybe not hit anymore after refactoring?
|
||||
actual = "end of file"
|
||||
case !utf8.ValidString(l.input[l.start:]):
|
||||
case !utf8.ValidString(l.input[l.pos:]):
|
||||
actual = "non-UTF8 data"
|
||||
default:
|
||||
actual = fmt.Sprintf("token '%c'", l.peek())
|
||||
r, _, _ := l.peek()
|
||||
actual = fmt.Sprintf("token '%c'", r)
|
||||
}
|
||||
return l.errorf("Unexpected %s (expected %s)", actual, expected)
|
||||
}
|
||||
|
|
|
@ -1,175 +0,0 @@
|
|||
package lexer_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/mmakaay/toml/lexer"
|
||||
)
|
||||
|
||||
func TestInvalidUtf8Data(t *testing.T) {
|
||||
assertFailureAndCheck(t, "\xbc", []string{}, "Unexpected non-UTF8 data (expected end of file)")
|
||||
}
|
||||
|
||||
func TestEmptyInput(t *testing.T) {
|
||||
assertSuccessAndCheck(t, "", []string{})
|
||||
}
|
||||
func TestWhiteSpace(t *testing.T) {
|
||||
assertSuccessAndCheck(t, " ", []string{})
|
||||
assertSuccessAndCheck(t, "\t", []string{})
|
||||
assertSuccessAndCheck(t, " \t \t ", []string{})
|
||||
}
|
||||
func TestWhiteSpaceAndNewlines(t *testing.T) {
|
||||
assertSuccessAndCheck(t, "\n", []string{})
|
||||
assertSuccessAndCheck(t, "\n \t\r\n", []string{})
|
||||
}
|
||||
func TestComments(t *testing.T) {
|
||||
assertSuccessAndCheck(t, "#", []string{`Comment("#")`})
|
||||
assertSuccessAndCheck(t, " \t \t #", []string{`Comment("#")`})
|
||||
assertSuccessAndCheck(t, " \t \t # not empty", []string{`Comment("# not empty")`})
|
||||
assertSuccessAndCheck(t, " \t \t # not empty\r\r\r\n", []string{`Comment("# not empty")`})
|
||||
assertSuccessAndCheck(t, "\n \t\r\n# AAP\r\n", []string{`Comment("# AAP")`})
|
||||
assertSuccessAndCheck(t,
|
||||
"# two lines\n# of comments\n",
|
||||
[]string{`Comment("# two lines")`, `Comment("# of comments")`})
|
||||
assertSuccessAndCheck(t,
|
||||
`# \tcomment\nwith escape-y chars`,
|
||||
[]string{`Comment("# \\tcomment\\nwith escape-y chars")`})
|
||||
}
|
||||
|
||||
func TestBareKeyWithoutValue(t *testing.T) {
|
||||
err := "Unexpected end of file (expected an '=' value assignment)"
|
||||
assertFailureAndCheck(t, "a", []string{`Key("a")`}, err)
|
||||
assertFailureAndCheck(t, "_", []string{`Key("_")`}, err)
|
||||
assertFailureAndCheck(t, " a", []string{`Key("a")`}, err)
|
||||
assertFailureAndCheck(t, " a ", []string{`Key("a")`}, err)
|
||||
assertFailureAndCheck(t, "ab", []string{`Key("ab")`}, err)
|
||||
assertFailureAndCheck(t, "Ab", []string{`Key("Ab")`}, err)
|
||||
assertFailureAndCheck(t, "Ab1", []string{`Key("Ab1")`}, err)
|
||||
assertFailureAndCheck(t, "_Ab1", []string{`Key("_Ab1")`}, err)
|
||||
assertFailureAndCheck(t, "_-Ab1", []string{`Key("_-Ab1")`}, err)
|
||||
assertFailureAndCheck(t, "_-Ab1_this-is_GOOD987", []string{`Key("_-Ab1_this-is_GOOD987")`}, err)
|
||||
}
|
||||
|
||||
func TestDottedKey(t *testing.T) {
|
||||
err := "Unexpected end of file (expected an '=' value assignment)"
|
||||
assertFailureAndCheck(t, "a.b", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
|
||||
assertFailureAndCheck(t, " a .\t\t b\t ", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
|
||||
}
|
||||
|
||||
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
||||
err := "Unexpected end of file (expected a value)"
|
||||
assertFailureAndCheck(t, " some_cool_key = ", []string{`Key("some_cool_key")`}, err)
|
||||
}
|
||||
|
||||
func TestUnterminatedBasicString(t *testing.T) {
|
||||
assertFailure(t, `key="value`, "Unexpected end of file (expected basic string token)")
|
||||
}
|
||||
|
||||
func TestBasicStringWithNewline(t *testing.T) {
|
||||
assertFailure(t, "key=\"value\nwith\nnewlines\"", "ohoh")
|
||||
}
|
||||
|
||||
func TestEmptyBasicString(t *testing.T) {
|
||||
assertSuccessAndCheck(t, `a=""`, []string{`Key("a")`, `String("")`})
|
||||
assertSuccessAndCheck(t, `a=""#hi`, []string{`Key("a")`, `String("")`, `Comment("#hi")`})
|
||||
assertSuccessAndCheck(t, `a = ""`, []string{`Key("a")`, `String("")`})
|
||||
assertSuccessAndCheck(t, `a.b = ""`, []string{`Key("a")`, `KeyDot(".")`, `Key("b")`, `String("")`})
|
||||
assertSuccessAndCheck(t, `a=""b=""`, []string{`Key("a")`, `String("")`, `Key("b")`, `String("")`})
|
||||
}
|
||||
func TestBasicString(t *testing.T) {
|
||||
assertSuccessAndCheck(t, `_ = "b"`,
|
||||
[]string{
|
||||
`Key("_")`,
|
||||
`String("b")`})
|
||||
assertSuccessAndCheck(t, `thing = "A cool ʎǝʞ" # huh, it's up-side down!!`,
|
||||
[]string{
|
||||
`Key("thing")`,
|
||||
`String("A cool ʎǝʞ")`,
|
||||
`Comment("# huh, it's up-side down!!")`})
|
||||
}
|
||||
|
||||
func TestInvalidEscapeSequence(t *testing.T) {
|
||||
assertFailure(t, `a="\x"`, `Invalid escape sequence \x in string value`)
|
||||
}
|
||||
func TestBasicStringEscapes(t *testing.T) {
|
||||
for in, out := range map[string]string{
|
||||
`\b`: "\b",
|
||||
`\t`: "\t",
|
||||
`\n`: "\n",
|
||||
`\f`: "\f",
|
||||
`\r`: "\r",
|
||||
`\"`: "\"",
|
||||
`\b\t\nhuh\f\r\"`: "\b\t\nhuh\f\r\"",
|
||||
`\u2318`: "⌘",
|
||||
`\U0001014D`: "𐅍",
|
||||
} {
|
||||
l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
|
||||
if out != l[1].Value {
|
||||
t.Fatalf("Unexpected result when parsing '%s'\nexpected: %q\nactual: %q", in, out, l[1].Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// func TestBasicStringUnicodeEscapes(t *testing.T) {
|
||||
// for in, out := range map[string]string{
|
||||
// `\u`: "\b",
|
||||
// } {
|
||||
// l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
|
||||
// s := l[2]
|
||||
// if out != s.Value {
|
||||
// t.Fatalf("Unexpected result when parsing '%s'", in)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
func TestTwoKeyValuePairs(t *testing.T) {
|
||||
assertSuccessAndCheck(t, "a=\"Hello\" #comment1\nb=\"World!\"#comment2\r\n",
|
||||
[]string{
|
||||
`Key("a")`,
|
||||
`String("Hello")`,
|
||||
`Comment("#comment1")`,
|
||||
`Key("b")`,
|
||||
`String("World!")`,
|
||||
`Comment("#comment2")`})
|
||||
}
|
||||
|
||||
func assertSuccessAndCheck(t *testing.T, input string, expected []string) {
|
||||
l := assertSuccess(t, input)
|
||||
assertItems(t, l, expected)
|
||||
}
|
||||
|
||||
func assertFailureAndCheck(t *testing.T, input string, expected []string, expectedErr string) {
|
||||
l := assertFailure(t, input, expectedErr)
|
||||
assertItems(t, l, expected)
|
||||
}
|
||||
|
||||
func assertFailure(t *testing.T, input string, expectedErr string) []lexer.Item {
|
||||
l, err := lexer.Lex(input).ToArray()
|
||||
if err == nil {
|
||||
t.Fatalf("Expected lexer error '%s', but no error occurred", expectedErr)
|
||||
}
|
||||
if err.Error() != expectedErr {
|
||||
t.Fatalf("Mismatch between expected and actual error:\nExpected: %s\nActual: %s\n", expectedErr, err)
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func assertSuccess(t *testing.T, input string) []lexer.Item {
|
||||
l, err := lexer.Lex(input).ToArray()
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected lexer error: %s", err)
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func assertItems(t *testing.T, l []lexer.Item, expected []string) {
|
||||
if len(expected) != len(l) {
|
||||
t.Fatalf("Unexpected number of lexer items: %d (expected: %d)", len(l), len(expected))
|
||||
}
|
||||
for i, e := range expected {
|
||||
if l[i].String() != e {
|
||||
t.Fatalf("Unexpected lexer item at index %d: %s (expected: %s)", i, l[i], e)
|
||||
}
|
||||
}
|
||||
}
|
143
lexer/states.go
143
lexer/states.go
|
@ -1,6 +1,6 @@
|
|||
package lexer
|
||||
|
||||
// stateFn represents the state of the scanner as a function
|
||||
// stateFn represents the state of the lexer as a function
|
||||
// that returns the next state.
|
||||
type stateFn func(*Lexer) stateFn
|
||||
|
||||
|
@ -19,15 +19,17 @@ const (
|
|||
singleQuote string = "'"
|
||||
doubleQuote string = "\""
|
||||
backslash string = "\\"
|
||||
someQuote string = singleQuote + doubleQuote
|
||||
bareKey string = lower + upper + digits + underscore + dash
|
||||
startOfKey string = bareKey + someQuote
|
||||
quotable string = `btnfr\"`
|
||||
quoteChars string = singleQuote + doubleQuote
|
||||
bareKeyChars string = lower + upper + digits + underscore + dash
|
||||
startOfKey string = bareKeyChars + quoteChars
|
||||
escapeChars string = `btnfr"\`
|
||||
shortUtf8Escape string = "u"
|
||||
longUtf8Escape string = "U"
|
||||
)
|
||||
|
||||
func stateKeyValuePair(l *Lexer) stateFn {
|
||||
l.skip(whitespace + carriageReturn + newline)
|
||||
if l.upcoming(hash) {
|
||||
l.skipRun(whitespace + carriageReturn + newline)
|
||||
if l.skip(hash) {
|
||||
return stateComment
|
||||
}
|
||||
if l.upcoming(startOfKey) {
|
||||
|
@ -38,36 +40,34 @@ func stateKeyValuePair(l *Lexer) stateFn {
|
|||
|
||||
// A '#' hash symbol marks the rest of the line as a comment.
|
||||
func stateComment(l *Lexer) stateFn {
|
||||
l.buffer.Reset()
|
||||
for {
|
||||
switch {
|
||||
case l.atEndOfFile() || l.accept(newline):
|
||||
s := l.buffer.AsLiteralString()
|
||||
l.emit(ItemComment, s)
|
||||
case l.atEndOfFile() || l.skip(newline):
|
||||
l.emitTrimmedLiteral(ItemComment)
|
||||
return stateKeyValuePair
|
||||
case l.accept(carriageReturn):
|
||||
l.ignore()
|
||||
default:
|
||||
l.buffer.WriteRune(l.next())
|
||||
if !l.acceptNext() {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A key may be either bare, quoted or dotted.
|
||||
func stateKey(l *Lexer) stateFn {
|
||||
if l.upcoming(bareKey) {
|
||||
return stateBareKey
|
||||
if l.acceptFrom(bareKeyChars) {
|
||||
return statebareKeyChars
|
||||
}
|
||||
return l.unexpectedTokenError("a valid key name")
|
||||
return l.unexpectedInputError("a valid key name")
|
||||
}
|
||||
|
||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||
// keys are allowed to be composed of only ASCII digits,
|
||||
// e.g. 1234, but are always interpreted as strings.
|
||||
func stateBareKey(l *Lexer) stateFn {
|
||||
l.acceptWhile(bareKey)
|
||||
l.emit(ItemKey, l.getAcceptedString())
|
||||
func statebareKeyChars(l *Lexer) stateFn {
|
||||
l.acceptRun(bareKeyChars)
|
||||
l.emitLiteral(ItemKey)
|
||||
return stateEndOfKeyOrKeyDot
|
||||
}
|
||||
|
||||
|
@ -76,10 +76,10 @@ func stateBareKey(l *Lexer) stateFn {
|
|||
func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
|
||||
// Whitespace around dot-separated parts is ignored, however,
|
||||
// best practice is to not use any extraneous whitespace.
|
||||
l.skip(whitespace)
|
||||
if l.accept(dot) {
|
||||
l.emit(ItemKeyDot, ".")
|
||||
l.skip(whitespace)
|
||||
l.skipRun(whitespace)
|
||||
if l.skip(dot) {
|
||||
l.emit(ItemKeyDot, "")
|
||||
l.skipRun(whitespace)
|
||||
return stateKey
|
||||
}
|
||||
return stateKeyAssignment
|
||||
|
@ -90,70 +90,57 @@ func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
|
|||
// sign, and value must be on the same line (though some values can
|
||||
// be broken over multiple lines).
|
||||
func stateKeyAssignment(l *Lexer) stateFn {
|
||||
l.skip(whitespace)
|
||||
if l.accept(equal) {
|
||||
l.skip(whitespace)
|
||||
l.skipRun(whitespace)
|
||||
if l.skip(equal) {
|
||||
l.emit(ItemAssignment, "")
|
||||
l.skipRun(whitespace)
|
||||
return stateValue
|
||||
}
|
||||
return l.unexpectedTokenError("an '=' value assignment")
|
||||
return l.unexpectedInputError("a value assignment")
|
||||
}
|
||||
|
||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||
func stateValue(l *Lexer) stateFn {
|
||||
l.skip(whitespace)
|
||||
if l.upcoming(someQuote) {
|
||||
l.skipRun(whitespace)
|
||||
if l.upcoming(quoteChars) {
|
||||
return stateStringValue
|
||||
}
|
||||
return l.unexpectedTokenError("a value")
|
||||
return l.unexpectedInputError("a value")
|
||||
}
|
||||
|
||||
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||
func stateStringValue(l *Lexer) stateFn {
|
||||
if l.accept(doubleQuote) {
|
||||
// Basic strings are surrounded by quotation marks.
|
||||
if l.skip(doubleQuote) {
|
||||
return stateBasicStringValue
|
||||
}
|
||||
return l.unexpectedTokenError("a string value")
|
||||
return l.unexpectedInputError("a string value")
|
||||
}
|
||||
|
||||
// Basic strings are surrounded by quotation marks. Any Unicode character
|
||||
// may be used except those that must be escaped: quotation mark, backslash,
|
||||
// and the control characters (U+0000 to U+001F, U+007F).
|
||||
//
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
//
|
||||
// \b - backspace (U+0008)
|
||||
// \t - tab (U+0009)
|
||||
// \n - linefeed (U+000A)
|
||||
// \f - form feed (U+000C)
|
||||
// \r - carriage return (U+000D)
|
||||
// \" - quote (U+0022)
|
||||
// \\ - backslash (U+005C)
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
//
|
||||
// Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
|
||||
// The escape codes must be valid Unicode scalar values.
|
||||
//
|
||||
// All other escape sequences not listed above are reserved and,
|
||||
// if used, TOML should produce an error.
|
||||
func stateBasicStringValue(l *Lexer) stateFn {
|
||||
// Possibly a """ multi-line string start,
|
||||
// possibly the end of an "" empty string.
|
||||
if l.accept(doubleQuote) {
|
||||
if l.skip(doubleQuote) {
|
||||
// It's a """ multi-line string.
|
||||
if l.accept(doubleQuote) {
|
||||
l.ignore()
|
||||
if l.skip(doubleQuote) {
|
||||
return stateMultiLineBasicString
|
||||
}
|
||||
// It's an "" empty string.
|
||||
l.ignore()
|
||||
l.emit(ItemString, "")
|
||||
return stateKeyValuePair
|
||||
}
|
||||
l.ignore()
|
||||
return stateBasicString
|
||||
}
|
||||
|
||||
const invalidBasicStringCharacters string = "" +
|
||||
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
||||
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
||||
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
||||
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
||||
"\u007F"
|
||||
|
||||
func stateParseBasicString(l *Lexer) stateFn {
|
||||
for {
|
||||
switch {
|
||||
|
@ -162,26 +149,47 @@ func stateParseBasicString(l *Lexer) stateFn {
|
|||
case l.accept(doubleQuote):
|
||||
return l.popState()
|
||||
case l.accept(backslash):
|
||||
if l.upcoming(quotable) {
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
// Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
|
||||
// The escape codes must be valid Unicode scalar values.
|
||||
switch {
|
||||
case l.upcoming(escapeChars):
|
||||
// \b - backspace (U+0008)
|
||||
// \t - tab (U+0009)
|
||||
// \n - linefeed (U+000A)
|
||||
// \f - form feed (U+000C)
|
||||
// \r - carriage return (U+000D)
|
||||
// \" - quote (U+0022)
|
||||
// \\ - backslash (U+005C)
|
||||
l.buffer.WriteRune('\\')
|
||||
l.buffer.WriteRune(l.next())
|
||||
} else {
|
||||
case l.upcoming(shortUtf8Escape):
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
return l.errorf("Not yet implemented: short utf8")
|
||||
case l.upcoming(longUtf8Escape):
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
return l.errorf("Not yet implemented: long utf8")
|
||||
default:
|
||||
// All other escape sequences not listed above are reserved and,
|
||||
// if used, TOML should produce an error.
|
||||
return l.errorf("Invalid escape sequence \\%c in string value", l.next())
|
||||
}
|
||||
case l.upcoming(invalidBasicStringCharacters):
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||
return l.errorf("Invalid character in basic string: %q", l.next())
|
||||
default:
|
||||
l.buffer.WriteRune(l.next())
|
||||
l.acceptNext()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stateBasicString(l *Lexer) stateFn {
|
||||
l.buffer.Reset()
|
||||
l.pushState(func(l *Lexer) stateFn {
|
||||
s, err := l.buffer.AsInterpretedString()
|
||||
err := l.emitInterpreted(ItemString)
|
||||
if err != nil {
|
||||
return l.errorf("Invalid data in string: %s", err)
|
||||
}
|
||||
l.emit(ItemString, s)
|
||||
return stateKeyValuePair
|
||||
})
|
||||
return stateParseBasicString
|
||||
|
@ -192,10 +200,9 @@ func stateMultiLineBasicString(l *Lexer) stateFn {
|
|||
}
|
||||
|
||||
func stateEndOfFile(l *Lexer) stateFn {
|
||||
i := l.peek()
|
||||
if i == endOfFile {
|
||||
if l.atEndOfFile() {
|
||||
l.emit(ItemEOF, "EOF")
|
||||
return nil
|
||||
}
|
||||
return l.unexpectedTokenError("end of file")
|
||||
return l.unexpectedInputError("end of file")
|
||||
}
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
package lexer_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/mmakaay/toml/lexer"
|
||||
)
|
||||
|
||||
func TestInvalidUtf8Data(t *testing.T) {
|
||||
runStatesT(t, statesT{
|
||||
"invalid UTF8 data", "\xbc", "",
|
||||
"Unexpected non-UTF8 data (expected end of file)"})
|
||||
}
|
||||
|
||||
func TestEmptyInput(t *testing.T) {
|
||||
runStatesT(t, statesT{"empty string", "", "", ""})
|
||||
}
|
||||
|
||||
func TestWhiteSpaceAndNewlines(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"space", " ", "", ""},
|
||||
{"tab", "\t", "", ""},
|
||||
{"newline", "\n", "", ""},
|
||||
{"carriage return", "\r", "", ""},
|
||||
{"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestComments(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"empty comment", "#", "#()", ""},
|
||||
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||
{"basic comment", "#chicken", "#(chicken)", ""},
|
||||
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestKeyWithoutAssignment(t *testing.T) {
|
||||
err := "Unexpected end of file (expected a value assignment)"
|
||||
runStatesTs(t, []statesT{
|
||||
{"bare with whitespace", " a ", []string{"[a]"}, err},
|
||||
{"bare lower", "abcdefghijklmnopqrstuvwxyz", []string{"[abcdefghijklmnopqrstuvwxyz]"}, err},
|
||||
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", []string{"[ABCDEFGHIJKLMNOPQRSTUVWXYZ]"}, err},
|
||||
{"bare numbers", "0123456789", []string{"[0123456789]"}, err},
|
||||
{"bare underscore", "_", []string{"[_]"}, err},
|
||||
{"bare dash", "-", []string{"[-]"}, err},
|
||||
{"bare big mix", "-hey_good_Lookin123-", []string{"[-hey_good_Lookin123-]"}, err},
|
||||
{"bare dotted", "a._.c", []string{"[a]", ".", "[_]", ".", "[c]"}, err},
|
||||
{"bare dotted with whitespace", " a .\t\t b\t ", []string{"[a]", ".", "[b]"}, err},
|
||||
})
|
||||
}
|
||||
|
||||
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
||||
err := "Unexpected end of file (expected a value)"
|
||||
runStatesTs(t, []statesT{
|
||||
{"bare", "a=", "[a]=", err},
|
||||
{"double equal sign", "a==", "[a]=", "Unexpected token '=' (expected a value)"},
|
||||
{"bare dotted", "a.b=", "[a].[b]=", err},
|
||||
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
|
||||
})
|
||||
}
|
||||
|
||||
func TestUnterminatedBasicString(t *testing.T) {
|
||||
runStatesT(t, statesT{
|
||||
"missing closing quote", `a="value`, "[a]=",
|
||||
"Unexpected end of file (expected basic string token)"})
|
||||
}
|
||||
|
||||
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00'`},
|
||||
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n'`},
|
||||
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f'`},
|
||||
})
|
||||
|
||||
// No need to write all test cases for disallowed characters by hand.
|
||||
for i := 0x00; i <= 0x1F; i++ {
|
||||
name := fmt.Sprintf("control character %x", rune(i))
|
||||
runStatesT(
|
||||
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
|
||||
fmt.Sprintf(`Invalid character in basic string: %q`, rune(i))})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmptyBasicString(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"empty", `a=""`, "[a]=STR()", ""},
|
||||
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
|
||||
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
|
||||
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
|
||||
{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
|
||||
{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestBasicString(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
|
||||
{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
|
||||
runStatesT(t, statesT{
|
||||
"invalid escape sequence", `a="\x"`, "[a]=", `Invalid escape sequence \x in string value`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestBasicStringEscapes(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
|
||||
{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
|
||||
{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
|
||||
{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
|
||||
{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
|
||||
{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
|
||||
{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
|
||||
{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
|
||||
{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
|
||||
{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
|
||||
})
|
||||
}
|
||||
|
||||
type statesT struct {
|
||||
name string
|
||||
in string
|
||||
out interface{}
|
||||
err string
|
||||
}
|
||||
|
||||
func runStatesTs(t *testing.T, tests []statesT) {
|
||||
for _, c := range tests {
|
||||
runStatesT(t, c)
|
||||
}
|
||||
}
|
||||
|
||||
func runStatesT(t *testing.T, c statesT) {
|
||||
l, err := lexer.Lex(c.in).ToArray()
|
||||
if err == nil && c.err != "" {
|
||||
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
||||
}
|
||||
if err != nil && c.err == "" {
|
||||
t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
|
||||
}
|
||||
if err != nil && c.err != "" && err.Error() != c.err {
|
||||
t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
|
||||
}
|
||||
switch expected := c.out.(type) {
|
||||
case []string:
|
||||
if len(expected) != len(l) {
|
||||
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
|
||||
}
|
||||
for i, e := range expected {
|
||||
if l[i].String() != e {
|
||||
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, l[i])
|
||||
}
|
||||
}
|
||||
case string:
|
||||
a := make([]string, len(l))
|
||||
for _, v := range l {
|
||||
a = append(a, v.String())
|
||||
}
|
||||
actual := strings.Join(a, "")
|
||||
if actual != expected {
|
||||
t.Errorf("[%s] Unexpected lexer output:\nexpected; %s\nactual: %s\n", c.name, expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,7 +19,7 @@ func (b *StringBuffer) Reset() *StringBuffer {
|
|||
return b
|
||||
}
|
||||
|
||||
// AddString adds the runes of the input string to the string buffer.
|
||||
// WriteString adds the runes of the input string to the string buffer.
|
||||
func (b *StringBuffer) WriteString(s string) *StringBuffer {
|
||||
for _, r := range s {
|
||||
b.WriteRune(r)
|
||||
|
|
|
@ -23,7 +23,7 @@ func TestResetResetsBuffer(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
type testCase struct {
|
||||
type stringbufT struct {
|
||||
name string
|
||||
in string
|
||||
out string
|
||||
|
@ -37,7 +37,7 @@ const (
|
|||
|
||||
func TestAsLiteralString(t *testing.T) {
|
||||
b := lexer.StringBuffer{}
|
||||
for _, c := range []testCase{
|
||||
for _, c := range []stringbufT{
|
||||
{"empty string", ``, ``, OK},
|
||||
{"simple string", `Simple string!`, `Simple string!`, OK},
|
||||
{"single quote", `'`, `'`, OK},
|
||||
|
@ -57,7 +57,7 @@ func TestAsLiteralString(t *testing.T) {
|
|||
|
||||
func TestAsInterpretedString(t *testing.T) {
|
||||
b := lexer.StringBuffer{}
|
||||
for _, c := range []testCase{
|
||||
for _, c := range []stringbufT{
|
||||
{"empty string", "", "", OK},
|
||||
{"one character", "Simple string!", "Simple string!", OK},
|
||||
{"escaped single quote", `\'`, "", FAIL},
|
||||
|
|
Loading…
Reference in New Issue