diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..722d5e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode diff --git a/lexer/items.go b/lexer/items.go index d7164d9..fa68b8f 100644 --- a/lexer/items.go +++ b/lexer/items.go @@ -7,13 +7,12 @@ type itemType int // Definition of all the lexer item types for the TOML lexer. const ( - ItemError itemType = iota // An error occurred - ItemEOF // End of input reached - ItemComment // Comment string, starts with # till en of line - ItemKey // Key of a key/value pair - ItemKeyDot // Dot for a dotted key - ItemKeyValueAssignment // Equal sign for a key/value pair assignment - ItemStringValue // A value of type string + ItemError itemType = iota // An error occurred + ItemEOF // End of input reached + ItemComment // Comment string, starts with # till en of line + ItemKey // Key of a key/value pair + ItemKeyDot // Dot for a dotted key + ItemString // A value of type string ) // Item represents a lexer item returned from the scanner. @@ -44,10 +43,8 @@ func (i itemType) String() string { return "Key" case ItemKeyDot: return "KeyDot" - case ItemKeyValueAssignment: - return "Assignment" - case ItemStringValue: - return "StringValue" + case ItemString: + return "String" default: return fmt.Sprintf("", i) } diff --git a/lexer/lexer.go b/lexer/lexer.go index 0f39ba5..bf3fdff 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -116,6 +116,10 @@ func (l *Lexer) ignore() { l.start = l.pos } +func (l *Lexer) atEndOfFile() bool { + return l.pos >= len(l.input) +} + // backup steps back one rune // Can be called only once per call of next. func (l *Lexer) backup() { @@ -194,14 +198,20 @@ func (l *Lexer) skipUntil(runes string) { } } -func (l *Lexer) newString() { +// resetStringBuild initializes a new string builder, used for building +// string by interpreting input data, e.g. for translating +// double quoted strings with escape codes into an actual +// Go string value. +func (l *Lexer) resetStringBuilder() { l.strValue.Reset() } +// addToString adds a rune to the string builder. func (l *Lexer) addToString(r rune) { l.strValue.WriteRune(r) } +// getString returns the runes in the string builder as a string value. func (l *Lexer) getString() string { return l.strValue.String() } @@ -210,9 +220,9 @@ var endOfFile rune = -1 // next returns the next rune in the input. func (l *Lexer) next() rune { - if l.pos >= len(l.input) { + if l.atEndOfFile() { l.width = 0 - return endOfFile + return endOfFile // TODO phase out this bizarro rune? } r, w := utf8.DecodeRuneInString(l.input[l.pos:]) l.width = w @@ -242,3 +252,7 @@ func (l *Lexer) unexpectedTokenError(expected string) stateFn { } return l.errorf("Unexpected %s (expected %s)", actual, expected) } + +func (l *Lexer) unexpectedEndOfFile(expected string) stateFn { + return l.errorf("Unexpected end of file (expected %s)", expected) +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index dbf2b26..1bacd2e 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -35,7 +35,7 @@ func TestWhitespacePlusComment(t *testing.T) { func TestBareKeyWithoutValue(t *testing.T) { err := "Unexpected end of file (expected an '=' value assignment)" - assertFailureAndCheck(t, "=", []string{`Key("a")`}, err) + assertFailureAndCheck(t, "a", []string{`Key("a")`}, err) assertFailureAndCheck(t, " a", []string{`Key("a")`}, err) assertFailureAndCheck(t, " a ", []string{`Key("a")`}, err) assertFailureAndCheck(t, "ab", []string{`Key("ab")`}, err) @@ -54,33 +54,40 @@ func TestDottedKey(t *testing.T) { func TestKeyWithAssignmentButNoValue(t *testing.T) { err := "Unexpected end of file (expected a value)" - assertFailureAndCheck(t, " some_cool_key = ", []string{`Key("some_cool_key")`, `Assignment("=")`}, err) + assertFailureAndCheck(t, " some_cool_key = ", []string{`Key("some_cool_key")`}, err) } -func TestEmptyBasicStringValue(t *testing.T) { - assertSuccessAndCheck(t, `a=""`, []string{`Key("a")`, `Assignment("=")`, `StringValue("")`}) - assertSuccessAndCheck(t, `a=""#hi`, []string{`Key("a")`, `Assignment("=")`, `StringValue("")`, `Comment("#hi")`}) - assertSuccessAndCheck(t, `a = ""`, []string{`Key("a")`, `Assignment("=")`, `StringValue("")`}) - assertSuccessAndCheck(t, `a.b = ""`, []string{`Key("a")`, `KeyDot(".")`, `Key("b")`, `Assignment("=")`, `StringValue("")`}) +func TestUnterminatedBasicString(t *testing.T) { + assertFailure(t, `key="value`, "Unexpected end of file (expected basic string token)") } -func TestBasicStringValue(t *testing.T) { + +func TestBasicStringWithNewline(t *testing.T) { + assertFailure(t, "key=\"value\nwith\nnewlines\"", "ohoh") +} + +func TestEmptyBasicString(t *testing.T) { + assertSuccessAndCheck(t, `a=""`, []string{`Key("a")`, `String("")`}) + assertSuccessAndCheck(t, `a=""#hi`, []string{`Key("a")`, `String("")`, `Comment("#hi")`}) + assertSuccessAndCheck(t, `a = ""`, []string{`Key("a")`, `String("")`}) + assertSuccessAndCheck(t, `a.b = ""`, []string{`Key("a")`, `KeyDot(".")`, `Key("b")`, `String("")`}) + assertSuccessAndCheck(t, `a=""b=""`, []string{`Key("a")`, `String("")`, `Key("b")`, `String("")`}) +} +func TestBasicString(t *testing.T) { assertSuccessAndCheck(t, `_ = "b"`, []string{ `Key("_")`, - `Assignment("=")`, - `StringValue("b")`}) + `String("b")`}) assertSuccessAndCheck(t, `thing = "A cool ʎǝʞ" # huh, it's up-side down!!`, []string{ `Key("thing")`, - `Assignment("=")`, - `StringValue("A cool ʎǝʞ")`, + `String("A cool ʎǝʞ")`, `Comment("# huh, it's up-side down!!")`}) } func TestInvalidEscapeSequence(t *testing.T) { assertFailure(t, `a="\x"`, `Invalid escape sequence \x in string value`) } -func TestBasicStringValueEscapes(t *testing.T) { +func TestBasicStringEscapes(t *testing.T) { for in, out := range map[string]string{ `\b`: "\b", `\t`: "\t", @@ -91,8 +98,7 @@ func TestBasicStringValueEscapes(t *testing.T) { `\b\t\n\f\r\"`: "\b\t\n\f\r\"", } { l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in)) - s := l[2] - if out != s.Value { + if out != l[1].Value { t.Fatalf("Unexpected result when parsing '%s'", in) } } @@ -114,12 +120,10 @@ func TestTwoKeyValuePairs(t *testing.T) { assertSuccessAndCheck(t, "a=\"Hello\" #comment1\nb=\"World!\"#comment2\r\n", []string{ `Key("a")`, - `Assignment("=")`, - `StringValue("Hello")`, + `String("Hello")`, `Comment("#comment1")`, `Key("b")`, - `Assignment("=")`, - `StringValue("World!")`, + `String("World!")`, `Comment("#comment2")`}) } diff --git a/lexer/states.go b/lexer/states.go index be877d4..d86b348 100644 --- a/lexer/states.go +++ b/lexer/states.go @@ -6,8 +6,9 @@ type stateFn func(*Lexer) stateFn const ( whitespace string = " \t" - newline string = "\r\n" - startOfComment string = "#" + carriageReturn string = "\r" + newline string = "\n" + hash string = "#" equal string = "=" lower string = "abcdefghijklmnopqrstuvwxyz" upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -19,15 +20,13 @@ const ( doubleQuote string = "\"" backslash string = "\\" someQuote string = singleQuote + doubleQuote - singleQuote3 string = singleQuote + singleQuote + singleQuote - doubleQuote3 string = doubleQuote + doubleQuote + doubleQuote bareKey string = lower + upper + digits + underscore + dash startOfKey string = bareKey + someQuote ) func stateKeyValuePair(l *Lexer) stateFn { - l.skip(whitespace + newline) - if l.upcoming(startOfComment) { + l.skip(whitespace + carriageReturn + newline) + if l.upcoming(hash) { return stateComment } if l.upcoming(startOfKey) { @@ -36,12 +35,20 @@ func stateKeyValuePair(l *Lexer) stateFn { return stateEndOfFile } -// A hash symbol marks the rest of the line as a comment. +// A '#' hash symbol marks the rest of the line as a comment. func stateComment(l *Lexer) stateFn { - l.acceptUntil(newline) - l.emit(ItemComment, l.getAcceptedString()) - l.skip(newline) - return stateKeyValuePair + l.resetStringBuilder() + for { + switch { + case l.atEndOfFile() || l.accept(newline): + l.emit(ItemComment, l.getString()) + return stateKeyValuePair + case l.accept(carriageReturn): + l.ignore() + default: + l.addToString(l.next()) + } + } } // A key may be either bare, quoted or dotted. @@ -83,7 +90,6 @@ func stateEndOfKeyOrKeyDot(l *Lexer) stateFn { func stateKeyAssignment(l *Lexer) stateFn { l.skip(whitespace) if l.accept(equal) { - l.emit(ItemKeyValueAssignment, "=") l.skip(whitespace) return stateValue } @@ -118,7 +124,7 @@ func stateBasicStringValue(l *Lexer) stateFn { } // An "" empty string. l.ignore() - l.emit(ItemStringValue, "") + l.emit(ItemString, "") return stateKeyValuePair } l.ignore() @@ -160,9 +166,9 @@ var basicEscapes = map[rune]rune{ func stateParseBasicString(l *Lexer) stateFn { for { switch { - case l.upcoming(endOfFile): - l.unexpectedTokenError("basic string token") - case l.upcoming(doubleQuote): + case l.atEndOfFile(): + return l.unexpectedEndOfFile("basic string token") + case l.accept(doubleQuote): return l.popState() case l.accept(backslash): r := l.next() @@ -178,31 +184,12 @@ func stateParseBasicString(l *Lexer) stateFn { } func stateBasicString(l *Lexer) stateFn { - l.newString() - l.pushState(stateBasicStringEnd) + l.resetStringBuilder() + l.pushState(func(l *Lexer) stateFn { + l.emit(ItemString, l.getString()) + return stateKeyValuePair + }) return stateParseBasicString - -parsing: - for { - r := l.next() - if r == endOfFile { - break - } - if r == '"' { - l.emit(ItemStringValue, l.getString()) - return stateKeyValuePair - } - if r == '\\' { - r = l.next() - if escaped, ok := basicEscapes[r]; ok { - l.addToString(escaped) - continue parsing - } - return l.errorf("Invalid escape sequence \\%c in string value", r) - } - l.addToString(r) - } - return l.unexpectedTokenError("valid basic string rune") } func stateMultiLineBasicString(l *Lexer) stateFn {