Initial import, work in progress.
This commit is contained in:
commit
f6efd34b31
|
@ -0,0 +1,54 @@
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// itemType represents the type of lexer items.
|
||||||
|
type itemType int
|
||||||
|
|
||||||
|
// Definition of all the lexer item types for the TOML lexer.
|
||||||
|
const (
|
||||||
|
ItemError itemType = iota // An error occurred
|
||||||
|
ItemEOF // End of input reached
|
||||||
|
ItemComment // Comment string, starts with # till en of line
|
||||||
|
ItemKey // Key of a key/value pair
|
||||||
|
ItemKeyDot // Dot for a dotted key
|
||||||
|
ItemKeyValueAssignment // Equal sign for a key/value pair assignment
|
||||||
|
ItemStringValue // A value of type string
|
||||||
|
)
|
||||||
|
|
||||||
|
// Item represents a lexer item returned from the scanner.
|
||||||
|
type Item struct {
|
||||||
|
Type itemType //Type, e.g. itemNumber, itemSquareBracket
|
||||||
|
Value string // Value, e.g. "10.42", "["
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string representation of the lexer item.
|
||||||
|
func (i Item) String() string {
|
||||||
|
switch i.Type {
|
||||||
|
case ItemEOF:
|
||||||
|
return "EOF"
|
||||||
|
case ItemError:
|
||||||
|
return "Error: " + i.Value
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%s(%q)", i.Type, i.Value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns a string representation of the lexer item type.
|
||||||
|
func (i itemType) String() string {
|
||||||
|
switch i {
|
||||||
|
case ItemError:
|
||||||
|
return "Error"
|
||||||
|
case ItemComment:
|
||||||
|
return "Comment"
|
||||||
|
case ItemKey:
|
||||||
|
return "Key"
|
||||||
|
case ItemKeyDot:
|
||||||
|
return "KeyDot"
|
||||||
|
case ItemKeyValueAssignment:
|
||||||
|
return "Assignment"
|
||||||
|
case ItemStringValue:
|
||||||
|
return "StringValue"
|
||||||
|
default:
|
||||||
|
return fmt.Sprintf("<type id %d>", i)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,244 @@
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Lexer holds the state of the scanner.
|
||||||
|
type Lexer struct {
|
||||||
|
input string // the scanned input string
|
||||||
|
state stateFn // the current state
|
||||||
|
stack []stateFn // state stack, for nested parsing
|
||||||
|
start int // start position of the currently scanned item
|
||||||
|
pos int // current scanning position in the input
|
||||||
|
width int // width of the last rune read
|
||||||
|
strValue strings.Builder // used to build string values
|
||||||
|
items chan Item // channel of scanned items
|
||||||
|
nextItem Item // the current item as reached by Next() and retrieved by Get()
|
||||||
|
err error // an error message when lexing failed, retrieved by Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lex takes an input string and initializes the TOML lexer for it.
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// l := lexer.Lex("...inputstring...")
|
||||||
|
// for l.Next() {
|
||||||
|
// item := l.Get()
|
||||||
|
// ... handle item ...
|
||||||
|
// }
|
||||||
|
// if e := l.Error(); e != nil {
|
||||||
|
// ... handle error message ...
|
||||||
|
// }
|
||||||
|
func Lex(input string) *Lexer {
|
||||||
|
return &Lexer{
|
||||||
|
input: input,
|
||||||
|
state: stateKeyValuePair,
|
||||||
|
items: make(chan Item, 2),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next advances to the next lexer item in the input string.
|
||||||
|
// When a next item was found, then true is returned.
|
||||||
|
// On error or reaching the end of the input, false is returned.
|
||||||
|
func (l *Lexer) Next() bool {
|
||||||
|
if l.state == nil {
|
||||||
|
panic("This should not happen: nil state reached, but entering Next()")
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case i := <-l.items:
|
||||||
|
if i.Type == ItemEOF {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if i.Type == ItemError {
|
||||||
|
l.err = errors.New(i.Value)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
l.nextItem = i
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
l.state = l.state(l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) Error() error {
|
||||||
|
return l.err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToArray returns lexer items as an array.
|
||||||
|
// When an error occurs during scanning, a partial result will be
|
||||||
|
// returned, accompanied by the error that occurred.
|
||||||
|
func (l *Lexer) ToArray() ([]Item, error) {
|
||||||
|
var items []Item
|
||||||
|
for l.Next() {
|
||||||
|
items = append(items, l.Get())
|
||||||
|
}
|
||||||
|
return items, l.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get returns the next lexer item, as reached by Next()
|
||||||
|
func (l *Lexer) Get() Item {
|
||||||
|
return l.nextItem
|
||||||
|
}
|
||||||
|
|
||||||
|
// pushState adds the state function to its stack.
|
||||||
|
// This is used for implementing nested parsing.
|
||||||
|
func (l *Lexer) pushState(state stateFn) {
|
||||||
|
l.stack = append(l.stack, state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// popState pops the last pushed state from its stack.
|
||||||
|
func (l *Lexer) popState() stateFn {
|
||||||
|
last := len(l.stack) - 1
|
||||||
|
head, tail := l.stack[:last], l.stack[last]
|
||||||
|
l.stack = head
|
||||||
|
return tail
|
||||||
|
}
|
||||||
|
|
||||||
|
// getAcceptedString returns the string as accepted by the
|
||||||
|
// accept* methods so far.
|
||||||
|
func (l *Lexer) getAcceptedString() string {
|
||||||
|
return l.input[l.start:l.pos]
|
||||||
|
}
|
||||||
|
|
||||||
|
// emit passes a scanned item back to the client.
|
||||||
|
func (l *Lexer) emit(t itemType, v string) {
|
||||||
|
l.items <- Item{t, v}
|
||||||
|
l.start = l.pos
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore skips over the pending input before the current position.
|
||||||
|
func (l *Lexer) ignore() {
|
||||||
|
l.start = l.pos
|
||||||
|
}
|
||||||
|
|
||||||
|
// backup steps back one rune
|
||||||
|
// Can be called only once per call of next.
|
||||||
|
func (l *Lexer) backup() {
|
||||||
|
l.pos -= l.width
|
||||||
|
}
|
||||||
|
|
||||||
|
// peek returns but does not advance to the next rune(s) in the input.
|
||||||
|
func (l *Lexer) peek() rune {
|
||||||
|
r := l.next()
|
||||||
|
l.backup()
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// accept consumes the next rune if it's from the valid set of runes.
|
||||||
|
func (l *Lexer) accept(runes string) bool {
|
||||||
|
if strings.IndexRune(runes, l.next()) >= 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) upcoming(runes string) bool {
|
||||||
|
if l.accept(runes) {
|
||||||
|
l.backup()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptNot consumes the next rune if it's not from the set of runes.
|
||||||
|
func (l *Lexer) acceptNot(runes string) bool {
|
||||||
|
r := l.next()
|
||||||
|
if r == endOfFile {
|
||||||
|
l.backup()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if strings.IndexRune(runes, r) < 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
l.backup()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptUntil consumes a run of runes until ones from the
|
||||||
|
// valid set is encountered.
|
||||||
|
func (l *Lexer) acceptUntil(runes string) bool {
|
||||||
|
accepted := false
|
||||||
|
for l.acceptNot(runes) {
|
||||||
|
accepted = true
|
||||||
|
}
|
||||||
|
return accepted
|
||||||
|
}
|
||||||
|
|
||||||
|
// acceptRun consumes a run of runes from the set of accepted runes.
|
||||||
|
func (l *Lexer) acceptWhile(runes string) bool {
|
||||||
|
accepted := false
|
||||||
|
for l.accept(runes) {
|
||||||
|
accepted = true
|
||||||
|
}
|
||||||
|
return accepted
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip skips a run of runes from the set of accepted runs.
|
||||||
|
func (l *Lexer) skip(runes string) {
|
||||||
|
if l.acceptWhile(runes) {
|
||||||
|
l.ignore()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// skipUntil skips a run of runes, until a rune from the set of
|
||||||
|
// runes of EOF is reached.
|
||||||
|
func (l *Lexer) skipUntil(runes string) {
|
||||||
|
if l.acceptUntil(runes) {
|
||||||
|
l.ignore()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) newString() {
|
||||||
|
l.strValue.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) addToString(r rune) {
|
||||||
|
l.strValue.WriteRune(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) getString() string {
|
||||||
|
return l.strValue.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
var endOfFile rune = -1
|
||||||
|
|
||||||
|
// next returns the next rune in the input.
|
||||||
|
func (l *Lexer) next() rune {
|
||||||
|
if l.pos >= len(l.input) {
|
||||||
|
l.width = 0
|
||||||
|
return endOfFile
|
||||||
|
}
|
||||||
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||||||
|
l.width = w
|
||||||
|
l.pos += w
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// error returns an error token and terminates the scan
|
||||||
|
// by returning nil to l.run.
|
||||||
|
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
|
||||||
|
l.items <- Item{
|
||||||
|
ItemError,
|
||||||
|
fmt.Sprintf(format, args...),
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Lexer) unexpectedTokenError(expected string) stateFn {
|
||||||
|
var actual string
|
||||||
|
switch {
|
||||||
|
case l.peek() == endOfFile:
|
||||||
|
actual = "end of file"
|
||||||
|
case !utf8.ValidString(l.input[l.start:]):
|
||||||
|
actual = "non-UTF8 data"
|
||||||
|
default:
|
||||||
|
actual = fmt.Sprintf("token '%c'", l.peek())
|
||||||
|
}
|
||||||
|
return l.errorf("Unexpected %s (expected %s)", actual, expected)
|
||||||
|
}
|
|
@ -0,0 +1,164 @@
|
||||||
|
package lexer_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/mmakaay/toml/lexer"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInvalidUtf8Data(t *testing.T) {
|
||||||
|
assertFailureAndCheck(t, "\xbc", []string{}, "Unexpected non-UTF8 data (expected end of file)")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmptyInput(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, "", []string{})
|
||||||
|
}
|
||||||
|
func TestWhiteSpace(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, " ", []string{})
|
||||||
|
assertSuccessAndCheck(t, "\t", []string{})
|
||||||
|
assertSuccessAndCheck(t, " \t \t ", []string{})
|
||||||
|
}
|
||||||
|
func TestWhiteSpaceAndNewlines(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, "\n", []string{})
|
||||||
|
assertSuccessAndCheck(t, "\n \t\r\n", []string{})
|
||||||
|
}
|
||||||
|
func TestWhitespacePlusComment(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, "#", []string{`Comment("#")`})
|
||||||
|
assertSuccessAndCheck(t, " \t \t #", []string{`Comment("#")`})
|
||||||
|
assertSuccessAndCheck(t, " \t \t # not empty", []string{`Comment("# not empty")`})
|
||||||
|
assertSuccessAndCheck(t, " \t \t # not empty\r\r\r\n", []string{`Comment("# not empty")`})
|
||||||
|
assertSuccessAndCheck(t, "\n \t\r\n# AAP\r\n", []string{`Comment("# AAP")`})
|
||||||
|
assertSuccessAndCheck(t, "# two lines\n# of comments\n",
|
||||||
|
[]string{`Comment("# two lines")`, `Comment("# of comments")`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBareKeyWithoutValue(t *testing.T) {
|
||||||
|
err := "Unexpected end of file (expected an '=' value assignment)"
|
||||||
|
assertFailureAndCheck(t, "=", []string{`Key("a")`}, err)
|
||||||
|
assertFailureAndCheck(t, " a", []string{`Key("a")`}, err)
|
||||||
|
assertFailureAndCheck(t, " a ", []string{`Key("a")`}, err)
|
||||||
|
assertFailureAndCheck(t, "ab", []string{`Key("ab")`}, err)
|
||||||
|
assertFailureAndCheck(t, "Ab", []string{`Key("Ab")`}, err)
|
||||||
|
assertFailureAndCheck(t, "Ab1", []string{`Key("Ab1")`}, err)
|
||||||
|
assertFailureAndCheck(t, "_Ab1", []string{`Key("_Ab1")`}, err)
|
||||||
|
assertFailureAndCheck(t, "_-Ab1", []string{`Key("_-Ab1")`}, err)
|
||||||
|
assertFailureAndCheck(t, "_-Ab1_this-is_GOOD987", []string{`Key("_-Ab1_this-is_GOOD987")`}, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDottedKey(t *testing.T) {
|
||||||
|
err := "Unexpected end of file (expected an '=' value assignment)"
|
||||||
|
assertFailureAndCheck(t, "a.b", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
|
||||||
|
assertFailureAndCheck(t, " a .\t\t b\t ", []string{`Key("a")`, `KeyDot(".")`, `Key("b")`}, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
||||||
|
err := "Unexpected end of file (expected a value)"
|
||||||
|
assertFailureAndCheck(t, " some_cool_key = ", []string{`Key("some_cool_key")`, `Assignment("=")`}, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmptyBasicStringValue(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, `a=""`, []string{`Key("a")`, `Assignment("=")`, `StringValue("")`})
|
||||||
|
assertSuccessAndCheck(t, `a=""#hi`, []string{`Key("a")`, `Assignment("=")`, `StringValue("")`, `Comment("#hi")`})
|
||||||
|
assertSuccessAndCheck(t, `a = ""`, []string{`Key("a")`, `Assignment("=")`, `StringValue("")`})
|
||||||
|
assertSuccessAndCheck(t, `a.b = ""`, []string{`Key("a")`, `KeyDot(".")`, `Key("b")`, `Assignment("=")`, `StringValue("")`})
|
||||||
|
}
|
||||||
|
func TestBasicStringValue(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, `_ = "b"`,
|
||||||
|
[]string{
|
||||||
|
`Key("_")`,
|
||||||
|
`Assignment("=")`,
|
||||||
|
`StringValue("b")`})
|
||||||
|
assertSuccessAndCheck(t, `thing = "A cool ʎǝʞ" # huh, it's up-side down!!`,
|
||||||
|
[]string{
|
||||||
|
`Key("thing")`,
|
||||||
|
`Assignment("=")`,
|
||||||
|
`StringValue("A cool ʎǝʞ")`,
|
||||||
|
`Comment("# huh, it's up-side down!!")`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInvalidEscapeSequence(t *testing.T) {
|
||||||
|
assertFailure(t, `a="\x"`, `Invalid escape sequence \x in string value`)
|
||||||
|
}
|
||||||
|
func TestBasicStringValueEscapes(t *testing.T) {
|
||||||
|
for in, out := range map[string]string{
|
||||||
|
`\b`: "\b",
|
||||||
|
`\t`: "\t",
|
||||||
|
`\n`: "\n",
|
||||||
|
`\f`: "\f",
|
||||||
|
`\r`: "\r",
|
||||||
|
`\"`: "\"",
|
||||||
|
`\b\t\n\f\r\"`: "\b\t\n\f\r\"",
|
||||||
|
} {
|
||||||
|
l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
|
||||||
|
s := l[2]
|
||||||
|
if out != s.Value {
|
||||||
|
t.Fatalf("Unexpected result when parsing '%s'", in)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// func TestBasicStringUnicodeEscapes(t *testing.T) {
|
||||||
|
// for in, out := range map[string]string{
|
||||||
|
// `\u`: "\b",
|
||||||
|
// } {
|
||||||
|
// l := assertSuccess(t, fmt.Sprintf(`x="%s"`, in))
|
||||||
|
// s := l[2]
|
||||||
|
// if out != s.Value {
|
||||||
|
// t.Fatalf("Unexpected result when parsing '%s'", in)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
func TestTwoKeyValuePairs(t *testing.T) {
|
||||||
|
assertSuccessAndCheck(t, "a=\"Hello\" #comment1\nb=\"World!\"#comment2\r\n",
|
||||||
|
[]string{
|
||||||
|
`Key("a")`,
|
||||||
|
`Assignment("=")`,
|
||||||
|
`StringValue("Hello")`,
|
||||||
|
`Comment("#comment1")`,
|
||||||
|
`Key("b")`,
|
||||||
|
`Assignment("=")`,
|
||||||
|
`StringValue("World!")`,
|
||||||
|
`Comment("#comment2")`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertSuccessAndCheck(t *testing.T, input string, expected []string) {
|
||||||
|
l := assertSuccess(t, input)
|
||||||
|
assertItems(t, l, expected)
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertFailureAndCheck(t *testing.T, input string, expected []string, expectedErr string) {
|
||||||
|
l := assertFailure(t, input, expectedErr)
|
||||||
|
assertItems(t, l, expected)
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertFailure(t *testing.T, input string, expectedErr string) []lexer.Item {
|
||||||
|
l, err := lexer.Lex(input).ToArray()
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("Expected lexer error '%s', but no error occurred", expectedErr)
|
||||||
|
}
|
||||||
|
if err.Error() != expectedErr {
|
||||||
|
t.Fatalf("Mismatch between expected and actual error:\nExpected: %s\nActual: %s\n", expectedErr, err)
|
||||||
|
}
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertSuccess(t *testing.T, input string) []lexer.Item {
|
||||||
|
l, err := lexer.Lex(input).ToArray()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unexpected lexer error: %s", err)
|
||||||
|
}
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertItems(t *testing.T, l []lexer.Item, expected []string) {
|
||||||
|
if len(expected) != len(l) {
|
||||||
|
t.Fatalf("Unexpected number of lexer items: %d (expected: %d)", len(l), len(expected))
|
||||||
|
}
|
||||||
|
for i, e := range expected {
|
||||||
|
if l[i].String() != e {
|
||||||
|
t.Fatalf("Unexpected lexer item at index %d: %s (expected: %s)", i, l[i], e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,219 @@
|
||||||
|
package lexer
|
||||||
|
|
||||||
|
// stateFn represents the state of the scanner as a function
|
||||||
|
// that returns the next state.
|
||||||
|
type stateFn func(*Lexer) stateFn
|
||||||
|
|
||||||
|
const (
|
||||||
|
whitespace string = " \t"
|
||||||
|
newline string = "\r\n"
|
||||||
|
startOfComment string = "#"
|
||||||
|
equal string = "="
|
||||||
|
lower string = "abcdefghijklmnopqrstuvwxyz"
|
||||||
|
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||||
|
digits string = "0123456789"
|
||||||
|
dot string = "."
|
||||||
|
underscore string = "_"
|
||||||
|
dash string = "-"
|
||||||
|
singleQuote string = "'"
|
||||||
|
doubleQuote string = "\""
|
||||||
|
backslash string = "\\"
|
||||||
|
someQuote string = singleQuote + doubleQuote
|
||||||
|
singleQuote3 string = singleQuote + singleQuote + singleQuote
|
||||||
|
doubleQuote3 string = doubleQuote + doubleQuote + doubleQuote
|
||||||
|
bareKey string = lower + upper + digits + underscore + dash
|
||||||
|
startOfKey string = bareKey + someQuote
|
||||||
|
)
|
||||||
|
|
||||||
|
func stateKeyValuePair(l *Lexer) stateFn {
|
||||||
|
l.skip(whitespace + newline)
|
||||||
|
if l.upcoming(startOfComment) {
|
||||||
|
return stateComment
|
||||||
|
}
|
||||||
|
if l.upcoming(startOfKey) {
|
||||||
|
return stateKey
|
||||||
|
}
|
||||||
|
return stateEndOfFile
|
||||||
|
}
|
||||||
|
|
||||||
|
// A hash symbol marks the rest of the line as a comment.
|
||||||
|
func stateComment(l *Lexer) stateFn {
|
||||||
|
l.acceptUntil(newline)
|
||||||
|
l.emit(ItemComment, l.getAcceptedString())
|
||||||
|
l.skip(newline)
|
||||||
|
return stateKeyValuePair
|
||||||
|
}
|
||||||
|
|
||||||
|
// A key may be either bare, quoted or dotted.
|
||||||
|
func stateKey(l *Lexer) stateFn {
|
||||||
|
if l.upcoming(bareKey) {
|
||||||
|
return stateBareKey
|
||||||
|
}
|
||||||
|
return l.unexpectedTokenError("a valid key name")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||||
|
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||||
|
// keys are allowed to be composed of only ASCII digits,
|
||||||
|
// e.g. 1234, but are always interpreted as strings.
|
||||||
|
func stateBareKey(l *Lexer) stateFn {
|
||||||
|
l.acceptWhile(bareKey)
|
||||||
|
l.emit(ItemKey, l.getAcceptedString())
|
||||||
|
return stateEndOfKeyOrKeyDot
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||||
|
// This allows for grouping similar properties together:
|
||||||
|
func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
|
||||||
|
// Whitespace around dot-separated parts is ignored, however,
|
||||||
|
// best practice is to not use any extraneous whitespace.
|
||||||
|
l.skip(whitespace)
|
||||||
|
if l.accept(dot) {
|
||||||
|
l.emit(ItemKeyDot, ".")
|
||||||
|
l.skip(whitespace)
|
||||||
|
return stateKey
|
||||||
|
}
|
||||||
|
return stateKeyAssignment
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keys are on the left of the equals sign and values are on the right.
|
||||||
|
// Whitespace is ignored around key names and values. The key, equals
|
||||||
|
// sign, and value must be on the same line (though some values can
|
||||||
|
// be broken over multiple lines).
|
||||||
|
func stateKeyAssignment(l *Lexer) stateFn {
|
||||||
|
l.skip(whitespace)
|
||||||
|
if l.accept(equal) {
|
||||||
|
l.emit(ItemKeyValueAssignment, "=")
|
||||||
|
l.skip(whitespace)
|
||||||
|
return stateValue
|
||||||
|
}
|
||||||
|
return l.unexpectedTokenError("an '=' value assignment")
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateValue(l *Lexer) stateFn {
|
||||||
|
l.skip(whitespace)
|
||||||
|
if l.upcoming(someQuote) {
|
||||||
|
return stateStringValue
|
||||||
|
}
|
||||||
|
return l.unexpectedTokenError("a value")
|
||||||
|
}
|
||||||
|
|
||||||
|
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||||
|
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||||
|
func stateStringValue(l *Lexer) stateFn {
|
||||||
|
if l.accept(doubleQuote) {
|
||||||
|
return stateBasicStringValue
|
||||||
|
}
|
||||||
|
return l.unexpectedTokenError("a string value")
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateBasicStringValue(l *Lexer) stateFn {
|
||||||
|
// Possibly a """ multi-line string start,
|
||||||
|
// possibly the end of an "" empty string.
|
||||||
|
if l.accept(doubleQuote) {
|
||||||
|
// A """ multi-line string.
|
||||||
|
if l.accept(doubleQuote) {
|
||||||
|
l.ignore()
|
||||||
|
return stateMultiLineBasicString
|
||||||
|
}
|
||||||
|
// An "" empty string.
|
||||||
|
l.ignore()
|
||||||
|
l.emit(ItemStringValue, "")
|
||||||
|
return stateKeyValuePair
|
||||||
|
}
|
||||||
|
l.ignore()
|
||||||
|
return stateBasicString
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basic strings are surrounded by quotation marks. Any Unicode character
|
||||||
|
// may be used except those that must be escaped: quotation mark, backslash,
|
||||||
|
// and the control characters (U+0000 to U+001F, U+007F).
|
||||||
|
//
|
||||||
|
// For convenience, some popular characters have a compact escape sequence.
|
||||||
|
//
|
||||||
|
// \b - backspace (U+0008)
|
||||||
|
// \t - tab (U+0009)
|
||||||
|
// \n - linefeed (U+000A)
|
||||||
|
// \f - form feed (U+000C)
|
||||||
|
// \r - carriage return (U+000D)
|
||||||
|
// \" - quote (U+0022)
|
||||||
|
// \\ - backslash (U+005C)
|
||||||
|
// \uXXXX - unicode (U+XXXX)
|
||||||
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||||
|
//
|
||||||
|
// Any Unicode character may be escaped with the \uXXXX or \UXXXXXXXX forms.
|
||||||
|
// The escape codes must be valid Unicode scalar values.
|
||||||
|
//
|
||||||
|
// All other escape sequences not listed above are reserved and,
|
||||||
|
// if used, TOML should produce an error.
|
||||||
|
|
||||||
|
var basicEscapes = map[rune]rune{
|
||||||
|
'b': rune(8),
|
||||||
|
't': rune(9),
|
||||||
|
'n': rune(10),
|
||||||
|
'f': rune(12),
|
||||||
|
'r': rune(13),
|
||||||
|
'"': rune(34),
|
||||||
|
'\\': rune(92),
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateParseBasicString(l *Lexer) stateFn {
|
||||||
|
for {
|
||||||
|
switch {
|
||||||
|
case l.upcoming(endOfFile):
|
||||||
|
l.unexpectedTokenError("basic string token")
|
||||||
|
case l.upcoming(doubleQuote):
|
||||||
|
return l.popState()
|
||||||
|
case l.accept(backslash):
|
||||||
|
r := l.next()
|
||||||
|
if escaped, ok := basicEscapes[r]; ok {
|
||||||
|
l.addToString(escaped)
|
||||||
|
} else {
|
||||||
|
return l.errorf("Invalid escape sequence \\%c in string value", r)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
l.addToString(l.next())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateBasicString(l *Lexer) stateFn {
|
||||||
|
l.newString()
|
||||||
|
l.pushState(stateBasicStringEnd)
|
||||||
|
return stateParseBasicString
|
||||||
|
|
||||||
|
parsing:
|
||||||
|
for {
|
||||||
|
r := l.next()
|
||||||
|
if r == endOfFile {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if r == '"' {
|
||||||
|
l.emit(ItemStringValue, l.getString())
|
||||||
|
return stateKeyValuePair
|
||||||
|
}
|
||||||
|
if r == '\\' {
|
||||||
|
r = l.next()
|
||||||
|
if escaped, ok := basicEscapes[r]; ok {
|
||||||
|
l.addToString(escaped)
|
||||||
|
continue parsing
|
||||||
|
}
|
||||||
|
return l.errorf("Invalid escape sequence \\%c in string value", r)
|
||||||
|
}
|
||||||
|
l.addToString(r)
|
||||||
|
}
|
||||||
|
return l.unexpectedTokenError("valid basic string rune")
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateMultiLineBasicString(l *Lexer) stateFn {
|
||||||
|
return l.errorf("Not yet implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func stateEndOfFile(l *Lexer) stateFn {
|
||||||
|
i := l.peek()
|
||||||
|
if i == endOfFile {
|
||||||
|
l.emit(ItemEOF, "EOF")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return l.unexpectedTokenError("end of file")
|
||||||
|
}
|
Loading…
Reference in New Issue