diff --git a/FIXME b/FIXME new file mode 100644 index 0000000..b4a3694 --- /dev/null +++ b/FIXME @@ -0,0 +1,2 @@ +key-no-eol I do accept "key1=1 key2=1". The spec does not deny that. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4969bd1 --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +all: + @(cd cmd/toml-test-decoder; go build) + +test: + @(cd ast; go test) + @(cd parse; go test) + +sushi-test: + @(cd cmd/toml-test-decoder; go build; ${GOPATH}/bin/toml-test ./toml-test-decoder) diff --git a/ast/string.go b/ast/string.go index a4a3bba..d658143 100644 --- a/ast/string.go +++ b/ast/string.go @@ -30,7 +30,7 @@ func (value Value) String() string { case TypeArrayOfTables: fallthrough case TypeArray: - values := make(Key, len(value.Data)) + values := make([]string, len(value.Data)) for i, value := range value.Data { values[i] = value.(*Value).String() } diff --git a/cmd/toml-test-decoder/main.go b/cmd/toml-test-decoder/main.go index 9393fc7..124375e 100644 --- a/cmd/toml-test-decoder/main.go +++ b/cmd/toml-test-decoder/main.go @@ -4,15 +4,23 @@ package main import ( + //"encoding/json" "encoding/json" "flag" "fmt" + "sort" + "strings" + "time" + + //"fmt" "log" "os" "path" - "time" - "github.com/mmakaay/toml" + //"time" + + "git.makaay.nl/mauricem/go-toml/ast" + "git.makaay.nl/mauricem/go-toml/parse" ) func init() { @@ -34,58 +42,120 @@ func main() { flag.Usage() } - var tmp interface{} - if _, err := toml.DecodeReader(os.Stdin, &tmp); err != nil { + toml, err := parse.Run(os.Stdin) + if err != nil { log.Fatalf("Error decoding TOML: %s", err) } - typedTmp := translate(tmp) - if err := json.NewEncoder(os.Stdout).Encode(typedTmp); err != nil { + sushi := makeSushi(ast.NewValue(ast.TypeTable, toml)) + var v = new(interface{}) + if err := json.NewDecoder(strings.NewReader(sushi)).Decode(v); err != nil { + log.Fatalf("Error decoding JSON: %s\n%s\n", err, sushi) + } + encoder := json.NewEncoder(os.Stdout) + encoder.SetIndent("", " ") + if err := encoder.Encode(v); err != nil { log.Fatalf("Error encoding JSON: %s", err) } } -func translate(tomlData interface{}) interface{} { - switch orig := tomlData.(type) { - case map[string]interface{}: - typed := make(map[string]interface{}, len(orig)) - for k, v := range orig { - typed[k] = translate(v) - } - return typed - case []map[string]interface{}: - typed := make([]map[string]interface{}, len(orig)) - for i, v := range orig { - typed[i] = translate(v).(map[string]interface{}) - } - return typed - case []interface{}: - typed := make([]interface{}, len(orig)) - for i, v := range orig { - typed[i] = translate(v) - } +// func translate(node *ast.Value) interface{} { +// switch node.Type { +// case ast.TypeTable: +// typed := make(map[string]interface{}, len(node.Data)) +// for k, v := range node.Data[0].(ast.Table) { +// typed[k] = translate(v) +// } +// return typed +// case ast.TypeArrayOfTables: +// typed := make([]map[string]interface{}, len(node.Data)) +// for i, v := range node.Data { +// value := v.(*ast.Value) +// typed[i] = translate(value).(map[string]interface{}) +// } +// return typed +// case []interface{}: +// typed := make([]interface{}, len(orig)) +// for i, v := range orig { +// typed[i] = translate(v) +// } - // We don't really need to tag arrays, but let's be future proof. - // (If TOML ever supports tuples, we'll need this.) - return tag("array", typed) - case time.Time: - return tag("datetime", orig.Format("2006-01-02T15:04:05Z")) - case bool: - return tag("bool", fmt.Sprintf("%v", orig)) - case int64: - return tag("integer", fmt.Sprintf("%d", orig)) - case float64: - return tag("float", fmt.Sprintf("%v", orig)) - case string: - return tag("string", orig) - } +// // We don't really need to tag arrays, but let's be future proof. +// // (If TOML ever supports tuples, we'll need this.) +// return tag("array", typed) +// case time.Time: +// return tag("datetime", orig.Format("2006-01-02T15:04:05Z")) +// case bool: +// return tag("bool", fmt.Sprintf("%v", orig)) +// case int64: +// return tag("integer", fmt.Sprintf("%d", orig)) +// case float64: +// return tag("float", fmt.Sprintf("%v", orig)) +// case string: +// return tag("string", orig) +// } - panic(fmt.Sprintf("Unknown type: %T", tomlData)) -} +// panic(fmt.Sprintf("Unknown type: %T", tomlData)) +// } -func tag(typeName string, data interface{}) map[string]interface{} { - return map[string]interface{}{ - "type": typeName, - "value": data, +// func tag(typeName string, data interface{}) map[string]interface{} { +// return map[string]interface{}{ +// "type": typeName, +// "value": data, +// } +// } + +func makeSushi(value *ast.Value) string { + switch value.Type { + case ast.TypeString: + return renderValue("string", value.Data[0].(string)) + case ast.TypeInteger: + return renderValue("integer", fmt.Sprintf("%d", value.Data[0].(int64))) + case ast.TypeFloat: + return renderValue("float", fmt.Sprintf("%v", value.Data[0].(float64))) + case ast.TypeBoolean: + return renderValue("bool", fmt.Sprintf("%t", value.Data[0].(bool))) + case ast.TypeOffsetDateTime: + return renderValue("datetime", value.Data[0].(time.Time).Format(time.RFC3339Nano)) + case ast.TypeLocalDateTime: + return renderValue("local_datetime", value.Data[0].(time.Time).Format("2006-01-02 15:04:05.999999999")) + case ast.TypeLocalDate: + return renderValue("local_date", value.Data[0].(time.Time).Format("2006-01-02")) + case ast.TypeLocalTime: + return renderValue("local_time", value.Data[0].(time.Time).Format("15:04:05.999999999")) + case ast.TypeArrayOfTables: + fallthrough + case ast.TypeArray: + values := make([]string, len(value.Data)) + isArrayOfTables := false + for i, value := range value.Data { + isArrayOfTables = value.(*ast.Value).Type == ast.TypeTable + values[i] = makeSushi(value.(*ast.Value)) + } + if isArrayOfTables { + return fmt.Sprintf("[%s]", strings.Join(values, ", ")) + } else { + return fmt.Sprintf(`{"type": "array", "value": [%s]}`, strings.Join(values, ", ")) + } + case ast.TypeTable: + pairs := value.Data[0].(ast.Table) + keys := make([]string, len(pairs)) + i := 0 + for k := range pairs { + keys[i] = k + i++ + } + sort.Strings(keys) + values := make([]string, len(pairs)) + for i, k := range keys { + values[i] = fmt.Sprintf("%q: %s", k, makeSushi(pairs[k])) + } + return fmt.Sprintf("{%s}", strings.Join(values, ", ")) + default: + return renderValue(string(value.Type), fmt.Sprintf("%q", value.Data[0])) } } + +func renderValue(t string, v string) string { + return fmt.Sprintf("{%q: %q, %q: %q}", "type", t, "value", v) +} diff --git a/cmd/toml-test-decoder/test.toml b/cmd/toml-test-decoder/test.toml new file mode 100644 index 0000000..890a413 --- /dev/null +++ b/cmd/toml-test-decoder/test.toml @@ -0,0 +1,12 @@ +a=123 +b=""" +hallo dan +""" +[table] +data="ok" +[[good]] +job=true +[[good]] +well="done!" +grade="A+" +when=2019-01-01 12:00:00Z diff --git a/cmd/toml-test-decoder/toml-test-decoder b/cmd/toml-test-decoder/toml-test-decoder new file mode 100755 index 0000000..7d3e27a Binary files /dev/null and b/cmd/toml-test-decoder/toml-test-decoder differ diff --git a/go.mod b/go.mod index df906fa..380a00e 100644 --- a/go.mod +++ b/go.mod @@ -5,5 +5,3 @@ go 1.12 require ( git.makaay.nl/mauricem/go-parsekit v0.0.0 ) - -replace git.makaay.nl/mauricem/go-parsekit v0.0.0 => ../go-parsekit diff --git a/parse/keyvaluepair.go b/parse/keyvaluepair.go index d4f7a36..dc2668f 100644 --- a/parse/keyvaluepair.go +++ b/parse/keyvaluepair.go @@ -32,6 +32,9 @@ var ( // around dot-separated parts are ignored, however, best practice is to // not use any extraneous blanks. keySeparatorDot = c.Seq(dropBlanks, a.Dot, dropBlanks) + + // After a value, the line must end. There can be an optional comment. + endOfLineAfterValue = c.Seq(dropBlanks, a.EndOfLine.Or(dropComment)) ) func (t *parser) startKeyValuePair(p *parse.API) { @@ -41,6 +44,8 @@ func (t *parser) startKeyValuePair(p *parse.API) { err := t.SetKeyValuePair(key, value) if err != nil { p.Error("%s", err) + } else if !p.Accept(endOfLineAfterValue) { + p.Expected("end of line") } } } diff --git a/parse/keyvaluepair_test.go b/parse/keyvaluepair_test.go index 585d6c7..479a12e 100644 --- a/parse/keyvaluepair_test.go +++ b/parse/keyvaluepair_test.go @@ -14,7 +14,7 @@ func TestKey(t *testing.T) { {"key1.key2=0", `{"key1": {"key2": 0}}`, ``}, {"key . with . spaces=0", `{"key": {"with": {"spaces": 0}}}`, ``}, {"key \t . \twithtabs\t . \tandspaces=0", `{"key": {"withtabs": {"andspaces": 0}}}`, ``}, - {"key1='value1' key2='value2' # on same line", `{"key1": "value1", "key2": "value2"}`, ``}, + {"key1='value1' key2='value2' # on same line", `{"key1": "value1"}`, `unexpected input (expected end of line) at line 1, column 14`}, // Single quoted key tests {"''=0", `{"": 0}`, ``}, {"'single quoted'=0", `{"single quoted": 0}`, ``}, @@ -47,7 +47,7 @@ func TestKeyValuePair(t *testing.T) { {`"ʎǝʞ" = "value"`, `{"ʎǝʞ": "value"}`, ``}, {`key = "value" # This is a comment at the end of a line`, `{"key": "value"}`, ``}, {`another = "# This is not a comment"`, `{"another": "# This is not a comment"}`, ``}, - {"key1=\"value1\"key2=\"value2\"\r\nkey3a.key3b=\"value3\"", `{"key1": "value1", "key2": "value2", "key3a": {"key3b": "value3"}}`, ``}, + {"key1=\"value1\"\nkey2=\"value2\"\r\nkey3a.key3b=\"value3\"", `{"key1": "value1", "key2": "value2", "key3a": {"key3b": "value3"}}`, ``}, {"with=\"comments\"# boring \nanother.cool =\"one\" \t # to the end\r\n", `{"another": {"cool": "one"}, "with": "comments"}`, ``}, {"key='value'\nkey='another value'", `{"key": "value"}`, `invalid key/value pair: string value already exists at key [key] at line 2, column 20`}, } { diff --git a/parse/parse_test.go b/parse/parse_test.go index 9c25955..4fd7b36 100644 --- a/parse/parse_test.go +++ b/parse/parse_test.go @@ -7,17 +7,17 @@ import ( ) func ExampleRun() { - doc, err := parse.Run("key = 'value' key2 = 'another value'") + doc, err := parse.Run("key = 'value'\nkey2 = 'another value'") fmt.Println(doc, err) doc, err = parse.Run("key = 'value'\n[table]\nanother_key = 'another one'") fmt.Println(doc, err) - doc, err = parse.Run("key1 = 'valid' key2 = invalid") + doc, err = parse.Run("key1 = 'valid'\nkey2 = invalid") fmt.Println(doc, err) // Output: // {"key": "value", "key2": "another value"} // {"key": "value", "table": {"another_key": "another one"}} - // {"key1": "valid"} unexpected input (expected a value) at line 1, column 23 + // {"key1": "valid"} unexpected input (expected a value) at line 2, column 8 } diff --git a/parse/value_number_test.go b/parse/value_number_test.go index 8965596..92a6166 100644 --- a/parse/value_number_test.go +++ b/parse/value_number_test.go @@ -24,7 +24,7 @@ func TestInteger(t *testing.T) { {`x=-17`, `{"x": -17}`, ``}, {`x=1234`, `{"x": 1234}`, ``}, {`x=_`, `{}`, `unexpected input (expected a value) at line 1, column 3`}, - {`x=1_`, `{"x": 1}`, `unexpected end of file (expected a value assignment) at line 1, column 5`}, + {`x=1_`, `{"x": 1}`, `unexpected input (expected end of line) at line 1, column 4`}, {`x=1_000`, `{"x": 1000}`, ``}, {`x=5_349_221`, `{"x": 5349221}`, ``}, {`x=1_2_3_4_5`, `{"x": 12345}`, ``}, @@ -47,7 +47,7 @@ func TestInteger(t *testing.T) { {`x=0xf_f`, `{"x": 255}`, ``}, {`x=0x0_0_f_f`, `{"x": 255}`, ``}, {`x=0xdead_beef`, `{"x": 3735928559}`, ``}, - {`x=0xgood_beef`, `{"x": 0}`, `unexpected end of file (expected a value assignment) at line 1, column 14`}, + {`x=0xgood_beef`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`}, {`x=0x7FFFFFFFFFFFFFFF`, `{"x": 9223372036854775807}`, ``}, {`x=0x8000000000000000`, `{}`, `invalid integer value 0x8000000000000000: strconv.ParseInt: parsing "8000000000000000": value out of range at line 1, column 21`}, //Octal @@ -57,7 +57,7 @@ func TestInteger(t *testing.T) { {`x=0o10`, `{"x": 8}`, ``}, {`x=0o1_6`, `{"x": 14}`, ``}, {`x=0o0_0_1_1_1`, `{"x": 73}`, ``}, - {`x=0o9`, `{"x": 0}`, `unexpected end of file (expected a value assignment) at line 1, column 6`}, + {`x=0o9`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`}, {`x=0o777777777777777777777`, `{"x": 9223372036854775807}`, ``}, {`x=0o1000000000000000000000`, `{}`, `invalid integer value 0o1000000000000000000000: strconv.ParseInt: parsing "1000000000000000000000": value out of range at line 1, column 27`}, // Binary @@ -68,7 +68,7 @@ func TestInteger(t *testing.T) { {`x=0b0100`, `{"x": 4}`, ``}, {`x=0b00001000`, `{"x": 8}`, ``}, {`x=0b0001_0000`, `{"x": 16}`, ``}, - {`x=0b9`, `{"x": 0}`, `unexpected end of file (expected a value assignment) at line 1, column 6`}, + {`x=0b9`, `{"x": 0}`, `unexpected input (expected end of line) at line 1, column 4`}, {`x=0b1_1_0_1_1`, `{"x": 27}`, ``}, {`x=0b11111111_11111111`, `{"x": 65535}`, ``}, {`x=0b01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111`, `{"x": 9223372036854775807}`, ``}, diff --git a/parse/value_string.go b/parse/value_string.go index 4744ad4..6902b8a 100644 --- a/parse/value_string.go +++ b/parse/value_string.go @@ -2,6 +2,7 @@ package parse import ( "strings" + "unicode/utf8" "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-toml/ast" @@ -87,18 +88,25 @@ func (t *parser) parseBasicString(name string, p *parse.API) (string, bool) { case p.Peek(controlCharacter): p.Error("invalid character in %s: %q (must be escaped)", name, p.Result().Rune(0)) return sb.String(), false - case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))): - sb.WriteString(p.Result().Value(0).(string)) + case p.Accept(tok.StrInterpreted(nil, validEscape)): + s := p.Result().Value(0).(string) + for _, r := range s { + if r == utf8.RuneError { + p.Error("invalid UTF8 rune") + return sb.String(), false + } + sb.WriteRune(r) + } case p.Peek(a.Backslash): p.Error("invalid escape sequence") return sb.String(), false case p.Accept(m.Drop(a.DoubleQuote)): return sb.String(), true - case p.Accept(a.ValidRune): - sb.WriteString(p.Result().String()) case p.Peek(a.InvalidRune): p.Error("invalid UTF8 rune") return sb.String(), false + case p.Accept(a.ValidRune): + sb.WriteString(p.Result().String()) default: p.Expected(`closing quotation marks`) return sb.String(), false @@ -128,11 +136,11 @@ func (t *parser) parseLiteralString(name string, p *parse.API) (string, bool) { case p.Peek(controlCharacter): p.Error("invalid character in %s: %q (no control chars allowed, except for tab)", name, p.Result().Rune(0)) return sb.String(), false - case p.Accept(a.ValidRune): - sb.WriteString(p.Result().String()) case p.Peek(a.InvalidRune): p.Error("invalid UTF8 rune") return sb.String(), false + case p.Accept(a.ValidRune): + sb.WriteString(p.Result().String()) default: p.Expected("closing single quote") return sb.String(), false @@ -177,7 +185,14 @@ func (t *parser) parseMultiLineBasicString(p *parse.API) (string, bool) { p.Error("invalid character in multi-line basic string: %q (must be escaped)", p.Result().Rune(0)) return sb.String(), false case p.Accept(tok.StrInterpreted(nil, c.OneOrMore(validEscape))): - sb.WriteString(p.Result().Value(0).(string)) + s := p.Result().Value(0).(string) + for _, r := range s { + if r == utf8.RuneError { + p.Error("invalid UTF8 rune") + return sb.String(), false + } + sb.WriteRune(r) + } case p.Accept(lineEndingBackslash): // NOOP, the line-ending backslash sequence is skipped. case p.Peek(a.Backslash): diff --git a/parse/value_string_test.go b/parse/value_string_test.go index 35e95c5..519e236 100644 --- a/parse/value_string_test.go +++ b/parse/value_string_test.go @@ -50,7 +50,7 @@ func TestString(t *testing.T) { } } -func TestBasipString(t *testing.T) { +func TestBasicString(t *testing.T) { for _, test := range []parseTest{ {`x="no end quote`, `{}`, `unexpected end of file (expected closing quotation marks) at line 1, column 16`}, {`x=""`, `{"x": ""}`, ``}, @@ -60,6 +60,7 @@ func TestBasipString(t *testing.T) { {`x="A cool UTF8 ƃuıɹʇs"`, `{"x": "A cool UTF8 ƃuıɹʇs"}`, ``}, {`x="A string with UTF8 escape \u2318"`, `{"x": "A string with UTF8 escape ⌘"}`, ``}, {"x=\"Invalid character for UTF \xcd\"", `{}`, `invalid UTF8 rune at line 1, column 30`}, + {"x=\"\\uD801\"", `{}`, `invalid UTF8 rune at line 1, column 10`}, {"x=\"Character that mus\t be escaped\"", `{}`, `invalid character in string value: '\t' (must be escaped) at line 1, column 22`}, {"x=\"Character that must be escaped \u0000\"", `{}`, `invalid character in string value: '\x00' (must be escaped) at line 1, column 35`}, {"x=\"Character that must be escaped \x7f\"", `{}`, `invalid character in string value: '\u007f' (must be escaped) at line 1, column 35`}, @@ -69,7 +70,7 @@ func TestBasipString(t *testing.T) { } } -func TestMultiLineBasipString(t *testing.T) { +func TestMultiLineBasicString(t *testing.T) { for _, test := range []parseTest{ {`x="""missing close quote""`, `{}`, `unexpected end of file (expected closing three quotation marks) at line 1, column 27`}, {`x=""""""`, `{"x": ""}`, ``}, @@ -120,7 +121,7 @@ func TestMultiLineLiteralString(t *testing.T) { } } -func TestBasipStringWithUnescapedControlCharacters(t *testing.T) { +func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { // A quick check for almost all characters that must be escaped. // The missing one (\x7f) is covered in the previous test. for i := 0x00; i <= 0x1F; i++ { diff --git a/toml.code-workspace b/toml.code-workspace deleted file mode 100644 index 362d7c2..0000000 --- a/toml.code-workspace +++ /dev/null @@ -1,7 +0,0 @@ -{ - "folders": [ - { - "path": "." - } - ] -} \ No newline at end of file diff --git a/toml.go b/toml.go new file mode 100644 index 0000000..0df11bc --- /dev/null +++ b/toml.go @@ -0,0 +1,8 @@ +// Package toml provides a decoder and an encoder for TOML. +// TOML stands for Tom's Obvious, Minimal Language. +// +// TOML specification: https://github.com/toml-lang/toml +// +// Compatible with TOML version +// [v0.5.0](https://github.com/toml-lang/toml/blob/master/versions/en/toml-v0.5.0.md) +package toml