diff --git a/keyvaluepair.go b/keyvaluepair.go index 14550ab..71c47af 100644 --- a/keyvaluepair.go +++ b/keyvaluepair.go @@ -102,13 +102,3 @@ func (t *parser) startAssignment(p *parse.API) { p.Expected("a value assignment") } } - -// Values must be of the following types: String, Integer, Float, Boolean, -// Datetime, Array, or Inline Table. Unspecified values are invalid. -func (t *parser) startValue(p *parse.API) { - if p.Peek(c.Any(a.SingleQuote, a.DoubleQuote)) { - p.Handle(t.startString) - } else { - p.Expected("a value") - } -} diff --git a/keyvaluepair_test.go b/keyvaluepair_test.go index 9c32c04..eb328f0 100644 --- a/keyvaluepair_test.go +++ b/keyvaluepair_test.go @@ -45,16 +45,6 @@ func TestAssignment(t *testing.T) { } } -func TestValue(t *testing.T) { - for _, test := range []parseTest{ - {``, []string{`Error: unexpected end of file (expected a value) at start of file`}}, - {`"basic string value"`, []string{`string("basic string value")`}}, - } { - p := &parser{} - testParseHandler(t, p, p.startValue, test) - } -} - func TestKeyValuePair(t *testing.T) { for _, test := range []parseTest{ {"", []string{}}, @@ -80,3 +70,75 @@ func TestKeyValuePair(t *testing.T) { testParseHandler(t, p, p.startKeyValuePair, test) } } + +func TestKeyValuePair_ForAllTypes(t *testing.T) { + for _, test := range []parseTest{ + {"string='literal'", []string{`key("string")`, `assign`, `string("literal")`}}, + {"string='''literal\nmulti-line'''", []string{`key("string")`, `assign`, `string("literal\nmulti-line")`}}, + {`string="basic"`, []string{`key("string")`, `assign`, `string("basic")`}}, + {"string=\"\"\"basic\nmulti-line\"\"\"", []string{`key("string")`, `assign`, `string("basic\nmulti-line")`}}, + {"integer=1_234_567", []string{`key("integer")`, `assign`, `integer(1234567)`}}, + {"integer=42", []string{`key("integer")`, `assign`, `integer(42)`}}, + {"integer=0x42", []string{`key("integer")`, `assign`, `integer(66)`}}, + {"integer=0o42", []string{`key("integer")`, `assign`, `integer(34)`}}, + {"integer=0b101010", []string{`key("integer")`, `assign`, `integer(42)`}}, + {"float=42.37", []string{`key("float")`, `assign`, `float(42.37)`}}, + {"float=42e+37", []string{`key("float")`, `assign`, `float(4.2e+38)`}}, + {"float=42.37e-11", []string{`key("float")`, `assign`, `float(4.237e-10)`}}, + {"boolean=true", []string{`key("boolean")`, `assign`, `boolean(true)`}}, + {"boolean=false", []string{`key("boolean")`, `assign`, `boolean(false)`}}, + {"date=2019-01-01", []string{`key("date")`, `assign`, `date(2019-01-01 00:00:00 +0000 UTC)`}}, + {"time=15:03:11", []string{`key("time")`, `assign`, `time(0000-01-01 15:03:11 +0000 UTC)`}}, + {"datetime=2021-02-01 15:03:11.123", []string{`key("datetime")`, `assign`, `datetime(2021-02-01 15:03:11.123 +0000 UTC)`}}, + {"offset_datetime=1111-11-11 11:11:11.111111111+11:11", []string{`key("offset_datetime")`, `assign`, `offset_datetime(1111-11-11 11:11:11.111111111 +1111 +1111)`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startKeyValuePair, test) + } +} + +func TestKeyValuePair_ExamplesFromSpecification(t *testing.T) { + for _, test := range []parseTest{ + {"int1 = +99", []string{`key("int1")`, `assign`, `integer(99)`}}, + {"int2 = 42", []string{`key("int2")`, `assign`, `integer(42)`}}, + {"int3 = 0", []string{`key("int3")`, `assign`, `integer(0)`}}, + {"int4 = -17", []string{`key("int4")`, `assign`, `integer(-17)`}}, + {"int5 = 1_000", []string{`key("int5")`, `assign`, `integer(1000)`}}, + {"int6 = 5_349_221", []string{`key("int6")`, `assign`, `integer(5349221)`}}, + {"int7 = 1_2_3_4_5 # VALID but discouraged", []string{`key("int7")`, `assign`, `integer(12345)`, `comment("# VALID but discouraged")`}}, + {"hex1 = 0xDEADBEEF", []string{`key("hex1")`, `assign`, `integer(3735928559)`}}, + {"hex2 = 0xdeadbeef", []string{`key("hex2")`, `assign`, `integer(3735928559)`}}, + {"hex3 = 0xdead_beef", []string{`key("hex3")`, `assign`, `integer(3735928559)`}}, + {"oct1 = 0o01234567", []string{`key("oct1")`, `assign`, `integer(342391)`}}, + {"oct2 = 0o755", []string{`key("oct2")`, `assign`, `integer(493)`}}, + {"bin1 = 0b11010110", []string{`key("bin1")`, `assign`, `integer(214)`}}, + {"flt1 = +1.0", []string{`key("flt1")`, `assign`, `float(1)`}}, + {"flt2 = 3.1415", []string{`key("flt2")`, `assign`, `float(3.1415)`}}, + {"flt3 = -0.01", []string{`key("flt3")`, `assign`, `float(-0.01)`}}, + {"flt4 = 5e+22", []string{`key("flt4")`, `assign`, `float(5e+22)`}}, + {"flt5 = 1e6", []string{`key("flt5")`, `assign`, `float(1e+06)`}}, + {"flt6 = -2E-2", []string{`key("flt6")`, `assign`, `float(-0.02)`}}, + {"flt7 = 6.626e-34", []string{`key("flt7")`, `assign`, `float(6.626e-34)`}}, + {"flt8 = 224_617.445_991_228", []string{`key("flt8")`, `assign`, `float(224617.445991228)`}}, + {"sf1 = inf # positive infinity", []string{`key("sf1")`, `assign`, `float(+Inf)`, `comment("# positive infinity")`}}, + {"sf2 = +inf # positive infinity", []string{`key("sf2")`, `assign`, `float(+Inf)`, `comment("# positive infinity")`}}, + {"sf3 = -inf # negative infinity", []string{`key("sf3")`, `assign`, `float(-Inf)`, `comment("# negative infinity")`}}, + {"sf4 = nan # actual sNaN/qNaN encoding is implementation specific", []string{`key("sf4")`, `assign`, `float(NaN)`, `comment("# actual sNaN/qNaN encoding is implementation specific")`}}, + {"sf5 = +nan # same as `nan`", []string{`key("sf5")`, `assign`, `float(NaN)`, "comment(\"# same as `nan`\")"}}, + {"sf6 = -nan # valid, actual encoding is implementation specific", []string{`key("sf6")`, `assign`, `float(NaN)`, `comment("# valid, actual encoding is implementation specific")`}}, + {"bool1 = true", []string{`key("bool1")`, `assign`, `boolean(true)`}}, + {"bool2 = false", []string{`key("bool2")`, `assign`, `boolean(false)`}}, + {"odt1 = 1979-05-27T07:32:00Z", []string{`key("odt1")`, `assign`, `offset_datetime(1979-05-27 07:32:00 +0000 UTC)`}}, + {"odt2 = 1979-05-27T00:32:00-07:00", []string{`key("odt2")`, `assign`, `offset_datetime(1979-05-27 00:32:00 -0700 -0700)`}}, + {"odt3 = 1979-05-27T00:32:00.999999-07:00", []string{`key("odt3")`, `assign`, `offset_datetime(1979-05-27 00:32:00.999999 -0700 -0700)`}}, + {"odt4 = 1979-05-27 07:32:00Z", []string{`key("odt4")`, `assign`, `offset_datetime(1979-05-27 07:32:00 +0000 UTC)`}}, + {"ldt1 = 1979-05-27T07:32:00", []string{`key("ldt1")`, `assign`, `datetime(1979-05-27 07:32:00 +0000 UTC)`}}, + {"ldt2 = 1979-05-27T00:32:00.999999", []string{`key("ldt2")`, `assign`, `datetime(1979-05-27 00:32:00.999999 +0000 UTC)`}}, + {"ld1 = 1979-05-27", []string{`key("ld1")`, `assign`, `date(1979-05-27 00:00:00 +0000 UTC)`}}, + {"lt1 = 07:32:00", []string{`key("lt1")`, `assign`, `time(0000-01-01 07:32:00 +0000 UTC)`}}, + {"lt2 = 00:32:00.999999", []string{`key("lt2")`, `assign`, `time(0000-01-01 00:32:00.999999 +0000 UTC)`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startKeyValuePair, test) + } +} diff --git a/toml.go b/toml.go index af3405b..9e68e15 100644 --- a/toml.go +++ b/toml.go @@ -14,14 +14,18 @@ type cmdType string // Command types that are emitted by the parser. const ( - cComment cmdType = "comment" // a # comment at the end of the line - cKey = "key" // set key name - cKeyDot = "keydot" // new key stack level - cAssign = "assign" // assign a value - csetStrVal = "string" // set a string value - csetIntVal = "integer" // set an integer value - csetFloatVal = "float" // set a float value - csetBoolVal = "boolean" // set a boolean value + cComment cmdType = "comment" // a # comment at the end of the line + cKey cmdType = "key" // set key name + cKeyDot cmdType = "keydot" // new key stack level + cAssign cmdType = "assign" // assign a value + csetStrVal cmdType = "string" // set a string value + csetIntVal cmdType = "integer" // set an integer value + csetFloatVal cmdType = "float" // set a float value + csetBoolVal cmdType = "boolean" // set a boolean value + coffsetDateTime cmdType = "offset_datetime" // set a date/time value with timezone information + clocalDateTime cmdType = "datetime" // set a local date/time value + clocalDate cmdType = "date" // set a local date value + clocalTime cmdType = "time" // set a local time value ) type parser struct { diff --git a/value.go b/value.go new file mode 100644 index 0000000..fe2c45d --- /dev/null +++ b/value.go @@ -0,0 +1,28 @@ +package parser + +import ( + "git.makaay.nl/mauricem/go-parsekit/parse" +) + +// Values must be of the following types: String, Integer, Float, Boolean, +// Datetime, Array, or Inline Table. Unspecified values are invalid. +func (t *parser) startValue(p *parse.API) { + switch { + case p.Peek(c.Any(a.SingleQuote, a.DoubleQuote)): + p.Handle(t.startString) + case p.Peek(a.Runes('t', 'f')): + p.Handle(t.startBoolean) + case p.Peek(a.Plus.Or(a.Minus)): + p.Handle(t.startNumber) + case p.Peek(a.Runes('i', 'n')): + p.Handle(t.startNumber) + case p.Peek(a.Digit): + if p.Peek(a.Digits.Then(a.Minus.Or(a.Colon))) { + p.Handle(t.startDateTime) + } else { + p.Handle(t.startNumber) + } + default: + p.Expected("a value") + } +} diff --git a/value_datetime.go b/value_datetime.go new file mode 100644 index 0000000..33dd40f --- /dev/null +++ b/value_datetime.go @@ -0,0 +1,91 @@ +package parser + +import ( + "time" + + "git.makaay.nl/mauricem/go-parsekit/parse" +) + +var ( + // Note: in the definitions below, the token types are chosen based on the + // formatting definitions as used by https://golang.org/src/time/format.go + + // To unambiguously represent a specific instant in time, you may use an + // RFC 3339 formatted date-time with offset. + // + // odt1 = 1979-05-27T07:32:00Z + // odt2 = 1979-05-27T00:32:00-07:00 + // odt3 = 1979-05-27T00:32:00.999999-07:00 + // + // If you include only the time portion of an RFC 3339 formatted date-time, + // it will represent that time of day without any relation to a specific + // day or any offset or timezone. + // + // lt1 = 07:32:00 + // lt2 = 00:32:00.999999 + year = a.Digit.Times(4) + month = a.Digit.Times(2) + day = a.Digit.Times(2) + yyyymmdd = c.Seq(year, a.Minus, month, a.Minus, day) + dateTok = tok.Str("2006-01-02", yyyymmdd) + hour = a.Digit.Times(2) + minute = a.Digit.Times(2) + seconds = a.Digit.Times(2) + hhmmss = c.Seq(hour, a.Colon, minute, a.Colon, seconds) + timeTok = tok.Str("15:04:05", hhmmss) + + // The precision of fractional seconds is implementation specific, but at + // least millisecond precision is expected. If the value contains greater + // precision than the implementation can support, the additional precision + // must be truncated, not rounded. + micro = a.Dot.Then(c.MinMax(1, 9, a.Digit).Then(m.Drop(c.ZeroOrMore(a.Digit)))) + microTok = tok.Str(".999999999", micro.Optional()) + + // For the sake of readability, you may replace the T delimiter between + // date and time with a space (as permitted by RFC 3339 section 5.6). + // + // odt4 = 1979-05-27 07:32:00Z + tdelimTok = tok.Str("T", a.Rune('T')).Or(tok.Str(" ", a.Rune(' '))) + + // If you omit the offset from an RFC 3339 formatted date-time, it will + // represent the given date-time without any relation to an offset or + // timezone. + // + // ldt1 = 1979-05-27T07:32:00 + // ldt2 = 1979-05-27T00:32:00.999999 + // + // It cannot be converted to an instant in time without additional + // information. Conversion to an instant, if required, is + // implementation-specific. + zulu = a.Rune('Z') + offset = c.Seq(a.Runes('+', '-'), hour, a.Colon, minute) + tz = zulu.Or(offset) + tzTok = tok.Str("Z07:00", tz) + + // The full date/time parse format, based on the above definitions. + offsetDateTime = tok.Str(coffsetDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok, tzTok)) + localDateTime = tok.Str(clocalDateTime, c.Seq(dateTok, tdelimTok, timeTok, microTok)) + localDate = tok.Str(clocalDate, dateTok) + localTime = tok.Str(clocalTime, c.Seq(timeTok, microTok)) + datetime = c.Any(offsetDateTime, localDateTime, localDate, localTime) +) + +func (t *parser) startDateTime(p *parse.API) { + if p.Accept(datetime) { + tokens := p.Result().Tokens() + valueType := tokens[0].Type.(cmdType) + layout := "" + for _, l := range tokens[1:] { + layout += l.Type.(string) + } + input := string(tokens[0].Runes) + value, err := time.Parse(layout, input) + if err == nil { + t.emitCommand(valueType, value) + } else { + p.Error("Cannot parse value 0%s: %s", input, err) + } + } else { + p.Expected("a date and/or time") + } +} diff --git a/value_datetime_test.go b/value_datetime_test.go new file mode 100644 index 0000000..9d6e322 --- /dev/null +++ b/value_datetime_test.go @@ -0,0 +1,32 @@ +package parser + +import ( + "testing" +) + +func TestDateTime(t *testing.T) { + for _, test := range []parseTest{ + {``, []string{`Error: unexpected end of file (expected a date and/or time) at start of file`}}, + {`1979-05-27`, []string{`date(1979-05-27 00:00:00 +0000 UTC)`}}, + {`00:00:00`, []string{`time(0000-01-01 00:00:00 +0000 UTC)`}}, + {`23:59:59`, []string{`time(0000-01-01 23:59:59 +0000 UTC)`}}, + {`12:10:08.12121212121212`, []string{`time(0000-01-01 12:10:08.121212121 +0000 UTC)`}}, + {`1979-05-28T01:01:01`, []string{`datetime(1979-05-28 01:01:01 +0000 UTC)`}}, + {`1979-05-28 01:01:01`, []string{`datetime(1979-05-28 01:01:01 +0000 UTC)`}}, + {`1979-05-27T07:32:00Z`, []string{`offset_datetime(1979-05-27 07:32:00 +0000 UTC)`}}, + {`1979-05-27 07:33:00Z`, []string{`offset_datetime(1979-05-27 07:33:00 +0000 UTC)`}}, + {`1979-05-27 07:34:00+07:00`, []string{`offset_datetime(1979-05-27 07:34:00 +0700 +0700)`}}, + {`1979-05-27 07:34:00-07:00`, []string{`offset_datetime(1979-05-27 07:34:00 -0700 -0700)`}}, + {`1985-03-31 23:59:59+00:00`, []string{`offset_datetime(1985-03-31 23:59:59 +0000 UTC)`}}, + {`2000-09-10 00:00:00.000000000+00:00`, []string{`offset_datetime(2000-09-10 00:00:00 +0000 UTC)`}}, + {`2003-11-01 01:02:03.999999999999+10:00`, []string{`offset_datetime(2003-11-01 01:02:03.999999999 +1000 +1000)`}}, + {`2021-02-01 10:10:10.101010203040Z`, []string{`offset_datetime(2021-02-01 10:10:10.101010203 +0000 UTC)`}}, + // TODO ugly column, should be at start or at the actual wrong part + {`2000-13-01`, []string{`Error: Cannot parse value 02000-13-01: parsing time "2000-13-01": month out of range at line 1, column 11`}}, + {`2000-02-31`, []string{`Error: Cannot parse value 02000-02-31: parsing time "2000-02-31": day out of range at line 1, column 11`}}, + {`25:01:01`, []string{`Error: Cannot parse value 025:01:01: parsing time "25:01:01": hour out of range at line 1, column 9`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startDateTime, test) + } +} diff --git a/value_number.go b/value_number.go index 2b28c1a..b3702bf 100644 --- a/value_number.go +++ b/value_number.go @@ -13,15 +13,14 @@ var ( // numbers, you may use underscores between digits to enhance readability. // Each underscore must be surrounded by at least one digit on each side. // Leading zeros are not allowed. - integerPrefix = a.Signed(a.DigitNotZero.Then(a.Digits.Optional())).Or(a.Signed(zero)) + integerPrefix = a.Signed(a.DigitNotZero.Then(a.Digits.Optional())).Or(a.Signed(a.Zero)) underscoreDigits = m.Drop(a.Underscore).Then(a.Digits) integerSuffix = c.ZeroOrMore(underscoreDigits) integer = integerPrefix.Then(integerSuffix) // Integer values -0 and +0 are valid and identical to an unprefixed zero. - zero = a.Rune('0') - plusZero = a.Plus.Then(zero) - minusZero = a.Minus.Then(zero) + plusZero = a.Plus.Then(a.Zero) + minusZero = a.Minus.Then(a.Zero) // Non-negative integer values may also be expressed in hexadecimal, octal, // or binary. In these formats, leading + is not allowed and leading zeros @@ -78,7 +77,7 @@ func (t *parser) startNumber(p *parse.API) { } else { t.emitCommand(csetFloatVal, math.Inf(+1)) } - case p.Accept(zero): + case p.Accept(a.Zero): p.Handle(t.startIntegerStartingWithZero) case p.Accept(tok.Int64(nil, integer)): t.emitCommand(csetIntVal, p.Result().Value(0).(int64)) diff --git a/value_test.go b/value_test.go new file mode 100644 index 0000000..4453c2e --- /dev/null +++ b/value_test.go @@ -0,0 +1,45 @@ +package parser + +import ( + "testing" +) + +func TestValue(t *testing.T) { + for _, test := range []parseTest{ + {``, []string{`Error: unexpected end of file (expected a value) at start of file`}}, + {`"basic s\tring value"`, []string{`string("basic s\tring value")`}}, + {`'literal s\tring value'`, []string{`string("literal s\\tring value")`}}, + {"\"\"\"basic multi-line\nstring value\"\"\"", []string{`string("basic multi-line\nstring value")`}}, + {"'''literal multi-line\nstring value'''", []string{`string("literal multi-line\nstring value")`}}, + {"true", []string{`boolean(true)`}}, + {"false", []string{`boolean(false)`}}, + {"0", []string{`integer(0)`}}, + {"+0", []string{`integer(0)`}}, + {"-0", []string{`integer(0)`}}, + {"0.0", []string{`float(0)`}}, + {"+0.0", []string{`float(0)`}}, + {"-0.0", []string{`float(-0)`}}, + {"1234", []string{`integer(1234)`}}, + {"-1234", []string{`integer(-1234)`}}, + {"+9_8_7.6_5_4e-321", []string{`float(9.8765e-319)`}}, + {"-1_234.5678e-33", []string{`float(-1.2345678e-30)`}}, + {"inf", []string{`float(+Inf)`}}, + {"+inf", []string{`float(+Inf)`}}, + {"-inf", []string{`float(-Inf)`}}, + {"nan", []string{`float(NaN)`}}, + {"+nan", []string{`float(NaN)`}}, + {"-nan", []string{`float(NaN)`}}, + {"2019-06-19", []string{`date(2019-06-19 00:00:00 +0000 UTC)`}}, + {"08:38:54", []string{`time(0000-01-01 08:38:54 +0000 UTC)`}}, + {"2019-06-19 08:38:54", []string{`datetime(2019-06-19 08:38:54 +0000 UTC)`}}, + {"2019-06-19T08:38:54", []string{`datetime(2019-06-19 08:38:54 +0000 UTC)`}}, + {"2019-06-19 08:38:54", []string{`datetime(2019-06-19 08:38:54 +0000 UTC)`}}, + {"2019-06-19T08:38:54.88888", []string{`datetime(2019-06-19 08:38:54.88888 +0000 UTC)`}}, + {"1979-05-27T07:32:00Z", []string{`offset_datetime(1979-05-27 07:32:00 +0000 UTC)`}}, + {"1979-05-27T00:32:00-07:00", []string{`offset_datetime(1979-05-27 00:32:00 -0700 -0700)`}}, + {"1979-05-27T00:32:00.999999-07:00", []string{`offset_datetime(1979-05-27 00:32:00.999999 -0700 -0700)`}}, + } { + p := &parser{} + testParseHandler(t, p, p.startValue, test) + } +}