Added some more straightforwardness to the pattern magic coding. I can now write stuff like p.After(upper, upper, 4hex).Store() to store runes in the string buffer when the match is complete. Other options instead of Store() are for now Backup() (making it more of a peek) or Ignore() (skipping over the scanned text). I think this methodology forms a nice mental representation for the coder that uses the library. It's close to how we think about parsing (or at least I do)
This commit is contained in:
parent
666cff3af3
commit
4556520582
|
@ -39,29 +39,27 @@ func (p *P) EmitInterpreted(t ItemType) error {
|
|||
}
|
||||
|
||||
// EmitError emits a Parser error item to the client.
|
||||
func (p *P) EmitError(format string, args ...interface{}) StateFn {
|
||||
func (p *P) EmitError(format string, args ...interface{}) {
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.Emit(ItemError, message)
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnexpectedInput is used by a parser implementation to emit an
|
||||
// error item that tells the client that an unexpected rune was
|
||||
// encountered in the input.
|
||||
// The parameter 'expected' is used to provide some context to the error.
|
||||
func (p *P) UnexpectedInput(expected string) StateFn {
|
||||
func (p *P) UnexpectedInput(expected string) {
|
||||
// next() takes care of error messages in cases where ok == false.
|
||||
// Therefore, we only provide an error message for the ok case here.
|
||||
if r, ok := p.next(); ok {
|
||||
return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
||||
p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnexpectedEndOfFile is used by a parser implementation to emit an
|
||||
// error item that tells the client that more data was expected from
|
||||
// the input.
|
||||
// The parameter 'expected' is used to provide some context to the error.
|
||||
func (p *P) UnexpectedEndOfFile(expected string) StateFn {
|
||||
return p.EmitError("Unexpected end of file (expected %s)", expected)
|
||||
func (p *P) UnexpectedEndOfFile(expected string) {
|
||||
p.EmitError("Unexpected end of file (expected %s)", expected)
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ import (
|
|||
// read, then false is returned. Both are considered error cases,
|
||||
// and for that reason these automatically emit an error to the client.
|
||||
func (p *P) next() (rune, bool) {
|
||||
r, w, ok := p.peek()
|
||||
r, w, ok := p.peek(0)
|
||||
if ok {
|
||||
p.advanceCursor(r, w)
|
||||
return r, true
|
||||
|
@ -27,8 +27,8 @@ func (p *P) next() (rune, bool) {
|
|||
// Returns the rune, its width in bytes and a boolean.
|
||||
// The boolean will be false in case no upcoming rune can be peeked
|
||||
// (end of data or invalid UTF8 character).
|
||||
func (p *P) peek() (rune, int, bool) {
|
||||
peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
|
||||
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
||||
peeked, width := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
||||
return peeked, width, peeked != utf8.RuneError
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
|
|||
// moved forward, false otherwise.
|
||||
// A callback function can be provided to specify what to do with
|
||||
// the runes that are encountered in the input.
|
||||
func (p *P) progress(callback func(rune), patterns ...string) bool {
|
||||
func (p *P) progress(callback func(rune), patterns ...interface{}) bool {
|
||||
if runes, widths, ok := p.Match(patterns...); ok {
|
||||
for i, r := range runes {
|
||||
callback(r)
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// AtEndOfFile returns true when there is no more data available in the input.
|
||||
|
@ -42,28 +44,56 @@ func (p *P) AcceptEndOfLine() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Match checks if the upcoming runes satisfy all provided patterns.
|
||||
// It returns a slice of runes that were found, a slice containing
|
||||
// their respective byte widths, and a boolean indicating whether
|
||||
// or not all provided patterns were satisfied by the input data.
|
||||
func (p *P) Match(patterns ...string) ([]rune, []int, bool) {
|
||||
peeked, widths, ok := p.peekMulti(len(patterns))
|
||||
if ok {
|
||||
for i, r := range patterns {
|
||||
if strings.IndexRune(r, peeked[i]) < 0 {
|
||||
return peeked, widths, false
|
||||
}
|
||||
}
|
||||
return peeked, widths, true
|
||||
func (p *P) Match(patterns ...interface{}) ([]rune, []int, bool) {
|
||||
return p.match(0, patterns...)
|
||||
}
|
||||
|
||||
func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
|
||||
var runes []rune
|
||||
var widths []int
|
||||
|
||||
addRune := func(r rune, w int) {
|
||||
offset += w
|
||||
runes = append(runes, r)
|
||||
widths = append(widths, w)
|
||||
}
|
||||
return peeked, widths, false
|
||||
|
||||
for _, pattern := range patterns {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
||||
if r == utf8.RuneError {
|
||||
return runes, widths, false
|
||||
}
|
||||
switch pattern := pattern.(type) {
|
||||
case []interface{}:
|
||||
rs, ws, matched := p.match(offset, pattern...)
|
||||
for i, r := range rs {
|
||||
addRune(r, ws[i])
|
||||
}
|
||||
if !matched {
|
||||
return runes, widths, false
|
||||
}
|
||||
case string:
|
||||
if strings.IndexRune(pattern, r) < 0 {
|
||||
return runes, widths, false
|
||||
}
|
||||
addRune(r, w)
|
||||
case rune:
|
||||
if pattern != r {
|
||||
return runes, widths, false
|
||||
}
|
||||
addRune(r, w)
|
||||
default:
|
||||
panic(fmt.Sprintf("Not rune matching implemented for pattern of type %T", pattern))
|
||||
}
|
||||
}
|
||||
return runes, widths, true
|
||||
}
|
||||
|
||||
// Upcoming checks if the upcoming runes satisfy all provided patterns.
|
||||
// Returns true if all provided patterns are satisfied.
|
||||
// This is basically the same as the Match method, but with only
|
||||
// the boolean return parameter for programmer convenciency.
|
||||
func (p *P) Upcoming(patterns ...string) bool {
|
||||
func (p *P) Upcoming(patterns ...interface{}) bool {
|
||||
_, _, ok := p.Match(patterns...)
|
||||
return ok
|
||||
}
|
||||
|
@ -79,10 +109,50 @@ func (p *P) AcceptAny() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
type afterFollowup struct {
|
||||
p *P
|
||||
runes []rune
|
||||
widths []int
|
||||
ok bool
|
||||
}
|
||||
|
||||
func (a *afterFollowup) Store() bool {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
a.p.buffer.writeRune(r)
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
func (a *afterFollowup) Ignore() bool {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
func (a *afterFollowup) Backup() bool {
|
||||
return a.ok
|
||||
}
|
||||
|
||||
func (p *P) After(patterns ...interface{}) *afterFollowup {
|
||||
runes, widths, ok := p.Match(patterns...)
|
||||
return &afterFollowup{
|
||||
p: p,
|
||||
runes: runes,
|
||||
widths: widths,
|
||||
ok: ok,
|
||||
}
|
||||
}
|
||||
|
||||
// AcceptMatching adds the next runes to the string buffer, but only
|
||||
// if the upcoming runes satisfy the provided patterns.
|
||||
// When runes were added then true is returned, false otherwise.
|
||||
func (p *P) AcceptMatching(patterns ...string) bool {
|
||||
func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
||||
return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
||||
}
|
||||
|
||||
|
@ -99,7 +169,7 @@ func (p *P) AcceptConsecutive(pattern string) bool {
|
|||
|
||||
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
||||
// Returns true when one or more runes were skipped.
|
||||
func (p *P) SkipMatching(patterns ...string) bool {
|
||||
func (p *P) SkipMatching(patterns ...interface{}) bool {
|
||||
if runes, widths, ok := p.Match(patterns...); ok {
|
||||
for i, r := range runes {
|
||||
p.advanceCursor(r, widths[i])
|
||||
|
|
|
@ -4,10 +4,10 @@ package parsekit
|
|||
// and initializes the parser for it.
|
||||
func New(input string, startState StateFn) *P {
|
||||
return &P{
|
||||
input: input,
|
||||
len: len(input),
|
||||
state: startState,
|
||||
items: make(chan Item, 2),
|
||||
input: input,
|
||||
len: len(input),
|
||||
nextState: startState,
|
||||
items: make(chan Item, 2),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,11 @@ func (p *P) Next() (Item, *Error, bool) {
|
|||
return i, nil, true
|
||||
}
|
||||
default:
|
||||
p.state = p.state(p)
|
||||
if p.nextState == nil {
|
||||
panic("No next state was scheduled for the parser")
|
||||
}
|
||||
p.state = p.nextState
|
||||
p.state(p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,30 @@
|
|||
package parsekit
|
||||
|
||||
func (p *P) QueueStates(states ...StateFn) StateFn {
|
||||
first, followup := states[0], states[1:]
|
||||
for reverse := range followup {
|
||||
p.PushState(followup[len(followup)-reverse-1])
|
||||
}
|
||||
return first
|
||||
func (p *P) RouteRepeat() {
|
||||
p.nextState = p.state
|
||||
return
|
||||
}
|
||||
|
||||
type RouteFollowup struct {
|
||||
p *P
|
||||
}
|
||||
|
||||
func (p *P) RouteTo(state StateFn) *RouteFollowup {
|
||||
p.nextState = state
|
||||
return &RouteFollowup{p}
|
||||
}
|
||||
|
||||
func (r *RouteFollowup) ThenTo(state StateFn) *RouteFollowup {
|
||||
r.p.PushState(state)
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RouteFollowup) ThenReturnHere() {
|
||||
r.p.PushState(r.p.state)
|
||||
}
|
||||
|
||||
func (p *P) RouteReturn() {
|
||||
p.nextState = p.PopState()
|
||||
}
|
||||
|
||||
func (p *P) ToChildState(state StateFn) StateFn {
|
||||
|
|
|
@ -2,7 +2,8 @@ package parsekit
|
|||
|
||||
// P holds the internal state of the parser.
|
||||
type P struct {
|
||||
state StateFn // a function that handles the current state
|
||||
state StateFn // the function that handles the current state
|
||||
nextState StateFn // the function that will handle the next state
|
||||
stack []StateFn // state function stack, for nested parsing
|
||||
input string // the scanned input
|
||||
len int // the total length of the input in bytes
|
||||
|
@ -18,7 +19,7 @@ type P struct {
|
|||
|
||||
// StateFn represents the state of the parser as a function
|
||||
// that returns the next state.
|
||||
type StateFn func(*P) StateFn
|
||||
type StateFn func(*P)
|
||||
|
||||
// ItemType represents the type of a parser Item.
|
||||
type ItemType int
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
package parser
|
||||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// Item types that are produced by this parser.
|
||||
const (
|
||||
ItemComment parsekit.ItemType = iota // Comment string
|
||||
ItemKey // Key of a key/value pair
|
||||
ItemKeyDot // Dot for a dotted key
|
||||
ItemAssignment // Value assignment coming up (=)
|
||||
ItemString // A value of type string
|
||||
)
|
||||
|
||||
const (
|
||||
whitespace string = " \t"
|
||||
carriageReturn string = "\r"
|
||||
newline string = "\n"
|
||||
hash string = "#"
|
||||
equal string = "="
|
||||
lower string = "abcdefghijklmnopqrstuvwxyz"
|
||||
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
digits string = "0123456789"
|
||||
hex string = digits + "abcdefABCDEF"
|
||||
dot string = "."
|
||||
underscore string = "_"
|
||||
dash string = "-"
|
||||
singleQuote string = "'"
|
||||
doubleQuote string = "\""
|
||||
backslash string = "\\"
|
||||
quoteChars string = singleQuote + doubleQuote
|
||||
bareKeyChars string = lower + upper + digits + underscore + dash
|
||||
startOfKey string = bareKeyChars + quoteChars
|
||||
escapeChars string = `btnfr"\`
|
||||
shortUtf8Escape string = "u"
|
||||
longUtf8Escape string = "U"
|
||||
)
|
||||
|
||||
var (
|
||||
doubleQuote3 = []string{doubleQuote, doubleQuote, doubleQuote}
|
||||
shortUtf8Match = []string{backslash, "u", hex, hex, hex, hex}
|
||||
longUtf8Match = []string{backslash, "U", hex, hex, hex, hex, hex, hex, hex, hex}
|
||||
)
|
||||
|
||||
// NewParser creates a new parser, using the provided input string
|
||||
// as the data to parse.
|
||||
func NewParser(input string) *parsekit.P {
|
||||
return parsekit.New(input, stateKeyValuePair)
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package parser
|
||||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// Item types that are produced by this parser.
|
||||
const (
|
||||
ItemComment parsekit.ItemType = iota // Comment string
|
||||
ItemKey // Key of a key/value pair
|
||||
ItemKeyDot // Dot for a dotted key
|
||||
ItemAssignment // Value assignment coming up (=)
|
||||
ItemString // A value of type string
|
||||
)
|
||||
|
||||
const (
|
||||
whitespace string = " \t"
|
||||
carriageReturn string = "\r"
|
||||
newline string = "\n"
|
||||
hash string = "#"
|
||||
equal string = "="
|
||||
lower string = "abcdefghijklmnopqrstuvwxyz"
|
||||
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
digits string = "0123456789"
|
||||
hex string = digits + "abcdefABCDEF"
|
||||
dot string = "."
|
||||
underscore string = "_"
|
||||
dash string = "-"
|
||||
singleQuote string = "'"
|
||||
doubleQuote string = "\""
|
||||
backslash string = "\\"
|
||||
quoteChars string = singleQuote + doubleQuote
|
||||
bareKeyChars string = lower + upper + digits + underscore + dash
|
||||
startOfKey string = bareKeyChars + quoteChars
|
||||
validEscapeChars string = `btnfr"\`
|
||||
mustBeEscaped string = "" +
|
||||
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
||||
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
||||
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
||||
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
||||
"\u007F"
|
||||
)
|
||||
|
||||
var (
|
||||
doubleQuote3 = []interface{}{doubleQuote, doubleQuote, doubleQuote}
|
||||
hex4 = []interface{}{hex, hex, hex, hex}
|
||||
shortUtf8Match = []interface{}{backslash, 'u', hex4}
|
||||
longUtf8Match = []interface{}{backslash, 'U', hex4, hex4}
|
||||
)
|
||||
|
||||
// NewParser creates a new parser, using the provided input string
|
||||
// as the data to parse.
|
||||
func NewParser(input string) *parsekit.P {
|
||||
return parsekit.New(input, stateKeyValuePair)
|
||||
}
|
|
@ -5,19 +5,19 @@ import (
|
|||
)
|
||||
|
||||
// A '#' hash symbol marks the rest of the line as a comment.
|
||||
func stateCommentStart(p *parsekit.P) parsekit.StateFn {
|
||||
func stateCommentStart(p *parsekit.P) {
|
||||
p.SkipConsecutive(hash)
|
||||
return stateCommentContent
|
||||
p.RouteTo(stateCommentContent)
|
||||
}
|
||||
|
||||
// All characters up to the end of the line are included in the comment.
|
||||
func stateCommentContent(p *parsekit.P) parsekit.StateFn {
|
||||
func stateCommentContent(p *parsekit.P) {
|
||||
switch {
|
||||
case p.AtEndOfLine():
|
||||
p.EmitLiteralTrim(ItemComment)
|
||||
return p.ToParentState()
|
||||
p.RouteReturn()
|
||||
default:
|
||||
p.AcceptAny()
|
||||
return stateCommentContent
|
||||
p.RouteRepeat()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,10 @@ package parser
|
|||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
func stateEndOfFile(p *parsekit.P) parsekit.StateFn {
|
||||
func stateEndOfFile(p *parsekit.P) {
|
||||
if p.AtEndOfFile() {
|
||||
p.Emit(parsekit.ItemEOF, "EOF") // todo Automate within parser?
|
||||
} else {
|
||||
p.UnexpectedInput("end of file")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -3,61 +3,64 @@ package parser
|
|||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// The primary building block of a TOML document is the key/value pair.
|
||||
func stateKeyValuePair(p *parsekit.P) parsekit.StateFn {
|
||||
func stateKeyValuePair(p *parsekit.P) {
|
||||
switch {
|
||||
case p.SkipConsecutive(whitespace + carriageReturn + newline):
|
||||
return stateKeyValuePair
|
||||
case p.Upcoming(hash):
|
||||
return p.ToChildState(stateCommentStart)
|
||||
case p.Upcoming(startOfKey):
|
||||
return stateKey
|
||||
case p.After(whitespace + carriageReturn + newline).Ignore():
|
||||
p.RouteRepeat()
|
||||
case p.After(hash).Backup():
|
||||
p.RouteTo(stateCommentStart).ThenReturnHere()
|
||||
case p.After(startOfKey).Backup():
|
||||
p.RouteTo(stateKey)
|
||||
default:
|
||||
return stateEndOfFile
|
||||
p.RouteTo(stateEndOfFile)
|
||||
}
|
||||
}
|
||||
|
||||
// A key may be either bare, quoted or dotted.
|
||||
func stateKey(p *parsekit.P) parsekit.StateFn {
|
||||
if p.AcceptMatching(bareKeyChars) {
|
||||
return statebareKeyChars
|
||||
func stateKey(p *parsekit.P) {
|
||||
if p.After(bareKeyChars).Backup() {
|
||||
p.RouteTo(statebareKey)
|
||||
} else {
|
||||
p.UnexpectedInput("a valid key name")
|
||||
}
|
||||
return p.UnexpectedInput("a valid key name")
|
||||
}
|
||||
|
||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||
// keys are allowed to be composed of only ASCII digits,
|
||||
// e.g. 1234, but are always interpreted as strings.
|
||||
func statebareKeyChars(p *parsekit.P) parsekit.StateFn {
|
||||
p.AcceptConsecutive(bareKeyChars)
|
||||
func statebareKey(p *parsekit.P) {
|
||||
p.AcceptConsecutive(bareKeyChars) // TODO make a plan for adding this to After()
|
||||
p.EmitLiteral(ItemKey)
|
||||
return stateEndOfKeyOrKeyDot
|
||||
p.RouteTo(stateEndOfKeyOrKeyDot)
|
||||
}
|
||||
|
||||
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||
// This allows for grouping similar properties together:
|
||||
func stateEndOfKeyOrKeyDot(p *parsekit.P) parsekit.StateFn {
|
||||
func stateEndOfKeyOrKeyDot(p *parsekit.P) {
|
||||
// Whitespace around dot-separated parts is ignored, however,
|
||||
// best practice is to not use any extraneous whitespace.
|
||||
p.SkipConsecutive(whitespace)
|
||||
if p.SkipMatching(dot) {
|
||||
p.Emit(ItemKeyDot, "")
|
||||
if p.After(dot).Store() {
|
||||
p.SkipConsecutive(whitespace)
|
||||
return stateKey
|
||||
p.EmitLiteral(ItemKeyDot)
|
||||
p.RouteTo(stateKey)
|
||||
} else {
|
||||
p.RouteTo(stateKeyAssignment)
|
||||
}
|
||||
return stateKeyAssignment
|
||||
}
|
||||
|
||||
// Keys are on the left of the equals sign and values are on the right.
|
||||
// Whitespace is ignored around key names and values. The key, equals
|
||||
// sign, and value must be on the same line (though some values can
|
||||
// be broken over multiple lines).
|
||||
func stateKeyAssignment(p *parsekit.P) parsekit.StateFn {
|
||||
func stateKeyAssignment(p *parsekit.P) {
|
||||
p.SkipConsecutive(whitespace)
|
||||
if p.SkipMatching(equal) {
|
||||
p.Emit(ItemAssignment, "")
|
||||
if p.After(equal).Store() {
|
||||
p.EmitLiteral(ItemAssignment)
|
||||
p.SkipConsecutive(whitespace)
|
||||
return stateValue
|
||||
p.RouteTo(stateValue)
|
||||
} else {
|
||||
p.UnexpectedInput("a value assignment")
|
||||
}
|
||||
return p.UnexpectedInput("a value assignment")
|
||||
}
|
||||
|
|
|
@ -6,14 +6,52 @@ import "github.com/mmakaay/toml/parsekit"
|
|||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
||||
// * Basic strings are surrounded by quotation marks.
|
||||
func stateStringValue(p *parsekit.P) parsekit.StateFn {
|
||||
func stateStringValue(p *parsekit.P) {
|
||||
switch {
|
||||
case p.SkipMatching(doubleQuote3...):
|
||||
return stateMultiLineBasicString
|
||||
case p.SkipMatching(doubleQuote3):
|
||||
p.RouteTo(stateMultiLineBasicString)
|
||||
case p.SkipMatching(doubleQuote):
|
||||
return p.QueueStates(stateParseString, stateBasicStringSpecific)
|
||||
p.RouteTo(parseString).ThenTo(basicStringSpecifics)
|
||||
default:
|
||||
p.UnexpectedInput("a string value")
|
||||
}
|
||||
}
|
||||
|
||||
func stateMultiLineBasicString(p *parsekit.P) {
|
||||
p.EmitError("Not yet implemented")
|
||||
}
|
||||
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
//
|
||||
// \b - backspace (U+0008)
|
||||
// \t - tab (U+0009)
|
||||
// \n - linefeed (U+000A)
|
||||
// \f - form feed (U+000C)
|
||||
// \r - carriage return (U+000D)
|
||||
// \" - quote (U+0022)
|
||||
// \\ - backslash (U+005C)
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
//
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||
func parseString(p *parsekit.P) {
|
||||
switch {
|
||||
case p.AtEndOfFile():
|
||||
p.UnexpectedEndOfFile("basic string token")
|
||||
case p.After(backslash, validEscapeChars).Store() ||
|
||||
p.After(shortUtf8Match).Store() ||
|
||||
p.After(longUtf8Match).Store():
|
||||
p.RouteRepeat()
|
||||
case p.After(mustBeEscaped).Backup():
|
||||
r, _, _ := p.Match(mustBeEscaped)
|
||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||
case p.After(backslash).Backup() || p.After(doubleQuote).Backup():
|
||||
p.RouteReturn()
|
||||
default:
|
||||
p.AcceptAny()
|
||||
p.RouteRepeat()
|
||||
}
|
||||
return p.UnexpectedInput("a string value")
|
||||
}
|
||||
|
||||
// Specific handling of input for basic strings.
|
||||
|
@ -21,64 +59,17 @@ func stateStringValue(p *parsekit.P) parsekit.StateFn {
|
|||
// * No additional \escape sequences are allowed. What the spec say about this:
|
||||
// "All other escape sequences [..] are reserved and, if used, TOML should
|
||||
// produce an error.""
|
||||
|
||||
func stateBasicStringSpecific(p *parsekit.P) parsekit.StateFn {
|
||||
func basicStringSpecifics(p *parsekit.P) {
|
||||
switch {
|
||||
case p.SkipMatching(doubleQuote):
|
||||
if err := p.EmitInterpreted(ItemString); err != nil {
|
||||
return p.EmitError("Invalid data in string: %s", err)
|
||||
case p.After(doubleQuote).Ignore():
|
||||
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
|
||||
p.EmitError("Invalid data in string: %s", err)
|
||||
} else {
|
||||
p.RouteTo(stateKeyValuePair)
|
||||
}
|
||||
return stateKeyValuePair
|
||||
case p.Upcoming(backslash):
|
||||
return p.EmitError("Invalid escape sequence")
|
||||
case p.After(backslash).Backup():
|
||||
p.EmitError("Invalid escape sequence")
|
||||
default:
|
||||
return p.QueueStates(stateParseString, stateBasicStringSpecific)
|
||||
p.RouteTo(parseString).ThenTo(basicStringSpecifics)
|
||||
}
|
||||
}
|
||||
|
||||
func stateMultiLineBasicString(p *parsekit.P) parsekit.StateFn {
|
||||
p.EmitError("Not yet implemented")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||
const invalidBasicStringCharacters string = "\"\\" +
|
||||
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
||||
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
||||
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
||||
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
||||
"\u007F"
|
||||
|
||||
func stateParseString(p *parsekit.P) parsekit.StateFn {
|
||||
switch {
|
||||
case p.AtEndOfFile():
|
||||
return p.UnexpectedEndOfFile("basic string token")
|
||||
case p.AcceptMatching(backslash, escapeChars):
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
// \b - backspace (U+0008)
|
||||
// \t - tab (U+0009)
|
||||
// \n - linefeed (U+000A)
|
||||
// \f - form feed (U+000C)
|
||||
// \r - carriage return (U+000D)
|
||||
// \" - quote (U+0022)
|
||||
// \\ - backslash (U+005C)
|
||||
case p.AcceptMatching(shortUtf8Match...):
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
case p.AcceptMatching(longUtf8Match...):
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
case p.Upcoming(backslash) || p.Upcoming(doubleQuote):
|
||||
// Returning to the parent state to have special cases handled,
|
||||
// because there are differences between single and multi line strings.
|
||||
return p.ToParentState()
|
||||
case p.Upcoming(invalidBasicStringCharacters):
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||
r, _, _ := p.Match(invalidBasicStringCharacters)
|
||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||
return nil
|
||||
default:
|
||||
p.AcceptAny()
|
||||
}
|
||||
return stateParseString
|
||||
}
|
||||
|
|
|
@ -4,10 +4,11 @@ import "github.com/mmakaay/toml/parsekit"
|
|||
|
||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||
func stateValue(p *parsekit.P) parsekit.StateFn {
|
||||
func stateValue(p *parsekit.P) {
|
||||
p.SkipConsecutive(whitespace)
|
||||
if p.Upcoming(quoteChars) {
|
||||
return stateStringValue
|
||||
p.RouteTo(stateStringValue)
|
||||
} else {
|
||||
p.UnexpectedInput("a value")
|
||||
}
|
||||
return p.UnexpectedInput("a value")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue