Ahhhh found a name that clicked for the more general layer of the parser code: parsekit. That is short and tells me what it is. It's not a parser, but something to build parsers with. Now I could also name the actual parsing code as I would like to, namely 'toml/parser'. So it feels like the structure is settling down.
This commit is contained in:
parent
3f638c59cd
commit
666cff3af3
4
Makefile
4
Makefile
|
@ -1,3 +1,3 @@
|
||||||
test:
|
test:
|
||||||
cd parser && go test
|
@cd parsekit && go test
|
||||||
cd lexer && go test
|
@cd parser && go test
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
package lexer
|
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parser"
|
|
||||||
|
|
||||||
func stateEndOfFile(l *parser.Parser) parser.StateFn {
|
|
||||||
if l.AtEndOfFile() {
|
|
||||||
l.Emit(parser.ItemEOF, "EOF") // todo Automate within parser?
|
|
||||||
} else {
|
|
||||||
l.UnexpectedInputError("end of file")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -1,13 +0,0 @@
|
||||||
package lexer
|
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parser"
|
|
||||||
|
|
||||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
|
||||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
|
||||||
func stateValue(l *parser.Parser) parser.StateFn {
|
|
||||||
l.SkipConsecutive(whitespace)
|
|
||||||
if l.Upcoming(quoteChars) {
|
|
||||||
return stateStringValue
|
|
||||||
}
|
|
||||||
return l.UnexpectedInputError("a value")
|
|
||||||
}
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Emit passes a Parser item to the client, including the provided string.
|
||||||
|
func (p *P) Emit(t ItemType, s string) {
|
||||||
|
p.items <- Item{t, s}
|
||||||
|
p.buffer.reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmitLiteral passes a Parser item to the client, including the accumulated
|
||||||
|
// string buffer data as a literal string.
|
||||||
|
func (p *P) EmitLiteral(t ItemType) {
|
||||||
|
p.Emit(t, p.buffer.asLiteralString())
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmitLiteralTrim passes a Parser item to the client, including the
|
||||||
|
// accumulated string buffer data as a literal string with whitespace
|
||||||
|
// trimmed from it.
|
||||||
|
func (p *P) EmitLiteralTrim(t ItemType) {
|
||||||
|
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmitInterpreted passes a Parser item to the client, including the
|
||||||
|
// accumulated string buffer data a Go doubled quoted interpreted string
|
||||||
|
// (handling escape codes like \n, \t, \uXXXX, etc.)
|
||||||
|
// This method might return an error, in case there is data in the
|
||||||
|
// string buffer that is not valid for string interpretation.
|
||||||
|
func (p *P) EmitInterpreted(t ItemType) error {
|
||||||
|
s, err := p.buffer.asInterpretedString()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
p.Emit(t, s)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmitError emits a Parser error item to the client.
|
||||||
|
func (p *P) EmitError(format string, args ...interface{}) StateFn {
|
||||||
|
message := fmt.Sprintf(format, args...)
|
||||||
|
p.Emit(ItemError, message)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnexpectedInput is used by a parser implementation to emit an
|
||||||
|
// error item that tells the client that an unexpected rune was
|
||||||
|
// encountered in the input.
|
||||||
|
// The parameter 'expected' is used to provide some context to the error.
|
||||||
|
func (p *P) UnexpectedInput(expected string) StateFn {
|
||||||
|
// next() takes care of error messages in cases where ok == false.
|
||||||
|
// Therefore, we only provide an error message for the ok case here.
|
||||||
|
if r, ok := p.next(); ok {
|
||||||
|
return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnexpectedEndOfFile is used by a parser implementation to emit an
|
||||||
|
// error item that tells the client that more data was expected from
|
||||||
|
// the input.
|
||||||
|
// The parameter 'expected' is used to provide some context to the error.
|
||||||
|
func (p *P) UnexpectedEndOfFile(expected string) StateFn {
|
||||||
|
return p.EmitError("Unexpected end of file (expected %s)", expected)
|
||||||
|
}
|
|
@ -0,0 +1,88 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// next returns the next rune from the input and a boolean indicating if
|
||||||
|
// reading the input was successful.
|
||||||
|
// When the end of input is reached, or an invalid UTF8 character is
|
||||||
|
// read, then false is returned. Both are considered error cases,
|
||||||
|
// and for that reason these automatically emit an error to the client.
|
||||||
|
func (p *P) next() (rune, bool) {
|
||||||
|
r, w, ok := p.peek()
|
||||||
|
if ok {
|
||||||
|
p.advanceCursor(r, w)
|
||||||
|
return r, true
|
||||||
|
}
|
||||||
|
if r == utf8.RuneError && w == 0 {
|
||||||
|
p.EmitError("unexpected end of file")
|
||||||
|
} else {
|
||||||
|
p.EmitError("invalid UTF8 character")
|
||||||
|
}
|
||||||
|
return r, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||||
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
|
// (end of data or invalid UTF8 character).
|
||||||
|
func (p *P) peek() (rune, int, bool) {
|
||||||
|
peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
|
||||||
|
return peeked, width, peeked != utf8.RuneError
|
||||||
|
}
|
||||||
|
|
||||||
|
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||||
|
// Returns a slice of runes, a slice containing their respective
|
||||||
|
// widths in bytes and a boolean.
|
||||||
|
// The boolean will be false in case less runes can be peeked than
|
||||||
|
// the requested amount (end of data or invalid UTF8 character).
|
||||||
|
func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
|
||||||
|
var runes []rune
|
||||||
|
var widths []int
|
||||||
|
offset := 0
|
||||||
|
for i := 0; i < amount; i++ {
|
||||||
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
||||||
|
switch {
|
||||||
|
case r == utf8.RuneError:
|
||||||
|
return runes, widths, false
|
||||||
|
default:
|
||||||
|
offset += w
|
||||||
|
runes = append(runes, r)
|
||||||
|
widths = append(widths, w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return runes, widths, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// progress moves the cursor forward in the input, returning one rune
|
||||||
|
// for every specified pattern. The cursor will only be moved forward when
|
||||||
|
// all requested patterns can be satisfied.
|
||||||
|
// Returns true when all patterns were satisfied and the cursor was
|
||||||
|
// moved forward, false otherwise.
|
||||||
|
// A callback function can be provided to specify what to do with
|
||||||
|
// the runes that are encountered in the input.
|
||||||
|
func (p *P) progress(callback func(rune), patterns ...string) bool {
|
||||||
|
if runes, widths, ok := p.Match(patterns...); ok {
|
||||||
|
for i, r := range runes {
|
||||||
|
callback(r)
|
||||||
|
p.advanceCursor(r, widths[i])
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// advanceCursor advances the rune cursor one position in the
|
||||||
|
// input data. While doing so, it keeps tracks of newlines,
|
||||||
|
// so we can report on row + column positions on error.
|
||||||
|
func (p *P) advanceCursor(r rune, w int) {
|
||||||
|
p.pos += w
|
||||||
|
if p.newline {
|
||||||
|
p.cursorColumn = 0
|
||||||
|
p.cursorRow++
|
||||||
|
} else {
|
||||||
|
p.cursorColumn++
|
||||||
|
}
|
||||||
|
p.newline = r == '\n'
|
||||||
|
}
|
|
@ -0,0 +1,120 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AtEndOfFile returns true when there is no more data available in the input.
|
||||||
|
func (p *P) AtEndOfFile() bool {
|
||||||
|
return p.pos >= p.len
|
||||||
|
}
|
||||||
|
|
||||||
|
// AtEndOfLine returns true when the cursor is either at the end of the line
|
||||||
|
// or at the end of the file. The cursor is not moved to a new position
|
||||||
|
// by this method.
|
||||||
|
func (p *P) AtEndOfLine() bool {
|
||||||
|
return p.AtEndOfFile() ||
|
||||||
|
p.Upcoming("\r", "\n") ||
|
||||||
|
p.Upcoming("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipEndOfLine returns true when the cursor is either at the end of the line
|
||||||
|
// or at the end of the file. Additionally, when not at the end of the file,
|
||||||
|
// the cursor is moved forward to beyond the newline.
|
||||||
|
func (p *P) SkipEndOfLine() bool {
|
||||||
|
return p.AtEndOfFile() ||
|
||||||
|
p.SkipMatching("\r", "\n") ||
|
||||||
|
p.SkipMatching("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptEndOfLine returns true when the cursor is either at the end of the line
|
||||||
|
// or at the end of the file. When not at the end of the file, a normalized
|
||||||
|
// newline (only a '\n' character, even with '\r\n' on the input)
|
||||||
|
// is added to the string buffer.
|
||||||
|
func (p *P) AcceptEndOfLine() bool {
|
||||||
|
if p.AtEndOfFile() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if p.SkipEndOfLine() {
|
||||||
|
p.buffer.writeRune('\n')
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match checks if the upcoming runes satisfy all provided patterns.
|
||||||
|
// It returns a slice of runes that were found, a slice containing
|
||||||
|
// their respective byte widths, and a boolean indicating whether
|
||||||
|
// or not all provided patterns were satisfied by the input data.
|
||||||
|
func (p *P) Match(patterns ...string) ([]rune, []int, bool) {
|
||||||
|
peeked, widths, ok := p.peekMulti(len(patterns))
|
||||||
|
if ok {
|
||||||
|
for i, r := range patterns {
|
||||||
|
if strings.IndexRune(r, peeked[i]) < 0 {
|
||||||
|
return peeked, widths, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return peeked, widths, true
|
||||||
|
}
|
||||||
|
return peeked, widths, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upcoming checks if the upcoming runes satisfy all provided patterns.
|
||||||
|
// Returns true if all provided patterns are satisfied.
|
||||||
|
// This is basically the same as the Match method, but with only
|
||||||
|
// the boolean return parameter for programmer convenciency.
|
||||||
|
func (p *P) Upcoming(patterns ...string) bool {
|
||||||
|
_, _, ok := p.Match(patterns...)
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptAny adds the next rune from the input to the string buffer.
|
||||||
|
// If no rune could be read (end of file or invalid UTF8 data),
|
||||||
|
// then false is returned.
|
||||||
|
func (p *P) AcceptAny() bool {
|
||||||
|
if r, ok := p.next(); ok {
|
||||||
|
p.buffer.writeRune(r)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptMatching adds the next runes to the string buffer, but only
|
||||||
|
// if the upcoming runes satisfy the provided patterns.
|
||||||
|
// When runes were added then true is returned, false otherwise.
|
||||||
|
func (p *P) AcceptMatching(patterns ...string) bool {
|
||||||
|
return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptConsecutive adds consecutive runes from the input to the string
|
||||||
|
// buffer, as long as they exist in the pattern.
|
||||||
|
// If any runes were added then true is returned, false otherwise.
|
||||||
|
func (p *P) AcceptConsecutive(pattern string) bool {
|
||||||
|
accepted := false
|
||||||
|
for p.AcceptMatching(pattern) {
|
||||||
|
accepted = true
|
||||||
|
}
|
||||||
|
return accepted
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
||||||
|
// Returns true when one or more runes were skipped.
|
||||||
|
func (p *P) SkipMatching(patterns ...string) bool {
|
||||||
|
if runes, widths, ok := p.Match(patterns...); ok {
|
||||||
|
for i, r := range runes {
|
||||||
|
p.advanceCursor(r, widths[i])
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipConsecutive skips consecutive runes from the provided pattern.
|
||||||
|
// Returns true when one or more runes were skipped.
|
||||||
|
func (p *P) SkipConsecutive(pattern string) bool {
|
||||||
|
didSkip := false
|
||||||
|
for p.SkipMatching(pattern) {
|
||||||
|
didSkip = true
|
||||||
|
}
|
||||||
|
return didSkip
|
||||||
|
}
|
|
@ -1,25 +1,36 @@
|
||||||
package parser
|
package parsekit
|
||||||
|
|
||||||
|
// New takes an input string and a start state,
|
||||||
|
// and initializes the parser for it.
|
||||||
|
func New(input string, startState StateFn) *P {
|
||||||
|
return &P{
|
||||||
|
input: input,
|
||||||
|
len: len(input),
|
||||||
|
state: startState,
|
||||||
|
items: make(chan Item, 2),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Next retrieves the next parsed item.
|
// Next retrieves the next parsed item.
|
||||||
// When a valid item was found, then the boolean return parameter will be true.
|
// When a valid item was found, then the boolean return parameter will be true.
|
||||||
// On error or when successfully reaching the end of the input, false is returned.
|
// On error or when successfully reaching the end of the input, false is returned.
|
||||||
// When an error occurred, it will be set in the error return value, nil otherwise.
|
// When an error occurred, it will be set in the error return value, nil otherwise.
|
||||||
func (l *Parser) Next() (Item, *Error, bool) {
|
func (p *P) Next() (Item, *Error, bool) {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case i := <-l.items:
|
case i := <-p.items:
|
||||||
switch {
|
switch {
|
||||||
case i.Type == ItemEOF:
|
case i.Type == ItemEOF:
|
||||||
return i, nil, false
|
return i, nil, false
|
||||||
case i.Type == ItemError:
|
case i.Type == ItemError:
|
||||||
l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
|
p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
|
||||||
return i, l.err, false
|
return i, p.err, false
|
||||||
default:
|
default:
|
||||||
l.item = i
|
p.item = i
|
||||||
return i, nil, true
|
return i, nil, true
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
l.state = l.state(l)
|
p.state = p.state(p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,10 +38,10 @@ func (l *Parser) Next() (Item, *Error, bool) {
|
||||||
// ToArray returns Parser items as an array (mainly intended for testing purposes)
|
// ToArray returns Parser items as an array (mainly intended for testing purposes)
|
||||||
// When an error occurs during scanning, a partial result will be
|
// When an error occurs during scanning, a partial result will be
|
||||||
// returned, accompanied by the error that occurred.
|
// returned, accompanied by the error that occurred.
|
||||||
func (l *Parser) ToArray() ([]Item, *Error) {
|
func (p *P) ToArray() ([]Item, *Error) {
|
||||||
var items []Item
|
var items []Item
|
||||||
for {
|
for {
|
||||||
item, err, more := l.Next()
|
item, err, more := p.Next()
|
||||||
if !more {
|
if !more {
|
||||||
return items, err
|
return items, err
|
||||||
}
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
package parser
|
package parsekit
|
||||||
|
|
||||||
func (p *Parser) QueueStates(states ...StateFn) StateFn {
|
func (p *P) QueueStates(states ...StateFn) StateFn {
|
||||||
first, followup := states[0], states[1:]
|
first, followup := states[0], states[1:]
|
||||||
for reverse := range followup {
|
for reverse := range followup {
|
||||||
p.PushState(followup[len(followup)-reverse-1])
|
p.PushState(followup[len(followup)-reverse-1])
|
||||||
|
@ -8,24 +8,24 @@ func (p *Parser) QueueStates(states ...StateFn) StateFn {
|
||||||
return first
|
return first
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Parser) ToChildState(state StateFn) StateFn {
|
func (p *P) ToChildState(state StateFn) StateFn {
|
||||||
p.PushState(p.state)
|
p.PushState(p.state)
|
||||||
return state
|
return state
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Parser) ToParentState() StateFn {
|
func (p *P) ToParentState() StateFn {
|
||||||
state := p.PopState()
|
state := p.PopState()
|
||||||
return state
|
return state
|
||||||
}
|
}
|
||||||
|
|
||||||
// PushState adds the state function to the state stack.
|
// PushState adds the state function to the state stack.
|
||||||
// This is used for implementing nested parsing.
|
// This is used for implementing nested parsing.
|
||||||
func (p *Parser) PushState(state StateFn) {
|
func (p *P) PushState(state StateFn) {
|
||||||
p.stack = append(p.stack, state)
|
p.stack = append(p.stack, state)
|
||||||
}
|
}
|
||||||
|
|
||||||
// PopState pops the last pushed state from the state stack.
|
// PopState pops the last pushed state from the state stack.
|
||||||
func (p *Parser) PopState() StateFn {
|
func (p *P) PopState() StateFn {
|
||||||
last := len(p.stack) - 1
|
last := len(p.stack) - 1
|
||||||
head, tail := p.stack[:last], p.stack[last]
|
head, tail := p.stack[:last], p.stack[last]
|
||||||
p.stack = head
|
p.stack = head
|
|
@ -1,4 +1,4 @@
|
||||||
package parser
|
package parsekit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
|
@ -1,4 +1,4 @@
|
||||||
package parser
|
package parsekit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
|
@ -1,7 +1,7 @@
|
||||||
package parser
|
package parsekit
|
||||||
|
|
||||||
// Parser holds the internal state of the Parser.
|
// P holds the internal state of the parser.
|
||||||
type Parser struct {
|
type P struct {
|
||||||
state StateFn // a function that handles the current state
|
state StateFn // a function that handles the current state
|
||||||
stack []StateFn // state function stack, for nested parsing
|
stack []StateFn // state function stack, for nested parsing
|
||||||
input string // the scanned input
|
input string // the scanned input
|
||||||
|
@ -18,7 +18,7 @@ type Parser struct {
|
||||||
|
|
||||||
// StateFn represents the state of the parser as a function
|
// StateFn represents the state of the parser as a function
|
||||||
// that returns the next state.
|
// that returns the next state.
|
||||||
type StateFn func(*Parser) StateFn
|
type StateFn func(*P) StateFn
|
||||||
|
|
||||||
// ItemType represents the type of a parser Item.
|
// ItemType represents the type of a parser Item.
|
||||||
type ItemType int
|
type ItemType int
|
|
@ -1,14 +1,14 @@
|
||||||
package lexer
|
package parser
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parser"
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
// Item types that are emitted by this parser.
|
// Item types that are produced by this parser.
|
||||||
const (
|
const (
|
||||||
ItemComment parser.ItemType = iota // An error occurred
|
ItemComment parsekit.ItemType = iota // Comment string
|
||||||
ItemKey // Key of a key/value pair
|
ItemKey // Key of a key/value pair
|
||||||
ItemKeyDot // Dot for a dotted key
|
ItemKeyDot // Dot for a dotted key
|
||||||
ItemAssignment // Value assignment coming up (=)
|
ItemAssignment // Value assignment coming up (=)
|
||||||
ItemString // A value of type string
|
ItemString // A value of type string
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -43,6 +43,6 @@ var (
|
||||||
|
|
||||||
// NewParser creates a new parser, using the provided input string
|
// NewParser creates a new parser, using the provided input string
|
||||||
// as the data to parse.
|
// as the data to parse.
|
||||||
func NewParser(input string) *parser.Parser {
|
func NewParser(input string) *parsekit.P {
|
||||||
return parser.New(input, stateKeyValuePair)
|
return parsekit.New(input, stateKeyValuePair)
|
||||||
}
|
}
|
|
@ -1,12 +1,12 @@
|
||||||
package lexer_test
|
package parser_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/mmakaay/toml/lexer"
|
"github.com/mmakaay/toml/parsekit"
|
||||||
"github.com/mmakaay/toml/parser"
|
lexer "github.com/mmakaay/toml/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
type statesT struct {
|
type statesT struct {
|
||||||
|
@ -56,8 +56,8 @@ func runStatesT(t *testing.T, c statesT) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParserItemToString returns a string representation of the parser.Item.
|
// ParserItemToString returns a string representation of the parsekit.Item.
|
||||||
func ParserItemToString(i parser.Item) string {
|
func ParserItemToString(i parsekit.Item) string {
|
||||||
switch i.Type {
|
switch i.Type {
|
||||||
case lexer.ItemComment:
|
case lexer.ItemComment:
|
||||||
return fmt.Sprintf("#(%s)", i.Value)
|
return fmt.Sprintf("#(%s)", i.Value)
|
||||||
|
@ -70,6 +70,6 @@ func ParserItemToString(i parser.Item) string {
|
||||||
case lexer.ItemAssignment:
|
case lexer.ItemAssignment:
|
||||||
return "="
|
return "="
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type))
|
panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,13 +1,13 @@
|
||||||
package lexer_test
|
package parser_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/mmakaay/toml/lexer"
|
"github.com/mmakaay/toml/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||||
_, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
||||||
t.Logf("Got error: %s", err.Error())
|
t.Logf("Got error: %s", err.Error())
|
||||||
if err.Row != 4 {
|
if err.Row != 4 {
|
||||||
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
|
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
|
274
parser/parser.go
274
parser/parser.go
|
@ -1,274 +0,0 @@
|
||||||
package parser
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// New takes an input string and a start state,
|
|
||||||
// and initializes the parser for it.
|
|
||||||
func New(input string, startState StateFn) *Parser {
|
|
||||||
return &Parser{
|
|
||||||
input: input,
|
|
||||||
len: len(input),
|
|
||||||
state: startState,
|
|
||||||
items: make(chan Item, 2),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AtEndOfFile returns true when there is no more data available in the input.
|
|
||||||
func (p *Parser) AtEndOfFile() bool {
|
|
||||||
return p.pos >= p.len
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Parser) AtEndOfLine() bool {
|
|
||||||
return p.AtEndOfFile() ||
|
|
||||||
p.Upcoming("\r", "\n") ||
|
|
||||||
p.Upcoming("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Parser) SkipEndOfLine() bool {
|
|
||||||
return p.AtEndOfFile() ||
|
|
||||||
p.SkipMatching("\r", "\n") ||
|
|
||||||
p.SkipMatching("\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Parser) AcceptEndOfLine() bool {
|
|
||||||
// No newline, but we're defintely at the end of the line here.
|
|
||||||
if p.AtEndOfFile() {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// If we see some kind of end of line, then we accept a
|
|
||||||
// normalized newline, which is just a '\n'. This will normalize
|
|
||||||
// '\r\n' into '\n'.
|
|
||||||
if p.SkipEndOfLine() {
|
|
||||||
p.buffer.writeRune('\n')
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit passes a Parser item to the client, including the provided string.
|
|
||||||
func (p *Parser) Emit(t ItemType, s string) {
|
|
||||||
p.items <- Item{t, s}
|
|
||||||
p.buffer.reset()
|
|
||||||
}
|
|
||||||
|
|
||||||
// EmitLiteral passes a Parser item to the client, including the accumulated
|
|
||||||
// string buffer data as a literal string.
|
|
||||||
func (p *Parser) EmitLiteral(t ItemType) {
|
|
||||||
p.Emit(t, p.buffer.asLiteralString())
|
|
||||||
}
|
|
||||||
|
|
||||||
// EmitLiteralTrim passes a Parser item to the client, including the
|
|
||||||
// accumulated string buffer data as a literal string with whitespace
|
|
||||||
// trimmed from it.
|
|
||||||
func (p *Parser) EmitLiteralTrim(t ItemType) {
|
|
||||||
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// EmitInterpreted passes a Parser item to the client, including the
|
|
||||||
// accumulated string buffer data a Go doubled quoted interpreted string
|
|
||||||
// (handling escape codes like \n, \t, \uXXXX, etc.)
|
|
||||||
// This method might return an error, in case there is data in the
|
|
||||||
// string buffer that is not valid for string interpretation.
|
|
||||||
func (p *Parser) EmitInterpreted(t ItemType) error {
|
|
||||||
s, err := p.buffer.asInterpretedString()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
p.Emit(t, s)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// EmitError emits a Parser error item to the client.
|
|
||||||
func (p *Parser) EmitError(format string, args ...interface{}) StateFn {
|
|
||||||
message := fmt.Sprintf(format, args...)
|
|
||||||
p.Emit(ItemError, message)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Match checks if the upcoming runes satisfy all provided patterns.
|
|
||||||
// It returns a slice of runes that were found, their total byte width
|
|
||||||
// and a boolean indicating whether or not all provided patterns were
|
|
||||||
// satisfied by the input data.
|
|
||||||
func (p *Parser) Match(patterns ...string) ([]rune, int, bool) {
|
|
||||||
peeked, width, ok := p.peekMulti(len(patterns))
|
|
||||||
if ok {
|
|
||||||
for i, r := range patterns {
|
|
||||||
if strings.IndexRune(r, peeked[i]) < 0 {
|
|
||||||
return peeked, width, false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return peeked, width, true
|
|
||||||
}
|
|
||||||
return peeked, width, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Upcoming checks if the upcoming runes satisfy all provided patterns.
|
|
||||||
// Returns true if all provided patterns are satisfied.
|
|
||||||
func (p *Parser) Upcoming(patterns ...string) bool {
|
|
||||||
_, _, ok := p.Match(patterns...)
|
|
||||||
return ok
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptAny adds the next rune from the input to the string buffer.
|
|
||||||
// If no rune could be read (end of file or invalid UTF8 data),
|
|
||||||
// then false is returned.
|
|
||||||
func (p *Parser) AcceptAny() bool {
|
|
||||||
if r, ok := p.next(); ok {
|
|
||||||
p.buffer.writeRune(r)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptMatching adds the next runes to the string buffer, but only
|
|
||||||
// if the upcoming runes satisfy the provided patterns.
|
|
||||||
// When runes were added then true is returned, false otherwise.
|
|
||||||
func (p *Parser) AcceptMatching(patterns ...string) bool {
|
|
||||||
return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptConsecutive adds consecutive runes from the input to the string
|
|
||||||
// buffer, as long as they exist in the pattern.
|
|
||||||
// If any runes were added then true is returned, false otherwise.
|
|
||||||
func (p *Parser) AcceptConsecutive(pattern string) bool {
|
|
||||||
accepted := false
|
|
||||||
for p.AcceptMatching(pattern) {
|
|
||||||
accepted = true
|
|
||||||
}
|
|
||||||
return accepted
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
|
||||||
// Returns true when one or more runes were skipped.
|
|
||||||
func (p *Parser) SkipMatching(patterns ...string) bool {
|
|
||||||
if runes, w, ok := p.Match(patterns...); ok {
|
|
||||||
p.pos += w
|
|
||||||
for _, r := range runes {
|
|
||||||
p.advanceCursor(r)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipConsecutive skips consecutive runes from the provided pattern.
|
|
||||||
// Returns true when one or more runes were skipped.
|
|
||||||
func (p *Parser) SkipConsecutive(pattern string) bool {
|
|
||||||
didSkip := false
|
|
||||||
for p.SkipMatching(pattern) {
|
|
||||||
didSkip = true
|
|
||||||
}
|
|
||||||
return didSkip
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// EMIT DATA AND ERRORS
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
// UnexpectedInputError is used by a parser implementation to emit an
|
|
||||||
// error item that tells the client that an unexpected rune was
|
|
||||||
// encountered in the input.
|
|
||||||
// The parameter 'expected' is used to provide some context to the error.
|
|
||||||
func (p *Parser) UnexpectedInputError(expected string) StateFn {
|
|
||||||
// next() takes care of error messages for ok == false.
|
|
||||||
if r, ok := p.next(); ok {
|
|
||||||
return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnexpectedEndOfFile is used by a parser implementation to emit an
|
|
||||||
// error item that tells the client that more data was expected from
|
|
||||||
// the input.
|
|
||||||
// The parameter 'expected' is used to provide some context to the error.
|
|
||||||
func (p *Parser) UnexpectedEndOfFile(expected string) StateFn {
|
|
||||||
return p.EmitError("Unexpected end of file (expected %s)", expected)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
|
||||||
// LEXER : our lexer is quite low level, it only returns UTF8 runes
|
|
||||||
// ============================================================================
|
|
||||||
|
|
||||||
// peek returns but does not advance to the next rune(s) in the input.
|
|
||||||
// Returns the rune, its width and a boolean. The boolean will be false in case
|
|
||||||
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
|
||||||
func (p *Parser) peek() (rune, int, bool) {
|
|
||||||
peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
|
|
||||||
return peeked, width, peeked != utf8.RuneError
|
|
||||||
}
|
|
||||||
|
|
||||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
|
||||||
// Returns a slice of runes, their total width in bytes and a boolean.
|
|
||||||
// The boolean will be false in case less runes can be peeked than
|
|
||||||
// the requested amount (end of data or invalid UTF8 character).
|
|
||||||
func (p *Parser) peekMulti(amount int) ([]rune, int, bool) {
|
|
||||||
width := 0
|
|
||||||
var peeked []rune
|
|
||||||
for i := 0; i < amount; i++ {
|
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+width:])
|
|
||||||
switch {
|
|
||||||
case r == utf8.RuneError:
|
|
||||||
return peeked, width, false
|
|
||||||
default:
|
|
||||||
width += w
|
|
||||||
peeked = append(peeked, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return peeked, width, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// progress moves the cursor forward in the input, returning one rune
|
|
||||||
// for every specified pattern. The cursor is only moved forward when
|
|
||||||
// all patterns are satisfied.
|
|
||||||
// Returns true when all patterns were satisfied and the cursor was
|
|
||||||
// moved forward, false otherwise.
|
|
||||||
// A callback function can be provided to specify what to do with
|
|
||||||
// the runes that are encountered in the input.
|
|
||||||
func (p *Parser) progress(callback func(rune), patterns ...string) bool {
|
|
||||||
if runes, w, ok := p.Match(patterns...); ok {
|
|
||||||
p.pos += w
|
|
||||||
for _, r := range runes {
|
|
||||||
callback(r)
|
|
||||||
p.advanceCursor(r)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// next returns the next rune from the input and a boolean indicating if
|
|
||||||
// reading the input was successful.
|
|
||||||
// When the end of input is reached, or an invalid UTF8 character is
|
|
||||||
// read, then false is returned. Both are considered error cases,
|
|
||||||
// and for that reason these automatically emit an error to the client.
|
|
||||||
func (p *Parser) next() (rune, bool) {
|
|
||||||
r, w, ok := p.peek()
|
|
||||||
if ok {
|
|
||||||
p.pos += w
|
|
||||||
p.advanceCursor(r)
|
|
||||||
return r, true
|
|
||||||
}
|
|
||||||
if r == utf8.RuneError && w == 0 {
|
|
||||||
p.EmitError("unexpected end of file")
|
|
||||||
} else {
|
|
||||||
p.EmitError("invalid UTF8 character")
|
|
||||||
}
|
|
||||||
return r, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the
|
|
||||||
// input data. While doing so, it keeps tracks of newlines,
|
|
||||||
// so we can report on row + column positions on error.
|
|
||||||
func (p *Parser) advanceCursor(r rune) {
|
|
||||||
if p.newline {
|
|
||||||
p.cursorColumn = 0
|
|
||||||
p.cursorRow++
|
|
||||||
} else {
|
|
||||||
p.cursorColumn++
|
|
||||||
}
|
|
||||||
p.newline = r == '\n'
|
|
||||||
}
|
|
|
@ -1,15 +1,17 @@
|
||||||
package lexer
|
package parser
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parser"
|
import (
|
||||||
|
"github.com/mmakaay/toml/parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
// A '#' hash symbol marks the rest of the line as a comment.
|
// A '#' hash symbol marks the rest of the line as a comment.
|
||||||
func stateCommentStart(p *parser.Parser) parser.StateFn {
|
func stateCommentStart(p *parsekit.P) parsekit.StateFn {
|
||||||
p.SkipConsecutive(hash)
|
p.SkipConsecutive(hash)
|
||||||
return stateCommentContent
|
return stateCommentContent
|
||||||
}
|
}
|
||||||
|
|
||||||
// All characters up to the end of the line are included in the comment.
|
// All characters up to the end of the line are included in the comment.
|
||||||
func stateCommentContent(p *parser.Parser) parser.StateFn {
|
func stateCommentContent(p *parsekit.P) parsekit.StateFn {
|
||||||
switch {
|
switch {
|
||||||
case p.AtEndOfLine():
|
case p.AtEndOfLine():
|
||||||
p.EmitLiteralTrim(ItemComment)
|
p.EmitLiteralTrim(ItemComment)
|
|
@ -1,4 +1,4 @@
|
||||||
package lexer_test
|
package parser_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
|
@ -0,0 +1,12 @@
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
|
func stateEndOfFile(p *parsekit.P) parsekit.StateFn {
|
||||||
|
if p.AtEndOfFile() {
|
||||||
|
p.Emit(parsekit.ItemEOF, "EOF") // todo Automate within parser?
|
||||||
|
} else {
|
||||||
|
p.UnexpectedInput("end of file")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -1,15 +1,15 @@
|
||||||
package lexer
|
package parser
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parser"
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
// The primary building block of a TOML document is the key/value pair.
|
// The primary building block of a TOML document is the key/value pair.
|
||||||
func stateKeyValuePair(l *parser.Parser) parser.StateFn {
|
func stateKeyValuePair(p *parsekit.P) parsekit.StateFn {
|
||||||
switch {
|
switch {
|
||||||
case l.SkipConsecutive(whitespace + carriageReturn + newline):
|
case p.SkipConsecutive(whitespace + carriageReturn + newline):
|
||||||
return stateKeyValuePair
|
return stateKeyValuePair
|
||||||
case l.Upcoming(hash):
|
case p.Upcoming(hash):
|
||||||
return l.ToChildState(stateCommentStart)
|
return p.ToChildState(stateCommentStart)
|
||||||
case l.Upcoming(startOfKey):
|
case p.Upcoming(startOfKey):
|
||||||
return stateKey
|
return stateKey
|
||||||
default:
|
default:
|
||||||
return stateEndOfFile
|
return stateEndOfFile
|
||||||
|
@ -17,32 +17,32 @@ func stateKeyValuePair(l *parser.Parser) parser.StateFn {
|
||||||
}
|
}
|
||||||
|
|
||||||
// A key may be either bare, quoted or dotted.
|
// A key may be either bare, quoted or dotted.
|
||||||
func stateKey(l *parser.Parser) parser.StateFn {
|
func stateKey(p *parsekit.P) parsekit.StateFn {
|
||||||
if l.AcceptMatching(bareKeyChars) {
|
if p.AcceptMatching(bareKeyChars) {
|
||||||
return statebareKeyChars
|
return statebareKeyChars
|
||||||
}
|
}
|
||||||
return l.UnexpectedInputError("a valid key name")
|
return p.UnexpectedInput("a valid key name")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||||
// keys are allowed to be composed of only ASCII digits,
|
// keys are allowed to be composed of only ASCII digits,
|
||||||
// e.g. 1234, but are always interpreted as strings.
|
// e.g. 1234, but are always interpreted as strings.
|
||||||
func statebareKeyChars(l *parser.Parser) parser.StateFn {
|
func statebareKeyChars(p *parsekit.P) parsekit.StateFn {
|
||||||
l.AcceptConsecutive(bareKeyChars)
|
p.AcceptConsecutive(bareKeyChars)
|
||||||
l.EmitLiteral(ItemKey)
|
p.EmitLiteral(ItemKey)
|
||||||
return stateEndOfKeyOrKeyDot
|
return stateEndOfKeyOrKeyDot
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||||
// This allows for grouping similar properties together:
|
// This allows for grouping similar properties together:
|
||||||
func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
|
func stateEndOfKeyOrKeyDot(p *parsekit.P) parsekit.StateFn {
|
||||||
// Whitespace around dot-separated parts is ignored, however,
|
// Whitespace around dot-separated parts is ignored, however,
|
||||||
// best practice is to not use any extraneous whitespace.
|
// best practice is to not use any extraneous whitespace.
|
||||||
l.SkipConsecutive(whitespace)
|
p.SkipConsecutive(whitespace)
|
||||||
if l.SkipMatching(dot) {
|
if p.SkipMatching(dot) {
|
||||||
l.Emit(ItemKeyDot, "")
|
p.Emit(ItemKeyDot, "")
|
||||||
l.SkipConsecutive(whitespace)
|
p.SkipConsecutive(whitespace)
|
||||||
return stateKey
|
return stateKey
|
||||||
}
|
}
|
||||||
return stateKeyAssignment
|
return stateKeyAssignment
|
||||||
|
@ -52,12 +52,12 @@ func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
|
||||||
// Whitespace is ignored around key names and values. The key, equals
|
// Whitespace is ignored around key names and values. The key, equals
|
||||||
// sign, and value must be on the same line (though some values can
|
// sign, and value must be on the same line (though some values can
|
||||||
// be broken over multiple lines).
|
// be broken over multiple lines).
|
||||||
func stateKeyAssignment(l *parser.Parser) parser.StateFn {
|
func stateKeyAssignment(p *parsekit.P) parsekit.StateFn {
|
||||||
l.SkipConsecutive(whitespace)
|
p.SkipConsecutive(whitespace)
|
||||||
if l.SkipMatching(equal) {
|
if p.SkipMatching(equal) {
|
||||||
l.Emit(ItemAssignment, "")
|
p.Emit(ItemAssignment, "")
|
||||||
l.SkipConsecutive(whitespace)
|
p.SkipConsecutive(whitespace)
|
||||||
return stateValue
|
return stateValue
|
||||||
}
|
}
|
||||||
return l.UnexpectedInputError("a value assignment")
|
return p.UnexpectedInput("a value assignment")
|
||||||
}
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package lexer_test
|
package parser_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
|
@ -1,19 +1,19 @@
|
||||||
package lexer
|
package parser
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parser"
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
// There are four ways to express strings: basic, multi-line basic, literal,
|
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||||
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
||||||
// * Basic strings are surrounded by quotation marks.
|
// * Basic strings are surrounded by quotation marks.
|
||||||
func stateStringValue(l *parser.Parser) parser.StateFn {
|
func stateStringValue(p *parsekit.P) parsekit.StateFn {
|
||||||
switch {
|
switch {
|
||||||
case l.SkipMatching(doubleQuote3...):
|
case p.SkipMatching(doubleQuote3...):
|
||||||
return stateMultiLineBasicString
|
return stateMultiLineBasicString
|
||||||
case l.SkipMatching(doubleQuote):
|
case p.SkipMatching(doubleQuote):
|
||||||
return l.QueueStates(stateParseString, stateBasicStringSpecific)
|
return p.QueueStates(stateParseString, stateBasicStringSpecific)
|
||||||
}
|
}
|
||||||
return l.UnexpectedInputError("a string value")
|
return p.UnexpectedInput("a string value")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Specific handling of input for basic strings.
|
// Specific handling of input for basic strings.
|
||||||
|
@ -22,7 +22,7 @@ func stateStringValue(l *parser.Parser) parser.StateFn {
|
||||||
// "All other escape sequences [..] are reserved and, if used, TOML should
|
// "All other escape sequences [..] are reserved and, if used, TOML should
|
||||||
// produce an error.""
|
// produce an error.""
|
||||||
|
|
||||||
func stateBasicStringSpecific(p *parser.Parser) parser.StateFn {
|
func stateBasicStringSpecific(p *parsekit.P) parsekit.StateFn {
|
||||||
switch {
|
switch {
|
||||||
case p.SkipMatching(doubleQuote):
|
case p.SkipMatching(doubleQuote):
|
||||||
if err := p.EmitInterpreted(ItemString); err != nil {
|
if err := p.EmitInterpreted(ItemString); err != nil {
|
||||||
|
@ -36,8 +36,8 @@ func stateBasicStringSpecific(p *parser.Parser) parser.StateFn {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
|
func stateMultiLineBasicString(p *parsekit.P) parsekit.StateFn {
|
||||||
l.EmitError("Not yet implemented")
|
p.EmitError("Not yet implemented")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,11 +50,11 @@ const invalidBasicStringCharacters string = "\"\\" +
|
||||||
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
||||||
"\u007F"
|
"\u007F"
|
||||||
|
|
||||||
func stateParseString(l *parser.Parser) parser.StateFn {
|
func stateParseString(p *parsekit.P) parsekit.StateFn {
|
||||||
switch {
|
switch {
|
||||||
case l.AtEndOfFile():
|
case p.AtEndOfFile():
|
||||||
return l.UnexpectedEndOfFile("basic string token")
|
return p.UnexpectedEndOfFile("basic string token")
|
||||||
case l.AcceptMatching(backslash, escapeChars):
|
case p.AcceptMatching(backslash, escapeChars):
|
||||||
// For convenience, some popular characters have a compact escape sequence.
|
// For convenience, some popular characters have a compact escape sequence.
|
||||||
// \b - backspace (U+0008)
|
// \b - backspace (U+0008)
|
||||||
// \t - tab (U+0009)
|
// \t - tab (U+0009)
|
||||||
|
@ -63,22 +63,22 @@ func stateParseString(l *parser.Parser) parser.StateFn {
|
||||||
// \r - carriage return (U+000D)
|
// \r - carriage return (U+000D)
|
||||||
// \" - quote (U+0022)
|
// \" - quote (U+0022)
|
||||||
// \\ - backslash (U+005C)
|
// \\ - backslash (U+005C)
|
||||||
case l.AcceptMatching(shortUtf8Match...):
|
case p.AcceptMatching(shortUtf8Match...):
|
||||||
// \uXXXX - unicode (U+XXXX)
|
// \uXXXX - unicode (U+XXXX)
|
||||||
case l.AcceptMatching(longUtf8Match...):
|
case p.AcceptMatching(longUtf8Match...):
|
||||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||||
case l.Upcoming(backslash) || l.Upcoming(doubleQuote):
|
case p.Upcoming(backslash) || p.Upcoming(doubleQuote):
|
||||||
// Returning to the parent state to have special cases handled,
|
// Returning to the parent state to have special cases handled,
|
||||||
// because there are differences between single and multi line strings.
|
// because there are differences between single and multi line strings.
|
||||||
return l.ToParentState()
|
return p.ToParentState()
|
||||||
case l.Upcoming(invalidBasicStringCharacters):
|
case p.Upcoming(invalidBasicStringCharacters):
|
||||||
// Any Unicode character may be used except those that must be escaped:
|
// Any Unicode character may be used except those that must be escaped:
|
||||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||||
r, _, _ := l.Match(invalidBasicStringCharacters)
|
r, _, _ := p.Match(invalidBasicStringCharacters)
|
||||||
l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||||
return nil
|
return nil
|
||||||
default:
|
default:
|
||||||
l.AcceptAny()
|
p.AcceptAny()
|
||||||
}
|
}
|
||||||
return stateParseString
|
return stateParseString
|
||||||
}
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package lexer_test
|
package parser_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
|
@ -0,0 +1,13 @@
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
|
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||||
|
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||||
|
func stateValue(p *parsekit.P) parsekit.StateFn {
|
||||||
|
p.SkipConsecutive(whitespace)
|
||||||
|
if p.Upcoming(quoteChars) {
|
||||||
|
return stateStringValue
|
||||||
|
}
|
||||||
|
return p.UnexpectedInput("a value")
|
||||||
|
}
|
Loading…
Reference in New Issue