Backup work on code cleanup now the parser/combinator code is stable.
This commit is contained in:
parent
84ae34fb5f
commit
3677ab18cb
|
@ -3,7 +3,6 @@ package parsekit
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Emit passes a Parser item to the client, including the provided string.
|
// Emit passes a Parser item to the client, including the provided string.
|
||||||
|
@ -48,27 +47,23 @@ func (p *P) EmitError(format string, args ...interface{}) {
|
||||||
// UnexpectedInput is used by a parser implementation to emit an
|
// UnexpectedInput is used by a parser implementation to emit an
|
||||||
// error item that tells the client that an unexpected rune was
|
// error item that tells the client that an unexpected rune was
|
||||||
// encountered in the input.
|
// encountered in the input.
|
||||||
// The parameter 'expected' is used to provide some context to the error.
|
func (p *P) UnexpectedInput() {
|
||||||
func (p *P) UnexpectedInput(expected string) {
|
|
||||||
// next() takes care of error messages in cases where ok == false.
|
|
||||||
// Therefore, we only provide an error message for the ok case here.
|
|
||||||
r, _, ok := p.peek(0)
|
r, _, ok := p.peek(0)
|
||||||
switch {
|
switch {
|
||||||
case ok:
|
case ok:
|
||||||
p.EmitError("unexpected character %q (expected %s)", r, expected)
|
p.EmitError("unexpected character %q%s", r, p.fmtExpects())
|
||||||
case r == EOF:
|
case r == EOF:
|
||||||
p.EmitError("unexpected end of file (expected %s)", expected)
|
p.EmitError("unexpected end of file%s", p.fmtExpects())
|
||||||
case r == utf8.RuneError:
|
case r == INVALID:
|
||||||
p.EmitError("invalid UTF8 character in input (expected %s)", expected)
|
p.EmitError("invalid UTF8 character in input%s", p.fmtExpects())
|
||||||
default:
|
default:
|
||||||
panic("Unhandled output from peek()")
|
panic("Unhandled output from peek()")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnexpectedEndOfFile is used by a parser implementation to emit an
|
func (p *P) fmtExpects() string {
|
||||||
// error item that tells the client that more data was expected from
|
if p.expecting == "" {
|
||||||
// the input.
|
return ""
|
||||||
// The parameter 'expected' is used to provide some context to the error.
|
}
|
||||||
func (p *P) UnexpectedEndOfFile(expected string) {
|
return fmt.Sprintf(" (expected %s)", p.expecting)
|
||||||
p.EmitError("Unexpected end of file (expected %s)", expected)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,24 @@ import (
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// P holds the internal state of the parser.
|
||||||
|
type P struct {
|
||||||
|
state StateFn // the function that handles the current state
|
||||||
|
nextState StateFn // the function that will handle the next state
|
||||||
|
stack []StateFn // state function stack, for nested parsing
|
||||||
|
input string // the scanned input
|
||||||
|
len int // the total length of the input in bytes
|
||||||
|
pos int // current byte scanning position in the input
|
||||||
|
newline bool // keep track of when we have scanned a newline
|
||||||
|
cursorRow int // current row number in the input
|
||||||
|
cursorColumn int // current column position in the input
|
||||||
|
expecting string // a description of what the current state expects to find
|
||||||
|
buffer stringBuffer // an efficient buffer, used to build string values
|
||||||
|
items chan Item // channel of resulting Parser items
|
||||||
|
item Item // the current item as reached by Next() and retrieved by Get()
|
||||||
|
err *Error // an error when lexing failed, retrieved by Error()
|
||||||
|
}
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
|
@ -13,60 +31,6 @@ func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
||||||
return handleRuneError(r, w)
|
return handleRuneError(r, w)
|
||||||
}
|
}
|
||||||
|
|
||||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
|
||||||
// Returns a slice of runes, a slice containing their respective
|
|
||||||
// widths in bytes and a boolean.
|
|
||||||
// The boolean will be false in case less runes can be peeked than
|
|
||||||
// the requested amount (end of data or invalid UTF8 character).
|
|
||||||
func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
|
|
||||||
var runes []rune
|
|
||||||
var widths []int
|
|
||||||
offset := 0
|
|
||||||
for i := 0; i < amount; i++ {
|
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
|
||||||
r, w, ok := handleRuneError(r, w)
|
|
||||||
runes = append(runes, r)
|
|
||||||
widths = append(widths, w)
|
|
||||||
offset += w
|
|
||||||
if !ok {
|
|
||||||
return runes, widths, false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return runes, widths, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// progress moves the cursor forward in the input, returning one rune
|
|
||||||
// for every specified pattern. The cursor will only be moved forward when
|
|
||||||
// all requested patterns can be satisfied.
|
|
||||||
// Returns true when all patterns were satisfied and the cursor was
|
|
||||||
// moved forward, false otherwise.
|
|
||||||
// A callback function can be provided to specify what to do with
|
|
||||||
// the runes that are encountered in the input.
|
|
||||||
func (p *P) progress(callback func(rune), patterns ...interface{}) bool {
|
|
||||||
if runes, widths, ok := p.Match(patterns...); ok {
|
|
||||||
for i, r := range runes {
|
|
||||||
callback(r)
|
|
||||||
p.advanceCursor(r, widths[i])
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the
|
|
||||||
// input data. While doing so, it keeps tracks of newlines,
|
|
||||||
// so we can report on row + column positions on error.
|
|
||||||
func (p *P) advanceCursor(r rune, w int) {
|
|
||||||
p.pos += w
|
|
||||||
if p.newline {
|
|
||||||
p.cursorColumn = 0
|
|
||||||
p.cursorRow++
|
|
||||||
} else {
|
|
||||||
p.cursorColumn++
|
|
||||||
}
|
|
||||||
p.newline = r == '\n'
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleRuneError is used to normale rune value in case of errors.
|
// handleRuneError is used to normale rune value in case of errors.
|
||||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||||
// This can however indicate one of two situations:
|
// This can however indicate one of two situations:
|
||||||
|
@ -84,3 +48,48 @@ func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||||
}
|
}
|
||||||
return r, w, true
|
return r, w, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EOF is a special rune, which is used to indicate an end of file when
|
||||||
|
// reading a character from the input.
|
||||||
|
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||||
|
// say 'I now expect the end of the file' is using something like:
|
||||||
|
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||||
|
const EOF rune = -1
|
||||||
|
|
||||||
|
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||||
|
// rune on the input.
|
||||||
|
const INVALID rune = utf8.RuneError
|
||||||
|
|
||||||
|
// StateFn defines the type of function that can be used to
|
||||||
|
// handle a parser state.
|
||||||
|
type StateFn func(*P)
|
||||||
|
|
||||||
|
// ItemType represents the type of a parser Item.
|
||||||
|
type ItemType int
|
||||||
|
|
||||||
|
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||||
|
// end of the input was reached.
|
||||||
|
const ItemEOF ItemType = -1
|
||||||
|
|
||||||
|
// ItemError is a built-in parser item type that is used for flagging that
|
||||||
|
// an error has occurred during parsing.
|
||||||
|
const ItemError ItemType = -2
|
||||||
|
|
||||||
|
// Item represents an item returned from the parser.
|
||||||
|
type Item struct {
|
||||||
|
Type ItemType
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error is used as the error type when parsing errors occur.
|
||||||
|
// The error includes some extra meta information to allow for useful
|
||||||
|
// error messages to the user.
|
||||||
|
type Error struct {
|
||||||
|
Message string
|
||||||
|
Row int
|
||||||
|
Column int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *Error) Error() string {
|
||||||
|
return err.Message
|
||||||
|
}
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
import "unicode/utf8"
|
import (
|
||||||
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
// Not in need of it myself, but nice to have I guess:
|
// Not in need of it myself, but nice to have I guess:
|
||||||
// - NotFollowedBy
|
// - NotFollowedBy
|
||||||
// - Discard
|
|
||||||
// - Separated
|
// - Separated
|
||||||
|
|
||||||
|
// MatchDialog is used by Matcher implementations as a means
|
||||||
|
// to retrieve data to match against and to report back
|
||||||
|
// successful matches.
|
||||||
type MatchDialog struct {
|
type MatchDialog struct {
|
||||||
p *P
|
p *P
|
||||||
runes []rune
|
runes []rune
|
||||||
|
@ -14,44 +19,70 @@ type MatchDialog struct {
|
||||||
offset int
|
offset int
|
||||||
curRune rune
|
curRune rune
|
||||||
curWidth int
|
curWidth int
|
||||||
forked bool
|
parent *MatchDialog
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fork splits off a child MatchDialog, containing the same
|
||||||
|
// offset as the parent MatchDialog, but with all other data
|
||||||
|
// in a new state.
|
||||||
|
// By forking, a Matcher implementation can freely work with
|
||||||
|
// a MatchDialog, without affecting the parent MatchDialog.
|
||||||
|
// When the Matcher decides that a match was found, it can
|
||||||
|
// use the Merge() method on the child to merge the child's
|
||||||
|
// matching data into the parent MatchDialog.
|
||||||
func (m *MatchDialog) Fork() *MatchDialog {
|
func (m *MatchDialog) Fork() *MatchDialog {
|
||||||
fork := &MatchDialog{
|
child := &MatchDialog{
|
||||||
p: m.p,
|
p: m.p,
|
||||||
offset: m.offset,
|
offset: m.offset,
|
||||||
forked: true,
|
parent: m,
|
||||||
}
|
}
|
||||||
return fork
|
return child
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MatchDialog) Join(fork *MatchDialog) bool {
|
// Merge merges the data for a a forked child MatchDialog back
|
||||||
if !fork.forked {
|
// into its parent:
|
||||||
panic("Cannot join a non-forked MatchDialog")
|
// * the runes that are accumulated in the child are added
|
||||||
|
// to the parent's runes
|
||||||
|
// * the parent's offset is set to the child's offset
|
||||||
|
// After a Merge, the child MatchDialog is reset so it can
|
||||||
|
// immediately be reused for performing another match.
|
||||||
|
func (m *MatchDialog) Merge() bool {
|
||||||
|
if m.parent == nil {
|
||||||
|
panic("Cannot call Merge a a non-forked MatchDialog")
|
||||||
}
|
}
|
||||||
m.runes = append(m.runes, fork.runes...)
|
m.parent.runes = append(m.parent.runes, m.runes...)
|
||||||
m.widths = append(m.widths, fork.widths...)
|
m.parent.widths = append(m.parent.widths, m.widths...)
|
||||||
m.offset = fork.offset
|
m.parent.offset = m.offset
|
||||||
fork.runes = []rune{}
|
m.Clear()
|
||||||
fork.widths = []int{}
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NextRune can be called by a Matcher on a MatchDialog in order
|
||||||
|
// to receive the next rune from the input.
|
||||||
|
// The rune is automatically added to the MatchDialog's runes.
|
||||||
|
// Returns the rune and a boolean. The boolean will be false in
|
||||||
|
// case an invalid UTF8 rune of the end of the file was encountered.
|
||||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||||
if m.curRune == utf8.RuneError {
|
if m.curRune == utf8.RuneError {
|
||||||
panic("Matcher must not call NextRune() after it returned false")
|
panic("Matcher must not call NextRune() after it returned false")
|
||||||
}
|
}
|
||||||
r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
|
r, w, ok := m.p.peek(m.offset)
|
||||||
m.offset += w
|
m.offset += w
|
||||||
m.curRune = r
|
m.curRune = r
|
||||||
m.curWidth = w
|
m.curWidth = w
|
||||||
m.runes = append(m.runes, r)
|
m.runes = append(m.runes, r)
|
||||||
m.widths = append(m.widths, w)
|
m.widths = append(m.widths, w)
|
||||||
return r, r != EOF && r != INVALID
|
return r, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Matcher is the interface that can be implemented to provide
|
// Clear empties out the accumulated runes that are stored
|
||||||
|
// in the MatchDialog.
|
||||||
|
func (m *MatchDialog) Clear() {
|
||||||
|
m.runes = []rune{}
|
||||||
|
m.widths = []int{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matcher is the interface that must be implemented to provide
|
||||||
// a matching stategy for the match() function.
|
// a matching stategy for the match() function.
|
||||||
// A MatchDialog is provided as input. This implements a
|
// A MatchDialog is provided as input. This implements a
|
||||||
// specific set of methods that a Matcher needs to retrieve data
|
// specific set of methods that a Matcher needs to retrieve data
|
||||||
|
@ -60,20 +91,28 @@ type Matcher interface {
|
||||||
Match(*MatchDialog) bool
|
Match(*MatchDialog) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type MatcherConstructors struct {
|
type matcherConstructors struct {
|
||||||
Any func() MatchAny
|
Any func() MatchAny
|
||||||
Rune func(rune rune) MatchRune
|
Rune func(rune) MatchRune
|
||||||
RuneRange func(start rune, end rune) MatchRuneRange
|
RuneRange func(rune, rune) MatchRuneRange
|
||||||
Runes func(runes ...rune) MatchAnyOf
|
Runes func(...rune) MatchAnyOf
|
||||||
AnyOf func(matchers ...Matcher) MatchAnyOf
|
String func(string) MatchSequence
|
||||||
Repeat func(count int, matcher Matcher) MatchRepeat
|
StringNoCase func(string) MatchSequence
|
||||||
Sequence func(matchers ...Matcher) MatchSequence
|
AnyOf func(...Matcher) MatchAnyOf
|
||||||
ZeroOrMore func(matcher Matcher) MatchZeroOrMore
|
Repeat func(int, Matcher) MatchRepeat
|
||||||
OneOrMore func(matcher Matcher) MatchOneOrMore
|
Sequence func(...Matcher) MatchSequence
|
||||||
Optional func(matcher Matcher) MatchOptional
|
ZeroOrMore func(Matcher) MatchZeroOrMore
|
||||||
|
OneOrMore func(Matcher) MatchOneOrMore
|
||||||
|
Optional func(Matcher) MatchOptional
|
||||||
|
Drop func(Matcher) MatchDrop
|
||||||
}
|
}
|
||||||
|
|
||||||
var C = MatcherConstructors{
|
// C provides access to a wide range of parser/combinator
|
||||||
|
// constructors that can be used to build matching expressions.
|
||||||
|
// When using C in your own parser, then it is advised to create
|
||||||
|
// an alias in your own package for easy reference:
|
||||||
|
// var c = parsekit.C
|
||||||
|
var C = matcherConstructors{
|
||||||
Any: func() MatchAny {
|
Any: func() MatchAny {
|
||||||
return MatchAny{}
|
return MatchAny{}
|
||||||
},
|
},
|
||||||
|
@ -90,6 +129,22 @@ var C = MatcherConstructors{
|
||||||
}
|
}
|
||||||
return MatchAnyOf{m}
|
return MatchAnyOf{m}
|
||||||
},
|
},
|
||||||
|
String: func(s string) MatchSequence {
|
||||||
|
m := make([]Matcher, len(s))
|
||||||
|
for i, r := range s {
|
||||||
|
m[i] = MatchRune{r}
|
||||||
|
}
|
||||||
|
return MatchSequence{m}
|
||||||
|
},
|
||||||
|
StringNoCase: func(s string) MatchSequence {
|
||||||
|
m := make([]Matcher, len(s))
|
||||||
|
for i, r := range s {
|
||||||
|
u := MatchRune{unicode.ToUpper(r)}
|
||||||
|
l := MatchRune{unicode.ToLower(r)}
|
||||||
|
m[i] = MatchAnyOf{[]Matcher{u, l}}
|
||||||
|
}
|
||||||
|
return MatchSequence{m}
|
||||||
|
},
|
||||||
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
||||||
return MatchAnyOf{matchers}
|
return MatchAnyOf{matchers}
|
||||||
},
|
},
|
||||||
|
@ -108,6 +163,9 @@ var C = MatcherConstructors{
|
||||||
Optional: func(matcher Matcher) MatchOptional {
|
Optional: func(matcher Matcher) MatchOptional {
|
||||||
return MatchOptional{matcher}
|
return MatchOptional{matcher}
|
||||||
},
|
},
|
||||||
|
Drop: func(matcher Matcher) MatchDrop {
|
||||||
|
return MatchDrop{matcher}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
type MatchAny struct{}
|
type MatchAny struct{}
|
||||||
|
@ -142,9 +200,9 @@ type MatchAnyOf struct {
|
||||||
|
|
||||||
func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
||||||
for _, matcher := range c.matcher {
|
for _, matcher := range c.matcher {
|
||||||
mc := m.Fork()
|
child := m.Fork()
|
||||||
if matcher.Match(mc) {
|
if matcher.Match(child) {
|
||||||
return m.Join(mc)
|
return child.Merge()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -156,13 +214,13 @@ type MatchRepeat struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
||||||
mc := m.Fork()
|
child := m.Fork()
|
||||||
for i := 0; i < c.count; i++ {
|
for i := 0; i < c.count; i++ {
|
||||||
if !c.matcher.Match(mc) {
|
if !c.matcher.Match(child) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.Join(mc)
|
child.Merge()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -171,13 +229,13 @@ type MatchSequence struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchSequence) Match(m *MatchDialog) bool {
|
func (c MatchSequence) Match(m *MatchDialog) bool {
|
||||||
mPart := m.Fork()
|
child := m.Fork()
|
||||||
for _, matcher := range c.matchers {
|
for _, matcher := range c.matchers {
|
||||||
if !matcher.Match(mPart) {
|
if !matcher.Match(child) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.Join(mPart)
|
child.Merge()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,9 +244,9 @@ type MatchOneOrMore struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
|
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
|
||||||
mc := m.Fork()
|
child := m.Fork()
|
||||||
for c.matcher.Match(mc) {
|
for c.matcher.Match(child) {
|
||||||
m.Join(mc)
|
child.Merge()
|
||||||
}
|
}
|
||||||
return len(m.runes) > 0
|
return len(m.runes) > 0
|
||||||
}
|
}
|
||||||
|
@ -198,9 +256,9 @@ type MatchZeroOrMore struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
|
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
|
||||||
mc := m.Fork()
|
child := m.Fork()
|
||||||
for c.matcher.Match(mc) {
|
for c.matcher.Match(child) {
|
||||||
m.Join(mc)
|
child.Merge()
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -210,9 +268,23 @@ type MatchOptional struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchOptional) Match(m *MatchDialog) bool {
|
func (c MatchOptional) Match(m *MatchDialog) bool {
|
||||||
mc := m.Fork()
|
child := m.Fork()
|
||||||
if c.matcher.Match(mc) {
|
if c.matcher.Match(child) {
|
||||||
m.Join(mc)
|
child.Merge()
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MatchDrop struct {
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchDrop) Match(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if c.matcher.Match(child) {
|
||||||
|
child.Clear()
|
||||||
|
child.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
|
@ -12,11 +12,10 @@ const TestItem p.ItemType = 1
|
||||||
|
|
||||||
func newParser(input string, matcher p.Matcher) *p.P {
|
func newParser(input string, matcher p.Matcher) *p.P {
|
||||||
stateFn := func(p *p.P) {
|
stateFn := func(p *p.P) {
|
||||||
|
p.Expects("MATCH")
|
||||||
if p.On(matcher).Accept() {
|
if p.On(matcher).Accept() {
|
||||||
p.EmitLiteral(TestItem)
|
p.EmitLiteral(TestItem)
|
||||||
p.Repeat()
|
p.Repeat()
|
||||||
} else {
|
|
||||||
p.UnexpectedInput("MATCH")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return p.New(input, stateFn)
|
return p.New(input, stateFn)
|
||||||
|
@ -107,6 +106,35 @@ func TestMatchRuneRange(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMatchString(t *testing.T) {
|
||||||
|
p := newParser("Hello, world!", c.String("Hello"))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if r.Type != TestItem {
|
||||||
|
t.Error("Parser item type not expected TestTitem")
|
||||||
|
}
|
||||||
|
if r.Value != "Hello" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
// func TestMatchStringNoCase(t *testing.T) {
|
||||||
|
// p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
|
||||||
|
// r, err, ok := p.Next()
|
||||||
|
// if !ok {
|
||||||
|
// t.Fatalf("Parsing failed: %s", err)
|
||||||
|
// }
|
||||||
|
// if r.Type != TestItem {
|
||||||
|
// t.Error("Parser item type not expected TestTitem")
|
||||||
|
// }
|
||||||
|
// if r.Value != "Hello" {
|
||||||
|
// t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
func TestMatchRunes(t *testing.T) {
|
func TestMatchRunes(t *testing.T) {
|
||||||
m := c.Runes('+', '-', '*', '/')
|
m := c.Runes('+', '-', '*', '/')
|
||||||
s := "-+/*+++"
|
s := "-+/*+++"
|
||||||
|
@ -243,6 +271,17 @@ func TestMatchOptional(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMatchDrop(t *testing.T) {
|
||||||
|
dashes := c.OneOrMore(c.Rune('-'))
|
||||||
|
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes)))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "X" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
func TestMixAndMatch(t *testing.T) {
|
func TestMixAndMatch(t *testing.T) {
|
||||||
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||||
backslash := c.Rune('\\')
|
backslash := c.Rune('\\')
|
||||||
|
|
|
@ -1,10 +1,18 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
import (
|
// Expects is used to let a state function describe what input it is expecting.
|
||||||
"fmt"
|
// This expectation is used in error messages to make them more descriptive.
|
||||||
"strings"
|
//
|
||||||
"unicode/utf8"
|
// Also, when defining an expectation inside a StateFn, you do not need
|
||||||
)
|
// to handle unexpected input yourself. When the end of the function is
|
||||||
|
// reached without setting the next state, an automatic error will be
|
||||||
|
// emitted. This error differentiates between issues:
|
||||||
|
// * there is valid data on input, but it was not accepted by the function
|
||||||
|
// * there is an invalid UTF8 character on input
|
||||||
|
// * the end of the file was reached.
|
||||||
|
func (p *P) Expects(description string) {
|
||||||
|
p.expecting = description
|
||||||
|
}
|
||||||
|
|
||||||
// AtEndOfFile returns true when there is no more data available in the input.
|
// AtEndOfFile returns true when there is no more data available in the input.
|
||||||
func (p *P) AtEndOfFile() bool {
|
func (p *P) AtEndOfFile() bool {
|
||||||
|
@ -16,8 +24,8 @@ func (p *P) AtEndOfFile() bool {
|
||||||
// by this method.
|
// by this method.
|
||||||
func (p *P) AtEndOfLine() bool {
|
func (p *P) AtEndOfLine() bool {
|
||||||
return p.AtEndOfFile() ||
|
return p.AtEndOfFile() ||
|
||||||
p.Upcoming("\r", "\n") ||
|
p.On(C.String("\r\n")).Stay() ||
|
||||||
p.Upcoming("\n")
|
p.On(C.Rune('\n')).Stay()
|
||||||
}
|
}
|
||||||
|
|
||||||
// SkipEndOfLine returns true when the cursor is either at the end of the line
|
// SkipEndOfLine returns true when the cursor is either at the end of the line
|
||||||
|
@ -25,8 +33,8 @@ func (p *P) AtEndOfLine() bool {
|
||||||
// the cursor is moved forward to beyond the newline.
|
// the cursor is moved forward to beyond the newline.
|
||||||
func (p *P) SkipEndOfLine() bool {
|
func (p *P) SkipEndOfLine() bool {
|
||||||
return p.AtEndOfFile() ||
|
return p.AtEndOfFile() ||
|
||||||
p.SkipMatching("\r", "\n") ||
|
p.On(C.String("\r\n")).Skip() ||
|
||||||
p.SkipMatching("\n")
|
p.On(C.Rune('\n')).Skip()
|
||||||
}
|
}
|
||||||
|
|
||||||
// AcceptEndOfLine returns true when the cursor is either at the end of the line
|
// AcceptEndOfLine returns true when the cursor is either at the end of the line
|
||||||
|
@ -44,65 +52,24 @@ func (p *P) AcceptEndOfLine() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *P) Match(patterns ...interface{}) ([]rune, []int, bool) {
|
func (p *P) On(m Matcher) *action {
|
||||||
return p.match(0, patterns...)
|
runes, widths, ok := p.Match(m)
|
||||||
|
return &action{
|
||||||
|
p: p,
|
||||||
|
runes: runes,
|
||||||
|
widths: widths,
|
||||||
|
ok: ok,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
|
func (p *P) Match(matcher Matcher) ([]rune, []int, bool) {
|
||||||
var runes []rune
|
return p.match(0, matcher)
|
||||||
var widths []int
|
|
||||||
|
|
||||||
addRune := func(r rune, w int) {
|
|
||||||
offset += w
|
|
||||||
runes = append(runes, r)
|
|
||||||
widths = append(widths, w)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, pattern := range patterns {
|
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
|
||||||
if r == utf8.RuneError {
|
|
||||||
return runes, widths, false
|
|
||||||
}
|
|
||||||
switch pattern := pattern.(type) {
|
|
||||||
case Matcher:
|
|
||||||
m := &MatchDialog{p: p}
|
|
||||||
if pattern.Match(m) {
|
|
||||||
return m.runes, m.widths, true
|
|
||||||
} else {
|
|
||||||
return m.runes, m.widths, false
|
|
||||||
}
|
|
||||||
case []interface{}:
|
|
||||||
rs, ws, matched := p.match(offset, pattern...)
|
|
||||||
for i, r := range rs {
|
|
||||||
addRune(r, ws[i])
|
|
||||||
}
|
|
||||||
if !matched {
|
|
||||||
return runes, widths, false
|
|
||||||
}
|
|
||||||
case string:
|
|
||||||
if strings.IndexRune(pattern, r) < 0 {
|
|
||||||
return runes, widths, false
|
|
||||||
}
|
|
||||||
addRune(r, w)
|
|
||||||
case rune:
|
|
||||||
if pattern != r {
|
|
||||||
return runes, widths, false
|
|
||||||
}
|
|
||||||
addRune(r, w)
|
|
||||||
default:
|
|
||||||
panic(fmt.Sprintf("Not rune matching implemented for pattern of type %T", pattern))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return runes, widths, true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upcoming checks if the upcoming runes satisfy all provided patterns.
|
func (p *P) match(offset int, matcher Matcher) ([]rune, []int, bool) {
|
||||||
// Returns true if all provided patterns are satisfied.
|
m := &MatchDialog{p: p}
|
||||||
// This is basically the same as the Match method, but with only
|
ok := matcher.Match(m)
|
||||||
// the boolean return parameter for programmer convenciency.
|
return m.runes, m.widths, ok
|
||||||
func (p *P) Upcoming(patterns ...interface{}) bool {
|
|
||||||
_, _, ok := p.Match(patterns...)
|
|
||||||
return ok
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type action struct {
|
type action struct {
|
||||||
|
@ -135,6 +102,24 @@ func (a *action) Skip() bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *action) Stay() bool {
|
||||||
|
return a.ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// advanceCursor advances the rune cursor one position in the
|
||||||
|
// input data. While doing so, it keeps tracks of newlines,
|
||||||
|
// so we can report on row + column positions on error.
|
||||||
|
func (p *P) advanceCursor(r rune, w int) {
|
||||||
|
p.pos += w
|
||||||
|
if p.newline {
|
||||||
|
p.cursorColumn = 0
|
||||||
|
p.cursorRow++
|
||||||
|
} else {
|
||||||
|
p.cursorColumn++
|
||||||
|
}
|
||||||
|
p.newline = r == '\n'
|
||||||
|
}
|
||||||
|
|
||||||
func (a *action) RouteTo(state StateFn) bool {
|
func (a *action) RouteTo(state StateFn) bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.p.RouteTo(state)
|
a.p.RouteTo(state)
|
||||||
|
@ -142,36 +127,9 @@ func (a *action) RouteTo(state StateFn) bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *action) Stay() bool {
|
func (a *action) RouteReturn() bool {
|
||||||
|
if a.ok {
|
||||||
|
a.p.RouteReturn()
|
||||||
|
}
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *P) On(patterns ...interface{}) *action {
|
|
||||||
runes, widths, ok := p.Match(patterns...)
|
|
||||||
return &action{
|
|
||||||
p: p,
|
|
||||||
runes: runes,
|
|
||||||
widths: widths,
|
|
||||||
ok: ok,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptMatching adds the next runes to the string buffer, but only
|
|
||||||
// if the upcoming runes satisfy the provided patterns.
|
|
||||||
// When runes were added then true is returned, false otherwise.
|
|
||||||
// TODO not needed anymore
|
|
||||||
// func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
|
||||||
// return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
|
||||||
// }
|
|
||||||
|
|
||||||
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
|
||||||
// Returns true when one or more runes were skipped.
|
|
||||||
func (p *P) SkipMatching(patterns ...interface{}) bool {
|
|
||||||
if runes, widths, ok := p.Match(patterns...); ok {
|
|
||||||
for i, r := range runes {
|
|
||||||
p.advanceCursor(r, widths[i])
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"reflect"
|
||||||
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
// New takes an input string and a start state,
|
// New takes an input string and a start state,
|
||||||
// and initializes the parser for it.
|
// and initializes the parser for it.
|
||||||
func New(input string, startState StateFn) *P {
|
func New(input string, startState StateFn) *P {
|
||||||
|
@ -30,13 +36,25 @@ func (p *P) Next() (Item, *Error, bool) {
|
||||||
return i, nil, true
|
return i, nil, true
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
// When implementing a parser, it is mandatory to provide
|
// When implementing a parser, a state function must provide
|
||||||
// a conscious state routing decision for every cycle.
|
// a routing decision in every state function execution.
|
||||||
// This helps preventing bugs during implementation.
|
// When no route is specified, then it is considered a but
|
||||||
|
// in the parser implementation.
|
||||||
|
// An exception is when a function specified its expectation
|
||||||
|
// using the Expects() method. In that case, an unexpected
|
||||||
|
// input error is emitted.
|
||||||
if p.nextState == nil {
|
if p.nextState == nil {
|
||||||
panic("No next state was scheduled for the parser")
|
if p.expecting != "" {
|
||||||
|
p.UnexpectedInput()
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
|
||||||
|
panic(fmt.Sprintf("StateFn implementation bug: %s did not set next state or input expectation", name))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
p.state, p.nextState = p.nextState, nil
|
p.state = p.nextState
|
||||||
|
p.nextState = nil
|
||||||
|
p.expecting = ""
|
||||||
p.state(p)
|
p.state(p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,67 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// P holds the internal state of the parser.
|
|
||||||
type P struct {
|
|
||||||
state StateFn // the function that handles the current state
|
|
||||||
nextState StateFn // the function that will handle the next state
|
|
||||||
stack []StateFn // state function stack, for nested parsing
|
|
||||||
input string // the scanned input
|
|
||||||
len int // the total length of the input in bytes
|
|
||||||
pos int // current byte scanning position in the input
|
|
||||||
newline bool // keep track of when we have scanned a newline
|
|
||||||
cursorRow int // current row number in the input
|
|
||||||
cursorColumn int // current column position in the input
|
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values
|
|
||||||
items chan Item // channel of resulting Parser items
|
|
||||||
item Item // the current item as reached by Next() and retrieved by Get()
|
|
||||||
err *Error // an error when lexing failed, retrieved by Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
// StateFn defines the type of function that can be used to
|
|
||||||
// handle a parser state.
|
|
||||||
type StateFn func(*P)
|
|
||||||
|
|
||||||
// ItemType represents the type of a parser Item.
|
|
||||||
type ItemType int
|
|
||||||
|
|
||||||
// ItemEOF is a built-in parser item type that is used for flagging that the
|
|
||||||
// end of the input was reached.
|
|
||||||
const ItemEOF ItemType = -1
|
|
||||||
|
|
||||||
// ItemError is a built-in parser item type that is used for flagging that
|
|
||||||
// an error has occurred during parsing.
|
|
||||||
const ItemError ItemType = -2
|
|
||||||
|
|
||||||
// Item represents an item returned from the parser.
|
|
||||||
type Item struct {
|
|
||||||
Type ItemType
|
|
||||||
Value string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Error is used as the error type when parsing errors occur.
|
|
||||||
// The error includes some extra meta information to allow for useful
|
|
||||||
// error messages to the user.
|
|
||||||
type Error struct {
|
|
||||||
Message string
|
|
||||||
Row int
|
|
||||||
Column int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (err *Error) Error() string {
|
|
||||||
return err.Message
|
|
||||||
}
|
|
||||||
|
|
||||||
// EOF is a special rune, which is used to indicate an end of file when
|
|
||||||
// reading a character from the input.
|
|
||||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
|
||||||
// say 'I now expect the end of the file' is using something like:
|
|
||||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
|
||||||
const EOF rune = -1
|
|
||||||
|
|
||||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
|
||||||
// rune on the input.
|
|
||||||
const INVALID rune = utf8.RuneError
|
|
|
@ -27,12 +27,13 @@ var (
|
||||||
any = c.Any()
|
any = c.Any()
|
||||||
anyQuote = c.AnyOf(singleQuote, doubleQuote)
|
anyQuote = c.AnyOf(singleQuote, doubleQuote)
|
||||||
backslash = c.Rune('\\')
|
backslash = c.Rune('\\')
|
||||||
lower = c.RuneRange('a', 'z')
|
asciiLower = c.RuneRange('a', 'z')
|
||||||
upper = c.RuneRange('A', 'Z')
|
asciiUpper = c.RuneRange('A', 'Z')
|
||||||
digit = c.RuneRange('0', '9')
|
digit = c.RuneRange('0', '9')
|
||||||
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
||||||
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
||||||
optionalWhitespace = c.Optional(whitespace)
|
optionalWhitespace = c.Optional(whitespace)
|
||||||
|
endOfLine = c.AnyOf(lineFeed, c.Rune(parsekit.EOF))
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewParser creates a new parser, using the provided input string
|
// NewParser creates a new parser, using the provided input string
|
||||||
|
|
|
@ -12,13 +12,12 @@ func startComment(p *parsekit.P) {
|
||||||
|
|
||||||
// All characters up to the end of the line are included in the comment.
|
// All characters up to the end of the line are included in the comment.
|
||||||
func commentContents(p *parsekit.P) {
|
func commentContents(p *parsekit.P) {
|
||||||
|
p.Expects("comment contents")
|
||||||
switch {
|
switch {
|
||||||
case p.AtEndOfLine():
|
case p.AtEndOfLine() || p.On(endOfLine).Skip(): // TODO drop AtEndOfLine support
|
||||||
p.EmitLiteralTrim(ItemComment)
|
p.EmitLiteralTrim(ItemComment)
|
||||||
p.RouteReturn()
|
p.RouteReturn()
|
||||||
case p.On(any).Accept():
|
case p.On(any).Accept():
|
||||||
p.Repeat()
|
p.Repeat()
|
||||||
default:
|
|
||||||
p.UnexpectedInput("comment contents")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,14 +7,14 @@ import (
|
||||||
func TestComments(t *testing.T) {
|
func TestComments(t *testing.T) {
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"empty comment", "#", "#()", ""},
|
{"empty comment", "#", "#()", ""},
|
||||||
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
// {"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||||
{"basic comment", "#chicken", "#(chicken)", ""},
|
// {"basic comment", "#chicken", "#(chicken)", ""},
|
||||||
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
// {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||||
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
// {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||||
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
// {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||||
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
// {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||||
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
|
// {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
|
||||||
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
|
// {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
|
||||||
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
// {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,9 +3,8 @@ package parser
|
||||||
import "github.com/mmakaay/toml/parsekit"
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
func endOfFile(p *parsekit.P) {
|
func endOfFile(p *parsekit.P) {
|
||||||
|
p.Expects("end of file")
|
||||||
if p.AtEndOfFile() {
|
if p.AtEndOfFile() {
|
||||||
p.Emit(parsekit.ItemEOF, "EOF") // todo Automate within parser?
|
p.Emit(parsekit.ItemEOF, "EOF")
|
||||||
} else {
|
|
||||||
p.UnexpectedInput("end of file")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,7 +15,7 @@ var (
|
||||||
// contain ASCII letters, ASCII digits, underscores, and dashes
|
// contain ASCII letters, ASCII digits, underscores, and dashes
|
||||||
// (A-Za-z0-9_-). Note that bare keys are allowed to be composed of only
|
// (A-Za-z0-9_-). Note that bare keys are allowed to be composed of only
|
||||||
// ASCII digits, e.g. 1234, but are always interpreted as strings.
|
// ASCII digits, e.g. 1234, but are always interpreted as strings.
|
||||||
bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
|
bareKeyRune = c.AnyOf(asciiLower, asciiUpper, digit, underscore, dash)
|
||||||
bareKey = c.OneOrMore(bareKeyRune)
|
bareKey = c.OneOrMore(bareKeyRune)
|
||||||
|
|
||||||
// Quoted keys follow the exact same rules as either basic strings or
|
// Quoted keys follow the exact same rules as either basic strings or
|
||||||
|
@ -44,17 +44,16 @@ func startKeyValuePair(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func startKey(p *parsekit.P) {
|
func startKey(p *parsekit.P) {
|
||||||
switch {
|
p.Expects("a key name")
|
||||||
case p.On(bareKeyRune).RouteTo(startBareKey):
|
p.On(bareKeyRune).RouteTo(startBareKey)
|
||||||
default:
|
|
||||||
p.UnexpectedInput("a valid key name")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func startBareKey(p *parsekit.P) {
|
func startBareKey(p *parsekit.P) {
|
||||||
p.On(bareKey).Accept()
|
p.Expects("a bare key name")
|
||||||
p.EmitLiteral(ItemKey)
|
if p.On(bareKey).Accept() {
|
||||||
p.RouteTo(endOfKeyOrDot)
|
p.EmitLiteral(ItemKey)
|
||||||
|
p.RouteTo(endOfKeyOrDot)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func endOfKeyOrDot(p *parsekit.P) {
|
func endOfKeyOrDot(p *parsekit.P) {
|
||||||
|
@ -62,25 +61,21 @@ func endOfKeyOrDot(p *parsekit.P) {
|
||||||
p.Emit(ItemKeyDot, ".")
|
p.Emit(ItemKeyDot, ".")
|
||||||
p.RouteTo(startKey)
|
p.RouteTo(startKey)
|
||||||
} else {
|
} else {
|
||||||
p.RouteTo(startKeyAssignment)
|
p.RouteTo(startAssignment)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func startKeyAssignment(p *parsekit.P) {
|
func startAssignment(p *parsekit.P) {
|
||||||
|
p.Expects("a value assignment")
|
||||||
if p.On(keyAssignment).Skip() {
|
if p.On(keyAssignment).Skip() {
|
||||||
p.Emit(ItemAssignment, "=")
|
p.Emit(ItemAssignment, "=")
|
||||||
p.RouteTo(startValue)
|
p.RouteTo(startValue)
|
||||||
} else {
|
|
||||||
p.UnexpectedInput("a value assignment")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||||
func startValue(p *parsekit.P) {
|
func startValue(p *parsekit.P) {
|
||||||
switch {
|
p.Expects("a value")
|
||||||
case p.On(anyQuote).RouteTo(startString):
|
p.On(anyQuote).RouteTo(startString)
|
||||||
default:
|
|
||||||
p.UnexpectedInput("a value")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,13 +8,12 @@ var (
|
||||||
// UTF-8 characters. * Multi-line basic strings are surrounded by three
|
// UTF-8 characters. * Multi-line basic strings are surrounded by three
|
||||||
// quotation marks on each side. * Basic strings are surrounded by
|
// quotation marks on each side. * Basic strings are surrounded by
|
||||||
// quotation marks.
|
// quotation marks.
|
||||||
doubleQuote3 = c.Repeat(3, doubleQuote)
|
doubleQuote3 = c.String(`"""`)
|
||||||
|
|
||||||
// Any Unicode character may be used except those that must be escaped:
|
// Any Unicode character may be used except those that must be escaped:
|
||||||
// quotation mark, backslash, and the control characters (U+0000 to
|
// quotation mark, backslash, and the control characters (U+0000 to
|
||||||
// U+001F, U+007F).
|
// U+001F, U+007F).
|
||||||
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'),
|
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
|
||||||
c.Rune('\u007F'))
|
|
||||||
|
|
||||||
// For convenience, some popular characters have a compact escape sequence.
|
// For convenience, some popular characters have a compact escape sequence.
|
||||||
//
|
//
|
||||||
|
@ -36,35 +35,33 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
func startString(p *parsekit.P) {
|
func startString(p *parsekit.P) {
|
||||||
|
p.Expects("a string value")
|
||||||
switch {
|
switch {
|
||||||
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
||||||
case p.On(doubleQuote).RouteTo(startBasicString):
|
case p.On(doubleQuote).RouteTo(startBasicString):
|
||||||
default:
|
|
||||||
p.UnexpectedInput("a string value")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseBasicString(p *parsekit.P) {
|
func parseBasicString(p *parsekit.P) {
|
||||||
|
p.Expects("string contents")
|
||||||
switch {
|
switch {
|
||||||
case p.On(parsekit.EOF).Stay():
|
case p.On(charThatMustBeEscaped).Stay():
|
||||||
p.UnexpectedEndOfFile("basic string token")
|
|
||||||
case p.On(validEscape).Accept():
|
|
||||||
p.Repeat()
|
|
||||||
case p.On(charThatMustBeEscaped).Stay():
|
|
||||||
r, _, _ := p.Match(charThatMustBeEscaped)
|
r, _, _ := p.Match(charThatMustBeEscaped)
|
||||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||||
|
case p.On(validEscape).Accept():
|
||||||
|
p.Repeat()
|
||||||
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
||||||
p.RouteReturn()
|
p.RouteReturn()
|
||||||
case p.On(any).Accept():
|
case p.On(any).Accept():
|
||||||
p.Repeat()
|
p.Repeat()
|
||||||
default:
|
|
||||||
p.UnexpectedInput("string contents")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func startBasicString(p *parsekit.P) {
|
func startBasicString(p *parsekit.P) {
|
||||||
p.On(doubleQuote).Skip()
|
p.Expects("a basic string")
|
||||||
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
|
if p.On(doubleQuote).Skip() {
|
||||||
|
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Specific handling of input for basic strings.
|
// Specific handling of input for basic strings.
|
||||||
|
@ -88,6 +85,8 @@ func basicStringSpecifics(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func startMultiLineBasicString(p *parsekit.P) {
|
func startMultiLineBasicString(p *parsekit.P) {
|
||||||
p.On(doubleQuote3).Skip()
|
p.Expects("a multi-line basic string")
|
||||||
p.EmitError("Not yet implemented")
|
if p.On(doubleQuote3).Skip() {
|
||||||
|
p.EmitError("Not yet implemented")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ import (
|
||||||
func TestUnterminatedBasicString(t *testing.T) {
|
func TestUnterminatedBasicString(t *testing.T) {
|
||||||
runStatesT(t, statesT{
|
runStatesT(t, statesT{
|
||||||
"missing closing quote", `a="value`, "[a]=",
|
"missing closing quote", `a="value`, "[a]=",
|
||||||
"Unexpected end of file (expected basic string token)"})
|
"unexpected end of file (expected string contents)"})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||||
|
|
Loading…
Reference in New Issue