Backup work on code cleanup now the parser/combinator code is stable.

This commit is contained in:
Maurice Makaay 2019-05-20 12:24:36 +00:00
parent 84ae34fb5f
commit 3677ab18cb
14 changed files with 354 additions and 337 deletions

View File

@ -3,7 +3,6 @@ package parsekit
import (
"fmt"
"strings"
"unicode/utf8"
)
// Emit passes a Parser item to the client, including the provided string.
@ -48,27 +47,23 @@ func (p *P) EmitError(format string, args ...interface{}) {
// UnexpectedInput is used by a parser implementation to emit an
// error item that tells the client that an unexpected rune was
// encountered in the input.
// The parameter 'expected' is used to provide some context to the error.
func (p *P) UnexpectedInput(expected string) {
// next() takes care of error messages in cases where ok == false.
// Therefore, we only provide an error message for the ok case here.
func (p *P) UnexpectedInput() {
r, _, ok := p.peek(0)
switch {
case ok:
p.EmitError("unexpected character %q (expected %s)", r, expected)
p.EmitError("unexpected character %q%s", r, p.fmtExpects())
case r == EOF:
p.EmitError("unexpected end of file (expected %s)", expected)
case r == utf8.RuneError:
p.EmitError("invalid UTF8 character in input (expected %s)", expected)
p.EmitError("unexpected end of file%s", p.fmtExpects())
case r == INVALID:
p.EmitError("invalid UTF8 character in input%s", p.fmtExpects())
default:
panic("Unhandled output from peek()")
}
}
// UnexpectedEndOfFile is used by a parser implementation to emit an
// error item that tells the client that more data was expected from
// the input.
// The parameter 'expected' is used to provide some context to the error.
func (p *P) UnexpectedEndOfFile(expected string) {
p.EmitError("Unexpected end of file (expected %s)", expected)
func (p *P) fmtExpects() string {
if p.expecting == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", p.expecting)
}

View File

@ -4,6 +4,24 @@ import (
"unicode/utf8"
)
// P holds the internal state of the parser.
type P struct {
state StateFn // the function that handles the current state
nextState StateFn // the function that will handle the next state
stack []StateFn // state function stack, for nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input
cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
}
// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
@ -13,60 +31,6 @@ func (p *P) peek(offsetInBytes int) (rune, int, bool) {
return handleRuneError(r, w)
}
// peekMulti takes a peek at multiple upcoming runes in the input.
// Returns a slice of runes, a slice containing their respective
// widths in bytes and a boolean.
// The boolean will be false in case less runes can be peeked than
// the requested amount (end of data or invalid UTF8 character).
func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
var runes []rune
var widths []int
offset := 0
for i := 0; i < amount; i++ {
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
r, w, ok := handleRuneError(r, w)
runes = append(runes, r)
widths = append(widths, w)
offset += w
if !ok {
return runes, widths, false
}
}
return runes, widths, true
}
// progress moves the cursor forward in the input, returning one rune
// for every specified pattern. The cursor will only be moved forward when
// all requested patterns can be satisfied.
// Returns true when all patterns were satisfied and the cursor was
// moved forward, false otherwise.
// A callback function can be provided to specify what to do with
// the runes that are encountered in the input.
func (p *P) progress(callback func(rune), patterns ...interface{}) bool {
if runes, widths, ok := p.Match(patterns...); ok {
for i, r := range runes {
callback(r)
p.advanceCursor(r, widths[i])
}
return true
}
return false
}
// advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorColumn = 0
p.cursorRow++
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}
// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
@ -84,3 +48,48 @@ func handleRuneError(r rune, w int) (rune, int, bool) {
}
return r, w, true
}
// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError
// StateFn defines the type of function that can be used to
// handle a parser state.
type StateFn func(*P)
// ItemType represents the type of a parser Item.
type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item represents an item returned from the parser.
type Item struct {
Type ItemType
Value string
}
// Error is used as the error type when parsing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
Message string
Row int
Column int
}
func (err *Error) Error() string {
return err.Message
}

View File

@ -1,12 +1,17 @@
package parsekit
import "unicode/utf8"
import (
"unicode"
"unicode/utf8"
)
// Not in need of it myself, but nice to have I guess:
// - NotFollowedBy
// - Discard
// - Separated
// MatchDialog is used by Matcher implementations as a means
// to retrieve data to match against and to report back
// successful matches.
type MatchDialog struct {
p *P
runes []rune
@ -14,44 +19,70 @@ type MatchDialog struct {
offset int
curRune rune
curWidth int
forked bool
parent *MatchDialog
}
// Fork splits off a child MatchDialog, containing the same
// offset as the parent MatchDialog, but with all other data
// in a new state.
// By forking, a Matcher implementation can freely work with
// a MatchDialog, without affecting the parent MatchDialog.
// When the Matcher decides that a match was found, it can
// use the Merge() method on the child to merge the child's
// matching data into the parent MatchDialog.
func (m *MatchDialog) Fork() *MatchDialog {
fork := &MatchDialog{
child := &MatchDialog{
p: m.p,
offset: m.offset,
forked: true,
parent: m,
}
return fork
return child
}
func (m *MatchDialog) Join(fork *MatchDialog) bool {
if !fork.forked {
panic("Cannot join a non-forked MatchDialog")
// Merge merges the data for a a forked child MatchDialog back
// into its parent:
// * the runes that are accumulated in the child are added
// to the parent's runes
// * the parent's offset is set to the child's offset
// After a Merge, the child MatchDialog is reset so it can
// immediately be reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog")
}
m.runes = append(m.runes, fork.runes...)
m.widths = append(m.widths, fork.widths...)
m.offset = fork.offset
fork.runes = []rune{}
fork.widths = []int{}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
m.parent.offset = m.offset
m.Clear()
return true
}
// NextRune can be called by a Matcher on a MatchDialog in order
// to receive the next rune from the input.
// The rune is automatically added to the MatchDialog's runes.
// Returns the rune and a boolean. The boolean will be false in
// case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.curRune == utf8.RuneError {
panic("Matcher must not call NextRune() after it returned false")
}
r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
r, w, ok := m.p.peek(m.offset)
m.offset += w
m.curRune = r
m.curWidth = w
m.runes = append(m.runes, r)
m.widths = append(m.widths, w)
return r, r != EOF && r != INVALID
return r, ok
}
// Matcher is the interface that can be implemented to provide
// Clear empties out the accumulated runes that are stored
// in the MatchDialog.
func (m *MatchDialog) Clear() {
m.runes = []rune{}
m.widths = []int{}
}
// Matcher is the interface that must be implemented to provide
// a matching stategy for the match() function.
// A MatchDialog is provided as input. This implements a
// specific set of methods that a Matcher needs to retrieve data
@ -60,20 +91,28 @@ type Matcher interface {
Match(*MatchDialog) bool
}
type MatcherConstructors struct {
type matcherConstructors struct {
Any func() MatchAny
Rune func(rune rune) MatchRune
RuneRange func(start rune, end rune) MatchRuneRange
Runes func(runes ...rune) MatchAnyOf
AnyOf func(matchers ...Matcher) MatchAnyOf
Repeat func(count int, matcher Matcher) MatchRepeat
Sequence func(matchers ...Matcher) MatchSequence
ZeroOrMore func(matcher Matcher) MatchZeroOrMore
OneOrMore func(matcher Matcher) MatchOneOrMore
Optional func(matcher Matcher) MatchOptional
Rune func(rune) MatchRune
RuneRange func(rune, rune) MatchRuneRange
Runes func(...rune) MatchAnyOf
String func(string) MatchSequence
StringNoCase func(string) MatchSequence
AnyOf func(...Matcher) MatchAnyOf
Repeat func(int, Matcher) MatchRepeat
Sequence func(...Matcher) MatchSequence
ZeroOrMore func(Matcher) MatchZeroOrMore
OneOrMore func(Matcher) MatchOneOrMore
Optional func(Matcher) MatchOptional
Drop func(Matcher) MatchDrop
}
var C = MatcherConstructors{
// C provides access to a wide range of parser/combinator
// constructors that can be used to build matching expressions.
// When using C in your own parser, then it is advised to create
// an alias in your own package for easy reference:
// var c = parsekit.C
var C = matcherConstructors{
Any: func() MatchAny {
return MatchAny{}
},
@ -90,6 +129,22 @@ var C = MatcherConstructors{
}
return MatchAnyOf{m}
},
String: func(s string) MatchSequence {
m := make([]Matcher, len(s))
for i, r := range s {
m[i] = MatchRune{r}
}
return MatchSequence{m}
},
StringNoCase: func(s string) MatchSequence {
m := make([]Matcher, len(s))
for i, r := range s {
u := MatchRune{unicode.ToUpper(r)}
l := MatchRune{unicode.ToLower(r)}
m[i] = MatchAnyOf{[]Matcher{u, l}}
}
return MatchSequence{m}
},
AnyOf: func(matchers ...Matcher) MatchAnyOf {
return MatchAnyOf{matchers}
},
@ -108,6 +163,9 @@ var C = MatcherConstructors{
Optional: func(matcher Matcher) MatchOptional {
return MatchOptional{matcher}
},
Drop: func(matcher Matcher) MatchDrop {
return MatchDrop{matcher}
},
}
type MatchAny struct{}
@ -142,9 +200,9 @@ type MatchAnyOf struct {
func (c MatchAnyOf) Match(m *MatchDialog) bool {
for _, matcher := range c.matcher {
mc := m.Fork()
if matcher.Match(mc) {
return m.Join(mc)
child := m.Fork()
if matcher.Match(child) {
return child.Merge()
}
}
return false
@ -156,13 +214,13 @@ type MatchRepeat struct {
}
func (c MatchRepeat) Match(m *MatchDialog) bool {
mc := m.Fork()
child := m.Fork()
for i := 0; i < c.count; i++ {
if !c.matcher.Match(mc) {
if !c.matcher.Match(child) {
return false
}
}
m.Join(mc)
child.Merge()
return true
}
@ -171,13 +229,13 @@ type MatchSequence struct {
}
func (c MatchSequence) Match(m *MatchDialog) bool {
mPart := m.Fork()
child := m.Fork()
for _, matcher := range c.matchers {
if !matcher.Match(mPart) {
if !matcher.Match(child) {
return false
}
}
m.Join(mPart)
child.Merge()
return true
}
@ -186,9 +244,9 @@ type MatchOneOrMore struct {
}
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
mc := m.Fork()
for c.matcher.Match(mc) {
m.Join(mc)
child := m.Fork()
for c.matcher.Match(child) {
child.Merge()
}
return len(m.runes) > 0
}
@ -198,9 +256,9 @@ type MatchZeroOrMore struct {
}
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
mc := m.Fork()
for c.matcher.Match(mc) {
m.Join(mc)
child := m.Fork()
for c.matcher.Match(child) {
child.Merge()
}
return true
}
@ -210,9 +268,23 @@ type MatchOptional struct {
}
func (c MatchOptional) Match(m *MatchDialog) bool {
mc := m.Fork()
if c.matcher.Match(mc) {
m.Join(mc)
child := m.Fork()
if c.matcher.Match(child) {
child.Merge()
}
return true
}
type MatchDrop struct {
matcher Matcher
}
func (c MatchDrop) Match(m *MatchDialog) bool {
child := m.Fork()
if c.matcher.Match(child) {
child.Clear()
child.Merge()
return true
}
return false
}

View File

@ -12,11 +12,10 @@ const TestItem p.ItemType = 1
func newParser(input string, matcher p.Matcher) *p.P {
stateFn := func(p *p.P) {
p.Expects("MATCH")
if p.On(matcher).Accept() {
p.EmitLiteral(TestItem)
p.Repeat()
} else {
p.UnexpectedInput("MATCH")
}
}
return p.New(input, stateFn)
@ -107,6 +106,35 @@ func TestMatchRuneRange(t *testing.T) {
}
}
func TestMatchString(t *testing.T) {
p := newParser("Hello, world!", c.String("Hello"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "Hello" {
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
}
}
// TODO
// func TestMatchStringNoCase(t *testing.T) {
// p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
// r, err, ok := p.Next()
// if !ok {
// t.Fatalf("Parsing failed: %s", err)
// }
// if r.Type != TestItem {
// t.Error("Parser item type not expected TestTitem")
// }
// if r.Value != "Hello" {
// t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
// }
// }
func TestMatchRunes(t *testing.T) {
m := c.Runes('+', '-', '*', '/')
s := "-+/*+++"
@ -243,6 +271,17 @@ func TestMatchOptional(t *testing.T) {
}
}
func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes)))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
}
if r.Value != "X" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMixAndMatch(t *testing.T) {
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
backslash := c.Rune('\\')

View File

@ -1,10 +1,18 @@
package parsekit
import (
"fmt"
"strings"
"unicode/utf8"
)
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateFn, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}
// AtEndOfFile returns true when there is no more data available in the input.
func (p *P) AtEndOfFile() bool {
@ -16,8 +24,8 @@ func (p *P) AtEndOfFile() bool {
// by this method.
func (p *P) AtEndOfLine() bool {
return p.AtEndOfFile() ||
p.Upcoming("\r", "\n") ||
p.Upcoming("\n")
p.On(C.String("\r\n")).Stay() ||
p.On(C.Rune('\n')).Stay()
}
// SkipEndOfLine returns true when the cursor is either at the end of the line
@ -25,8 +33,8 @@ func (p *P) AtEndOfLine() bool {
// the cursor is moved forward to beyond the newline.
func (p *P) SkipEndOfLine() bool {
return p.AtEndOfFile() ||
p.SkipMatching("\r", "\n") ||
p.SkipMatching("\n")
p.On(C.String("\r\n")).Skip() ||
p.On(C.Rune('\n')).Skip()
}
// AcceptEndOfLine returns true when the cursor is either at the end of the line
@ -44,65 +52,24 @@ func (p *P) AcceptEndOfLine() bool {
return false
}
func (p *P) Match(patterns ...interface{}) ([]rune, []int, bool) {
return p.match(0, patterns...)
func (p *P) On(m Matcher) *action {
runes, widths, ok := p.Match(m)
return &action{
p: p,
runes: runes,
widths: widths,
ok: ok,
}
}
func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
var runes []rune
var widths []int
func (p *P) Match(matcher Matcher) ([]rune, []int, bool) {
return p.match(0, matcher)
}
addRune := func(r rune, w int) {
offset += w
runes = append(runes, r)
widths = append(widths, w)
}
for _, pattern := range patterns {
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
if r == utf8.RuneError {
return runes, widths, false
}
switch pattern := pattern.(type) {
case Matcher:
func (p *P) match(offset int, matcher Matcher) ([]rune, []int, bool) {
m := &MatchDialog{p: p}
if pattern.Match(m) {
return m.runes, m.widths, true
} else {
return m.runes, m.widths, false
}
case []interface{}:
rs, ws, matched := p.match(offset, pattern...)
for i, r := range rs {
addRune(r, ws[i])
}
if !matched {
return runes, widths, false
}
case string:
if strings.IndexRune(pattern, r) < 0 {
return runes, widths, false
}
addRune(r, w)
case rune:
if pattern != r {
return runes, widths, false
}
addRune(r, w)
default:
panic(fmt.Sprintf("Not rune matching implemented for pattern of type %T", pattern))
}
}
return runes, widths, true
}
// Upcoming checks if the upcoming runes satisfy all provided patterns.
// Returns true if all provided patterns are satisfied.
// This is basically the same as the Match method, but with only
// the boolean return parameter for programmer convenciency.
func (p *P) Upcoming(patterns ...interface{}) bool {
_, _, ok := p.Match(patterns...)
return ok
ok := matcher.Match(m)
return m.runes, m.widths, ok
}
type action struct {
@ -135,6 +102,24 @@ func (a *action) Skip() bool {
return a.ok
}
func (a *action) Stay() bool {
return a.ok
}
// advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorColumn = 0
p.cursorRow++
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}
func (a *action) RouteTo(state StateFn) bool {
if a.ok {
a.p.RouteTo(state)
@ -142,36 +127,9 @@ func (a *action) RouteTo(state StateFn) bool {
return a.ok
}
func (a *action) Stay() bool {
func (a *action) RouteReturn() bool {
if a.ok {
a.p.RouteReturn()
}
return a.ok
}
func (p *P) On(patterns ...interface{}) *action {
runes, widths, ok := p.Match(patterns...)
return &action{
p: p,
runes: runes,
widths: widths,
ok: ok,
}
}
// AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided patterns.
// When runes were added then true is returned, false otherwise.
// TODO not needed anymore
// func (p *P) AcceptMatching(patterns ...interface{}) bool {
// return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
// }
// SkipMatching skips runes, but only when all provided patterns are satisfied.
// Returns true when one or more runes were skipped.
func (p *P) SkipMatching(patterns ...interface{}) bool {
if runes, widths, ok := p.Match(patterns...); ok {
for i, r := range runes {
p.advanceCursor(r, widths[i])
}
return true
}
return false
}

View File

@ -1,5 +1,11 @@
package parsekit
import (
"fmt"
"reflect"
"runtime"
)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, startState StateFn) *P {
@ -30,13 +36,25 @@ func (p *P) Next() (Item, *Error, bool) {
return i, nil, true
}
default:
// When implementing a parser, it is mandatory to provide
// a conscious state routing decision for every cycle.
// This helps preventing bugs during implementation.
// When implementing a parser, a state function must provide
// a routing decision in every state function execution.
// When no route is specified, then it is considered a but
// in the parser implementation.
// An exception is when a function specified its expectation
// using the Expects() method. In that case, an unexpected
// input error is emitted.
if p.nextState == nil {
panic("No next state was scheduled for the parser")
if p.expecting != "" {
p.UnexpectedInput()
continue
} else {
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
panic(fmt.Sprintf("StateFn implementation bug: %s did not set next state or input expectation", name))
}
p.state, p.nextState = p.nextState, nil
}
p.state = p.nextState
p.nextState = nil
p.expecting = ""
p.state(p)
}
}

View File

@ -1,67 +0,0 @@
package parsekit
import (
"unicode/utf8"
)
// P holds the internal state of the parser.
type P struct {
state StateFn // the function that handles the current state
nextState StateFn // the function that will handle the next state
stack []StateFn // state function stack, for nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input
cursorColumn int // current column position in the input
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
}
// StateFn defines the type of function that can be used to
// handle a parser state.
type StateFn func(*P)
// ItemType represents the type of a parser Item.
type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item represents an item returned from the parser.
type Item struct {
Type ItemType
Value string
}
// Error is used as the error type when parsing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
Message string
Row int
Column int
}
func (err *Error) Error() string {
return err.Message
}
// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError

View File

@ -27,12 +27,13 @@ var (
any = c.Any()
anyQuote = c.AnyOf(singleQuote, doubleQuote)
backslash = c.Rune('\\')
lower = c.RuneRange('a', 'z')
upper = c.RuneRange('A', 'Z')
asciiLower = c.RuneRange('a', 'z')
asciiUpper = c.RuneRange('A', 'Z')
digit = c.RuneRange('0', '9')
whitespace = c.OneOrMore(c.AnyOf(space, tab))
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
optionalWhitespace = c.Optional(whitespace)
endOfLine = c.AnyOf(lineFeed, c.Rune(parsekit.EOF))
)
// NewParser creates a new parser, using the provided input string

View File

@ -12,13 +12,12 @@ func startComment(p *parsekit.P) {
// All characters up to the end of the line are included in the comment.
func commentContents(p *parsekit.P) {
p.Expects("comment contents")
switch {
case p.AtEndOfLine():
case p.AtEndOfLine() || p.On(endOfLine).Skip(): // TODO drop AtEndOfLine support
p.EmitLiteralTrim(ItemComment)
p.RouteReturn()
case p.On(any).Accept():
p.Repeat()
default:
p.UnexpectedInput("comment contents")
}
}

View File

@ -7,14 +7,14 @@ import (
func TestComments(t *testing.T) {
runStatesTs(t, []statesT{
{"empty comment", "#", "#()", ""},
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
{"basic comment", "#chicken", "#(chicken)", ""},
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
// {"empty comment with spaces", "# \t \r\n", `#()`, ""},
// {"basic comment", "#chicken", "#(chicken)", ""},
// {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
// {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
// {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
// {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
// {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
// {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
// {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
})
}

View File

@ -3,9 +3,8 @@ package parser
import "github.com/mmakaay/toml/parsekit"
func endOfFile(p *parsekit.P) {
p.Expects("end of file")
if p.AtEndOfFile() {
p.Emit(parsekit.ItemEOF, "EOF") // todo Automate within parser?
} else {
p.UnexpectedInput("end of file")
p.Emit(parsekit.ItemEOF, "EOF")
}
}

View File

@ -15,7 +15,7 @@ var (
// contain ASCII letters, ASCII digits, underscores, and dashes
// (A-Za-z0-9_-). Note that bare keys are allowed to be composed of only
// ASCII digits, e.g. 1234, but are always interpreted as strings.
bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
bareKeyRune = c.AnyOf(asciiLower, asciiUpper, digit, underscore, dash)
bareKey = c.OneOrMore(bareKeyRune)
// Quoted keys follow the exact same rules as either basic strings or
@ -44,17 +44,16 @@ func startKeyValuePair(p *parsekit.P) {
}
func startKey(p *parsekit.P) {
switch {
case p.On(bareKeyRune).RouteTo(startBareKey):
default:
p.UnexpectedInput("a valid key name")
}
p.Expects("a key name")
p.On(bareKeyRune).RouteTo(startBareKey)
}
func startBareKey(p *parsekit.P) {
p.On(bareKey).Accept()
p.Expects("a bare key name")
if p.On(bareKey).Accept() {
p.EmitLiteral(ItemKey)
p.RouteTo(endOfKeyOrDot)
}
}
func endOfKeyOrDot(p *parsekit.P) {
@ -62,25 +61,21 @@ func endOfKeyOrDot(p *parsekit.P) {
p.Emit(ItemKeyDot, ".")
p.RouteTo(startKey)
} else {
p.RouteTo(startKeyAssignment)
p.RouteTo(startAssignment)
}
}
func startKeyAssignment(p *parsekit.P) {
func startAssignment(p *parsekit.P) {
p.Expects("a value assignment")
if p.On(keyAssignment).Skip() {
p.Emit(ItemAssignment, "=")
p.RouteTo(startValue)
} else {
p.UnexpectedInput("a value assignment")
}
}
// Values must be of the following types: String, Integer, Float, Boolean,
// Datetime, Array, or Inline Table. Unspecified values are invalid.
func startValue(p *parsekit.P) {
switch {
case p.On(anyQuote).RouteTo(startString):
default:
p.UnexpectedInput("a value")
}
p.Expects("a value")
p.On(anyQuote).RouteTo(startString)
}

View File

@ -8,13 +8,12 @@ var (
// UTF-8 characters. * Multi-line basic strings are surrounded by three
// quotation marks on each side. * Basic strings are surrounded by
// quotation marks.
doubleQuote3 = c.Repeat(3, doubleQuote)
doubleQuote3 = c.String(`"""`)
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to
// U+001F, U+007F).
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'),
c.Rune('\u007F'))
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
// For convenience, some popular characters have a compact escape sequence.
//
@ -36,35 +35,33 @@ var (
)
func startString(p *parsekit.P) {
p.Expects("a string value")
switch {
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
case p.On(doubleQuote).RouteTo(startBasicString):
default:
p.UnexpectedInput("a string value")
}
}
func parseBasicString(p *parsekit.P) {
p.Expects("string contents")
switch {
case p.On(parsekit.EOF).Stay():
p.UnexpectedEndOfFile("basic string token")
case p.On(validEscape).Accept():
p.Repeat()
case p.On(charThatMustBeEscaped).Stay():
r, _, _ := p.Match(charThatMustBeEscaped)
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
case p.On(validEscape).Accept():
p.Repeat()
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
p.RouteReturn()
case p.On(any).Accept():
p.Repeat()
default:
p.UnexpectedInput("string contents")
}
}
func startBasicString(p *parsekit.P) {
p.On(doubleQuote).Skip()
p.Expects("a basic string")
if p.On(doubleQuote).Skip() {
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
}
// Specific handling of input for basic strings.
@ -88,6 +85,8 @@ func basicStringSpecifics(p *parsekit.P) {
}
func startMultiLineBasicString(p *parsekit.P) {
p.On(doubleQuote3).Skip()
p.Expects("a multi-line basic string")
if p.On(doubleQuote3).Skip() {
p.EmitError("Not yet implemented")
}
}

View File

@ -8,7 +8,7 @@ import (
func TestUnterminatedBasicString(t *testing.T) {
runStatesT(t, statesT{
"missing closing quote", `a="value`, "[a]=",
"Unexpected end of file (expected basic string token)"})
"unexpected end of file (expected string contents)"})
}
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {