Backup work on code cleanup now the parser/combinator code is stable.
This commit is contained in:
parent
84ae34fb5f
commit
3677ab18cb
|
@ -3,7 +3,6 @@ package parsekit
|
|||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Emit passes a Parser item to the client, including the provided string.
|
||||
|
@ -48,27 +47,23 @@ func (p *P) EmitError(format string, args ...interface{}) {
|
|||
// UnexpectedInput is used by a parser implementation to emit an
|
||||
// error item that tells the client that an unexpected rune was
|
||||
// encountered in the input.
|
||||
// The parameter 'expected' is used to provide some context to the error.
|
||||
func (p *P) UnexpectedInput(expected string) {
|
||||
// next() takes care of error messages in cases where ok == false.
|
||||
// Therefore, we only provide an error message for the ok case here.
|
||||
func (p *P) UnexpectedInput() {
|
||||
r, _, ok := p.peek(0)
|
||||
switch {
|
||||
case ok:
|
||||
p.EmitError("unexpected character %q (expected %s)", r, expected)
|
||||
p.EmitError("unexpected character %q%s", r, p.fmtExpects())
|
||||
case r == EOF:
|
||||
p.EmitError("unexpected end of file (expected %s)", expected)
|
||||
case r == utf8.RuneError:
|
||||
p.EmitError("invalid UTF8 character in input (expected %s)", expected)
|
||||
p.EmitError("unexpected end of file%s", p.fmtExpects())
|
||||
case r == INVALID:
|
||||
p.EmitError("invalid UTF8 character in input%s", p.fmtExpects())
|
||||
default:
|
||||
panic("Unhandled output from peek()")
|
||||
}
|
||||
}
|
||||
|
||||
// UnexpectedEndOfFile is used by a parser implementation to emit an
|
||||
// error item that tells the client that more data was expected from
|
||||
// the input.
|
||||
// The parameter 'expected' is used to provide some context to the error.
|
||||
func (p *P) UnexpectedEndOfFile(expected string) {
|
||||
p.EmitError("Unexpected end of file (expected %s)", expected)
|
||||
func (p *P) fmtExpects() string {
|
||||
if p.expecting == "" {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf(" (expected %s)", p.expecting)
|
||||
}
|
||||
|
|
|
@ -4,6 +4,24 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// P holds the internal state of the parser.
|
||||
type P struct {
|
||||
state StateFn // the function that handles the current state
|
||||
nextState StateFn // the function that will handle the next state
|
||||
stack []StateFn // state function stack, for nested parsing
|
||||
input string // the scanned input
|
||||
len int // the total length of the input in bytes
|
||||
pos int // current byte scanning position in the input
|
||||
newline bool // keep track of when we have scanned a newline
|
||||
cursorRow int // current row number in the input
|
||||
cursorColumn int // current column position in the input
|
||||
expecting string // a description of what the current state expects to find
|
||||
buffer stringBuffer // an efficient buffer, used to build string values
|
||||
items chan Item // channel of resulting Parser items
|
||||
item Item // the current item as reached by Next() and retrieved by Get()
|
||||
err *Error // an error when lexing failed, retrieved by Error()
|
||||
}
|
||||
|
||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||
// Returns the rune, its width in bytes and a boolean.
|
||||
// The boolean will be false in case no upcoming rune can be peeked
|
||||
|
@ -13,60 +31,6 @@ func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
|||
return handleRuneError(r, w)
|
||||
}
|
||||
|
||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||
// Returns a slice of runes, a slice containing their respective
|
||||
// widths in bytes and a boolean.
|
||||
// The boolean will be false in case less runes can be peeked than
|
||||
// the requested amount (end of data or invalid UTF8 character).
|
||||
func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
|
||||
var runes []rune
|
||||
var widths []int
|
||||
offset := 0
|
||||
for i := 0; i < amount; i++ {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
||||
r, w, ok := handleRuneError(r, w)
|
||||
runes = append(runes, r)
|
||||
widths = append(widths, w)
|
||||
offset += w
|
||||
if !ok {
|
||||
return runes, widths, false
|
||||
}
|
||||
}
|
||||
return runes, widths, true
|
||||
}
|
||||
|
||||
// progress moves the cursor forward in the input, returning one rune
|
||||
// for every specified pattern. The cursor will only be moved forward when
|
||||
// all requested patterns can be satisfied.
|
||||
// Returns true when all patterns were satisfied and the cursor was
|
||||
// moved forward, false otherwise.
|
||||
// A callback function can be provided to specify what to do with
|
||||
// the runes that are encountered in the input.
|
||||
func (p *P) progress(callback func(rune), patterns ...interface{}) bool {
|
||||
if runes, widths, ok := p.Match(patterns...); ok {
|
||||
for i, r := range runes {
|
||||
callback(r)
|
||||
p.advanceCursor(r, widths[i])
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// advanceCursor advances the rune cursor one position in the
|
||||
// input data. While doing so, it keeps tracks of newlines,
|
||||
// so we can report on row + column positions on error.
|
||||
func (p *P) advanceCursor(r rune, w int) {
|
||||
p.pos += w
|
||||
if p.newline {
|
||||
p.cursorColumn = 0
|
||||
p.cursorRow++
|
||||
} else {
|
||||
p.cursorColumn++
|
||||
}
|
||||
p.newline = r == '\n'
|
||||
}
|
||||
|
||||
// handleRuneError is used to normale rune value in case of errors.
|
||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||
// This can however indicate one of two situations:
|
||||
|
@ -84,3 +48,48 @@ func handleRuneError(r rune, w int) (rune, int, bool) {
|
|||
}
|
||||
return r, w, true
|
||||
}
|
||||
|
||||
// EOF is a special rune, which is used to indicate an end of file when
|
||||
// reading a character from the input.
|
||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||
// say 'I now expect the end of the file' is using something like:
|
||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||
const EOF rune = -1
|
||||
|
||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||
// rune on the input.
|
||||
const INVALID rune = utf8.RuneError
|
||||
|
||||
// StateFn defines the type of function that can be used to
|
||||
// handle a parser state.
|
||||
type StateFn func(*P)
|
||||
|
||||
// ItemType represents the type of a parser Item.
|
||||
type ItemType int
|
||||
|
||||
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||
// end of the input was reached.
|
||||
const ItemEOF ItemType = -1
|
||||
|
||||
// ItemError is a built-in parser item type that is used for flagging that
|
||||
// an error has occurred during parsing.
|
||||
const ItemError ItemType = -2
|
||||
|
||||
// Item represents an item returned from the parser.
|
||||
type Item struct {
|
||||
Type ItemType
|
||||
Value string
|
||||
}
|
||||
|
||||
// Error is used as the error type when parsing errors occur.
|
||||
// The error includes some extra meta information to allow for useful
|
||||
// error messages to the user.
|
||||
type Error struct {
|
||||
Message string
|
||||
Row int
|
||||
Column int
|
||||
}
|
||||
|
||||
func (err *Error) Error() string {
|
||||
return err.Message
|
||||
}
|
||||
|
|
|
@ -1,12 +1,17 @@
|
|||
package parsekit
|
||||
|
||||
import "unicode/utf8"
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Not in need of it myself, but nice to have I guess:
|
||||
// - NotFollowedBy
|
||||
// - Discard
|
||||
// - Separated
|
||||
|
||||
// MatchDialog is used by Matcher implementations as a means
|
||||
// to retrieve data to match against and to report back
|
||||
// successful matches.
|
||||
type MatchDialog struct {
|
||||
p *P
|
||||
runes []rune
|
||||
|
@ -14,44 +19,70 @@ type MatchDialog struct {
|
|||
offset int
|
||||
curRune rune
|
||||
curWidth int
|
||||
forked bool
|
||||
parent *MatchDialog
|
||||
}
|
||||
|
||||
// Fork splits off a child MatchDialog, containing the same
|
||||
// offset as the parent MatchDialog, but with all other data
|
||||
// in a new state.
|
||||
// By forking, a Matcher implementation can freely work with
|
||||
// a MatchDialog, without affecting the parent MatchDialog.
|
||||
// When the Matcher decides that a match was found, it can
|
||||
// use the Merge() method on the child to merge the child's
|
||||
// matching data into the parent MatchDialog.
|
||||
func (m *MatchDialog) Fork() *MatchDialog {
|
||||
fork := &MatchDialog{
|
||||
child := &MatchDialog{
|
||||
p: m.p,
|
||||
offset: m.offset,
|
||||
forked: true,
|
||||
parent: m,
|
||||
}
|
||||
return fork
|
||||
return child
|
||||
}
|
||||
|
||||
func (m *MatchDialog) Join(fork *MatchDialog) bool {
|
||||
if !fork.forked {
|
||||
panic("Cannot join a non-forked MatchDialog")
|
||||
// Merge merges the data for a a forked child MatchDialog back
|
||||
// into its parent:
|
||||
// * the runes that are accumulated in the child are added
|
||||
// to the parent's runes
|
||||
// * the parent's offset is set to the child's offset
|
||||
// After a Merge, the child MatchDialog is reset so it can
|
||||
// immediately be reused for performing another match.
|
||||
func (m *MatchDialog) Merge() bool {
|
||||
if m.parent == nil {
|
||||
panic("Cannot call Merge a a non-forked MatchDialog")
|
||||
}
|
||||
m.runes = append(m.runes, fork.runes...)
|
||||
m.widths = append(m.widths, fork.widths...)
|
||||
m.offset = fork.offset
|
||||
fork.runes = []rune{}
|
||||
fork.widths = []int{}
|
||||
m.parent.runes = append(m.parent.runes, m.runes...)
|
||||
m.parent.widths = append(m.parent.widths, m.widths...)
|
||||
m.parent.offset = m.offset
|
||||
m.Clear()
|
||||
return true
|
||||
}
|
||||
|
||||
// NextRune can be called by a Matcher on a MatchDialog in order
|
||||
// to receive the next rune from the input.
|
||||
// The rune is automatically added to the MatchDialog's runes.
|
||||
// Returns the rune and a boolean. The boolean will be false in
|
||||
// case an invalid UTF8 rune of the end of the file was encountered.
|
||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||
if m.curRune == utf8.RuneError {
|
||||
panic("Matcher must not call NextRune() after it returned false")
|
||||
}
|
||||
r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
|
||||
r, w, ok := m.p.peek(m.offset)
|
||||
m.offset += w
|
||||
m.curRune = r
|
||||
m.curWidth = w
|
||||
m.runes = append(m.runes, r)
|
||||
m.widths = append(m.widths, w)
|
||||
return r, r != EOF && r != INVALID
|
||||
return r, ok
|
||||
}
|
||||
|
||||
// Matcher is the interface that can be implemented to provide
|
||||
// Clear empties out the accumulated runes that are stored
|
||||
// in the MatchDialog.
|
||||
func (m *MatchDialog) Clear() {
|
||||
m.runes = []rune{}
|
||||
m.widths = []int{}
|
||||
}
|
||||
|
||||
// Matcher is the interface that must be implemented to provide
|
||||
// a matching stategy for the match() function.
|
||||
// A MatchDialog is provided as input. This implements a
|
||||
// specific set of methods that a Matcher needs to retrieve data
|
||||
|
@ -60,20 +91,28 @@ type Matcher interface {
|
|||
Match(*MatchDialog) bool
|
||||
}
|
||||
|
||||
type MatcherConstructors struct {
|
||||
Any func() MatchAny
|
||||
Rune func(rune rune) MatchRune
|
||||
RuneRange func(start rune, end rune) MatchRuneRange
|
||||
Runes func(runes ...rune) MatchAnyOf
|
||||
AnyOf func(matchers ...Matcher) MatchAnyOf
|
||||
Repeat func(count int, matcher Matcher) MatchRepeat
|
||||
Sequence func(matchers ...Matcher) MatchSequence
|
||||
ZeroOrMore func(matcher Matcher) MatchZeroOrMore
|
||||
OneOrMore func(matcher Matcher) MatchOneOrMore
|
||||
Optional func(matcher Matcher) MatchOptional
|
||||
type matcherConstructors struct {
|
||||
Any func() MatchAny
|
||||
Rune func(rune) MatchRune
|
||||
RuneRange func(rune, rune) MatchRuneRange
|
||||
Runes func(...rune) MatchAnyOf
|
||||
String func(string) MatchSequence
|
||||
StringNoCase func(string) MatchSequence
|
||||
AnyOf func(...Matcher) MatchAnyOf
|
||||
Repeat func(int, Matcher) MatchRepeat
|
||||
Sequence func(...Matcher) MatchSequence
|
||||
ZeroOrMore func(Matcher) MatchZeroOrMore
|
||||
OneOrMore func(Matcher) MatchOneOrMore
|
||||
Optional func(Matcher) MatchOptional
|
||||
Drop func(Matcher) MatchDrop
|
||||
}
|
||||
|
||||
var C = MatcherConstructors{
|
||||
// C provides access to a wide range of parser/combinator
|
||||
// constructors that can be used to build matching expressions.
|
||||
// When using C in your own parser, then it is advised to create
|
||||
// an alias in your own package for easy reference:
|
||||
// var c = parsekit.C
|
||||
var C = matcherConstructors{
|
||||
Any: func() MatchAny {
|
||||
return MatchAny{}
|
||||
},
|
||||
|
@ -90,6 +129,22 @@ var C = MatcherConstructors{
|
|||
}
|
||||
return MatchAnyOf{m}
|
||||
},
|
||||
String: func(s string) MatchSequence {
|
||||
m := make([]Matcher, len(s))
|
||||
for i, r := range s {
|
||||
m[i] = MatchRune{r}
|
||||
}
|
||||
return MatchSequence{m}
|
||||
},
|
||||
StringNoCase: func(s string) MatchSequence {
|
||||
m := make([]Matcher, len(s))
|
||||
for i, r := range s {
|
||||
u := MatchRune{unicode.ToUpper(r)}
|
||||
l := MatchRune{unicode.ToLower(r)}
|
||||
m[i] = MatchAnyOf{[]Matcher{u, l}}
|
||||
}
|
||||
return MatchSequence{m}
|
||||
},
|
||||
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
||||
return MatchAnyOf{matchers}
|
||||
},
|
||||
|
@ -108,6 +163,9 @@ var C = MatcherConstructors{
|
|||
Optional: func(matcher Matcher) MatchOptional {
|
||||
return MatchOptional{matcher}
|
||||
},
|
||||
Drop: func(matcher Matcher) MatchDrop {
|
||||
return MatchDrop{matcher}
|
||||
},
|
||||
}
|
||||
|
||||
type MatchAny struct{}
|
||||
|
@ -142,9 +200,9 @@ type MatchAnyOf struct {
|
|||
|
||||
func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
||||
for _, matcher := range c.matcher {
|
||||
mc := m.Fork()
|
||||
if matcher.Match(mc) {
|
||||
return m.Join(mc)
|
||||
child := m.Fork()
|
||||
if matcher.Match(child) {
|
||||
return child.Merge()
|
||||
}
|
||||
}
|
||||
return false
|
||||
|
@ -156,13 +214,13 @@ type MatchRepeat struct {
|
|||
}
|
||||
|
||||
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
child := m.Fork()
|
||||
for i := 0; i < c.count; i++ {
|
||||
if !c.matcher.Match(mc) {
|
||||
if !c.matcher.Match(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
m.Join(mc)
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -171,13 +229,13 @@ type MatchSequence struct {
|
|||
}
|
||||
|
||||
func (c MatchSequence) Match(m *MatchDialog) bool {
|
||||
mPart := m.Fork()
|
||||
child := m.Fork()
|
||||
for _, matcher := range c.matchers {
|
||||
if !matcher.Match(mPart) {
|
||||
if !matcher.Match(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
m.Join(mPart)
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -186,9 +244,9 @@ type MatchOneOrMore struct {
|
|||
}
|
||||
|
||||
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
for c.matcher.Match(mc) {
|
||||
m.Join(mc)
|
||||
child := m.Fork()
|
||||
for c.matcher.Match(child) {
|
||||
child.Merge()
|
||||
}
|
||||
return len(m.runes) > 0
|
||||
}
|
||||
|
@ -198,9 +256,9 @@ type MatchZeroOrMore struct {
|
|||
}
|
||||
|
||||
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
for c.matcher.Match(mc) {
|
||||
m.Join(mc)
|
||||
child := m.Fork()
|
||||
for c.matcher.Match(child) {
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -210,9 +268,23 @@ type MatchOptional struct {
|
|||
}
|
||||
|
||||
func (c MatchOptional) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
if c.matcher.Match(mc) {
|
||||
m.Join(mc)
|
||||
child := m.Fork()
|
||||
if c.matcher.Match(child) {
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type MatchDrop struct {
|
||||
matcher Matcher
|
||||
}
|
||||
|
||||
func (c MatchDrop) Match(m *MatchDialog) bool {
|
||||
child := m.Fork()
|
||||
if c.matcher.Match(child) {
|
||||
child.Clear()
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -12,11 +12,10 @@ const TestItem p.ItemType = 1
|
|||
|
||||
func newParser(input string, matcher p.Matcher) *p.P {
|
||||
stateFn := func(p *p.P) {
|
||||
p.Expects("MATCH")
|
||||
if p.On(matcher).Accept() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.Repeat()
|
||||
} else {
|
||||
p.UnexpectedInput("MATCH")
|
||||
}
|
||||
}
|
||||
return p.New(input, stateFn)
|
||||
|
@ -107,6 +106,35 @@ func TestMatchRuneRange(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestMatchString(t *testing.T) {
|
||||
p := newParser("Hello, world!", c.String("Hello"))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "Hello" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
// TODO
|
||||
// func TestMatchStringNoCase(t *testing.T) {
|
||||
// p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
|
||||
// r, err, ok := p.Next()
|
||||
// if !ok {
|
||||
// t.Fatalf("Parsing failed: %s", err)
|
||||
// }
|
||||
// if r.Type != TestItem {
|
||||
// t.Error("Parser item type not expected TestTitem")
|
||||
// }
|
||||
// if r.Value != "Hello" {
|
||||
// t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
|
||||
// }
|
||||
// }
|
||||
|
||||
func TestMatchRunes(t *testing.T) {
|
||||
m := c.Runes('+', '-', '*', '/')
|
||||
s := "-+/*+++"
|
||||
|
@ -243,6 +271,17 @@ func TestMatchOptional(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestMatchDrop(t *testing.T) {
|
||||
dashes := c.OneOrMore(c.Rune('-'))
|
||||
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes)))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "X" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
}
|
||||
}
|
||||
func TestMixAndMatch(t *testing.T) {
|
||||
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||
backslash := c.Rune('\\')
|
||||
|
|
|
@ -1,10 +1,18 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
// Expects is used to let a state function describe what input it is expecting.
|
||||
// This expectation is used in error messages to make them more descriptive.
|
||||
//
|
||||
// Also, when defining an expectation inside a StateFn, you do not need
|
||||
// to handle unexpected input yourself. When the end of the function is
|
||||
// reached without setting the next state, an automatic error will be
|
||||
// emitted. This error differentiates between issues:
|
||||
// * there is valid data on input, but it was not accepted by the function
|
||||
// * there is an invalid UTF8 character on input
|
||||
// * the end of the file was reached.
|
||||
func (p *P) Expects(description string) {
|
||||
p.expecting = description
|
||||
}
|
||||
|
||||
// AtEndOfFile returns true when there is no more data available in the input.
|
||||
func (p *P) AtEndOfFile() bool {
|
||||
|
@ -16,8 +24,8 @@ func (p *P) AtEndOfFile() bool {
|
|||
// by this method.
|
||||
func (p *P) AtEndOfLine() bool {
|
||||
return p.AtEndOfFile() ||
|
||||
p.Upcoming("\r", "\n") ||
|
||||
p.Upcoming("\n")
|
||||
p.On(C.String("\r\n")).Stay() ||
|
||||
p.On(C.Rune('\n')).Stay()
|
||||
}
|
||||
|
||||
// SkipEndOfLine returns true when the cursor is either at the end of the line
|
||||
|
@ -25,8 +33,8 @@ func (p *P) AtEndOfLine() bool {
|
|||
// the cursor is moved forward to beyond the newline.
|
||||
func (p *P) SkipEndOfLine() bool {
|
||||
return p.AtEndOfFile() ||
|
||||
p.SkipMatching("\r", "\n") ||
|
||||
p.SkipMatching("\n")
|
||||
p.On(C.String("\r\n")).Skip() ||
|
||||
p.On(C.Rune('\n')).Skip()
|
||||
}
|
||||
|
||||
// AcceptEndOfLine returns true when the cursor is either at the end of the line
|
||||
|
@ -44,65 +52,24 @@ func (p *P) AcceptEndOfLine() bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func (p *P) Match(patterns ...interface{}) ([]rune, []int, bool) {
|
||||
return p.match(0, patterns...)
|
||||
func (p *P) On(m Matcher) *action {
|
||||
runes, widths, ok := p.Match(m)
|
||||
return &action{
|
||||
p: p,
|
||||
runes: runes,
|
||||
widths: widths,
|
||||
ok: ok,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
|
||||
var runes []rune
|
||||
var widths []int
|
||||
|
||||
addRune := func(r rune, w int) {
|
||||
offset += w
|
||||
runes = append(runes, r)
|
||||
widths = append(widths, w)
|
||||
}
|
||||
|
||||
for _, pattern := range patterns {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
||||
if r == utf8.RuneError {
|
||||
return runes, widths, false
|
||||
}
|
||||
switch pattern := pattern.(type) {
|
||||
case Matcher:
|
||||
m := &MatchDialog{p: p}
|
||||
if pattern.Match(m) {
|
||||
return m.runes, m.widths, true
|
||||
} else {
|
||||
return m.runes, m.widths, false
|
||||
}
|
||||
case []interface{}:
|
||||
rs, ws, matched := p.match(offset, pattern...)
|
||||
for i, r := range rs {
|
||||
addRune(r, ws[i])
|
||||
}
|
||||
if !matched {
|
||||
return runes, widths, false
|
||||
}
|
||||
case string:
|
||||
if strings.IndexRune(pattern, r) < 0 {
|
||||
return runes, widths, false
|
||||
}
|
||||
addRune(r, w)
|
||||
case rune:
|
||||
if pattern != r {
|
||||
return runes, widths, false
|
||||
}
|
||||
addRune(r, w)
|
||||
default:
|
||||
panic(fmt.Sprintf("Not rune matching implemented for pattern of type %T", pattern))
|
||||
}
|
||||
}
|
||||
return runes, widths, true
|
||||
func (p *P) Match(matcher Matcher) ([]rune, []int, bool) {
|
||||
return p.match(0, matcher)
|
||||
}
|
||||
|
||||
// Upcoming checks if the upcoming runes satisfy all provided patterns.
|
||||
// Returns true if all provided patterns are satisfied.
|
||||
// This is basically the same as the Match method, but with only
|
||||
// the boolean return parameter for programmer convenciency.
|
||||
func (p *P) Upcoming(patterns ...interface{}) bool {
|
||||
_, _, ok := p.Match(patterns...)
|
||||
return ok
|
||||
func (p *P) match(offset int, matcher Matcher) ([]rune, []int, bool) {
|
||||
m := &MatchDialog{p: p}
|
||||
ok := matcher.Match(m)
|
||||
return m.runes, m.widths, ok
|
||||
}
|
||||
|
||||
type action struct {
|
||||
|
@ -135,6 +102,24 @@ func (a *action) Skip() bool {
|
|||
return a.ok
|
||||
}
|
||||
|
||||
func (a *action) Stay() bool {
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// advanceCursor advances the rune cursor one position in the
|
||||
// input data. While doing so, it keeps tracks of newlines,
|
||||
// so we can report on row + column positions on error.
|
||||
func (p *P) advanceCursor(r rune, w int) {
|
||||
p.pos += w
|
||||
if p.newline {
|
||||
p.cursorColumn = 0
|
||||
p.cursorRow++
|
||||
} else {
|
||||
p.cursorColumn++
|
||||
}
|
||||
p.newline = r == '\n'
|
||||
}
|
||||
|
||||
func (a *action) RouteTo(state StateFn) bool {
|
||||
if a.ok {
|
||||
a.p.RouteTo(state)
|
||||
|
@ -142,36 +127,9 @@ func (a *action) RouteTo(state StateFn) bool {
|
|||
return a.ok
|
||||
}
|
||||
|
||||
func (a *action) Stay() bool {
|
||||
func (a *action) RouteReturn() bool {
|
||||
if a.ok {
|
||||
a.p.RouteReturn()
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
func (p *P) On(patterns ...interface{}) *action {
|
||||
runes, widths, ok := p.Match(patterns...)
|
||||
return &action{
|
||||
p: p,
|
||||
runes: runes,
|
||||
widths: widths,
|
||||
ok: ok,
|
||||
}
|
||||
}
|
||||
|
||||
// AcceptMatching adds the next runes to the string buffer, but only
|
||||
// if the upcoming runes satisfy the provided patterns.
|
||||
// When runes were added then true is returned, false otherwise.
|
||||
// TODO not needed anymore
|
||||
// func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
||||
// return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
||||
// }
|
||||
|
||||
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
||||
// Returns true when one or more runes were skipped.
|
||||
func (p *P) SkipMatching(patterns ...interface{}) bool {
|
||||
if runes, widths, ok := p.Match(patterns...); ok {
|
||||
for i, r := range runes {
|
||||
p.advanceCursor(r, widths[i])
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
// New takes an input string and a start state,
|
||||
// and initializes the parser for it.
|
||||
func New(input string, startState StateFn) *P {
|
||||
|
@ -30,13 +36,25 @@ func (p *P) Next() (Item, *Error, bool) {
|
|||
return i, nil, true
|
||||
}
|
||||
default:
|
||||
// When implementing a parser, it is mandatory to provide
|
||||
// a conscious state routing decision for every cycle.
|
||||
// This helps preventing bugs during implementation.
|
||||
// When implementing a parser, a state function must provide
|
||||
// a routing decision in every state function execution.
|
||||
// When no route is specified, then it is considered a but
|
||||
// in the parser implementation.
|
||||
// An exception is when a function specified its expectation
|
||||
// using the Expects() method. In that case, an unexpected
|
||||
// input error is emitted.
|
||||
if p.nextState == nil {
|
||||
panic("No next state was scheduled for the parser")
|
||||
if p.expecting != "" {
|
||||
p.UnexpectedInput()
|
||||
continue
|
||||
} else {
|
||||
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
|
||||
panic(fmt.Sprintf("StateFn implementation bug: %s did not set next state or input expectation", name))
|
||||
}
|
||||
}
|
||||
p.state, p.nextState = p.nextState, nil
|
||||
p.state = p.nextState
|
||||
p.nextState = nil
|
||||
p.expecting = ""
|
||||
p.state(p)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// P holds the internal state of the parser.
|
||||
type P struct {
|
||||
state StateFn // the function that handles the current state
|
||||
nextState StateFn // the function that will handle the next state
|
||||
stack []StateFn // state function stack, for nested parsing
|
||||
input string // the scanned input
|
||||
len int // the total length of the input in bytes
|
||||
pos int // current byte scanning position in the input
|
||||
newline bool // keep track of when we have scanned a newline
|
||||
cursorRow int // current row number in the input
|
||||
cursorColumn int // current column position in the input
|
||||
buffer stringBuffer // an efficient buffer, used to build string values
|
||||
items chan Item // channel of resulting Parser items
|
||||
item Item // the current item as reached by Next() and retrieved by Get()
|
||||
err *Error // an error when lexing failed, retrieved by Error()
|
||||
}
|
||||
|
||||
// StateFn defines the type of function that can be used to
|
||||
// handle a parser state.
|
||||
type StateFn func(*P)
|
||||
|
||||
// ItemType represents the type of a parser Item.
|
||||
type ItemType int
|
||||
|
||||
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||
// end of the input was reached.
|
||||
const ItemEOF ItemType = -1
|
||||
|
||||
// ItemError is a built-in parser item type that is used for flagging that
|
||||
// an error has occurred during parsing.
|
||||
const ItemError ItemType = -2
|
||||
|
||||
// Item represents an item returned from the parser.
|
||||
type Item struct {
|
||||
Type ItemType
|
||||
Value string
|
||||
}
|
||||
|
||||
// Error is used as the error type when parsing errors occur.
|
||||
// The error includes some extra meta information to allow for useful
|
||||
// error messages to the user.
|
||||
type Error struct {
|
||||
Message string
|
||||
Row int
|
||||
Column int
|
||||
}
|
||||
|
||||
func (err *Error) Error() string {
|
||||
return err.Message
|
||||
}
|
||||
|
||||
// EOF is a special rune, which is used to indicate an end of file when
|
||||
// reading a character from the input.
|
||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||
// say 'I now expect the end of the file' is using something like:
|
||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||
const EOF rune = -1
|
||||
|
||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||
// rune on the input.
|
||||
const INVALID rune = utf8.RuneError
|
|
@ -27,12 +27,13 @@ var (
|
|||
any = c.Any()
|
||||
anyQuote = c.AnyOf(singleQuote, doubleQuote)
|
||||
backslash = c.Rune('\\')
|
||||
lower = c.RuneRange('a', 'z')
|
||||
upper = c.RuneRange('A', 'Z')
|
||||
asciiLower = c.RuneRange('a', 'z')
|
||||
asciiUpper = c.RuneRange('A', 'Z')
|
||||
digit = c.RuneRange('0', '9')
|
||||
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
||||
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
||||
optionalWhitespace = c.Optional(whitespace)
|
||||
endOfLine = c.AnyOf(lineFeed, c.Rune(parsekit.EOF))
|
||||
)
|
||||
|
||||
// NewParser creates a new parser, using the provided input string
|
||||
|
|
|
@ -12,13 +12,12 @@ func startComment(p *parsekit.P) {
|
|||
|
||||
// All characters up to the end of the line are included in the comment.
|
||||
func commentContents(p *parsekit.P) {
|
||||
p.Expects("comment contents")
|
||||
switch {
|
||||
case p.AtEndOfLine():
|
||||
case p.AtEndOfLine() || p.On(endOfLine).Skip(): // TODO drop AtEndOfLine support
|
||||
p.EmitLiteralTrim(ItemComment)
|
||||
p.RouteReturn()
|
||||
case p.On(any).Accept():
|
||||
p.Repeat()
|
||||
default:
|
||||
p.UnexpectedInput("comment contents")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,14 +7,14 @@ import (
|
|||
func TestComments(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"empty comment", "#", "#()", ""},
|
||||
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||
{"basic comment", "#chicken", "#(chicken)", ""},
|
||||
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
|
||||
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
|
||||
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||
// {"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||
// {"basic comment", "#chicken", "#(chicken)", ""},
|
||||
// {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||
// {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||
// {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||
// {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||
// {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
|
||||
// {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
|
||||
// {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||
})
|
||||
}
|
||||
|
|
|
@ -3,9 +3,8 @@ package parser
|
|||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
func endOfFile(p *parsekit.P) {
|
||||
p.Expects("end of file")
|
||||
if p.AtEndOfFile() {
|
||||
p.Emit(parsekit.ItemEOF, "EOF") // todo Automate within parser?
|
||||
} else {
|
||||
p.UnexpectedInput("end of file")
|
||||
p.Emit(parsekit.ItemEOF, "EOF")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ var (
|
|||
// contain ASCII letters, ASCII digits, underscores, and dashes
|
||||
// (A-Za-z0-9_-). Note that bare keys are allowed to be composed of only
|
||||
// ASCII digits, e.g. 1234, but are always interpreted as strings.
|
||||
bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
|
||||
bareKeyRune = c.AnyOf(asciiLower, asciiUpper, digit, underscore, dash)
|
||||
bareKey = c.OneOrMore(bareKeyRune)
|
||||
|
||||
// Quoted keys follow the exact same rules as either basic strings or
|
||||
|
@ -44,17 +44,16 @@ func startKeyValuePair(p *parsekit.P) {
|
|||
}
|
||||
|
||||
func startKey(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(bareKeyRune).RouteTo(startBareKey):
|
||||
default:
|
||||
p.UnexpectedInput("a valid key name")
|
||||
}
|
||||
p.Expects("a key name")
|
||||
p.On(bareKeyRune).RouteTo(startBareKey)
|
||||
}
|
||||
|
||||
func startBareKey(p *parsekit.P) {
|
||||
p.On(bareKey).Accept()
|
||||
p.EmitLiteral(ItemKey)
|
||||
p.RouteTo(endOfKeyOrDot)
|
||||
p.Expects("a bare key name")
|
||||
if p.On(bareKey).Accept() {
|
||||
p.EmitLiteral(ItemKey)
|
||||
p.RouteTo(endOfKeyOrDot)
|
||||
}
|
||||
}
|
||||
|
||||
func endOfKeyOrDot(p *parsekit.P) {
|
||||
|
@ -62,25 +61,21 @@ func endOfKeyOrDot(p *parsekit.P) {
|
|||
p.Emit(ItemKeyDot, ".")
|
||||
p.RouteTo(startKey)
|
||||
} else {
|
||||
p.RouteTo(startKeyAssignment)
|
||||
p.RouteTo(startAssignment)
|
||||
}
|
||||
}
|
||||
|
||||
func startKeyAssignment(p *parsekit.P) {
|
||||
func startAssignment(p *parsekit.P) {
|
||||
p.Expects("a value assignment")
|
||||
if p.On(keyAssignment).Skip() {
|
||||
p.Emit(ItemAssignment, "=")
|
||||
p.RouteTo(startValue)
|
||||
} else {
|
||||
p.UnexpectedInput("a value assignment")
|
||||
}
|
||||
}
|
||||
|
||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||
func startValue(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(anyQuote).RouteTo(startString):
|
||||
default:
|
||||
p.UnexpectedInput("a value")
|
||||
}
|
||||
p.Expects("a value")
|
||||
p.On(anyQuote).RouteTo(startString)
|
||||
}
|
||||
|
|
|
@ -8,13 +8,12 @@ var (
|
|||
// UTF-8 characters. * Multi-line basic strings are surrounded by three
|
||||
// quotation marks on each side. * Basic strings are surrounded by
|
||||
// quotation marks.
|
||||
doubleQuote3 = c.Repeat(3, doubleQuote)
|
||||
doubleQuote3 = c.String(`"""`)
|
||||
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to
|
||||
// U+001F, U+007F).
|
||||
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'),
|
||||
c.Rune('\u007F'))
|
||||
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
|
||||
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
//
|
||||
|
@ -36,35 +35,33 @@ var (
|
|||
)
|
||||
|
||||
func startString(p *parsekit.P) {
|
||||
p.Expects("a string value")
|
||||
switch {
|
||||
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
||||
case p.On(doubleQuote).RouteTo(startBasicString):
|
||||
default:
|
||||
p.UnexpectedInput("a string value")
|
||||
}
|
||||
}
|
||||
|
||||
func parseBasicString(p *parsekit.P) {
|
||||
p.Expects("string contents")
|
||||
switch {
|
||||
case p.On(parsekit.EOF).Stay():
|
||||
p.UnexpectedEndOfFile("basic string token")
|
||||
case p.On(validEscape).Accept():
|
||||
p.Repeat()
|
||||
case p.On(charThatMustBeEscaped).Stay():
|
||||
r, _, _ := p.Match(charThatMustBeEscaped)
|
||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||
case p.On(validEscape).Accept():
|
||||
p.Repeat()
|
||||
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
||||
p.RouteReturn()
|
||||
case p.On(any).Accept():
|
||||
p.Repeat()
|
||||
default:
|
||||
p.UnexpectedInput("string contents")
|
||||
}
|
||||
}
|
||||
|
||||
func startBasicString(p *parsekit.P) {
|
||||
p.On(doubleQuote).Skip()
|
||||
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
|
||||
p.Expects("a basic string")
|
||||
if p.On(doubleQuote).Skip() {
|
||||
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
|
||||
}
|
||||
}
|
||||
|
||||
// Specific handling of input for basic strings.
|
||||
|
@ -88,6 +85,8 @@ func basicStringSpecifics(p *parsekit.P) {
|
|||
}
|
||||
|
||||
func startMultiLineBasicString(p *parsekit.P) {
|
||||
p.On(doubleQuote3).Skip()
|
||||
p.EmitError("Not yet implemented")
|
||||
p.Expects("a multi-line basic string")
|
||||
if p.On(doubleQuote3).Skip() {
|
||||
p.EmitError("Not yet implemented")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
func TestUnterminatedBasicString(t *testing.T) {
|
||||
runStatesT(t, statesT{
|
||||
"missing closing quote", `a="value`, "[a]=",
|
||||
"Unexpected end of file (expected basic string token)"})
|
||||
"unexpected end of file (expected string contents)"})
|
||||
}
|
||||
|
||||
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||
|
|
Loading…
Reference in New Issue