Backup work, created a lot of tests for parser combinators and atoms. Pretty solid now!
This commit is contained in:
parent
d9ab7298e7
commit
6ad4499971
114
atoms.go
114
atoms.go
|
@ -1,114 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// A provides convenient access to a range of atoms that can be used to
|
|
||||||
// build combinators or parsing rules.
|
|
||||||
var A = struct {
|
|
||||||
EndOfFile Matcher
|
|
||||||
AnyRune Matcher
|
|
||||||
Space Matcher
|
|
||||||
Tab Matcher
|
|
||||||
CarriageRet Matcher
|
|
||||||
Newline Matcher
|
|
||||||
Excl Matcher
|
|
||||||
DoubleQuote Matcher
|
|
||||||
Hash Matcher
|
|
||||||
Dollar Matcher
|
|
||||||
Percent Matcher
|
|
||||||
Amp Matcher
|
|
||||||
SingleQuote Matcher
|
|
||||||
RoundOpen Matcher
|
|
||||||
RoundClose Matcher
|
|
||||||
Asterisk Matcher
|
|
||||||
Plus Matcher
|
|
||||||
Comma Matcher
|
|
||||||
Minus Matcher
|
|
||||||
Dot Matcher
|
|
||||||
Slash Matcher
|
|
||||||
Colon Matcher
|
|
||||||
Semicolon Matcher
|
|
||||||
AngleOpen Matcher
|
|
||||||
Equal Matcher
|
|
||||||
AngleClose Matcher
|
|
||||||
Question Matcher
|
|
||||||
At Matcher
|
|
||||||
SquareOpen Matcher
|
|
||||||
Backslash Matcher
|
|
||||||
SquareClose Matcher
|
|
||||||
Caret Matcher
|
|
||||||
Underscore Matcher
|
|
||||||
Backquote Matcher
|
|
||||||
CurlyOpen Matcher
|
|
||||||
Pipe Matcher
|
|
||||||
CurlyClose Matcher
|
|
||||||
Tilde Matcher
|
|
||||||
Whitespace Matcher
|
|
||||||
WhitespaceAndNewlines Matcher
|
|
||||||
EndOfLine Matcher
|
|
||||||
Digit Matcher
|
|
||||||
ASCII Matcher
|
|
||||||
ASCIILower Matcher
|
|
||||||
ASCIIUpper Matcher
|
|
||||||
HexDigit Matcher
|
|
||||||
}{
|
|
||||||
EndOfFile: MatchEndOfFile(),
|
|
||||||
AnyRune: MatchAnyRune(),
|
|
||||||
Space: C.Rune(' '),
|
|
||||||
Tab: C.Rune('\t'),
|
|
||||||
CarriageRet: C.Rune('\r'),
|
|
||||||
Newline: C.Rune('\n'),
|
|
||||||
Excl: C.Rune('!'),
|
|
||||||
DoubleQuote: C.Rune('"'),
|
|
||||||
Hash: C.Rune('#'),
|
|
||||||
Dollar: C.Rune('$'),
|
|
||||||
Percent: C.Rune('%'),
|
|
||||||
Amp: C.Rune('&'),
|
|
||||||
SingleQuote: C.Rune('\''),
|
|
||||||
RoundOpen: C.Rune('('),
|
|
||||||
RoundClose: C.Rune(')'),
|
|
||||||
Asterisk: C.Rune('*'),
|
|
||||||
Plus: C.Rune('+'),
|
|
||||||
Comma: C.Rune(','),
|
|
||||||
Minus: C.Rune('-'),
|
|
||||||
Dot: C.Rune('.'),
|
|
||||||
Slash: C.Rune('/'),
|
|
||||||
Colon: C.Rune(':'),
|
|
||||||
Semicolon: C.Rune(';'),
|
|
||||||
AngleOpen: C.Rune('<'),
|
|
||||||
Equal: C.Rune('='),
|
|
||||||
AngleClose: C.Rune('>'),
|
|
||||||
Question: C.Rune('?'),
|
|
||||||
At: C.Rune('@'),
|
|
||||||
SquareOpen: C.Rune('['),
|
|
||||||
Backslash: C.Rune('\\'),
|
|
||||||
SquareClose: C.Rune(']'),
|
|
||||||
Caret: C.Rune('^'),
|
|
||||||
Underscore: C.Rune('_'),
|
|
||||||
Backquote: C.Rune('`'),
|
|
||||||
CurlyOpen: C.Rune('{'),
|
|
||||||
Pipe: C.Rune('|'),
|
|
||||||
CurlyClose: C.Rune('}'),
|
|
||||||
Tilde: C.Rune('~'),
|
|
||||||
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
|
|
||||||
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.Rune('\r'), C.Rune('\n'))),
|
|
||||||
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
|
||||||
Digit: C.RuneRange('0', '9'),
|
|
||||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
|
||||||
ASCIILower: C.RuneRange('a', 'z'),
|
|
||||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
|
||||||
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchEndOfFile() Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
fork := m.Fork()
|
|
||||||
input, ok := fork.NextRune()
|
|
||||||
return !ok && input == EOF
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchAnyRune() Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
_, ok := m.NextRune()
|
|
||||||
return ok
|
|
||||||
}
|
|
||||||
}
|
|
128
atoms_test.go
128
atoms_test.go
|
@ -1,128 +0,0 @@
|
||||||
package parsekit_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestAtoms(t *testing.T) {
|
|
||||||
for i, c := range []struct {
|
|
||||||
input string
|
|
||||||
matcher parsekit.Matcher
|
|
||||||
mustMatch bool
|
|
||||||
}{
|
|
||||||
{"", a.EndOfFile, true},
|
|
||||||
{"⌘", a.AnyRune, true},
|
|
||||||
{"\xbc", a.AnyRune, false}, // invalid UTF8 rune
|
|
||||||
{"", a.AnyRune, false}, // end of file
|
|
||||||
{" ", a.Space, true},
|
|
||||||
{"X", a.Space, false},
|
|
||||||
{"\t", a.Tab, true},
|
|
||||||
{"\r", a.CarriageRet, true},
|
|
||||||
{"\n", a.Newline, true},
|
|
||||||
{"!", a.Excl, true},
|
|
||||||
{"\"", a.DoubleQuote, true},
|
|
||||||
{"#", a.Hash, true},
|
|
||||||
{"$", a.Dollar, true},
|
|
||||||
{"%", a.Percent, true},
|
|
||||||
{"&", a.Amp, true},
|
|
||||||
{"'", a.SingleQuote, true},
|
|
||||||
{"(", a.RoundOpen, true},
|
|
||||||
{")", a.RoundClose, true},
|
|
||||||
{"*", a.Asterisk, true},
|
|
||||||
{"+", a.Plus, true},
|
|
||||||
{",", a.Comma, true},
|
|
||||||
{"-", a.Minus, true},
|
|
||||||
{".", a.Dot, true},
|
|
||||||
{"/", a.Slash, true},
|
|
||||||
{":", a.Colon, true},
|
|
||||||
{";", a.Semicolon, true},
|
|
||||||
{"<", a.AngleOpen, true},
|
|
||||||
{"=", a.Equal, true},
|
|
||||||
{">", a.AngleClose, true},
|
|
||||||
{"?", a.Question, true},
|
|
||||||
{"@", a.At, true},
|
|
||||||
{"[", a.SquareOpen, true},
|
|
||||||
{"\\", a.Backslash, true},
|
|
||||||
{"]", a.SquareClose, true},
|
|
||||||
{"^", a.Caret, true},
|
|
||||||
{"_", a.Underscore, true},
|
|
||||||
{"`", a.Backquote, true},
|
|
||||||
{"{", a.CurlyOpen, true},
|
|
||||||
{"|", a.Pipe, true},
|
|
||||||
{"}", a.CurlyClose, true},
|
|
||||||
{"~", a.Tilde, true},
|
|
||||||
{" \t \t ", a.Whitespace, true},
|
|
||||||
{" \t\r\n ", a.WhitespaceAndNewlines, true},
|
|
||||||
{"", a.EndOfLine, true},
|
|
||||||
{"\r\n", a.EndOfLine, true},
|
|
||||||
{"\n", a.EndOfLine, true},
|
|
||||||
{"0", a.Digit, true},
|
|
||||||
{"1", a.Digit, true},
|
|
||||||
{"2", a.Digit, true},
|
|
||||||
{"3", a.Digit, true},
|
|
||||||
{"4", a.Digit, true},
|
|
||||||
{"5", a.Digit, true},
|
|
||||||
{"6", a.Digit, true},
|
|
||||||
{"7", a.Digit, true},
|
|
||||||
{"8", a.Digit, true},
|
|
||||||
{"9", a.Digit, true},
|
|
||||||
{"X", a.Digit, false},
|
|
||||||
{"a", a.ASCIILower, true},
|
|
||||||
{"z", a.ASCIILower, true},
|
|
||||||
{"A", a.ASCIILower, false},
|
|
||||||
{"Z", a.ASCIILower, false},
|
|
||||||
{"A", a.ASCIIUpper, true},
|
|
||||||
{"Z", a.ASCIIUpper, true},
|
|
||||||
{"a", a.ASCIIUpper, false},
|
|
||||||
{"z", a.ASCIIUpper, false},
|
|
||||||
{"0", a.HexDigit, true},
|
|
||||||
{"9", a.HexDigit, true},
|
|
||||||
{"a", a.HexDigit, true},
|
|
||||||
{"f", a.HexDigit, true},
|
|
||||||
{"A", a.HexDigit, true},
|
|
||||||
{"F", a.HexDigit, true},
|
|
||||||
{"g", a.HexDigit, false},
|
|
||||||
{"G", a.HexDigit, false},
|
|
||||||
} {
|
|
||||||
parser := parsekit.New(c.matcher).Parse(c.input)
|
|
||||||
item, err, ok := parser.Next()
|
|
||||||
if c.mustMatch {
|
|
||||||
if !ok {
|
|
||||||
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
|
|
||||||
}
|
|
||||||
if item.Type != parsekit.MatchedItem {
|
|
||||||
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if ok {
|
|
||||||
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSequenceOfRunes(t *testing.T) {
|
|
||||||
sequence := c.Sequence(
|
|
||||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
|
||||||
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
|
||||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
|
||||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
|
||||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
|
||||||
)
|
|
||||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
|
||||||
parser := parsekit.New(func(p *parsekit.P) {
|
|
||||||
p.Expects("Sequence of runes")
|
|
||||||
if p.On(sequence).Accept().End() {
|
|
||||||
p.EmitLiteral(TestItem)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
item, err, ok := parser.Parse(input).Next()
|
|
||||||
if !ok {
|
|
||||||
t.Fatalf("Parsing failed: %s", err)
|
|
||||||
}
|
|
||||||
if item.Value != input {
|
|
||||||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
|
||||||
}
|
|
||||||
}
|
|
296
combinators.go
296
combinators.go
|
@ -1,296 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unicode"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Nice to have I guess:
|
|
||||||
// - LookAhead
|
|
||||||
// - Ready to go combinators for various number notations
|
|
||||||
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
|
|
||||||
|
|
||||||
type Matcher func(m *MatchDialog) bool
|
|
||||||
|
|
||||||
// MatchDialog is used by Matcher functions to retrieve data from the parser
|
|
||||||
// input to match against and to report back successful matches.
|
|
||||||
type MatchDialog struct {
|
|
||||||
p *P
|
|
||||||
runes []rune
|
|
||||||
widths []int
|
|
||||||
offset int
|
|
||||||
curRune rune
|
|
||||||
curWidth int
|
|
||||||
parent *MatchDialog
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextRune can be called by a Matcher on a MatchDialog in order
|
|
||||||
// to receive the next rune from the input.
|
|
||||||
// The rune is automatically added to the MatchDialog's slice of runes.
|
|
||||||
// Returns the rune and a boolean. The boolean will be false in
|
|
||||||
// case an invalid UTF8 rune of the end of the file was encountered.
|
|
||||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
|
||||||
if m.curRune == utf8.RuneError {
|
|
||||||
panic("internal parser error: Matcher must not call NextRune() after it returned false")
|
|
||||||
}
|
|
||||||
r, w, ok := m.p.peek(m.offset)
|
|
||||||
m.offset += w
|
|
||||||
m.curRune = r
|
|
||||||
m.curWidth = w
|
|
||||||
m.runes = append(m.runes, r)
|
|
||||||
m.widths = append(m.widths, w)
|
|
||||||
return r, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fork splits off a child MatchDialog, containing the same offset as the
|
|
||||||
// parent MatchDialog, but with all other data in a new state.
|
|
||||||
//
|
|
||||||
// By forking, a Matcher implementation can freely work with a MatchDialog,
|
|
||||||
// without affecting the parent MatchDialog. This is for example useful when
|
|
||||||
// the Matcher is checking for a sequence of runes. When there are first
|
|
||||||
// 3 runes returned from NextRune() which match the expectations, then the
|
|
||||||
// slice of runes inside the MatchDialog will contain these 3 runes.
|
|
||||||
// When after this the 4th rune turns out to be a mismatch, the forked
|
|
||||||
// MatchDialog can simply be discarded, and the state in the parent will be
|
|
||||||
// kept as-is.
|
|
||||||
//
|
|
||||||
// When a forked MatchDialog is in use, and the Matcher decides that a
|
|
||||||
// successul match was found, then the Merge() method can be called in
|
|
||||||
// order to transport the collected runes to the parent MatchDialog.
|
|
||||||
func (m *MatchDialog) Fork() *MatchDialog {
|
|
||||||
child := &MatchDialog{
|
|
||||||
p: m.p,
|
|
||||||
offset: m.offset,
|
|
||||||
parent: m,
|
|
||||||
}
|
|
||||||
return child
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge merges the data from a forked child MatchDialog back into its parent:
|
|
||||||
// * the runes that are accumulated in the child are added to the parent runes
|
|
||||||
// * the parent's offset is set to the child's offset
|
|
||||||
// After a Merge, the child MatchDialog is reset so it can immediately be
|
|
||||||
// reused for performing another match.
|
|
||||||
func (m *MatchDialog) Merge() bool {
|
|
||||||
if m.parent == nil {
|
|
||||||
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
|
||||||
}
|
|
||||||
m.parent.runes = append(m.parent.runes, m.runes...)
|
|
||||||
m.parent.widths = append(m.parent.widths, m.widths...)
|
|
||||||
m.parent.offset = m.offset
|
|
||||||
m.Clear()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear empties out the accumulated runes that are stored in the MatchDialog.
|
|
||||||
// The offset is kept as-is.
|
|
||||||
func (m *MatchDialog) Clear() {
|
|
||||||
m.runes = []rune{}
|
|
||||||
m.widths = []int{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// C provides convenient access to a range of parser/combinator
|
|
||||||
// constructors that can be used to build matching expressions.
|
|
||||||
//
|
|
||||||
// When using C in your own parser, then it is advised to create
|
|
||||||
// a variable in your own package to reference it (var c = parsekit.C).
|
|
||||||
// This saves a lot of typing, and it makes your code a lot cleaner.
|
|
||||||
var C = struct {
|
|
||||||
Rune func(rune) Matcher
|
|
||||||
Runes func(...rune) Matcher
|
|
||||||
RuneRange func(rune, rune) Matcher
|
|
||||||
String func(string) Matcher
|
|
||||||
StringNoCase func(string) Matcher
|
|
||||||
AnyOf func(...Matcher) Matcher
|
|
||||||
Not func(Matcher) Matcher
|
|
||||||
Optional func(Matcher) Matcher
|
|
||||||
Sequence func(...Matcher) Matcher
|
|
||||||
Repeat func(int, Matcher) Matcher
|
|
||||||
Min func(int, Matcher) Matcher
|
|
||||||
Max func(int, Matcher) Matcher
|
|
||||||
ZeroOrMore func(Matcher) Matcher
|
|
||||||
OneOrMore func(Matcher) Matcher
|
|
||||||
MinMax func(int, int, Matcher) Matcher
|
|
||||||
Separated func(Matcher, Matcher) Matcher
|
|
||||||
Drop func(Matcher) Matcher
|
|
||||||
}{
|
|
||||||
Rune: MatchRune,
|
|
||||||
Runes: MatchRunes,
|
|
||||||
RuneRange: MatchRuneRange,
|
|
||||||
String: MatchString,
|
|
||||||
StringNoCase: MatchStringNoCase,
|
|
||||||
Optional: MatchOptional,
|
|
||||||
AnyOf: MatchAnyOf,
|
|
||||||
Not: MatchNot,
|
|
||||||
Sequence: MatchSequence,
|
|
||||||
Repeat: MatchRepeat,
|
|
||||||
Min: MatchMin,
|
|
||||||
Max: MatchMax,
|
|
||||||
ZeroOrMore: MatchZeroOrMore,
|
|
||||||
OneOrMore: MatchOneOrMore,
|
|
||||||
MinMax: MatchMinMax,
|
|
||||||
Separated: MatchSeparated,
|
|
||||||
Drop: MatchDrop,
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchRune(r rune) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
input, ok := m.NextRune()
|
|
||||||
return ok && input == r
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchRunes(runes ...rune) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
input, ok := m.NextRune()
|
|
||||||
if ok {
|
|
||||||
for _, r := range runes {
|
|
||||||
if input == r {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchRuneRange(start rune, end rune) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
input, ok := m.NextRune()
|
|
||||||
return ok && input >= start && input <= end
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchString(s string) Matcher {
|
|
||||||
var matchers = []Matcher{}
|
|
||||||
for _, r := range s {
|
|
||||||
matchers = append(matchers, MatchRune(r))
|
|
||||||
}
|
|
||||||
return MatchSequence(matchers...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchStringNoCase(s string) Matcher {
|
|
||||||
var matchers = []Matcher{}
|
|
||||||
for _, r := range s {
|
|
||||||
u := unicode.ToUpper(r)
|
|
||||||
l := unicode.ToLower(r)
|
|
||||||
matchers = append(matchers, MatchRunes(u, l))
|
|
||||||
}
|
|
||||||
return MatchSequence(matchers...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchOptional(matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if matcher(child) {
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchSequence(matchers ...Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
for _, matcher := range matchers {
|
|
||||||
if !matcher(child) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
child.Merge()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchAnyOf(matchers ...Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
for _, matcher := range matchers {
|
|
||||||
child := m.Fork()
|
|
||||||
if matcher(child) {
|
|
||||||
return child.Merge()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchNot(matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if !matcher(child) {
|
|
||||||
return child.Merge()
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchRepeat(count int, matcher Matcher) Matcher {
|
|
||||||
return MatchMinMax(count, count, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchMin(min int, matcher Matcher) Matcher {
|
|
||||||
return MatchMinMax(min, -1, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchMax(max int, matcher Matcher) Matcher {
|
|
||||||
return MatchMinMax(-1, max, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchZeroOrMore(matcher Matcher) Matcher {
|
|
||||||
return MatchMinMax(0, -1, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchOneOrMore(matcher Matcher) Matcher {
|
|
||||||
return MatchMinMax(1, -1, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if min >= 0 && max >= 0 && min > max {
|
|
||||||
panic("internal parser error: MatchRepeat definition error: max must not be < min")
|
|
||||||
}
|
|
||||||
total := 0
|
|
||||||
// Specified min: check for the minimum required amount of matches.
|
|
||||||
for min > 0 && total < min {
|
|
||||||
total++
|
|
||||||
if !matcher(child) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// No specified max: include the rest of the available matches.
|
|
||||||
if max < 0 {
|
|
||||||
child.Merge()
|
|
||||||
for matcher(child) {
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// Specified max: include the rest of the availble matches, up to the max.
|
|
||||||
child.Merge()
|
|
||||||
for total < max {
|
|
||||||
total++
|
|
||||||
if !matcher(child) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
|
|
||||||
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchDrop(matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if matcher(child) {
|
|
||||||
child.Clear()
|
|
||||||
child.Merge()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,112 +0,0 @@
|
||||||
package parsekit_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ExampleMatchAnyRune(t *testing.T) {
|
|
||||||
parser := parsekit.New(
|
|
||||||
func(p *parsekit.P) {
|
|
||||||
p.Expects("Any valid rune")
|
|
||||||
if p.On(a.AnyRune).Accept().End() {
|
|
||||||
p.EmitLiteral(TestItem)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
run := parser.Parse("¡Any / valid / character will dö!")
|
|
||||||
match, _, ok := run.Next()
|
|
||||||
if ok {
|
|
||||||
fmt.Printf("Match = %q\n", match)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCombinators(t *testing.T) {
|
|
||||||
for i, c := range []struct {
|
|
||||||
input string
|
|
||||||
matcher parsekit.Matcher
|
|
||||||
mustMatch bool
|
|
||||||
expected string
|
|
||||||
}{
|
|
||||||
{"xxx", c.Rune('x'), true, "x"},
|
|
||||||
{"x ", c.Rune(' '), false, ""},
|
|
||||||
{"aa", c.RuneRange('b', 'e'), false, ""},
|
|
||||||
{"bb", c.RuneRange('b', 'e'), true, "b"},
|
|
||||||
{"cc", c.RuneRange('b', 'e'), true, "c"},
|
|
||||||
{"dd", c.RuneRange('b', 'e'), true, "d"},
|
|
||||||
{"ee", c.RuneRange('b', 'e'), true, "e"},
|
|
||||||
{"ff", c.RuneRange('b', 'e'), false, ""},
|
|
||||||
{"Hello, world!", c.String("Hello"), true, "Hello"},
|
|
||||||
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
|
|
||||||
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
|
|
||||||
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
|
|
||||||
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
|
|
||||||
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
|
|
||||||
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
|
|
||||||
{"abc", c.Not(c.Rune('b')), true, "a"},
|
|
||||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
|
||||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
|
||||||
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
|
|
||||||
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
|
|
||||||
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
|
|
||||||
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
|
|
||||||
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
|
|
||||||
{"", c.Min(0, c.Rune('a')), true, ""},
|
|
||||||
{"a", c.Min(0, c.Rune('a')), true, "a"},
|
|
||||||
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
|
|
||||||
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
|
|
||||||
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
|
|
||||||
{"", c.Max(4, c.Rune('b')), true, ""},
|
|
||||||
{"X", c.Max(4, c.Rune('b')), true, ""},
|
|
||||||
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
|
|
||||||
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
|
|
||||||
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
|
||||||
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
|
||||||
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
|
||||||
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
|
||||||
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
|
||||||
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
|
|
||||||
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
|
||||||
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
|
||||||
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
|
|
||||||
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
|
|
||||||
{"", c.OneOrMore(c.Rune('d')), false, ""},
|
|
||||||
{"X", c.OneOrMore(c.Rune('d')), false, ""},
|
|
||||||
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
|
|
||||||
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
|
|
||||||
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
|
|
||||||
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
|
|
||||||
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
|
|
||||||
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
|
|
||||||
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
|
|
||||||
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
|
|
||||||
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
|
|
||||||
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
|
|
||||||
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
|
|
||||||
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
|
|
||||||
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
|
|
||||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
|
||||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
|
||||||
} {
|
|
||||||
parser := parsekit.New(c.matcher).Parse(c.input)
|
|
||||||
item, err, ok := parser.Next()
|
|
||||||
|
|
||||||
if c.mustMatch {
|
|
||||||
if !ok {
|
|
||||||
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
|
|
||||||
} else if item.Type != parsekit.MatchedItem {
|
|
||||||
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
|
|
||||||
} else if item.Value != c.expected {
|
|
||||||
t.Errorf("Test [%d] %q failed: not expected output:\nexpected: %s\nactual: %s\n", i, c.input, c.expected, item.Value)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if ok {
|
|
||||||
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,187 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Matcher is the function type that must be implemented to create a function
|
||||||
|
// that can be used in conjunction with parsekit.P.On() or parsekit.New().
|
||||||
|
// Its purpose is to check if input data matches some kind of pattern and to
|
||||||
|
// report back the match.
|
||||||
|
//
|
||||||
|
// A Matcher function gets a MatchDialog as its input and returns a boolean to
|
||||||
|
// indicate whether or not the Matcher found a match on the input.
|
||||||
|
// The MatchDialog is used for retrieving input data to match against
|
||||||
|
// and for reporting back results.
|
||||||
|
type Matcher func(m *MatchDialog) bool
|
||||||
|
|
||||||
|
// MatchDialog is used by Matcher functions to retrieve runes from the
|
||||||
|
// input to match against and to report back results.
|
||||||
|
//
|
||||||
|
// Basic operation:
|
||||||
|
//
|
||||||
|
// To retrieve the next rune from the input, the Matcher function can call
|
||||||
|
// the MatchDialog.NextRune() method.
|
||||||
|
//
|
||||||
|
// The Matcher function can then evaluate the retrieved rune and either
|
||||||
|
// accept of skip the rune. When accepting it using MatchDialog.Accept(),
|
||||||
|
// the rune is added to the output of the MatchDialog. When using
|
||||||
|
// MatchDialog.Skip(), the rune will not be added to the output. It is
|
||||||
|
// mandatory for a Matcher to call either Accept() or Skip() after retrieving
|
||||||
|
// a rune, before calling NextRune() again.
|
||||||
|
//
|
||||||
|
// Eventually, the Matcher function must return a boolean value, indicating
|
||||||
|
// whether or not a match was found. When true, then the calling code will
|
||||||
|
// use the runes that were accepted into the MatchDialog's resulting output.
|
||||||
|
//
|
||||||
|
// Forking operation for easy lookahead support:
|
||||||
|
//
|
||||||
|
// Sometimes, a Matcher function must be able to perform a lookahead, which
|
||||||
|
// might either succeed or fail. In case of a failing lookahead, the state
|
||||||
|
// of the MatchDialog must be brought back to the original state.
|
||||||
|
//
|
||||||
|
// The way in which this is supported, is by forking a MatchDialog by calling
|
||||||
|
// MatchDialog.Fork(). This will return a child MatchDialog, with an empty
|
||||||
|
// output buffer, but using the same input offset as the forked parent.
|
||||||
|
//
|
||||||
|
// The Matcher function can then use the same interface as described for
|
||||||
|
// normal operation to retrieve runes from the input and to fill the output
|
||||||
|
// buffer. When the Matcher function decides that the lookahead was successful,
|
||||||
|
// then the method MatchDialog.Merge() can be called on the forked child to
|
||||||
|
// append the resulting output from the child to the parent's resulting output,
|
||||||
|
// and to update the parent input offset to that of the child.
|
||||||
|
//
|
||||||
|
// When the Matcher function decides that the lookahead was unsuccessful, then
|
||||||
|
// it can simply discard the forked child. The parent MatchDialog was never
|
||||||
|
// modified, so a new match can be safely started using that parent, as if the
|
||||||
|
// lookahead never happened.
|
||||||
|
type MatchDialog struct {
|
||||||
|
p *P // parser state, used to retrieve input data to match against (TODO should be interface)
|
||||||
|
inputOffset int // the byte offset into the input
|
||||||
|
input []rune // a slice of runes that represents the retrieved input runes for the Matcher
|
||||||
|
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
||||||
|
currRune *runeToken // hold the last rune that was read from the input
|
||||||
|
parent *MatchDialog // the parent MatchDialog, in case this one was forked
|
||||||
|
}
|
||||||
|
|
||||||
|
type runeToken struct {
|
||||||
|
Rune rune
|
||||||
|
ByteSize int
|
||||||
|
OK bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextRune retrieves the next rune from the input.
|
||||||
|
//
|
||||||
|
// It returns the rune and a boolean. The boolean will be false in case an
|
||||||
|
// invalid UTF8 rune or the end of the file was encountered.
|
||||||
|
//
|
||||||
|
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
||||||
|
// to respectively add the rune to the MatchDialog's resulting output or to
|
||||||
|
// fully ignore it. This way, a Matcher has full control over what runes are
|
||||||
|
// significant for the resulting output of that matcher.
|
||||||
|
//
|
||||||
|
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||||
|
// rune is explicitly accepted or skipped as described above.
|
||||||
|
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||||
|
if m.currRune != nil {
|
||||||
|
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
|
||||||
|
}
|
||||||
|
r, w, ok := m.p.peek(m.inputOffset)
|
||||||
|
m.currRune = &runeToken{r, w, ok}
|
||||||
|
if ok {
|
||||||
|
m.input = append(m.input, r)
|
||||||
|
}
|
||||||
|
return r, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fork splits off a child MatchDialog, containing the same offset as the
|
||||||
|
// parent MatchDialog, but with all other data in a fresh state.
|
||||||
|
//
|
||||||
|
// By forking, a Matcher function can freely work with a MatchDialog, without
|
||||||
|
// affecting the parent MatchDialog. This is for example useful when the
|
||||||
|
// Matcher function must perform some form of lookahead.
|
||||||
|
//
|
||||||
|
// When a successful match was found, the Matcher function can call
|
||||||
|
// child.Merge() to have the resulting output added to the parent MatchDialog.
|
||||||
|
// When no match was found, the forked child can simply be discarded.
|
||||||
|
//
|
||||||
|
// Example case: A Matcher checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
||||||
|
// This is done in 4 steps and only after finishing all steps, the Matcher
|
||||||
|
// function can confirm a successful match. The Matcher function for this
|
||||||
|
// case could look like this (yes, it's naive, but it shows the point):
|
||||||
|
//
|
||||||
|
// func MatchAbcd(m *MatchDialog) bool {
|
||||||
|
// child := m.Fork() // fork to keep m from input untouched
|
||||||
|
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||||
|
// if r, ok := m.NextRune(); !ok || r != letter {
|
||||||
|
// return false // report mismatch, m is left untouched
|
||||||
|
// }
|
||||||
|
// child.Accept() // add rune to child output
|
||||||
|
// }
|
||||||
|
// child.Merge() // we have a match, add resulting output to parent
|
||||||
|
// return true // and report the successful match
|
||||||
|
// }
|
||||||
|
func (m *MatchDialog) Fork() *MatchDialog {
|
||||||
|
child := &MatchDialog{
|
||||||
|
p: m.p,
|
||||||
|
inputOffset: m.inputOffset,
|
||||||
|
parent: m,
|
||||||
|
}
|
||||||
|
return child
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept will add the last rune as read by NextRune() to the resulting
|
||||||
|
// output of the MatchDialog.
|
||||||
|
func (m *MatchDialog) Accept() {
|
||||||
|
m.checkAllowedCall("Accept()")
|
||||||
|
m.output = append(m.output, m.currRune.Rune)
|
||||||
|
m.inputOffset += m.currRune.ByteSize
|
||||||
|
m.currRune = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip will ignore the last rune as read by NextRune().
|
||||||
|
func (m *MatchDialog) Skip() {
|
||||||
|
m.checkAllowedCall("Skip()")
|
||||||
|
m.inputOffset += m.currRune.ByteSize
|
||||||
|
m.currRune = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MatchDialog) checkAllowedCall(name string) {
|
||||||
|
if m.currRune == nil {
|
||||||
|
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
|
||||||
|
}
|
||||||
|
if !m.currRune.OK {
|
||||||
|
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge merges the resulting output from a forked child MatchDialog back into
|
||||||
|
// its parent: The runes that are accepted in the child are added to the parent
|
||||||
|
// runes and the parent's offset is advanced to the child's offset.
|
||||||
|
//
|
||||||
|
// After the merge, the child MatchDialog is reset so it can immediately be
|
||||||
|
// reused for performing another match (all data are cleared, except for the
|
||||||
|
// input offset which is kept at its current position).
|
||||||
|
func (m *MatchDialog) Merge() bool {
|
||||||
|
if m.parent == nil {
|
||||||
|
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
||||||
|
}
|
||||||
|
m.parent.input = append(m.parent.input, m.input...)
|
||||||
|
m.parent.output = append(m.parent.output, m.output...)
|
||||||
|
m.parent.inputOffset = m.inputOffset
|
||||||
|
m.ClearOutput()
|
||||||
|
m.ClearInput()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearOutput clears the resulting output for the MatchDialog, but it keeps
|
||||||
|
// the input and input offset as-is.
|
||||||
|
func (m *MatchDialog) ClearOutput() {
|
||||||
|
m.output = []rune{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearInput clears the input for the MatchDialog, but it keeps the output
|
||||||
|
// and input offset as-is.
|
||||||
|
func (m *MatchDialog) ClearInput() {
|
||||||
|
m.input = []rune{}
|
||||||
|
}
|
|
@ -0,0 +1,477 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// C provides convenient access to a range of parser/combinators
|
||||||
|
// that can be used to build Matcher functions.
|
||||||
|
//
|
||||||
|
// When using C in your own parser, then it is advised to create
|
||||||
|
// a variable in your own package to reference it:
|
||||||
|
//
|
||||||
|
// var c = parsekit.C
|
||||||
|
//
|
||||||
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
|
var C = struct {
|
||||||
|
Rune func(rune) Matcher
|
||||||
|
Runes func(...rune) Matcher
|
||||||
|
RuneRange func(rune, rune) Matcher
|
||||||
|
String func(string) Matcher
|
||||||
|
StringNoCase func(string) Matcher
|
||||||
|
AnyOf func(...Matcher) Matcher
|
||||||
|
Not func(Matcher) Matcher
|
||||||
|
Optional func(Matcher) Matcher
|
||||||
|
Sequence func(...Matcher) Matcher
|
||||||
|
Repeat func(int, Matcher) Matcher
|
||||||
|
Min func(int, Matcher) Matcher
|
||||||
|
Max func(int, Matcher) Matcher
|
||||||
|
ZeroOrMore func(Matcher) Matcher
|
||||||
|
OneOrMore func(Matcher) Matcher
|
||||||
|
MinMax func(int, int, Matcher) Matcher
|
||||||
|
Separated func(Matcher, Matcher) Matcher
|
||||||
|
Drop func(Matcher) Matcher
|
||||||
|
Trim func(Matcher, string) Matcher
|
||||||
|
TrimLeft func(Matcher, string) Matcher
|
||||||
|
TrimRight func(Matcher, string) Matcher
|
||||||
|
}{
|
||||||
|
Rune: MatchRune,
|
||||||
|
Runes: MatchRunes,
|
||||||
|
RuneRange: MatchRuneRange,
|
||||||
|
String: MatchString,
|
||||||
|
StringNoCase: MatchStringNoCase,
|
||||||
|
Optional: MatchOptional,
|
||||||
|
AnyOf: MatchAnyOf,
|
||||||
|
Not: MatchNot,
|
||||||
|
Sequence: MatchSequence,
|
||||||
|
Repeat: MatchRepeat,
|
||||||
|
Min: MatchMin,
|
||||||
|
Max: MatchMax,
|
||||||
|
ZeroOrMore: MatchZeroOrMore,
|
||||||
|
OneOrMore: MatchOneOrMore,
|
||||||
|
MinMax: MatchMinMax,
|
||||||
|
Separated: MatchSeparated,
|
||||||
|
Drop: MatchDrop,
|
||||||
|
Trim: MatchTrim,
|
||||||
|
TrimLeft: MatchTrimLeft,
|
||||||
|
TrimRight: MatchTrimRight,
|
||||||
|
}
|
||||||
|
|
||||||
|
// A provides convenient access to a range of atoms that can be used to
|
||||||
|
// build combinators or parsing rules.
|
||||||
|
//
|
||||||
|
// In parsekit, an atom is defined as a ready to go Matcher function.
|
||||||
|
var A = struct {
|
||||||
|
EndOfFile Matcher
|
||||||
|
AnyRune Matcher
|
||||||
|
Space Matcher
|
||||||
|
Tab Matcher
|
||||||
|
CR Matcher
|
||||||
|
LF Matcher
|
||||||
|
CRLF Matcher
|
||||||
|
Excl Matcher
|
||||||
|
DoubleQuote Matcher
|
||||||
|
Hash Matcher
|
||||||
|
Dollar Matcher
|
||||||
|
Percent Matcher
|
||||||
|
Amp Matcher
|
||||||
|
SingleQuote Matcher
|
||||||
|
RoundOpen Matcher
|
||||||
|
RoundClose Matcher
|
||||||
|
Asterisk Matcher
|
||||||
|
Plus Matcher
|
||||||
|
Comma Matcher
|
||||||
|
Minus Matcher
|
||||||
|
Dot Matcher
|
||||||
|
Slash Matcher
|
||||||
|
Colon Matcher
|
||||||
|
Semicolon Matcher
|
||||||
|
AngleOpen Matcher
|
||||||
|
Equal Matcher
|
||||||
|
AngleClose Matcher
|
||||||
|
Question Matcher
|
||||||
|
At Matcher
|
||||||
|
SquareOpen Matcher
|
||||||
|
Backslash Matcher
|
||||||
|
SquareClose Matcher
|
||||||
|
Caret Matcher
|
||||||
|
Underscore Matcher
|
||||||
|
Backquote Matcher
|
||||||
|
CurlyOpen Matcher
|
||||||
|
Pipe Matcher
|
||||||
|
CurlyClose Matcher
|
||||||
|
Tilde Matcher
|
||||||
|
Newline Matcher
|
||||||
|
Whitespace Matcher
|
||||||
|
WhitespaceAndNewlines Matcher
|
||||||
|
EndOfLine Matcher
|
||||||
|
Digit Matcher
|
||||||
|
ASCII Matcher
|
||||||
|
ASCIILower Matcher
|
||||||
|
ASCIIUpper Matcher
|
||||||
|
HexDigit Matcher
|
||||||
|
}{
|
||||||
|
EndOfFile: MatchEndOfFile(),
|
||||||
|
AnyRune: MatchAnyRune(),
|
||||||
|
Space: C.Rune(' '),
|
||||||
|
Tab: C.Rune('\t'),
|
||||||
|
CR: C.Rune('\r'),
|
||||||
|
LF: C.Rune('\n'),
|
||||||
|
CRLF: C.String("\r\n"),
|
||||||
|
Excl: C.Rune('!'),
|
||||||
|
DoubleQuote: C.Rune('"'),
|
||||||
|
Hash: C.Rune('#'),
|
||||||
|
Dollar: C.Rune('$'),
|
||||||
|
Percent: C.Rune('%'),
|
||||||
|
Amp: C.Rune('&'),
|
||||||
|
SingleQuote: C.Rune('\''),
|
||||||
|
RoundOpen: C.Rune('('),
|
||||||
|
RoundClose: C.Rune(')'),
|
||||||
|
Asterisk: C.Rune('*'),
|
||||||
|
Plus: C.Rune('+'),
|
||||||
|
Comma: C.Rune(','),
|
||||||
|
Minus: C.Rune('-'),
|
||||||
|
Dot: C.Rune('.'),
|
||||||
|
Slash: C.Rune('/'),
|
||||||
|
Colon: C.Rune(':'),
|
||||||
|
Semicolon: C.Rune(';'),
|
||||||
|
AngleOpen: C.Rune('<'),
|
||||||
|
Equal: C.Rune('='),
|
||||||
|
AngleClose: C.Rune('>'),
|
||||||
|
Question: C.Rune('?'),
|
||||||
|
At: C.Rune('@'),
|
||||||
|
SquareOpen: C.Rune('['),
|
||||||
|
Backslash: C.Rune('\\'),
|
||||||
|
SquareClose: C.Rune(']'),
|
||||||
|
Caret: C.Rune('^'),
|
||||||
|
Underscore: C.Rune('_'),
|
||||||
|
Backquote: C.Rune('`'),
|
||||||
|
CurlyOpen: C.Rune('{'),
|
||||||
|
Pipe: C.Rune('|'),
|
||||||
|
CurlyClose: C.Rune('}'),
|
||||||
|
Tilde: C.Rune('~'),
|
||||||
|
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
|
||||||
|
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.String("\r\n"), C.Rune('\n'))),
|
||||||
|
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||||
|
Digit: C.RuneRange('0', '9'),
|
||||||
|
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||||
|
ASCIILower: C.RuneRange('a', 'z'),
|
||||||
|
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||||
|
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRune creates a Matcher function that checks if the next rune from
|
||||||
|
// the input matches the provided rune.
|
||||||
|
func MatchRune(expected rune) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
input, ok := m.NextRune()
|
||||||
|
if ok && input == expected {
|
||||||
|
m.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRunes creates a Matcher function that that checks if the next rune
|
||||||
|
// from the input is one of the provided runes.
|
||||||
|
func MatchRunes(expected ...rune) Matcher {
|
||||||
|
s := string(expected)
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
input, ok := m.NextRune()
|
||||||
|
if ok {
|
||||||
|
if strings.ContainsRune(s, input) {
|
||||||
|
m.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRuneRange creates a Matcher function that that checks if the next rune
|
||||||
|
// from the input is contained by the provided rune range.
|
||||||
|
//
|
||||||
|
// The rune range is defined by a start and an end rune, inclusive, so:
|
||||||
|
//
|
||||||
|
// MatchRuneRange('g', 'k')
|
||||||
|
//
|
||||||
|
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
||||||
|
func MatchRuneRange(start rune, end rune) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
if end < start {
|
||||||
|
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
||||||
|
}
|
||||||
|
input, ok := m.NextRune()
|
||||||
|
if ok && input >= start && input <= end {
|
||||||
|
m.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchString creater a Matcher that will check if the upcoming runes on the
|
||||||
|
// input match the provided string.
|
||||||
|
// TODO make this a more efficient string-level match?
|
||||||
|
func MatchString(expected string) Matcher {
|
||||||
|
var matchers = []Matcher{}
|
||||||
|
for _, r := range expected {
|
||||||
|
matchers = append(matchers, MatchRune(r))
|
||||||
|
}
|
||||||
|
return MatchSequence(matchers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchStringNoCase creater a Matcher that will check if the upcoming runes
|
||||||
|
// on the input match the provided string in a case-insensitive manner.
|
||||||
|
// TODO make this a more efficient string-level match?
|
||||||
|
func MatchStringNoCase(expected string) Matcher {
|
||||||
|
var matchers = []Matcher{}
|
||||||
|
for _, r := range expected {
|
||||||
|
u := unicode.ToUpper(r)
|
||||||
|
l := unicode.ToLower(r)
|
||||||
|
matchers = append(matchers, MatchRunes(u, l))
|
||||||
|
}
|
||||||
|
return MatchSequence(matchers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchOptional creates a Matcher that makes the provided Matcher optional.
|
||||||
|
// When the provided Matcher applies, then its output is used, otherwise
|
||||||
|
// no output is generated but still a successful match is reported.
|
||||||
|
func MatchOptional(matcher Matcher) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if matcher(child) {
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchSequence creates a Matcher that checks if the provided Matchers can be
|
||||||
|
// applied in their exact order. Only if all matcher apply, the sequence
|
||||||
|
// reports successful match.
|
||||||
|
func MatchSequence(matchers ...Matcher) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
for _, matcher := range matchers {
|
||||||
|
if !matcher(child) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
child.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAnyOf creates a Matcher that checks if any of the provided Matchers
|
||||||
|
// can be applied. They are applied in their provided order. The first Matcher
|
||||||
|
// that applies is used for reporting back a match.
|
||||||
|
func MatchAnyOf(matchers ...Matcher) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
for _, matcher := range matchers {
|
||||||
|
child := m.Fork()
|
||||||
|
if matcher(child) {
|
||||||
|
return child.Merge()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchNot creates a Matcher that checks if the provided Matcher applies to
|
||||||
|
// the current input. If it does, then a failed match will be reported. If it
|
||||||
|
// does not, then the next rune from the input will be reported as a match.
|
||||||
|
func MatchNot(matcher Matcher) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
probe := m.Fork()
|
||||||
|
if matcher(probe) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, ok := m.NextRune()
|
||||||
|
if ok {
|
||||||
|
m.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRepeat creates a Matcher that checks if the provided Matcher can be
|
||||||
|
// applied exactly the provided amount of times.
|
||||||
|
//
|
||||||
|
// Note that the input can contain more Matches for the provided matcher, e.g.:
|
||||||
|
//
|
||||||
|
// MatchRepeat(4, MatchRune('X'))
|
||||||
|
//
|
||||||
|
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||||||
|
// In that last case, there will be a remainder "XX" of the input.
|
||||||
|
func MatchRepeat(times int, matcher Matcher) Matcher {
|
||||||
|
return matchMinMax(times, times, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchMin creates a Matcher that checks if the provided Matcher can be
|
||||||
|
// applied at least the provided minimum number of times.
|
||||||
|
// When more matches are possible, these will be included in the output.
|
||||||
|
func MatchMin(min int, matcher Matcher) Matcher {
|
||||||
|
return matchMinMax(min, -1, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchMax creates a Matcher that checks if the provided Matcher can be
|
||||||
|
// applied at maximum the provided minimum number of times.
|
||||||
|
// When more matches are possible, these will be included in the output.
|
||||||
|
// Zero matches are considered a successful match.
|
||||||
|
func MatchMax(max int, matcher Matcher) Matcher {
|
||||||
|
return matchMinMax(0, max, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
|
||||||
|
// be applied zero or more times. All matches will be included in the output.
|
||||||
|
// Zero matches are considered a successful match.
|
||||||
|
func MatchZeroOrMore(matcher Matcher) Matcher {
|
||||||
|
return matchMinMax(0, -1, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
|
||||||
|
// be applied one or more times. All matches will be included in the output.
|
||||||
|
func MatchOneOrMore(matcher Matcher) Matcher {
|
||||||
|
return matchMinMax(1, -1, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchMinMax creates a Matcher that checks if the provided Matcher can
|
||||||
|
// be applied between the provided minimum and maximum number of times,
|
||||||
|
// inclusive. All matches will be included in the output.
|
||||||
|
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
|
||||||
|
if max < 0 {
|
||||||
|
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
|
||||||
|
}
|
||||||
|
if min < 0 {
|
||||||
|
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
|
||||||
|
}
|
||||||
|
return matchMinMax(min, max, matcher)
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchMinMax(min int, max int, matcher Matcher) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if max >= 0 && min > max {
|
||||||
|
panic(fmt.Sprintf("internal parser error: MatchRepeat definition error: max %d must not be < min %d", max, min))
|
||||||
|
}
|
||||||
|
total := 0
|
||||||
|
// Check for the minimum required amount of matches.
|
||||||
|
for total < min {
|
||||||
|
total++
|
||||||
|
if !matcher(child) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No specified max: include the rest of the available matches.
|
||||||
|
// Specified max: include the rest of the availble matches, up to the max.
|
||||||
|
child.Merge()
|
||||||
|
for max < 0 || total < max {
|
||||||
|
total++
|
||||||
|
if !matcher(child) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchSeparated creates a Matcher that checks for a pattern of one or more
|
||||||
|
// Matchers of one type (the separated), separated by Matches of another type
|
||||||
|
// (the separator). All matches (separated + separator) are included in the
|
||||||
|
// output.
|
||||||
|
func MatchSeparated(separated Matcher, separator Matcher) Matcher {
|
||||||
|
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchDrop creates a Matcher that checks if the provided Matcher applies.
|
||||||
|
// If it does, then a successful match is reported, but its output is not used.
|
||||||
|
// If the Matcher does not apply, a successful match is reported as well.
|
||||||
|
func MatchDrop(matcher Matcher) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if matcher(child) {
|
||||||
|
child.ClearOutput()
|
||||||
|
child.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchTrim creates a Matcher that checks if the provided Matcher applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are trimmed from both the left and the right of the output.
|
||||||
|
// The trimmed output is reported back as the match output.
|
||||||
|
func MatchTrim(matcher Matcher, cutset string) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
return matchTrim(m, cutset, matcher, true, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchTrimLeft creates a Matcher that checks if the provided Matcher applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are trimmed from the left of the output.
|
||||||
|
// The trimmed output is reported back as the match output.
|
||||||
|
func MatchTrimLeft(matcher Matcher, cutset string) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
return matchTrim(m, cutset, matcher, true, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchTrimRight creates a Matcher that checks if the provided Matcher applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are trimmed from the right of the output.
|
||||||
|
// The trimmed output is reported back as the match output.
|
||||||
|
func MatchTrimRight(matcher Matcher, cutset string) Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
return matchTrim(m, cutset, matcher, false, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchTrim(m *MatchDialog, cutset string, matcher Matcher, trimLeft bool, trimRight bool) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if matcher(child) {
|
||||||
|
child.Merge()
|
||||||
|
s := string(m.output)
|
||||||
|
if trimLeft {
|
||||||
|
s = strings.TrimLeft(s, cutset)
|
||||||
|
}
|
||||||
|
if trimRight {
|
||||||
|
s = strings.TrimRight(s, cutset)
|
||||||
|
}
|
||||||
|
m.output = []rune(s)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchEndOfFile creates a Matcher that checks if the end of the input data
|
||||||
|
// has been reached. This Matcher will never produce output. It only reports
|
||||||
|
// a successful or a failing match through its boolean return value.
|
||||||
|
func MatchEndOfFile() Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
fork := m.Fork()
|
||||||
|
input, ok := fork.NextRune()
|
||||||
|
return !ok && input == EOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
|
||||||
|
// read from the input. It reports back a successful match if the end of the
|
||||||
|
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
||||||
|
func MatchAnyRune() Matcher {
|
||||||
|
return func(m *MatchDialog) bool {
|
||||||
|
_, ok := m.NextRune()
|
||||||
|
if ok {
|
||||||
|
m.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,203 @@
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleMatchAnyRune() {
|
||||||
|
parser := parsekit.New(
|
||||||
|
func(p *parsekit.P) {
|
||||||
|
p.Expects("Any valid rune")
|
||||||
|
if p.On(a.AnyRune).Accept().End() {
|
||||||
|
p.EmitLiteral(TestItem)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
run := parser.Parse("¡Any / valid / character will dö!")
|
||||||
|
match, _, ok := run.Next()
|
||||||
|
if ok {
|
||||||
|
fmt.Printf("Match = %q\n", match)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCombinators(t *testing.T) {
|
||||||
|
RunMatcherTests(t, []MatcherTest{
|
||||||
|
{"xxx", c.Rune('x'), true, "x"},
|
||||||
|
{"x ", c.Rune(' '), false, ""},
|
||||||
|
{"aa", c.RuneRange('b', 'e'), false, ""},
|
||||||
|
{"bb", c.RuneRange('b', 'e'), true, "b"},
|
||||||
|
{"cc", c.RuneRange('b', 'e'), true, "c"},
|
||||||
|
{"dd", c.RuneRange('b', 'e'), true, "d"},
|
||||||
|
{"ee", c.RuneRange('b', 'e'), true, "e"},
|
||||||
|
{"ff", c.RuneRange('b', 'e'), false, ""},
|
||||||
|
{"Hello, world!", c.String("Hello"), true, "Hello"},
|
||||||
|
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
|
||||||
|
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
|
||||||
|
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
|
||||||
|
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
|
||||||
|
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
|
||||||
|
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
|
||||||
|
{"abc", c.Not(c.Rune('b')), true, "a"},
|
||||||
|
{"bcd", c.Not(c.Rune('b')), false, ""},
|
||||||
|
{"bcd", c.Not(c.Rune('b')), false, ""},
|
||||||
|
{"1010", c.Not(c.Sequence(c.Rune('2'), c.Rune('0'))), true, "1"},
|
||||||
|
{"2020", c.Not(c.Sequence(c.Rune('2'), c.Rune('0'))), false, ""},
|
||||||
|
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
|
||||||
|
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
|
||||||
|
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
|
||||||
|
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
|
||||||
|
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
|
||||||
|
{"", c.Min(0, c.Rune('a')), true, ""},
|
||||||
|
{"a", c.Min(0, c.Rune('a')), true, "a"},
|
||||||
|
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
|
||||||
|
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
|
||||||
|
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
|
||||||
|
{"", c.Max(4, c.Rune('b')), true, ""},
|
||||||
|
{"X", c.Max(4, c.Rune('b')), true, ""},
|
||||||
|
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
|
||||||
|
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
|
||||||
|
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
||||||
|
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||||
|
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||||
|
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||||
|
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
||||||
|
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
|
||||||
|
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
||||||
|
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
||||||
|
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
|
||||||
|
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
|
||||||
|
{"", c.OneOrMore(c.Rune('d')), false, ""},
|
||||||
|
{"X", c.OneOrMore(c.Rune('d')), false, ""},
|
||||||
|
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
|
||||||
|
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
|
||||||
|
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
|
||||||
|
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
|
||||||
|
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
|
||||||
|
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
|
||||||
|
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
|
||||||
|
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
|
||||||
|
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
|
||||||
|
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
|
||||||
|
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
|
||||||
|
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
|
||||||
|
{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"},
|
||||||
|
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
|
||||||
|
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||||
|
{" ", c.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
{" ", c.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
{" ", c.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
{" trim ", c.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||||
|
{" \t trim \t ", c.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||||
|
{" trim ", c.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||||
|
{" trim ", c.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
||||||
|
{" \t trim \t ", c.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAtoms(t *testing.T) {
|
||||||
|
RunMatcherTests(t, []MatcherTest{
|
||||||
|
{"", a.EndOfFile, true, ""},
|
||||||
|
{"⌘", a.AnyRune, true, "⌘"},
|
||||||
|
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
|
||||||
|
{"", a.AnyRune, false, ""}, // end of file
|
||||||
|
{" ", a.Space, true, " "},
|
||||||
|
{"X", a.Space, false, ""},
|
||||||
|
{"\t", a.Tab, true, "\t"},
|
||||||
|
{"\r", a.CR, true, "\r"},
|
||||||
|
{"\n", a.LF, true, "\n"},
|
||||||
|
{"!", a.Excl, true, "!"},
|
||||||
|
{"\"", a.DoubleQuote, true, "\""},
|
||||||
|
{"#", a.Hash, true, "#"},
|
||||||
|
{"$", a.Dollar, true, "$"},
|
||||||
|
{"%", a.Percent, true, "%"},
|
||||||
|
{"&", a.Amp, true, "&"},
|
||||||
|
{"'", a.SingleQuote, true, "'"},
|
||||||
|
{"(", a.RoundOpen, true, "("},
|
||||||
|
{")", a.RoundClose, true, ")"},
|
||||||
|
{"*", a.Asterisk, true, "*"},
|
||||||
|
{"+", a.Plus, true, "+"},
|
||||||
|
{",", a.Comma, true, ","},
|
||||||
|
{"-", a.Minus, true, "-"},
|
||||||
|
{".", a.Dot, true, "."},
|
||||||
|
{"/", a.Slash, true, "/"},
|
||||||
|
{":", a.Colon, true, ":"},
|
||||||
|
{";", a.Semicolon, true, ";"},
|
||||||
|
{"<", a.AngleOpen, true, "<"},
|
||||||
|
{"=", a.Equal, true, "="},
|
||||||
|
{">", a.AngleClose, true, ">"},
|
||||||
|
{"?", a.Question, true, "?"},
|
||||||
|
{"@", a.At, true, "@"},
|
||||||
|
{"[", a.SquareOpen, true, "["},
|
||||||
|
{"\\", a.Backslash, true, "\\"},
|
||||||
|
{"]", a.SquareClose, true, "]"},
|
||||||
|
{"^", a.Caret, true, "^"},
|
||||||
|
{"_", a.Underscore, true, "_"},
|
||||||
|
{"`", a.Backquote, true, "`"},
|
||||||
|
{"{", a.CurlyOpen, true, "{"},
|
||||||
|
{"|", a.Pipe, true, "|"},
|
||||||
|
{"}", a.CurlyClose, true, "}"},
|
||||||
|
{"~", a.Tilde, true, "~"},
|
||||||
|
{" \t \t \r\n", a.Whitespace, true, " \t \t "},
|
||||||
|
{"\r", a.WhitespaceAndNewlines, false, ""},
|
||||||
|
{" \t\r\n \r", a.WhitespaceAndNewlines, true, " \t\r\n "},
|
||||||
|
{"", a.EndOfLine, true, ""},
|
||||||
|
{"\r\n", a.EndOfLine, true, "\r\n"},
|
||||||
|
{"\n", a.EndOfLine, true, "\n"},
|
||||||
|
{"0", a.Digit, true, "0"},
|
||||||
|
{"1", a.Digit, true, "1"},
|
||||||
|
{"2", a.Digit, true, "2"},
|
||||||
|
{"3", a.Digit, true, "3"},
|
||||||
|
{"4", a.Digit, true, "4"},
|
||||||
|
{"5", a.Digit, true, "5"},
|
||||||
|
{"6", a.Digit, true, "6"},
|
||||||
|
{"7", a.Digit, true, "7"},
|
||||||
|
{"8", a.Digit, true, "8"},
|
||||||
|
{"9", a.Digit, true, "9"},
|
||||||
|
{"X", a.Digit, false, ""},
|
||||||
|
{"a", a.ASCIILower, true, "a"},
|
||||||
|
{"z", a.ASCIILower, true, "z"},
|
||||||
|
{"A", a.ASCIILower, false, ""},
|
||||||
|
{"Z", a.ASCIILower, false, ""},
|
||||||
|
{"A", a.ASCIIUpper, true, "A"},
|
||||||
|
{"Z", a.ASCIIUpper, true, "Z"},
|
||||||
|
{"a", a.ASCIIUpper, false, ""},
|
||||||
|
{"z", a.ASCIIUpper, false, ""},
|
||||||
|
{"0", a.HexDigit, true, "0"},
|
||||||
|
{"9", a.HexDigit, true, "9"},
|
||||||
|
{"a", a.HexDigit, true, "a"},
|
||||||
|
{"f", a.HexDigit, true, "f"},
|
||||||
|
{"A", a.HexDigit, true, "A"},
|
||||||
|
{"F", a.HexDigit, true, "F"},
|
||||||
|
{"g", a.HexDigit, false, "g"},
|
||||||
|
{"G", a.HexDigit, false, "G"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSequenceOfRunes(t *testing.T) {
|
||||||
|
sequence := c.Sequence(
|
||||||
|
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
||||||
|
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||||
|
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||||
|
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||||
|
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||||
|
)
|
||||||
|
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||||
|
parser := parsekit.New(func(p *parsekit.P) {
|
||||||
|
p.Expects("Sequence of runes")
|
||||||
|
if p.On(sequence).Accept().End() {
|
||||||
|
p.EmitLiteral(TestItem)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
item, err, ok := parser.Parse(input).Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if item.Value != input {
|
||||||
|
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
||||||
|
}
|
||||||
|
}
|
98
parsekit.go
98
parsekit.go
|
@ -6,6 +6,14 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Parser is the top-level struct that holds the configuration for a parser.
|
||||||
|
// The Parser can be instantiated using the parsekit.New() method.
|
||||||
|
//
|
||||||
|
// To start parsing input data, use the method Parser.Parse().
|
||||||
|
type Parser struct {
|
||||||
|
startState StateHandler // the function that handles the very first state
|
||||||
|
}
|
||||||
|
|
||||||
// New instantiates a new Parser.
|
// New instantiates a new Parser.
|
||||||
// The logic parameter provides the parsing logic to apply. This can be:
|
// The logic parameter provides the parsing logic to apply. This can be:
|
||||||
//
|
//
|
||||||
|
@ -55,12 +63,13 @@ func makeParserForMatcher(matcher Matcher) *Parser {
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parser is the top-level parser.
|
// Run represents a single parse run for a Parser.
|
||||||
type Parser struct {
|
type Run struct {
|
||||||
startState StateHandler // the function that handles the very first state
|
p *P // a struct holding the internal state of a parse run
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse starts a parse run on the provided input data.
|
// Parse starts a parse run on the provided input data.
|
||||||
|
// To retrieve parse items from the run, make use of the Run.Next() method.
|
||||||
func (p *Parser) Parse(input string) *Run {
|
func (p *Parser) Parse(input string) *Run {
|
||||||
return &Run{
|
return &Run{
|
||||||
p: &P{
|
p: &P{
|
||||||
|
@ -74,69 +83,59 @@ func (p *Parser) Parse(input string) *Run {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run represents a single parse run for a Parser.
|
|
||||||
type Run struct {
|
|
||||||
p *P // a struct holding the internal state of a parse run
|
|
||||||
}
|
|
||||||
|
|
||||||
// P holds the internal state of a parse run.
|
|
||||||
type P struct {
|
|
||||||
state StateHandler // the function that handles the current state
|
|
||||||
nextState StateHandler // the function that will handle the next state
|
|
||||||
routeStack []StateHandler // route stack, for handling nested parsing
|
|
||||||
input string // the scanned input
|
|
||||||
len int // the total length of the input in bytes
|
|
||||||
pos int // current byte scanning position in the input
|
|
||||||
newline bool // keep track of when we have scanned a newline
|
|
||||||
cursorLine int // current row number in the input
|
|
||||||
cursorColumn int // current column position in the input
|
|
||||||
expecting string // a description of what the current state expects to find
|
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values
|
|
||||||
items chan Item // channel of resulting Parser items
|
|
||||||
item Item // the current item as reached by Next() and retrieved by Get()
|
|
||||||
err *Error // an error when lexing failed, retrieved by Error()
|
|
||||||
|
|
||||||
LastMatch string // a string representation of the last matched input data
|
|
||||||
}
|
|
||||||
|
|
||||||
// Next retrieves the next parsed item for a parse run.
|
// Next retrieves the next parsed item for a parse run.
|
||||||
|
//
|
||||||
// When a valid item was found, then the boolean return parameter will be true.
|
// When a valid item was found, then the boolean return parameter will be true.
|
||||||
// On error or when successfully reaching the end of the input, false is returned.
|
// On error or when successfully reaching the end of the input, false is returned.
|
||||||
// When an error occurred, it will be set in the error return value, nil otherwise.
|
// When an error occurred, false will be returned and the error return value will
|
||||||
|
// be set (default is nil).
|
||||||
func (run *Run) Next() (Item, *Error, bool) {
|
func (run *Run) Next() (Item, *Error, bool) {
|
||||||
|
// State handling loop: we handle states, until an Item is ready to be returned.
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
// If a state handler has emitted an (error) Item, then the state handling
|
||||||
|
// loop is stopped and the Item is returned to the caller.
|
||||||
case i := <-run.p.items:
|
case i := <-run.p.items:
|
||||||
return run.makeReturnValues(i)
|
return run.makeReturnValues(i)
|
||||||
|
// Otherwise, the next state handler is looked up and invoked.
|
||||||
default:
|
default:
|
||||||
run.runStatusHandler()
|
run.runNextStateHandler()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// StateHandler defines the type of function that can be used to
|
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||||
// handle a parser state.
|
switch {
|
||||||
type StateHandler func(*P)
|
case i.Type == ItemEOF:
|
||||||
|
return i, nil, false
|
||||||
|
case i.Type == ItemError:
|
||||||
|
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
||||||
|
return i, run.p.err, false
|
||||||
|
default:
|
||||||
|
run.p.item = i
|
||||||
|
return i, nil, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// runStatusHandler moves the parser, which is bascially a state machine,
|
// runNextStateHandler moves the parser, which is bascially a state machine,
|
||||||
// to its next status. It does so by invoking a function of the
|
// to its next status. It does so by invoking a function of the
|
||||||
// type StateHandler. This function represents the current status and
|
// type StateHandler. This function represents the current status and
|
||||||
// is responsible for moving the parser to its next status, depending
|
// is responsible for moving the parser to its next status, depending
|
||||||
// on the parsed input data.
|
// on the parsed input data.
|
||||||
func (run *Run) runStatusHandler() {
|
func (run *Run) runNextStateHandler() {
|
||||||
if state, ok := run.getNextStateHandler(); ok {
|
if state, ok := run.getNextStateHandler(); ok {
|
||||||
run.invokeNextStatusHandler(state)
|
run.invokeNextStateHandler(state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// getNextStateHandler determines the next StatusHandler to invoke in order
|
// getNextStateHandler determines the next StateHandler to invoke in order
|
||||||
// to move the parsing state machine one step further.
|
// to move the parsing state machine one step further.
|
||||||
//
|
//
|
||||||
// When implementing a parser, the StateHandler functions must provide
|
// When implementing a parser, the StateHandler functions must provide
|
||||||
// a routing decision in every invocation. A routing decision is one
|
// a routing decision in every invocation. A routing decision is one
|
||||||
// of the following:
|
// of the following:
|
||||||
//
|
//
|
||||||
// * A route is specified explicitly, which means that the next StatusHandler
|
// * A route is specified explicitly, which means that the next StateHandler
|
||||||
// function to invoke is registered during the StateHandler function
|
// function to invoke is registered during the StateHandler function
|
||||||
// invocation. For example: p.RouteTo(nextStatus)
|
// invocation. For example: p.RouteTo(nextStatus)
|
||||||
//
|
//
|
||||||
|
@ -147,9 +146,9 @@ func (run *Run) runStatusHandler() {
|
||||||
// a route explicitly, but otherStatus will be used implicitly after
|
// a route explicitly, but otherStatus will be used implicitly after
|
||||||
// the nextStatus function has returned.
|
// the nextStatus function has returned.
|
||||||
//
|
//
|
||||||
// * An expectation is registered by the StatusHandler.
|
// * An expectation is registered by the StateHandler.
|
||||||
// For example: p.Expects("a cool thing")
|
// For example: p.Expects("a cool thing")
|
||||||
// When the StatusHandler returns without having specified a route, this
|
// When the StateHandler returns without having specified a route, this
|
||||||
// expectation is used to generate an "unexpected input" error message.
|
// expectation is used to generate an "unexpected input" error message.
|
||||||
//
|
//
|
||||||
// When no routing decision is provided by a StateHandler, then this is
|
// When no routing decision is provided by a StateHandler, then this is
|
||||||
|
@ -169,24 +168,11 @@ func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// invokeNextStatusHandler moves the parser state to the provided state
|
// invokeNextStateHandler moves the parser state to the provided state
|
||||||
// and invokes the StatusHandler function.
|
// and invokes the StateHandler function.
|
||||||
func (run *Run) invokeNextStatusHandler(state StateHandler) {
|
func (run *Run) invokeNextStateHandler(state StateHandler) {
|
||||||
run.p.state = state
|
run.p.state = state
|
||||||
run.p.nextState = nil
|
run.p.nextState = nil
|
||||||
run.p.expecting = ""
|
run.p.expecting = ""
|
||||||
run.p.state(run.p)
|
run.p.state(run.p)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
|
||||||
switch {
|
|
||||||
case i.Type == ItemEOF:
|
|
||||||
return i, nil, false
|
|
||||||
case i.Type == ItemError:
|
|
||||||
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
|
||||||
return i, run.p.err, false
|
|
||||||
default:
|
|
||||||
run.p.item = i
|
|
||||||
return i, nil, true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,7 +1,46 @@
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
import "git.makaay.nl/mauricem/go-parsekit"
|
// This file only provides building blocks for writing tests.
|
||||||
|
// No actual tests belong in this file.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
const TestItem parsekit.ItemType = 1
|
const TestItem parsekit.ItemType = 1
|
||||||
|
|
||||||
var c, a = parsekit.C, parsekit.A
|
var c, a = parsekit.C, parsekit.A
|
||||||
|
|
||||||
|
type MatcherTest struct {
|
||||||
|
input string
|
||||||
|
matcher parsekit.Matcher
|
||||||
|
mustMatch bool
|
||||||
|
expected string
|
||||||
|
}
|
||||||
|
|
||||||
|
func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
|
||||||
|
for _, test := range testSet {
|
||||||
|
RunMatcherTest(t, test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func RunMatcherTest(t *testing.T, test MatcherTest) {
|
||||||
|
parser := parsekit.New(test.matcher).Parse(test.input)
|
||||||
|
item, err, ok := parser.Next()
|
||||||
|
|
||||||
|
if test.mustMatch {
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("Test %q failed with error: %s", test.input, err)
|
||||||
|
} else if item.Type != parsekit.MatchedItem {
|
||||||
|
t.Errorf("Test %q failed: should match, but it didn't", test.input)
|
||||||
|
} else if item.Value != test.expected {
|
||||||
|
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ok {
|
||||||
|
t.Errorf("Test %q failed: should not match, but it did", test.input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
43
peek.go
43
peek.go
|
@ -1,43 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
|
||||||
// (end of data or invalid UTF8 character).
|
|
||||||
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
|
||||||
return handleRuneError(r, w)
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleRuneError is used to normale rune value in case of errors.
|
|
||||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
||||||
// This can however indicate one of two situations:
|
|
||||||
// * w == 0: end of file is reached
|
|
||||||
// * w == 1: invalid UTF character on input
|
|
||||||
// This function lets these two cases return respectively the
|
|
||||||
// package's own EOF or INVALID runes, to make it easy for client
|
|
||||||
// code to distinct between these two cases.
|
|
||||||
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
||||||
if r == utf8.RuneError {
|
|
||||||
if w == 0 {
|
|
||||||
return EOF, 0, false
|
|
||||||
}
|
|
||||||
return INVALID, w, false
|
|
||||||
}
|
|
||||||
return r, w, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// EOF is a special rune, which is used to indicate an end of file when
|
|
||||||
// reading a character from the input.
|
|
||||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
|
||||||
// say 'I now expect the end of the file' is using something like:
|
|
||||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
|
||||||
const EOF rune = -1
|
|
||||||
|
|
||||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
|
||||||
// rune on the input.
|
|
||||||
const INVALID rune = utf8.RuneError
|
|
|
@ -0,0 +1,128 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import "unicode/utf8"
|
||||||
|
|
||||||
|
// StateHandler defines the type of function that must be implemented to
|
||||||
|
// handle a parsing state.
|
||||||
|
//
|
||||||
|
// A StateHandler function gets a P struct as its input. This struct holds
|
||||||
|
// all the internal state for the parsing state machine and provides the
|
||||||
|
// interface that the StateHandler must use to interact with the parser.
|
||||||
|
type StateHandler func(*P)
|
||||||
|
|
||||||
|
// P holds the internal state of a parse run and provides an API to
|
||||||
|
// StateHandler methods to communicate with the parser.
|
||||||
|
type P struct {
|
||||||
|
state StateHandler // the function that handles the current state
|
||||||
|
nextState StateHandler // the function that will handle the next state
|
||||||
|
routeStack []StateHandler // route stack, for handling nested parsing
|
||||||
|
input string // the scanned input
|
||||||
|
inputPos int // current byte cursor position in the input
|
||||||
|
cursorLine int // current rune cursor row number in the input
|
||||||
|
cursorColumn int // current rune cursor column position in the input
|
||||||
|
len int // the total length of the input in bytes
|
||||||
|
newline bool // keep track of when we have scanned a newline
|
||||||
|
expecting string // a description of what the current state expects to find
|
||||||
|
buffer stringBuffer // an efficient buffer, used to build string values
|
||||||
|
items chan Item // channel of resulting Parser items
|
||||||
|
item Item // the current item as reached by Next() and retrieved by Get()
|
||||||
|
err *Error // an error when lexing failed, retrieved by Error()
|
||||||
|
|
||||||
|
LastMatch string // a string representation of the last matched input data
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expects is used to let a state function describe what input it is expecting.
|
||||||
|
// This expectation is used in error messages to make them more descriptive.
|
||||||
|
//
|
||||||
|
// When defining an expectation inside a StateHandler, you do not need to
|
||||||
|
// handle unexpected input yourself. When the end of the function is reached
|
||||||
|
// without setting the next state, an automatic error will be emitted.
|
||||||
|
// This error can differentiate between the following issues:
|
||||||
|
//
|
||||||
|
// * there is valid data on input, but it was not accepted by the function
|
||||||
|
//
|
||||||
|
// * there is an invalid UTF8 character on input
|
||||||
|
//
|
||||||
|
// * the end of the file was reached.
|
||||||
|
func (p *P) Expects(description string) {
|
||||||
|
p.expecting = description
|
||||||
|
}
|
||||||
|
|
||||||
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||||
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
|
// (end of data or invalid UTF8 character).
|
||||||
|
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
||||||
|
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
||||||
|
return handleRuneError(r, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EOF is a special rune, which is used to indicate an end of file when
|
||||||
|
// reading a character from the input.
|
||||||
|
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||||
|
// say 'I now expect the end of the file' is using something like:
|
||||||
|
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||||
|
const EOF rune = -1
|
||||||
|
|
||||||
|
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||||
|
// rune on the input.
|
||||||
|
const INVALID rune = utf8.RuneError
|
||||||
|
|
||||||
|
// handleRuneError is used to normale rune value in case of errors.
|
||||||
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||||
|
// This can however indicate one of two situations:
|
||||||
|
// * w == 0: end of file is reached
|
||||||
|
// * w == 1: invalid UTF character on input
|
||||||
|
// This function lets these two cases return respectively the
|
||||||
|
// package's own EOF or INVALID runes, to make it easy for client
|
||||||
|
// code to distinct between these two cases.
|
||||||
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||||
|
if r == utf8.RuneError {
|
||||||
|
if w == 0 {
|
||||||
|
return EOF, 0, false
|
||||||
|
}
|
||||||
|
return INVALID, w, false
|
||||||
|
}
|
||||||
|
return r, w, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteTo tells the parser what StateHandler function to invoke
|
||||||
|
// in the next parsing cycle.
|
||||||
|
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
||||||
|
p.nextState = state
|
||||||
|
return &routeFollowupAction{chainAction: chainAction{p, true}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteRepeat indicates that on the next parsing cycle, the current
|
||||||
|
// StateHandler must be reinvoked.
|
||||||
|
func (p *P) RouteRepeat() *chainAction {
|
||||||
|
p.RouteTo(p.state)
|
||||||
|
return &chainAction{nil, true}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteReturn tells the parser that on the next cycle the last
|
||||||
|
// StateHandler that was pushed on the route stack must be invoked.
|
||||||
|
//
|
||||||
|
// Using this method is optional. When implementating a StateHandler that
|
||||||
|
// is used as a sort of subroutine (using constructions like
|
||||||
|
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
||||||
|
// providing an explicit routing decision from that handler. The parser will
|
||||||
|
// automatically assume a RouteReturn() in that case.
|
||||||
|
func (p *P) RouteReturn() *chainAction {
|
||||||
|
p.nextState = p.popRoute()
|
||||||
|
return &chainAction{nil, true}
|
||||||
|
}
|
||||||
|
|
||||||
|
// pushRoute adds the StateHandler to the route stack.
|
||||||
|
// This is used for implementing nested parsing.
|
||||||
|
func (p *P) pushRoute(state StateHandler) {
|
||||||
|
p.routeStack = append(p.routeStack, state)
|
||||||
|
}
|
||||||
|
|
||||||
|
// popRoute pops the last pushed StateHandler from the route stack.
|
||||||
|
func (p *P) popRoute() StateHandler {
|
||||||
|
last := len(p.routeStack) - 1
|
||||||
|
head, tail := p.routeStack[:last], p.routeStack[last]
|
||||||
|
p.routeStack = head
|
||||||
|
return tail
|
||||||
|
}
|
|
@ -2,7 +2,6 @@ package parsekit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ItemType represents the type of a parser Item.
|
// ItemType represents the type of a parser Item.
|
||||||
|
@ -16,7 +15,7 @@ const ItemEOF ItemType = -1
|
||||||
// an error has occurred during parsing.
|
// an error has occurred during parsing.
|
||||||
const ItemError ItemType = -2
|
const ItemError ItemType = -2
|
||||||
|
|
||||||
// Item is a built-in parser item type that is used for indicating a
|
// MatchedItem is a built-in parser item type that is used for indicating a
|
||||||
// successful match when using a parser that is based on a Matcher.
|
// successful match when using a parser that is based on a Matcher.
|
||||||
const MatchedItem ItemType = -3
|
const MatchedItem ItemType = -3
|
||||||
|
|
||||||
|
@ -27,8 +26,8 @@ type Item struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit passes a Parser item to the client, including the provided string.
|
// Emit passes a Parser item to the client, including the provided string.
|
||||||
func (p *P) Emit(t ItemType, s string) {
|
func (p *P) Emit(t ItemType, v string) {
|
||||||
p.items <- Item{t, s}
|
p.items <- Item{t, v}
|
||||||
p.buffer.reset()
|
p.buffer.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,28 +37,22 @@ func (p *P) EmitLiteral(t ItemType) {
|
||||||
p.Emit(t, p.buffer.asLiteralString())
|
p.Emit(t, p.buffer.asLiteralString())
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitLiteralTrim passes a Parser item to the client, including
|
// EmitInterpreted passes a Parser item to the client, including accumulated
|
||||||
// accumulated string buffer data as a literal string with whitespace
|
// string buffer data a Go double quoted interpreted string (handling escape
|
||||||
// trimmed from it.
|
// codes like \n, \t, \uXXXX, etc.)
|
||||||
func (p *P) EmitLiteralTrim(t ItemType) {
|
// This method returns a boolean value, indicating whether or not the string
|
||||||
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
|
// interpretation was successful. On invalid string data, an error will
|
||||||
}
|
// automatically be emitted and false will be returned.
|
||||||
|
func (p *P) EmitInterpreted(t ItemType) bool {
|
||||||
// EmitInterpreted passes a Parser item to the client, including
|
|
||||||
// accumulated string buffer data a Go doubled quoted interpreted string
|
|
||||||
// (handling escape codes like \n, \t, \uXXXX, etc.)
|
|
||||||
// This method might return an error, in case there is data in the
|
|
||||||
// string buffer that is not valid for string interpretation.
|
|
||||||
func (p *P) EmitInterpreted(t ItemType) error {
|
|
||||||
s, err := p.buffer.asInterpretedString()
|
s, err := p.buffer.asInterpretedString()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.EmitError(
|
p.EmitError(
|
||||||
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
||||||
p.buffer.asLiteralString(), err)
|
p.buffer.asLiteralString(), err)
|
||||||
return err
|
return false
|
||||||
}
|
}
|
||||||
p.Emit(t, s)
|
p.Emit(t, s)
|
||||||
return nil
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Error is used as the error type when parsing errors occur.
|
// Error is used as the error type when parsing errors occur.
|
||||||
|
@ -78,6 +71,8 @@ func (err *Error) Error() string {
|
||||||
return err.Message
|
return err.Message
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrorFull returns the current error message, including information about
|
||||||
|
// the position in the input where the error occurred.
|
||||||
func (err *Error) ErrorFull() string {
|
func (err *Error) ErrorFull() string {
|
||||||
message := err.Error()
|
message := err.Error()
|
||||||
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)
|
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// Expects is used to let a state function describe what input it is expecting.
|
|
||||||
// This expectation is used in error messages to make them more descriptive.
|
|
||||||
//
|
|
||||||
// Also, when defining an expectation inside a StateHandler, you do not need
|
|
||||||
// to handle unexpected input yourself. When the end of the function is
|
|
||||||
// reached without setting the next state, an automatic error will be
|
|
||||||
// emitted. This error differentiates between issues:
|
|
||||||
// * there is valid data on input, but it was not accepted by the function
|
|
||||||
// * there is an invalid UTF8 character on input
|
|
||||||
// * the end of the file was reached.
|
|
||||||
func (p *P) Expects(description string) {
|
|
||||||
p.expecting = description
|
|
||||||
}
|
|
|
@ -26,11 +26,15 @@ package parsekit
|
||||||
//
|
//
|
||||||
// You can omit "what to do with the match" and go straight into a routing
|
// You can omit "what to do with the match" and go straight into a routing
|
||||||
// method, e.g.
|
// method, e.g.
|
||||||
|
//
|
||||||
// On(...).RouteTo(...)
|
// On(...).RouteTo(...)
|
||||||
|
//
|
||||||
// This is functionally the same as using
|
// This is functionally the same as using
|
||||||
|
//
|
||||||
// On(...).Stay().RouteTo(...).
|
// On(...).Stay().RouteTo(...).
|
||||||
//
|
//
|
||||||
// Here's a complete example chain:
|
// Here's a complete example chain:
|
||||||
|
//
|
||||||
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
|
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
|
||||||
func (p *P) On(matcher Matcher) *matchAction {
|
func (p *P) On(matcher Matcher) *matchAction {
|
||||||
m := &MatchDialog{p: p}
|
m := &MatchDialog{p: p}
|
||||||
|
@ -44,16 +48,18 @@ func (p *P) On(matcher Matcher) *matchAction {
|
||||||
// if p.On(somethingBad).End() {
|
// if p.On(somethingBad).End() {
|
||||||
// p.Errorf("This was bad: %s", p.LastMatch)
|
// p.Errorf("This was bad: %s", p.LastMatch)
|
||||||
// }
|
// }
|
||||||
p.LastMatch = string(m.runes)
|
p.LastMatch = string(m.input)
|
||||||
|
|
||||||
return &matchAction{
|
return &matchAction{
|
||||||
routeAction: routeAction{chainAction{p, ok}},
|
routeAction: routeAction{chainAction{p, ok}},
|
||||||
runes: m.runes,
|
input: m.input,
|
||||||
widths: m.widths,
|
output: m.output,
|
||||||
|
inputPos: p.inputPos + m.inputOffset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// chainAction is used for building method chains for the On() method.
|
// chainAction is used for building method chains for the On() method.
|
||||||
|
// Every element of the method chain embeds this struct.
|
||||||
type chainAction struct {
|
type chainAction struct {
|
||||||
p *P
|
p *P
|
||||||
ok bool
|
ok bool
|
||||||
|
@ -64,3 +70,119 @@ type chainAction struct {
|
||||||
func (a *chainAction) End() bool {
|
func (a *chainAction) End() bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// matchAction is a struct that is used for building On()-method chains.
|
||||||
|
//
|
||||||
|
// It embeds the routeAction struct, to make it possible to go right into
|
||||||
|
// a route action, which is basically a simple way of aliasing a chain
|
||||||
|
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
|
||||||
|
type matchAction struct {
|
||||||
|
routeAction
|
||||||
|
input []rune
|
||||||
|
output []rune
|
||||||
|
inputPos int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept tells the parser to move the cursor past a match that was found,
|
||||||
|
// and to store the input that matched in the string buffer.
|
||||||
|
// When no match was found, then no action is taken.
|
||||||
|
// It returns a routeAction struct, which provides methods that can be used
|
||||||
|
// to tell the parser what state to go to next.
|
||||||
|
func (a *matchAction) Accept() *routeAction {
|
||||||
|
if a.ok {
|
||||||
|
a.p.buffer.writeString(string(a.output))
|
||||||
|
a.advanceCursor()
|
||||||
|
}
|
||||||
|
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip tells the parser to move the cursor past a match that was found,
|
||||||
|
// without storing the actual match in the string buffer.
|
||||||
|
// Returns true in case a match was found.
|
||||||
|
// When no match was found, then no action is taken and false is returned.
|
||||||
|
func (a *matchAction) Skip() *routeAction {
|
||||||
|
if a.ok {
|
||||||
|
a.advanceCursor()
|
||||||
|
}
|
||||||
|
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// advanceCursor advances the rune cursor one position in the input data.
|
||||||
|
// While doing so, it keeps tracks of newlines, so we can report on
|
||||||
|
// row + column positions on error.
|
||||||
|
func (a *matchAction) advanceCursor() {
|
||||||
|
a.p.inputPos = a.inputPos
|
||||||
|
for _, r := range a.input {
|
||||||
|
if a.p.newline {
|
||||||
|
a.p.cursorLine++
|
||||||
|
a.p.cursorColumn = 1
|
||||||
|
} else {
|
||||||
|
a.p.cursorColumn++
|
||||||
|
}
|
||||||
|
a.p.newline = r == '\n'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stay tells the parser to not move the cursor after finding a match.
|
||||||
|
// Returns true in case a match was found, false otherwise.
|
||||||
|
func (a *matchAction) Stay() *routeAction {
|
||||||
|
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// routeAction is a struct that is used for building On() method chains.
|
||||||
|
type routeAction struct {
|
||||||
|
chainAction
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteRepeat indicates that on the next parsing cycle,
|
||||||
|
// the current StateHandler must be reinvoked.
|
||||||
|
func (a *routeAction) RouteRepeat() *chainAction {
|
||||||
|
if a.ok {
|
||||||
|
return a.p.RouteRepeat()
|
||||||
|
}
|
||||||
|
return &chainAction{nil, false}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteTo tells the parser what StateHandler function to invoke
|
||||||
|
// in the next parsing cycle.
|
||||||
|
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
|
||||||
|
if a.ok {
|
||||||
|
return a.p.RouteTo(state)
|
||||||
|
}
|
||||||
|
return &routeFollowupAction{chainAction: chainAction{nil, false}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteReturn tells the parser that on the next cycle the next scheduled
|
||||||
|
// route must be invoked.
|
||||||
|
func (a *routeAction) RouteReturn() *chainAction {
|
||||||
|
if a.ok {
|
||||||
|
return a.p.RouteReturn()
|
||||||
|
}
|
||||||
|
return &chainAction{nil, false}
|
||||||
|
}
|
||||||
|
|
||||||
|
// routeFollowupAction chains parsing routes.
|
||||||
|
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||||
|
type routeFollowupAction struct {
|
||||||
|
chainAction
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
||||||
|
// StateHandler has been completed.
|
||||||
|
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
|
||||||
|
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
|
||||||
|
if a.ok {
|
||||||
|
a.p.pushRoute(state)
|
||||||
|
}
|
||||||
|
return &chainAction{nil, a.ok}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThenReturnHere schedules the current StateHandler to be invoked after
|
||||||
|
// the RouteTo StateHandler has been completed.
|
||||||
|
// For example: p.RouteTo(handlerA).ThenReturnHere()
|
||||||
|
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
|
||||||
|
if a.ok {
|
||||||
|
a.p.pushRoute(a.p.state)
|
||||||
|
}
|
||||||
|
return &chainAction{nil, a.ok}
|
||||||
|
}
|
||||||
|
|
|
@ -1,60 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// matchAction is a struct that is used for building On()-method chains.
|
|
||||||
//
|
|
||||||
// It embeds the routeAction struct, to make it possible to go right into
|
|
||||||
// a route action, which is basically a simple way of aliasing a chain
|
|
||||||
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
|
|
||||||
type matchAction struct {
|
|
||||||
routeAction
|
|
||||||
runes []rune
|
|
||||||
widths []int
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accept tells the parser to move the cursor past a match that was found,
|
|
||||||
// and to store the input that matched in the string buffer.
|
|
||||||
// When no match was found, then no action is taken.
|
|
||||||
// It returns a routeAction struct, which provides methods that can be used
|
|
||||||
// to tell the parser what state to go to next.
|
|
||||||
func (a *matchAction) Accept() *routeAction {
|
|
||||||
if a.ok {
|
|
||||||
for i, r := range a.runes {
|
|
||||||
a.p.buffer.writeRune(r)
|
|
||||||
a.p.advanceCursor(r, a.widths[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip tells the parser to move the cursor past a match that was found,
|
|
||||||
// without storing the actual match in the string buffer.
|
|
||||||
// Returns true in case a match was found.
|
|
||||||
// When no match was found, then no action is taken and false is returned.
|
|
||||||
func (a *matchAction) Skip() *routeAction {
|
|
||||||
if a.ok {
|
|
||||||
for i, r := range a.runes {
|
|
||||||
a.p.advanceCursor(r, a.widths[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stay tells the parser to not move the cursor after finding a match.
|
|
||||||
// Returns true in case a match was found, false otherwise.
|
|
||||||
func (a *matchAction) Stay() *routeAction {
|
|
||||||
return &routeAction{chainAction: chainAction{a.p, a.ok}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the input data.
|
|
||||||
// While doing so, it keeps tracks of newlines, so we can report on
|
|
||||||
// row + column positions on error.
|
|
||||||
func (p *P) advanceCursor(r rune, w int) {
|
|
||||||
p.pos += w
|
|
||||||
if p.newline {
|
|
||||||
p.cursorLine++
|
|
||||||
p.cursorColumn = 1
|
|
||||||
} else {
|
|
||||||
p.cursorColumn++
|
|
||||||
}
|
|
||||||
p.newline = r == '\n'
|
|
||||||
}
|
|
|
@ -1,59 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// routeAction is a struct that is used for building On() method chains.
|
|
||||||
type routeAction struct {
|
|
||||||
chainAction
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteRepeat indicates that on the next parsing cycle,
|
|
||||||
// the current StateHandler must be reinvoked.
|
|
||||||
func (a *routeAction) RouteRepeat() *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
return a.p.RouteRepeat()
|
|
||||||
}
|
|
||||||
return &chainAction{nil, false}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteTo tells the parser what StateHandler function to invoke
|
|
||||||
// in the next parsing cycle.
|
|
||||||
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
|
|
||||||
if a.ok {
|
|
||||||
return a.p.RouteTo(state)
|
|
||||||
}
|
|
||||||
return &routeFollowupAction{chainAction: chainAction{nil, false}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteReturn tells the parser that on the next cycle the next scheduled
|
|
||||||
// route must be invoked.
|
|
||||||
func (a *routeAction) RouteReturn() *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
return a.p.RouteReturn()
|
|
||||||
}
|
|
||||||
return &chainAction{nil, false}
|
|
||||||
}
|
|
||||||
|
|
||||||
// routeFollowupAction chains parsing routes.
|
|
||||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
|
||||||
type routeFollowupAction struct {
|
|
||||||
chainAction
|
|
||||||
}
|
|
||||||
|
|
||||||
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
|
||||||
// StateHandler has been completed.
|
|
||||||
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
|
|
||||||
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
a.p.pushRoute(state)
|
|
||||||
}
|
|
||||||
return &chainAction{nil, a.ok}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ThenReturnHere schedules the current StateHandler to be invoked after
|
|
||||||
// the RouteTo StateHandler has been completed.
|
|
||||||
// For example: p.RouteTo(handlerA).ThenReturnHere()
|
|
||||||
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
|
|
||||||
if a.ok {
|
|
||||||
a.p.pushRoute(a.p.state)
|
|
||||||
}
|
|
||||||
return &chainAction{nil, a.ok}
|
|
||||||
}
|
|
|
@ -1,42 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// RouteTo tells the parser what StateHandler function to invoke
|
|
||||||
// in the next parsing cycle.
|
|
||||||
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
|
||||||
p.nextState = state
|
|
||||||
return &routeFollowupAction{chainAction: chainAction{p, true}}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteRepeat indicates that on the next parsing cycle, the current
|
|
||||||
// StateHandler must be reinvoked.
|
|
||||||
func (p *P) RouteRepeat() *chainAction {
|
|
||||||
p.RouteTo(p.state)
|
|
||||||
return &chainAction{nil, true}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RouteReturn tells the parser that on the next cycle the last
|
|
||||||
// StateHandler that was pushed on the route stack must be invoked.
|
|
||||||
//
|
|
||||||
// Using this method is optional. When implementating a StateHandler that
|
|
||||||
// is used as a sort of subroutine (using constructions like
|
|
||||||
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
|
||||||
// providing an explicit routing decision from that handler. The parser will
|
|
||||||
// automatically assume a RouteReturn() in that case.
|
|
||||||
func (p *P) RouteReturn() *chainAction {
|
|
||||||
p.nextState = p.popRoute()
|
|
||||||
return &chainAction{nil, true}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pushRoute adds the StateHandler to the route stack.
|
|
||||||
// This is used for implementing nested parsing.
|
|
||||||
func (p *P) pushRoute(state StateHandler) {
|
|
||||||
p.routeStack = append(p.routeStack, state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// popRoute pops the last pushed StateHandler from the route stack.
|
|
||||||
func (p *P) popRoute() StateHandler {
|
|
||||||
last := len(p.routeStack) - 1
|
|
||||||
head, tail := p.routeStack[:last], p.routeStack[last]
|
|
||||||
p.routeStack = head
|
|
||||||
return tail
|
|
||||||
}
|
|
Loading…
Reference in New Issue