Backup work, created a lot of tests for parser combinators and atoms. Pretty solid now!

This commit is contained in:
Maurice Makaay 2019-05-24 12:41:34 +00:00
parent d9ab7298e7
commit 6ad4499971
17 changed files with 1216 additions and 948 deletions

114
atoms.go
View File

@ -1,114 +0,0 @@
package parsekit
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CarriageRet Matcher
Newline Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CarriageRet: C.Rune('\r'),
Newline: C.Rune('\n'),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.Rune('\r'), C.Rune('\n'))),
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == EOF
}
}
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
}

View File

@ -1,128 +0,0 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestAtoms(t *testing.T) {
for i, c := range []struct {
input string
matcher parsekit.Matcher
mustMatch bool
}{
{"", a.EndOfFile, true},
{"⌘", a.AnyRune, true},
{"\xbc", a.AnyRune, false}, // invalid UTF8 rune
{"", a.AnyRune, false}, // end of file
{" ", a.Space, true},
{"X", a.Space, false},
{"\t", a.Tab, true},
{"\r", a.CarriageRet, true},
{"\n", a.Newline, true},
{"!", a.Excl, true},
{"\"", a.DoubleQuote, true},
{"#", a.Hash, true},
{"$", a.Dollar, true},
{"%", a.Percent, true},
{"&", a.Amp, true},
{"'", a.SingleQuote, true},
{"(", a.RoundOpen, true},
{")", a.RoundClose, true},
{"*", a.Asterisk, true},
{"+", a.Plus, true},
{",", a.Comma, true},
{"-", a.Minus, true},
{".", a.Dot, true},
{"/", a.Slash, true},
{":", a.Colon, true},
{";", a.Semicolon, true},
{"<", a.AngleOpen, true},
{"=", a.Equal, true},
{">", a.AngleClose, true},
{"?", a.Question, true},
{"@", a.At, true},
{"[", a.SquareOpen, true},
{"\\", a.Backslash, true},
{"]", a.SquareClose, true},
{"^", a.Caret, true},
{"_", a.Underscore, true},
{"`", a.Backquote, true},
{"{", a.CurlyOpen, true},
{"|", a.Pipe, true},
{"}", a.CurlyClose, true},
{"~", a.Tilde, true},
{" \t \t ", a.Whitespace, true},
{" \t\r\n ", a.WhitespaceAndNewlines, true},
{"", a.EndOfLine, true},
{"\r\n", a.EndOfLine, true},
{"\n", a.EndOfLine, true},
{"0", a.Digit, true},
{"1", a.Digit, true},
{"2", a.Digit, true},
{"3", a.Digit, true},
{"4", a.Digit, true},
{"5", a.Digit, true},
{"6", a.Digit, true},
{"7", a.Digit, true},
{"8", a.Digit, true},
{"9", a.Digit, true},
{"X", a.Digit, false},
{"a", a.ASCIILower, true},
{"z", a.ASCIILower, true},
{"A", a.ASCIILower, false},
{"Z", a.ASCIILower, false},
{"A", a.ASCIIUpper, true},
{"Z", a.ASCIIUpper, true},
{"a", a.ASCIIUpper, false},
{"z", a.ASCIIUpper, false},
{"0", a.HexDigit, true},
{"9", a.HexDigit, true},
{"a", a.HexDigit, true},
{"f", a.HexDigit, true},
{"A", a.HexDigit, true},
{"F", a.HexDigit, true},
{"g", a.HexDigit, false},
{"G", a.HexDigit, false},
} {
parser := parsekit.New(c.matcher).Parse(c.input)
item, err, ok := parser.Next()
if c.mustMatch {
if !ok {
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
}
if item.Type != parsekit.MatchedItem {
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
}
} else {
if ok {
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
}
}
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Sequence(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Parse(input).Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}

View File

@ -1,296 +0,0 @@
package parsekit
import (
"unicode"
"unicode/utf8"
)
// Nice to have I guess:
// - LookAhead
// - Ready to go combinators for various number notations
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
type Matcher func(m *MatchDialog) bool
// MatchDialog is used by Matcher functions to retrieve data from the parser
// input to match against and to report back successful matches.
type MatchDialog struct {
p *P
runes []rune
widths []int
offset int
curRune rune
curWidth int
parent *MatchDialog
}
// NextRune can be called by a Matcher on a MatchDialog in order
// to receive the next rune from the input.
// The rune is automatically added to the MatchDialog's slice of runes.
// Returns the rune and a boolean. The boolean will be false in
// case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.curRune == utf8.RuneError {
panic("internal parser error: Matcher must not call NextRune() after it returned false")
}
r, w, ok := m.p.peek(m.offset)
m.offset += w
m.curRune = r
m.curWidth = w
m.runes = append(m.runes, r)
m.widths = append(m.widths, w)
return r, ok
}
// Fork splits off a child MatchDialog, containing the same offset as the
// parent MatchDialog, but with all other data in a new state.
//
// By forking, a Matcher implementation can freely work with a MatchDialog,
// without affecting the parent MatchDialog. This is for example useful when
// the Matcher is checking for a sequence of runes. When there are first
// 3 runes returned from NextRune() which match the expectations, then the
// slice of runes inside the MatchDialog will contain these 3 runes.
// When after this the 4th rune turns out to be a mismatch, the forked
// MatchDialog can simply be discarded, and the state in the parent will be
// kept as-is.
//
// When a forked MatchDialog is in use, and the Matcher decides that a
// successul match was found, then the Merge() method can be called in
// order to transport the collected runes to the parent MatchDialog.
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
offset: m.offset,
parent: m,
}
return child
}
// Merge merges the data from a forked child MatchDialog back into its parent:
// * the runes that are accumulated in the child are added to the parent runes
// * the parent's offset is set to the child's offset
// After a Merge, the child MatchDialog is reset so it can immediately be
// reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
m.parent.offset = m.offset
m.Clear()
return true
}
// Clear empties out the accumulated runes that are stored in the MatchDialog.
// The offset is kept as-is.
func (m *MatchDialog) Clear() {
m.runes = []rune{}
m.widths = []int{}
}
// C provides convenient access to a range of parser/combinator
// constructors that can be used to build matching expressions.
//
// When using C in your own parser, then it is advised to create
// a variable in your own package to reference it (var c = parsekit.C).
// This saves a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) Matcher
Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher
String func(string) Matcher
StringNoCase func(string) Matcher
AnyOf func(...Matcher) Matcher
Not func(Matcher) Matcher
Optional func(Matcher) Matcher
Sequence func(...Matcher) Matcher
Repeat func(int, Matcher) Matcher
Min func(int, Matcher) Matcher
Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
MinMax func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
String: MatchString,
StringNoCase: MatchStringNoCase,
Optional: MatchOptional,
AnyOf: MatchAnyOf,
Not: MatchNot,
Sequence: MatchSequence,
Repeat: MatchRepeat,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Drop: MatchDrop,
}
func MatchRune(r rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
return ok && input == r
}
}
func MatchRunes(runes ...rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok {
for _, r := range runes {
if input == r {
return true
}
}
}
return false
}
}
func MatchRuneRange(start rune, end rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
return ok && input >= start && input <= end
}
}
func MatchString(s string) Matcher {
var matchers = []Matcher{}
for _, r := range s {
matchers = append(matchers, MatchRune(r))
}
return MatchSequence(matchers...)
}
func MatchStringNoCase(s string) Matcher {
var matchers = []Matcher{}
for _, r := range s {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
matchers = append(matchers, MatchRunes(u, l))
}
return MatchSequence(matchers...)
}
func MatchOptional(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Merge()
}
return true
}
}
func MatchSequence(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
for _, matcher := range matchers {
if !matcher(child) {
return false
}
}
child.Merge()
return true
}
}
func MatchAnyOf(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
for _, matcher := range matchers {
child := m.Fork()
if matcher(child) {
return child.Merge()
}
}
return false
}
}
func MatchNot(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if !matcher(child) {
return child.Merge()
}
return false
}
}
func MatchRepeat(count int, matcher Matcher) Matcher {
return MatchMinMax(count, count, matcher)
}
func MatchMin(min int, matcher Matcher) Matcher {
return MatchMinMax(min, -1, matcher)
}
func MatchMax(max int, matcher Matcher) Matcher {
return MatchMinMax(-1, max, matcher)
}
func MatchZeroOrMore(matcher Matcher) Matcher {
return MatchMinMax(0, -1, matcher)
}
func MatchOneOrMore(matcher Matcher) Matcher {
return MatchMinMax(1, -1, matcher)
}
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if min >= 0 && max >= 0 && min > max {
panic("internal parser error: MatchRepeat definition error: max must not be < min")
}
total := 0
// Specified min: check for the minimum required amount of matches.
for min > 0 && total < min {
total++
if !matcher(child) {
return false
}
}
// No specified max: include the rest of the available matches.
if max < 0 {
child.Merge()
for matcher(child) {
child.Merge()
}
return true
}
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for total < max {
total++
if !matcher(child) {
break
}
child.Merge()
}
return true
}
}
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
}
func MatchDrop(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Clear()
child.Merge()
return true
}
return false
}
}

View File

@ -1,112 +0,0 @@
package parsekit_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleMatchAnyRune(t *testing.T) {
parser := parsekit.New(
func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
})
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func TestCombinators(t *testing.T) {
for i, c := range []struct {
input string
matcher parsekit.Matcher
mustMatch bool
expected string
}{
{"xxx", c.Rune('x'), true, "x"},
{"x ", c.Rune(' '), false, ""},
{"aa", c.RuneRange('b', 'e'), false, ""},
{"bb", c.RuneRange('b', 'e'), true, "b"},
{"cc", c.RuneRange('b', 'e'), true, "c"},
{"dd", c.RuneRange('b', 'e'), true, "d"},
{"ee", c.RuneRange('b', 'e'), true, "e"},
{"ff", c.RuneRange('b', 'e'), false, ""},
{"Hello, world!", c.String("Hello"), true, "Hello"},
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
{"abc", c.Not(c.Rune('b')), true, "a"},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
{"", c.Min(0, c.Rune('a')), true, ""},
{"a", c.Min(0, c.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
{"", c.Max(4, c.Rune('b')), true, ""},
{"X", c.Max(4, c.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
{"", c.OneOrMore(c.Rune('d')), false, ""},
{"X", c.OneOrMore(c.Rune('d')), false, ""},
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
} {
parser := parsekit.New(c.matcher).Parse(c.input)
item, err, ok := parser.Next()
if c.mustMatch {
if !ok {
t.Errorf("Test [%d] %q failed with error: %s", i+1, c.input, err)
} else if item.Type != parsekit.MatchedItem {
t.Errorf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
} else if item.Value != c.expected {
t.Errorf("Test [%d] %q failed: not expected output:\nexpected: %s\nactual: %s\n", i, c.input, c.expected, item.Value)
}
} else {
if ok {
t.Errorf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
}
}
}

187
matcher.go Normal file
View File

@ -0,0 +1,187 @@
package parsekit
import (
"fmt"
)
// Matcher is the function type that must be implemented to create a function
// that can be used in conjunction with parsekit.P.On() or parsekit.New().
// Its purpose is to check if input data matches some kind of pattern and to
// report back the match.
//
// A Matcher function gets a MatchDialog as its input and returns a boolean to
// indicate whether or not the Matcher found a match on the input.
// The MatchDialog is used for retrieving input data to match against
// and for reporting back results.
type Matcher func(m *MatchDialog) bool
// MatchDialog is used by Matcher functions to retrieve runes from the
// input to match against and to report back results.
//
// Basic operation:
//
// To retrieve the next rune from the input, the Matcher function can call
// the MatchDialog.NextRune() method.
//
// The Matcher function can then evaluate the retrieved rune and either
// accept of skip the rune. When accepting it using MatchDialog.Accept(),
// the rune is added to the output of the MatchDialog. When using
// MatchDialog.Skip(), the rune will not be added to the output. It is
// mandatory for a Matcher to call either Accept() or Skip() after retrieving
// a rune, before calling NextRune() again.
//
// Eventually, the Matcher function must return a boolean value, indicating
// whether or not a match was found. When true, then the calling code will
// use the runes that were accepted into the MatchDialog's resulting output.
//
// Forking operation for easy lookahead support:
//
// Sometimes, a Matcher function must be able to perform a lookahead, which
// might either succeed or fail. In case of a failing lookahead, the state
// of the MatchDialog must be brought back to the original state.
//
// The way in which this is supported, is by forking a MatchDialog by calling
// MatchDialog.Fork(). This will return a child MatchDialog, with an empty
// output buffer, but using the same input offset as the forked parent.
//
// The Matcher function can then use the same interface as described for
// normal operation to retrieve runes from the input and to fill the output
// buffer. When the Matcher function decides that the lookahead was successful,
// then the method MatchDialog.Merge() can be called on the forked child to
// append the resulting output from the child to the parent's resulting output,
// and to update the parent input offset to that of the child.
//
// When the Matcher function decides that the lookahead was unsuccessful, then
// it can simply discard the forked child. The parent MatchDialog was never
// modified, so a new match can be safely started using that parent, as if the
// lookahead never happened.
type MatchDialog struct {
p *P // parser state, used to retrieve input data to match against (TODO should be interface)
inputOffset int // the byte offset into the input
input []rune // a slice of runes that represents the retrieved input runes for the Matcher
output []rune // a slice of runes that represents the accepted output runes for the Matcher
currRune *runeToken // hold the last rune that was read from the input
parent *MatchDialog // the parent MatchDialog, in case this one was forked
}
type runeToken struct {
Rune rune
ByteSize int
OK bool
}
// NextRune retrieves the next rune from the input.
//
// It returns the rune and a boolean. The boolean will be false in case an
// invalid UTF8 rune or the end of the file was encountered.
//
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
// to respectively add the rune to the MatchDialog's resulting output or to
// fully ignore it. This way, a Matcher has full control over what runes are
// significant for the resulting output of that matcher.
//
// After using NextRune(), this method can not be reinvoked, until the last read
// rune is explicitly accepted or skipped as described above.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.currRune != nil {
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
}
r, w, ok := m.p.peek(m.inputOffset)
m.currRune = &runeToken{r, w, ok}
if ok {
m.input = append(m.input, r)
}
return r, ok
}
// Fork splits off a child MatchDialog, containing the same offset as the
// parent MatchDialog, but with all other data in a fresh state.
//
// By forking, a Matcher function can freely work with a MatchDialog, without
// affecting the parent MatchDialog. This is for example useful when the
// Matcher function must perform some form of lookahead.
//
// When a successful match was found, the Matcher function can call
// child.Merge() to have the resulting output added to the parent MatchDialog.
// When no match was found, the forked child can simply be discarded.
//
// Example case: A Matcher checks for a sequence of runes: 'a', 'b', 'c', 'd'.
// This is done in 4 steps and only after finishing all steps, the Matcher
// function can confirm a successful match. The Matcher function for this
// case could look like this (yes, it's naive, but it shows the point):
//
// func MatchAbcd(m *MatchDialog) bool {
// child := m.Fork() // fork to keep m from input untouched
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
// if r, ok := m.NextRune(); !ok || r != letter {
// return false // report mismatch, m is left untouched
// }
// child.Accept() // add rune to child output
// }
// child.Merge() // we have a match, add resulting output to parent
// return true // and report the successful match
// }
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
inputOffset: m.inputOffset,
parent: m,
}
return child
}
// Accept will add the last rune as read by NextRune() to the resulting
// output of the MatchDialog.
func (m *MatchDialog) Accept() {
m.checkAllowedCall("Accept()")
m.output = append(m.output, m.currRune.Rune)
m.inputOffset += m.currRune.ByteSize
m.currRune = nil
}
// Skip will ignore the last rune as read by NextRune().
func (m *MatchDialog) Skip() {
m.checkAllowedCall("Skip()")
m.inputOffset += m.currRune.ByteSize
m.currRune = nil
}
func (m *MatchDialog) checkAllowedCall(name string) {
if m.currRune == nil {
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
}
if !m.currRune.OK {
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
}
}
// Merge merges the resulting output from a forked child MatchDialog back into
// its parent: The runes that are accepted in the child are added to the parent
// runes and the parent's offset is advanced to the child's offset.
//
// After the merge, the child MatchDialog is reset so it can immediately be
// reused for performing another match (all data are cleared, except for the
// input offset which is kept at its current position).
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
}
m.parent.input = append(m.parent.input, m.input...)
m.parent.output = append(m.parent.output, m.output...)
m.parent.inputOffset = m.inputOffset
m.ClearOutput()
m.ClearInput()
return true
}
// ClearOutput clears the resulting output for the MatchDialog, but it keeps
// the input and input offset as-is.
func (m *MatchDialog) ClearOutput() {
m.output = []rune{}
}
// ClearInput clears the input for the MatchDialog, but it keeps the output
// and input offset as-is.
func (m *MatchDialog) ClearInput() {
m.input = []rune{}
}

477
matcher_builtin.go Normal file
View File

@ -0,0 +1,477 @@
package parsekit
import (
"fmt"
"strings"
"unicode"
)
// C provides convenient access to a range of parser/combinators
// that can be used to build Matcher functions.
//
// When using C in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var c = parsekit.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) Matcher
Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher
String func(string) Matcher
StringNoCase func(string) Matcher
AnyOf func(...Matcher) Matcher
Not func(Matcher) Matcher
Optional func(Matcher) Matcher
Sequence func(...Matcher) Matcher
Repeat func(int, Matcher) Matcher
Min func(int, Matcher) Matcher
Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
MinMax func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher
Trim func(Matcher, string) Matcher
TrimLeft func(Matcher, string) Matcher
TrimRight func(Matcher, string) Matcher
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
String: MatchString,
StringNoCase: MatchStringNoCase,
Optional: MatchOptional,
AnyOf: MatchAnyOf,
Not: MatchNot,
Sequence: MatchSequence,
Repeat: MatchRepeat,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Drop: MatchDrop,
Trim: MatchTrim,
TrimLeft: MatchTrimLeft,
TrimRight: MatchTrimRight,
}
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
//
// In parsekit, an atom is defined as a ready to go Matcher function.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CR Matcher
LF Matcher
CRLF Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Newline Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.String("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.String("\r\n"), C.Rune('\n'))),
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
// MatchRune creates a Matcher function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok && input == expected {
m.Accept()
return true
}
return false
}
}
// MatchRunes creates a Matcher function that that checks if the next rune
// from the input is one of the provided runes.
func MatchRunes(expected ...rune) Matcher {
s := string(expected)
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok {
if strings.ContainsRune(s, input) {
m.Accept()
return true
}
}
return false
}
}
// MatchRuneRange creates a Matcher function that that checks if the next rune
// from the input is contained by the provided rune range.
//
// The rune range is defined by a start and an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) Matcher {
return func(m *MatchDialog) bool {
if end < start {
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
input, ok := m.NextRune()
if ok && input >= start && input <= end {
m.Accept()
return true
}
return false
}
}
// MatchString creater a Matcher that will check if the upcoming runes on the
// input match the provided string.
// TODO make this a more efficient string-level match?
func MatchString(expected string) Matcher {
var matchers = []Matcher{}
for _, r := range expected {
matchers = append(matchers, MatchRune(r))
}
return MatchSequence(matchers...)
}
// MatchStringNoCase creater a Matcher that will check if the upcoming runes
// on the input match the provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStringNoCase(expected string) Matcher {
var matchers = []Matcher{}
for _, r := range expected {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
matchers = append(matchers, MatchRunes(u, l))
}
return MatchSequence(matchers...)
}
// MatchOptional creates a Matcher that makes the provided Matcher optional.
// When the provided Matcher applies, then its output is used, otherwise
// no output is generated but still a successful match is reported.
func MatchOptional(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Merge()
}
return true
}
}
// MatchSequence creates a Matcher that checks if the provided Matchers can be
// applied in their exact order. Only if all matcher apply, the sequence
// reports successful match.
func MatchSequence(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
for _, matcher := range matchers {
if !matcher(child) {
return false
}
}
child.Merge()
return true
}
}
// MatchAnyOf creates a Matcher that checks if any of the provided Matchers
// can be applied. They are applied in their provided order. The first Matcher
// that applies is used for reporting back a match.
func MatchAnyOf(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
for _, matcher := range matchers {
child := m.Fork()
if matcher(child) {
return child.Merge()
}
}
return false
}
}
// MatchNot creates a Matcher that checks if the provided Matcher applies to
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
probe := m.Fork()
if matcher(probe) {
return false
}
_, ok := m.NextRune()
if ok {
m.Accept()
return true
}
return false
}
}
// MatchRepeat creates a Matcher that checks if the provided Matcher can be
// applied exactly the provided amount of times.
//
// Note that the input can contain more Matches for the provided matcher, e.g.:
//
// MatchRepeat(4, MatchRune('X'))
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" of the input.
func MatchRepeat(times int, matcher Matcher) Matcher {
return matchMinMax(times, times, matcher)
}
// MatchMin creates a Matcher that checks if the provided Matcher can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, matcher Matcher) Matcher {
return matchMinMax(min, -1, matcher)
}
// MatchMax creates a Matcher that checks if the provided Matcher can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, matcher Matcher) Matcher {
return matchMinMax(0, max, matcher)
}
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(matcher Matcher) Matcher {
return matchMinMax(0, -1, matcher)
}
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(matcher Matcher) Matcher {
return matchMinMax(1, -1, matcher)
}
// MatchMinMax creates a Matcher that checks if the provided Matcher can
// be applied between the provided minimum and maximum number of times,
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
if max < 0 {
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
}
if min < 0 {
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
}
return matchMinMax(min, max, matcher)
}
func matchMinMax(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if max >= 0 && min > max {
panic(fmt.Sprintf("internal parser error: MatchRepeat definition error: max %d must not be < min %d", max, min))
}
total := 0
// Check for the minimum required amount of matches.
for total < min {
total++
if !matcher(child) {
return false
}
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for max < 0 || total < max {
total++
if !matcher(child) {
break
}
child.Merge()
}
return true
}
}
// MatchSeparated creates a Matcher that checks for a pattern of one or more
// Matchers of one type (the separated), separated by Matches of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separated Matcher, separator Matcher) Matcher {
return MatchSequence(separated, MatchZeroOrMore(MatchSequence(separator, separated)))
}
// MatchDrop creates a Matcher that checks if the provided Matcher applies.
// If it does, then a successful match is reported, but its output is not used.
// If the Matcher does not apply, a successful match is reported as well.
func MatchDrop(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.ClearOutput()
child.Merge()
return true
}
return true
}
}
// MatchTrim creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
// The trimmed output is reported back as the match output.
func MatchTrim(matcher Matcher, cutset string) Matcher {
return func(m *MatchDialog) bool {
return matchTrim(m, cutset, matcher, true, true)
}
}
// MatchTrimLeft creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the left of the output.
// The trimmed output is reported back as the match output.
func MatchTrimLeft(matcher Matcher, cutset string) Matcher {
return func(m *MatchDialog) bool {
return matchTrim(m, cutset, matcher, true, false)
}
}
// MatchTrimRight creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the right of the output.
// The trimmed output is reported back as the match output.
func MatchTrimRight(matcher Matcher, cutset string) Matcher {
return func(m *MatchDialog) bool {
return matchTrim(m, cutset, matcher, false, true)
}
}
func matchTrim(m *MatchDialog, cutset string, matcher Matcher, trimLeft bool, trimRight bool) bool {
child := m.Fork()
if matcher(child) {
child.Merge()
s := string(m.output)
if trimLeft {
s = strings.TrimLeft(s, cutset)
}
if trimRight {
s = strings.TrimRight(s, cutset)
}
m.output = []rune(s)
return true
}
return false
}
// MatchEndOfFile creates a Matcher that checks if the end of the input data
// has been reached. This Matcher will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == EOF
}
}
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
// read from the input. It reports back a successful match if the end of the
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
if ok {
m.Accept()
return true
}
return false
}
}

203
matcher_builtin_test.go Normal file
View File

@ -0,0 +1,203 @@
package parsekit_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleMatchAnyRune() {
parser := parsekit.New(
func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
})
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func TestCombinators(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
{"xxx", c.Rune('x'), true, "x"},
{"x ", c.Rune(' '), false, ""},
{"aa", c.RuneRange('b', 'e'), false, ""},
{"bb", c.RuneRange('b', 'e'), true, "b"},
{"cc", c.RuneRange('b', 'e'), true, "c"},
{"dd", c.RuneRange('b', 'e'), true, "d"},
{"ee", c.RuneRange('b', 'e'), true, "e"},
{"ff", c.RuneRange('b', 'e'), false, ""},
{"Hello, world!", c.String("Hello"), true, "Hello"},
{"HellÖ, world!", c.StringNoCase("hellö"), true, "HellÖ"},
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
{"abc", c.Not(c.Rune('b')), true, "a"},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"1010", c.Not(c.Sequence(c.Rune('2'), c.Rune('0'))), true, "1"},
{"2020", c.Not(c.Sequence(c.Rune('2'), c.Rune('0'))), false, ""},
{"abc", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "a"},
{"bcd", c.AnyOf(c.Rune('a'), c.Rune('b')), true, "b"},
{"cde", c.AnyOf(c.Rune('a'), c.Rune('b')), false, ""},
{"ababc", c.Repeat(4, c.Runes('a', 'b')), true, "abab"},
{"ababc", c.Repeat(5, c.Runes('a', 'b')), false, ""},
{"", c.Min(0, c.Rune('a')), true, ""},
{"a", c.Min(0, c.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
{"", c.Max(4, c.Rune('b')), true, ""},
{"X", c.Max(4, c.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
{"", c.OneOrMore(c.Rune('d')), false, ""},
{"X", c.OneOrMore(c.Rune('d')), false, ""},
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Sequence(c.String("Hello"), a.Comma, a.Space, c.String("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(c.Rune('f'))), true, ""},
{"ghijkl", c.Optional(c.Rune('h')), true, ""},
{"ghijkl", c.Optional(c.Rune('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"},
{"--cool", c.Sequence(c.Drop(c.OneOrMore(a.Minus)), c.String("cool")), true, "cool"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Sequence(a.Backslash, c.Rune('x'), c.Repeat(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", c.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", c.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", c.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
{" trim ", c.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", c.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", c.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", c.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", c.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
})
}
func TestAtoms(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
{"", a.AnyRune, false, ""}, // end of file
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
{"\r", a.CR, true, "\r"},
{"\n", a.LF, true, "\n"},
{"!", a.Excl, true, "!"},
{"\"", a.DoubleQuote, true, "\""},
{"#", a.Hash, true, "#"},
{"$", a.Dollar, true, "$"},
{"%", a.Percent, true, "%"},
{"&", a.Amp, true, "&"},
{"'", a.SingleQuote, true, "'"},
{"(", a.RoundOpen, true, "("},
{")", a.RoundClose, true, ")"},
{"*", a.Asterisk, true, "*"},
{"+", a.Plus, true, "+"},
{",", a.Comma, true, ","},
{"-", a.Minus, true, "-"},
{".", a.Dot, true, "."},
{"/", a.Slash, true, "/"},
{":", a.Colon, true, ":"},
{";", a.Semicolon, true, ";"},
{"<", a.AngleOpen, true, "<"},
{"=", a.Equal, true, "="},
{">", a.AngleClose, true, ">"},
{"?", a.Question, true, "?"},
{"@", a.At, true, "@"},
{"[", a.SquareOpen, true, "["},
{"\\", a.Backslash, true, "\\"},
{"]", a.SquareClose, true, "]"},
{"^", a.Caret, true, "^"},
{"_", a.Underscore, true, "_"},
{"`", a.Backquote, true, "`"},
{"{", a.CurlyOpen, true, "{"},
{"|", a.Pipe, true, "|"},
{"}", a.CurlyClose, true, "}"},
{"~", a.Tilde, true, "~"},
{" \t \t \r\n", a.Whitespace, true, " \t \t "},
{"\r", a.WhitespaceAndNewlines, false, ""},
{" \t\r\n \r", a.WhitespaceAndNewlines, true, " \t\r\n "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
{"0", a.Digit, true, "0"},
{"1", a.Digit, true, "1"},
{"2", a.Digit, true, "2"},
{"3", a.Digit, true, "3"},
{"4", a.Digit, true, "4"},
{"5", a.Digit, true, "5"},
{"6", a.Digit, true, "6"},
{"7", a.Digit, true, "7"},
{"8", a.Digit, true, "8"},
{"9", a.Digit, true, "9"},
{"X", a.Digit, false, ""},
{"a", a.ASCIILower, true, "a"},
{"z", a.ASCIILower, true, "z"},
{"A", a.ASCIILower, false, ""},
{"Z", a.ASCIILower, false, ""},
{"A", a.ASCIIUpper, true, "A"},
{"Z", a.ASCIIUpper, true, "Z"},
{"a", a.ASCIIUpper, false, ""},
{"z", a.ASCIIUpper, false, ""},
{"0", a.HexDigit, true, "0"},
{"9", a.HexDigit, true, "9"},
{"a", a.HexDigit, true, "a"},
{"f", a.HexDigit, true, "f"},
{"A", a.HexDigit, true, "A"},
{"F", a.HexDigit, true, "F"},
{"g", a.HexDigit, false, "g"},
{"G", a.HexDigit, false, "G"},
})
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Sequence(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Parse(input).Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}

View File

@ -6,6 +6,14 @@ import (
"runtime"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.New() method.
//
// To start parsing input data, use the method Parser.Parse().
type Parser struct {
startState StateHandler // the function that handles the very first state
}
// New instantiates a new Parser.
// The logic parameter provides the parsing logic to apply. This can be:
//
@ -55,12 +63,13 @@ func makeParserForMatcher(matcher Matcher) *Parser {
}))
}
// Parser is the top-level parser.
type Parser struct {
startState StateHandler // the function that handles the very first state
// Run represents a single parse run for a Parser.
type Run struct {
p *P // a struct holding the internal state of a parse run
}
// Parse starts a parse run on the provided input data.
// To retrieve parse items from the run, make use of the Run.Next() method.
func (p *Parser) Parse(input string) *Run {
return &Run{
p: &P{
@ -74,69 +83,59 @@ func (p *Parser) Parse(input string) *Run {
}
}
// Run represents a single parse run for a Parser.
type Run struct {
p *P // a struct holding the internal state of a parse run
}
// P holds the internal state of a parse run.
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorLine int // current row number in the input
cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
LastMatch string // a string representation of the last matched input data
}
// Next retrieves the next parsed item for a parse run.
//
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise.
// When an error occurred, false will be returned and the error return value will
// be set (default is nil).
func (run *Run) Next() (Item, *Error, bool) {
// State handling loop: we handle states, until an Item is ready to be returned.
for {
select {
// If a state handler has emitted an (error) Item, then the state handling
// loop is stopped and the Item is returned to the caller.
case i := <-run.p.items:
return run.makeReturnValues(i)
// Otherwise, the next state handler is looked up and invoked.
default:
run.runStatusHandler()
run.runNextStateHandler()
}
}
}
// StateHandler defines the type of function that can be used to
// handle a parser state.
type StateHandler func(*P)
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
run.p.item = i
return i, nil, true
}
}
// runStatusHandler moves the parser, which is bascially a state machine,
// runNextStateHandler moves the parser, which is bascially a state machine,
// to its next status. It does so by invoking a function of the
// type StateHandler. This function represents the current status and
// is responsible for moving the parser to its next status, depending
// on the parsed input data.
func (run *Run) runStatusHandler() {
func (run *Run) runNextStateHandler() {
if state, ok := run.getNextStateHandler(); ok {
run.invokeNextStatusHandler(state)
run.invokeNextStateHandler(state)
}
}
// getNextStateHandler determines the next StatusHandler to invoke in order
// getNextStateHandler determines the next StateHandler to invoke in order
// to move the parsing state machine one step further.
//
// When implementing a parser, the StateHandler functions must provide
// a routing decision in every invocation. A routing decision is one
// of the following:
//
// * A route is specified explicitly, which means that the next StatusHandler
// * A route is specified explicitly, which means that the next StateHandler
// function to invoke is registered during the StateHandler function
// invocation. For example: p.RouteTo(nextStatus)
//
@ -147,9 +146,9 @@ func (run *Run) runStatusHandler() {
// a route explicitly, but otherStatus will be used implicitly after
// the nextStatus function has returned.
//
// * An expectation is registered by the StatusHandler.
// * An expectation is registered by the StateHandler.
// For example: p.Expects("a cool thing")
// When the StatusHandler returns without having specified a route, this
// When the StateHandler returns without having specified a route, this
// expectation is used to generate an "unexpected input" error message.
//
// When no routing decision is provided by a StateHandler, then this is
@ -169,24 +168,11 @@ func (run *Run) getNextStateHandler() (StateHandler, bool) {
}
}
// invokeNextStatusHandler moves the parser state to the provided state
// and invokes the StatusHandler function.
func (run *Run) invokeNextStatusHandler(state StateHandler) {
// invokeNextStateHandler moves the parser state to the provided state
// and invokes the StateHandler function.
func (run *Run) invokeNextStateHandler(state StateHandler) {
run.p.state = state
run.p.nextState = nil
run.p.expecting = ""
run.p.state(run.p)
}
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
run.p.item = i
return i, nil, true
}
}

View File

@ -1,7 +1,46 @@
package parsekit_test
import "git.makaay.nl/mauricem/go-parsekit"
// This file only provides building blocks for writing tests.
// No actual tests belong in this file.
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
const TestItem parsekit.ItemType = 1
var c, a = parsekit.C, parsekit.A
type MatcherTest struct {
input string
matcher parsekit.Matcher
mustMatch bool
expected string
}
func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
for _, test := range testSet {
RunMatcherTest(t, test)
}
}
func RunMatcherTest(t *testing.T, test MatcherTest) {
parser := parsekit.New(test.matcher).Parse(test.input)
item, err, ok := parser.Next()
if test.mustMatch {
if !ok {
t.Errorf("Test %q failed with error: %s", test.input, err)
} else if item.Type != parsekit.MatchedItem {
t.Errorf("Test %q failed: should match, but it didn't", test.input)
} else if item.Value != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
}
} else {
if ok {
t.Errorf("Test %q failed: should not match, but it did", test.input)
}
}
}

43
peek.go
View File

@ -1,43 +0,0 @@
package parsekit
import (
"unicode/utf8"
)
// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
return handleRuneError(r, w)
}
// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return EOF, 0, false
}
return INVALID, w, false
}
return r, w, true
}
// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError

128
statehandler.go Normal file
View File

@ -0,0 +1,128 @@
package parsekit
import "unicode/utf8"
// StateHandler defines the type of function that must be implemented to
// handle a parsing state.
//
// A StateHandler function gets a P struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the StateHandler must use to interact with the parser.
type StateHandler func(*P)
// P holds the internal state of a parse run and provides an API to
// StateHandler methods to communicate with the parser.
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
inputPos int // current byte cursor position in the input
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
LastMatch string // a string representation of the last matched input data
}
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a StateHandler, you do not need to
// handle unexpected input yourself. When the end of the function is reached
// without setting the next state, an automatic error will be emitted.
// This error can differentiate between the following issues:
//
// * there is valid data on input, but it was not accepted by the function
//
// * there is an invalid UTF8 character on input
//
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}
// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
func (p *P) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}
// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError
// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return EOF, 0, false
}
return INVALID, w, false
}
return r, w, true
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
p.nextState = state
return &routeFollowupAction{chainAction: chainAction{p, true}}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *chainAction {
p.RouteTo(p.state)
return &chainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *chainAction {
p.nextState = p.popRoute()
return &chainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}

View File

@ -2,7 +2,6 @@ package parsekit
import (
"fmt"
"strings"
)
// ItemType represents the type of a parser Item.
@ -16,7 +15,7 @@ const ItemEOF ItemType = -1
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item is a built-in parser item type that is used for indicating a
// MatchedItem is a built-in parser item type that is used for indicating a
// successful match when using a parser that is based on a Matcher.
const MatchedItem ItemType = -3
@ -27,8 +26,8 @@ type Item struct {
}
// Emit passes a Parser item to the client, including the provided string.
func (p *P) Emit(t ItemType, s string) {
p.items <- Item{t, s}
func (p *P) Emit(t ItemType, v string) {
p.items <- Item{t, v}
p.buffer.reset()
}
@ -38,28 +37,22 @@ func (p *P) EmitLiteral(t ItemType) {
p.Emit(t, p.buffer.asLiteralString())
}
// EmitLiteralTrim passes a Parser item to the client, including
// accumulated string buffer data as a literal string with whitespace
// trimmed from it.
func (p *P) EmitLiteralTrim(t ItemType) {
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
}
// EmitInterpreted passes a Parser item to the client, including
// accumulated string buffer data a Go doubled quoted interpreted string
// (handling escape codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation.
func (p *P) EmitInterpreted(t ItemType) error {
// EmitInterpreted passes a Parser item to the client, including accumulated
// string buffer data a Go double quoted interpreted string (handling escape
// codes like \n, \t, \uXXXX, etc.)
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and false will be returned.
func (p *P) EmitInterpreted(t ItemType) bool {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.EmitError(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return err
return false
}
p.Emit(t, s)
return nil
return true
}
// Error is used as the error type when parsing errors occur.
@ -78,6 +71,8 @@ func (err *Error) Error() string {
return err.Message
}
// ErrorFull returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) ErrorFull() string {
message := err.Error()
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)

View File

@ -1,15 +0,0 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateHandler, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}

View File

@ -26,11 +26,15 @@ package parsekit
//
// You can omit "what to do with the match" and go straight into a routing
// method, e.g.
//
// On(...).RouteTo(...)
//
// This is functionally the same as using
//
// On(...).Stay().RouteTo(...).
//
// Here's a complete example chain:
//
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p}
@ -44,16 +48,18 @@ func (p *P) On(matcher Matcher) *matchAction {
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
p.LastMatch = string(m.runes)
p.LastMatch = string(m.input)
return &matchAction{
routeAction: routeAction{chainAction{p, ok}},
runes: m.runes,
widths: m.widths,
input: m.input,
output: m.output,
inputPos: p.inputPos + m.inputOffset,
}
}
// chainAction is used for building method chains for the On() method.
// Every element of the method chain embeds this struct.
type chainAction struct {
p *P
ok bool
@ -64,3 +70,119 @@ type chainAction struct {
func (a *chainAction) End() bool {
return a.ok
}
// matchAction is a struct that is used for building On()-method chains.
//
// It embeds the routeAction struct, to make it possible to go right into
// a route action, which is basically a simple way of aliasing a chain
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
type matchAction struct {
routeAction
input []rune
output []rune
inputPos int
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// When no match was found, then no action is taken.
// It returns a routeAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *matchAction) Accept() *routeAction {
if a.ok {
a.p.buffer.writeString(string(a.output))
a.advanceCursor()
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *matchAction) Skip() *routeAction {
if a.ok {
a.advanceCursor()
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (a *matchAction) advanceCursor() {
a.p.inputPos = a.inputPos
for _, r := range a.input {
if a.p.newline {
a.p.cursorLine++
a.p.cursorColumn = 1
} else {
a.p.cursorColumn++
}
a.p.newline = r == '\n'
}
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *matchAction) Stay() *routeAction {
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// routeAction is a struct that is used for building On() method chains.
type routeAction struct {
chainAction
}
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *routeAction) RouteRepeat() *chainAction {
if a.ok {
return a.p.RouteRepeat()
}
return &chainAction{nil, false}
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
if a.ok {
return a.p.RouteTo(state)
}
return &routeFollowupAction{chainAction: chainAction{nil, false}}
}
// RouteReturn tells the parser that on the next cycle the next scheduled
// route must be invoked.
func (a *routeAction) RouteReturn() *chainAction {
if a.ok {
return a.p.RouteReturn()
}
return &chainAction{nil, false}
}
// routeFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type routeFollowupAction struct {
chainAction
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
if a.ok {
a.p.pushRoute(state)
}
return &chainAction{nil, a.ok}
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
if a.ok {
a.p.pushRoute(a.p.state)
}
return &chainAction{nil, a.ok}
}

View File

@ -1,60 +0,0 @@
package parsekit
// matchAction is a struct that is used for building On()-method chains.
//
// It embeds the routeAction struct, to make it possible to go right into
// a route action, which is basically a simple way of aliasing a chain
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
type matchAction struct {
routeAction
runes []rune
widths []int
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// When no match was found, then no action is taken.
// It returns a routeAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *matchAction) Accept() *routeAction {
if a.ok {
for i, r := range a.runes {
a.p.buffer.writeRune(r)
a.p.advanceCursor(r, a.widths[i])
}
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *matchAction) Skip() *routeAction {
if a.ok {
for i, r := range a.runes {
a.p.advanceCursor(r, a.widths[i])
}
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *matchAction) Stay() *routeAction {
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorLine++
p.cursorColumn = 1
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}

View File

@ -1,59 +0,0 @@
package parsekit
// routeAction is a struct that is used for building On() method chains.
type routeAction struct {
chainAction
}
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *routeAction) RouteRepeat() *chainAction {
if a.ok {
return a.p.RouteRepeat()
}
return &chainAction{nil, false}
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
if a.ok {
return a.p.RouteTo(state)
}
return &routeFollowupAction{chainAction: chainAction{nil, false}}
}
// RouteReturn tells the parser that on the next cycle the next scheduled
// route must be invoked.
func (a *routeAction) RouteReturn() *chainAction {
if a.ok {
return a.p.RouteReturn()
}
return &chainAction{nil, false}
}
// routeFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type routeFollowupAction struct {
chainAction
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
if a.ok {
a.p.pushRoute(state)
}
return &chainAction{nil, a.ok}
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
if a.ok {
a.p.pushRoute(a.p.state)
}
return &chainAction{nil, a.ok}
}

View File

@ -1,42 +0,0 @@
package parsekit
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
p.nextState = state
return &routeFollowupAction{chainAction: chainAction{p, true}}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *chainAction {
p.RouteTo(p.state)
return &chainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *chainAction {
p.nextState = p.popRoute()
return &chainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}