Implemented a lot of atoms in the parsekit library, ready for use by a parser implementation.

This commit is contained in:
Maurice Makaay 2019-05-22 12:44:29 +00:00
parent 355f995388
commit 8a09b7ca49
7 changed files with 292 additions and 45 deletions

114
atoms.go Normal file
View File

@ -0,0 +1,114 @@
package parsekit
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CarriageRet Matcher
Newline Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CarriageRet: C.Rune('\r'),
Newline: C.Rune('\n'),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.Rune('\r'), C.Rune('\n'))),
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == EOF
}
}
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
}

131
atoms_test.go Normal file
View File

@ -0,0 +1,131 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestAtoms(t *testing.T) {
for i, c := range []struct {
input string
matcher parsekit.Matcher
mustMatch bool
}{
{"", a.EndOfFile, true},
{"⌘", a.AnyRune, true},
{"\xbc", a.AnyRune, false}, // invalid UTF8 rune
{"", a.AnyRune, false}, // end of file
{" ", a.Space, true},
{"X", a.Space, false},
{"\t", a.Tab, true},
{"\r", a.CarriageRet, true},
{"\n", a.Newline, true},
{"!", a.Excl, true},
{"\"", a.DoubleQuote, true},
{"#", a.Hash, true},
{"$", a.Dollar, true},
{"%", a.Percent, true},
{"&", a.Amp, true},
{"'", a.SingleQuote, true},
{"(", a.RoundOpen, true},
{")", a.RoundClose, true},
{"*", a.Asterisk, true},
{"+", a.Plus, true},
{",", a.Comma, true},
{"-", a.Minus, true},
{".", a.Dot, true},
{"/", a.Slash, true},
{":", a.Colon, true},
{";", a.Semicolon, true},
{"<", a.AngleOpen, true},
{"=", a.Equal, true},
{">", a.AngleClose, true},
{"?", a.Question, true},
{"@", a.At, true},
{"[", a.SquareOpen, true},
{"\\", a.Backslash, true},
{"]", a.SquareClose, true},
{"^", a.Caret, true},
{"_", a.Underscore, true},
{"`", a.Backquote, true},
{"{", a.CurlyOpen, true},
{"|", a.Pipe, true},
{"}", a.CurlyClose, true},
{"~", a.Tilde, true},
{" \t \t ", a.Whitespace, true},
{" \t\r\n ", a.WhitespaceAndNewlines, true},
{"", a.EndOfLine, true},
{"\r\n", a.EndOfLine, true},
{"\n", a.EndOfLine, true},
{"0", a.Digit, true},
{"1", a.Digit, true},
{"2", a.Digit, true},
{"3", a.Digit, true},
{"4", a.Digit, true},
{"5", a.Digit, true},
{"6", a.Digit, true},
{"7", a.Digit, true},
{"8", a.Digit, true},
{"9", a.Digit, true},
{"X", a.Digit, false},
{"a", a.ASCIILower, true},
{"z", a.ASCIILower, true},
{"A", a.ASCIILower, false},
{"Z", a.ASCIILower, false},
{"A", a.ASCIIUpper, true},
{"Z", a.ASCIIUpper, true},
{"a", a.ASCIIUpper, false},
{"z", a.ASCIIUpper, false},
{"0", a.HexDigit, true},
{"9", a.HexDigit, true},
{"a", a.HexDigit, true},
{"f", a.HexDigit, true},
{"A", a.HexDigit, true},
{"F", a.HexDigit, true},
{"g", a.HexDigit, false},
{"G", a.HexDigit, false},
} {
parser := parsekit.New(c.input, func(p *parsekit.P) {
if p.On(c.matcher).Accept().End() {
p.EmitLiteral(SuccessItem)
} else {
p.EmitLiteral(FailItem)
}
})
item, err, ok := parser.Next()
if !ok {
t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err)
}
if c.mustMatch && item.Type != SuccessItem {
t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
}
if !c.mustMatch && item.Type != FailItem {
t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
}
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Sequence(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(input, func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}

View File

@ -5,8 +5,10 @@ import (
"unicode/utf8"
)
// Not in need of it myself, but nice to have I guess:
// Nice to have I guess:
// - LookAhead
// - Ready to go combinators for various number notations
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
type Matcher func(m *MatchDialog) bool
@ -49,7 +51,7 @@ func (m *MatchDialog) NextRune() (rune, bool) {
// 3 runes returned from NextRune() which match the expectations, then the
// slice of runes inside the MatchDialog will contain these 3 runes.
// When after this the 4th rune turns out to be a mismatch, the forked
// MatchDialog can simply be disarded, and the state in the parent will be
// MatchDialog can simply be discarded, and the state in the parent will be
// kept as-is.
//
// When a forked MatchDialog is in use, and the Matcher decides that a
@ -87,15 +89,13 @@ func (m *MatchDialog) Clear() {
m.widths = []int{}
}
// C provides convenient access to a wide range of parser/combinator
// C provides convenient access to a range of parser/combinator
// constructors that can be used to build matching expressions.
//
// When using C in your own parser, then it is advised to create
// a variable in your own package to reference it (var c = parsekit.C).
// This saves a lot of typing, and it makes your code a lot cleaner.
var C = struct {
EndOfFile func() Matcher
AnyRune func() Matcher
Rune func(rune) Matcher
Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher
@ -114,8 +114,6 @@ var C = struct {
Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher
}{
EndOfFile: MatchEndOfFile,
AnyRune: MatchAnyRune,
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
@ -135,20 +133,6 @@ var C = struct {
Drop: MatchDrop,
}
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
return !ok && input == EOF
}
}
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
}
func MatchRune(r rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()

View File

@ -5,29 +5,24 @@ import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
p "git.makaay.nl/mauricem/go-parsekit"
)
var c = p.C
const TestItem p.ItemType = 1
func newParser(input string, Matcher p.Matcher) *p.P {
stateFn := func(p *p.P) {
func newParser(input string, Matcher parsekit.Matcher) *parsekit.P {
stateFn := func(p *parsekit.P) {
p.Expects("MATCH")
if p.On(Matcher).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
return p.New(input, stateFn)
return parsekit.New(input, stateFn)
}
func ExampleTestMatchAny(t *testing.T) {
parser := parsekit.New(
"¡Any / valid / character will dö!",
func(p *parsekit.P) {
p.On(parsekit.MatchAnyRune()).Accept()
p.On(a.AnyRune).Accept()
p.EmitLiteral(TestItem)
})
match, _, ok := parser.Next()
@ -37,7 +32,7 @@ func ExampleTestMatchAny(t *testing.T) {
}
func TestMatchAnyRune(t *testing.T) {
p := newParser("o", c.AnyRune())
p := newParser("o", a.AnyRune)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
@ -51,7 +46,7 @@ func TestMatchAnyRune(t *testing.T) {
}
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
p := newParser("", c.AnyRune())
p := newParser("", a.AnyRune)
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
@ -63,7 +58,7 @@ func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
}
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", c.AnyRune())
p := newParser("\xcd", a.AnyRune)
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
@ -335,7 +330,7 @@ func TestMatchOptional(t *testing.T) {
func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes)))
p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes)))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)

11
parsekit_test.go Normal file
View File

@ -0,0 +1,11 @@
package parsekit_test
import "git.makaay.nl/mauricem/go-parsekit"
const (
TestItem parsekit.ItemType = 1
SuccessItem parsekit.ItemType = 2
FailItem parsekit.ItemType = 3
)
var c, a = parsekit.C, parsekit.A

View File

@ -49,6 +49,9 @@ func (p *P) EmitLiteralTrim(t ItemType) {
func (p *P) EmitInterpreted(t ItemType) error {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.EmitError(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return err
}
p.Emit(t, s)

View File

@ -5,14 +5,17 @@ package parsekit
// This method is the start of a chain method in which multiple things can
// be arranged in one go:
//
// * Checking whether or not there is a match (this is what On does)
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the cursor forward, Accept(): move cursor forward and add the match in
// the parser string buffer)
// * Dedicing where to route to (e.g. using RouteTo() to route to a
// StateHandler by name)
// * Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
// 1) Checking whether or not there is a match (this is what On does)
//
// 2) Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the cursor forward, Accept(): move cursor forward and add the match in
// the parser string buffer)
//
// 3) Dedicing where to route to (e.g. using RouteTo() to route to a
// StateHandler by name)
//
// 4) Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
//
// For every step of this chain, you can end the chain using the
// End() method. This will return a boolean value, indicating whether or
@ -22,13 +25,19 @@ package parsekit
// require a boolean expression).
//
// You can omit "what to do with the match" and go straight into a routing
// method, e.g. On(...).RouteTo(...). This is functionally the same as
// using On(...).Stay().RouteTo(...).
// method, e.g.
// On(...).RouteTo(...)
// This is functionally the same as using
// On(...).Stay().RouteTo(...).
//
// Here's a complete example chain:
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p}
if matcher == nil {
p.EmitError("internal parser error: matcher argument for On() is nil")
return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}}
}
ok := matcher(m)
// Keep track of the last match, to allow parser implementations