Implemented a lot of atoms in the parsekit library, ready for use by a parser implementation.

This commit is contained in:
Maurice Makaay 2019-05-22 12:44:29 +00:00
parent 355f995388
commit 8a09b7ca49
7 changed files with 292 additions and 45 deletions

114
atoms.go Normal file
View File

@ -0,0 +1,114 @@
package parsekit
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CarriageRet Matcher
Newline Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CarriageRet: C.Rune('\r'),
Newline: C.Rune('\n'),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.Rune('\r'), C.Rune('\n'))),
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == EOF
}
}
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
}

131
atoms_test.go Normal file
View File

@ -0,0 +1,131 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestAtoms(t *testing.T) {
for i, c := range []struct {
input string
matcher parsekit.Matcher
mustMatch bool
}{
{"", a.EndOfFile, true},
{"⌘", a.AnyRune, true},
{"\xbc", a.AnyRune, false}, // invalid UTF8 rune
{"", a.AnyRune, false}, // end of file
{" ", a.Space, true},
{"X", a.Space, false},
{"\t", a.Tab, true},
{"\r", a.CarriageRet, true},
{"\n", a.Newline, true},
{"!", a.Excl, true},
{"\"", a.DoubleQuote, true},
{"#", a.Hash, true},
{"$", a.Dollar, true},
{"%", a.Percent, true},
{"&", a.Amp, true},
{"'", a.SingleQuote, true},
{"(", a.RoundOpen, true},
{")", a.RoundClose, true},
{"*", a.Asterisk, true},
{"+", a.Plus, true},
{",", a.Comma, true},
{"-", a.Minus, true},
{".", a.Dot, true},
{"/", a.Slash, true},
{":", a.Colon, true},
{";", a.Semicolon, true},
{"<", a.AngleOpen, true},
{"=", a.Equal, true},
{">", a.AngleClose, true},
{"?", a.Question, true},
{"@", a.At, true},
{"[", a.SquareOpen, true},
{"\\", a.Backslash, true},
{"]", a.SquareClose, true},
{"^", a.Caret, true},
{"_", a.Underscore, true},
{"`", a.Backquote, true},
{"{", a.CurlyOpen, true},
{"|", a.Pipe, true},
{"}", a.CurlyClose, true},
{"~", a.Tilde, true},
{" \t \t ", a.Whitespace, true},
{" \t\r\n ", a.WhitespaceAndNewlines, true},
{"", a.EndOfLine, true},
{"\r\n", a.EndOfLine, true},
{"\n", a.EndOfLine, true},
{"0", a.Digit, true},
{"1", a.Digit, true},
{"2", a.Digit, true},
{"3", a.Digit, true},
{"4", a.Digit, true},
{"5", a.Digit, true},
{"6", a.Digit, true},
{"7", a.Digit, true},
{"8", a.Digit, true},
{"9", a.Digit, true},
{"X", a.Digit, false},
{"a", a.ASCIILower, true},
{"z", a.ASCIILower, true},
{"A", a.ASCIILower, false},
{"Z", a.ASCIILower, false},
{"A", a.ASCIIUpper, true},
{"Z", a.ASCIIUpper, true},
{"a", a.ASCIIUpper, false},
{"z", a.ASCIIUpper, false},
{"0", a.HexDigit, true},
{"9", a.HexDigit, true},
{"a", a.HexDigit, true},
{"f", a.HexDigit, true},
{"A", a.HexDigit, true},
{"F", a.HexDigit, true},
{"g", a.HexDigit, false},
{"G", a.HexDigit, false},
} {
parser := parsekit.New(c.input, func(p *parsekit.P) {
if p.On(c.matcher).Accept().End() {
p.EmitLiteral(SuccessItem)
} else {
p.EmitLiteral(FailItem)
}
})
item, err, ok := parser.Next()
if !ok {
t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err)
}
if c.mustMatch && item.Type != SuccessItem {
t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
}
if !c.mustMatch && item.Type != FailItem {
t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
}
}
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Sequence(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(input, func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}

View File

@ -5,8 +5,10 @@ import (
"unicode/utf8" "unicode/utf8"
) )
// Not in need of it myself, but nice to have I guess: // Nice to have I guess:
// - LookAhead // - LookAhead
// - Ready to go combinators for various number notations
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
type Matcher func(m *MatchDialog) bool type Matcher func(m *MatchDialog) bool
@ -49,7 +51,7 @@ func (m *MatchDialog) NextRune() (rune, bool) {
// 3 runes returned from NextRune() which match the expectations, then the // 3 runes returned from NextRune() which match the expectations, then the
// slice of runes inside the MatchDialog will contain these 3 runes. // slice of runes inside the MatchDialog will contain these 3 runes.
// When after this the 4th rune turns out to be a mismatch, the forked // When after this the 4th rune turns out to be a mismatch, the forked
// MatchDialog can simply be disarded, and the state in the parent will be // MatchDialog can simply be discarded, and the state in the parent will be
// kept as-is. // kept as-is.
// //
// When a forked MatchDialog is in use, and the Matcher decides that a // When a forked MatchDialog is in use, and the Matcher decides that a
@ -87,15 +89,13 @@ func (m *MatchDialog) Clear() {
m.widths = []int{} m.widths = []int{}
} }
// C provides convenient access to a wide range of parser/combinator // C provides convenient access to a range of parser/combinator
// constructors that can be used to build matching expressions. // constructors that can be used to build matching expressions.
// //
// When using C in your own parser, then it is advised to create // When using C in your own parser, then it is advised to create
// a variable in your own package to reference it (var c = parsekit.C). // a variable in your own package to reference it (var c = parsekit.C).
// This saves a lot of typing, and it makes your code a lot cleaner. // This saves a lot of typing, and it makes your code a lot cleaner.
var C = struct { var C = struct {
EndOfFile func() Matcher
AnyRune func() Matcher
Rune func(rune) Matcher Rune func(rune) Matcher
Runes func(...rune) Matcher Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher RuneRange func(rune, rune) Matcher
@ -114,8 +114,6 @@ var C = struct {
Separated func(Matcher, Matcher) Matcher Separated func(Matcher, Matcher) Matcher
Drop func(Matcher) Matcher Drop func(Matcher) Matcher
}{ }{
EndOfFile: MatchEndOfFile,
AnyRune: MatchAnyRune,
Rune: MatchRune, Rune: MatchRune,
Runes: MatchRunes, Runes: MatchRunes,
RuneRange: MatchRuneRange, RuneRange: MatchRuneRange,
@ -135,20 +133,6 @@ var C = struct {
Drop: MatchDrop, Drop: MatchDrop,
} }
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
return !ok && input == EOF
}
}
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
}
func MatchRune(r rune) Matcher { func MatchRune(r rune) Matcher {
return func(m *MatchDialog) bool { return func(m *MatchDialog) bool {
input, ok := m.NextRune() input, ok := m.NextRune()

View File

@ -5,29 +5,24 @@ import (
"testing" "testing"
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
p "git.makaay.nl/mauricem/go-parsekit"
) )
var c = p.C func newParser(input string, Matcher parsekit.Matcher) *parsekit.P {
stateFn := func(p *parsekit.P) {
const TestItem p.ItemType = 1
func newParser(input string, Matcher p.Matcher) *p.P {
stateFn := func(p *p.P) {
p.Expects("MATCH") p.Expects("MATCH")
if p.On(Matcher).Accept().End() { if p.On(Matcher).Accept().End() {
p.EmitLiteral(TestItem) p.EmitLiteral(TestItem)
p.RouteRepeat() p.RouteRepeat()
} }
} }
return p.New(input, stateFn) return parsekit.New(input, stateFn)
} }
func ExampleTestMatchAny(t *testing.T) { func ExampleTestMatchAny(t *testing.T) {
parser := parsekit.New( parser := parsekit.New(
"¡Any / valid / character will dö!", "¡Any / valid / character will dö!",
func(p *parsekit.P) { func(p *parsekit.P) {
p.On(parsekit.MatchAnyRune()).Accept() p.On(a.AnyRune).Accept()
p.EmitLiteral(TestItem) p.EmitLiteral(TestItem)
}) })
match, _, ok := parser.Next() match, _, ok := parser.Next()
@ -37,7 +32,7 @@ func ExampleTestMatchAny(t *testing.T) {
} }
func TestMatchAnyRune(t *testing.T) { func TestMatchAnyRune(t *testing.T) {
p := newParser("o", c.AnyRune()) p := newParser("o", a.AnyRune)
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s", err) t.Fatalf("Parsing failed: %s", err)
@ -51,7 +46,7 @@ func TestMatchAnyRune(t *testing.T) {
} }
func TestMatchAnyRune_AtEndOfFile(t *testing.T) { func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
p := newParser("", c.AnyRune()) p := newParser("", a.AnyRune)
_, err, ok := p.Next() _, err, ok := p.Next()
if ok { if ok {
t.Fatalf("Parsing unexpectedly succeeded") t.Fatalf("Parsing unexpectedly succeeded")
@ -63,7 +58,7 @@ func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
} }
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) { func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", c.AnyRune()) p := newParser("\xcd", a.AnyRune)
_, err, ok := p.Next() _, err, ok := p.Next()
if ok { if ok {
t.Fatalf("Parsing unexpectedly succeeded") t.Fatalf("Parsing unexpectedly succeeded")
@ -335,7 +330,7 @@ func TestMatchOptional(t *testing.T) {
func TestMatchDrop(t *testing.T) { func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-')) dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes))) p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes)))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)

11
parsekit_test.go Normal file
View File

@ -0,0 +1,11 @@
package parsekit_test
import "git.makaay.nl/mauricem/go-parsekit"
const (
TestItem parsekit.ItemType = 1
SuccessItem parsekit.ItemType = 2
FailItem parsekit.ItemType = 3
)
var c, a = parsekit.C, parsekit.A

View File

@ -49,6 +49,9 @@ func (p *P) EmitLiteralTrim(t ItemType) {
func (p *P) EmitInterpreted(t ItemType) error { func (p *P) EmitInterpreted(t ItemType) error {
s, err := p.buffer.asInterpretedString() s, err := p.buffer.asInterpretedString()
if err != nil { if err != nil {
p.EmitError(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return err return err
} }
p.Emit(t, s) p.Emit(t, s)

View File

@ -5,14 +5,17 @@ package parsekit
// This method is the start of a chain method in which multiple things can // This method is the start of a chain method in which multiple things can
// be arranged in one go: // be arranged in one go:
// //
// * Checking whether or not there is a match (this is what On does) // 1) Checking whether or not there is a match (this is what On does)
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move //
// the cursor forward, Accept(): move cursor forward and add the match in // 2) Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the parser string buffer) // the cursor forward, Accept(): move cursor forward and add the match in
// * Dedicing where to route to (e.g. using RouteTo() to route to a // the parser string buffer)
// StateHandler by name) //
// * Followup routing after that, when applicable (.e.g using something like // 3) Dedicing where to route to (e.g. using RouteTo() to route to a
// RouteTo(...).ThenTo(...)) // StateHandler by name)
//
// 4) Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
// //
// For every step of this chain, you can end the chain using the // For every step of this chain, you can end the chain using the
// End() method. This will return a boolean value, indicating whether or // End() method. This will return a boolean value, indicating whether or
@ -22,13 +25,19 @@ package parsekit
// require a boolean expression). // require a boolean expression).
// //
// You can omit "what to do with the match" and go straight into a routing // You can omit "what to do with the match" and go straight into a routing
// method, e.g. On(...).RouteTo(...). This is functionally the same as // method, e.g.
// using On(...).Stay().RouteTo(...). // On(...).RouteTo(...)
// This is functionally the same as using
// On(...).Stay().RouteTo(...).
// //
// Here's a complete example chain: // Here's a complete example chain:
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End() // p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(matcher Matcher) *matchAction { func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p} m := &MatchDialog{p: p}
if matcher == nil {
p.EmitError("internal parser error: matcher argument for On() is nil")
return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}}
}
ok := matcher(m) ok := matcher(m)
// Keep track of the last match, to allow parser implementations // Keep track of the last match, to allow parser implementations