Implemented a lot of atoms in the parsekit library, ready for use by a parser implementation.
This commit is contained in:
parent
355f995388
commit
8a09b7ca49
|
@ -0,0 +1,114 @@
|
|||
package parsekit
|
||||
|
||||
// A provides convenient access to a range of atoms that can be used to
|
||||
// build combinators or parsing rules.
|
||||
var A = struct {
|
||||
EndOfFile Matcher
|
||||
AnyRune Matcher
|
||||
Space Matcher
|
||||
Tab Matcher
|
||||
CarriageRet Matcher
|
||||
Newline Matcher
|
||||
Excl Matcher
|
||||
DoubleQuote Matcher
|
||||
Hash Matcher
|
||||
Dollar Matcher
|
||||
Percent Matcher
|
||||
Amp Matcher
|
||||
SingleQuote Matcher
|
||||
RoundOpen Matcher
|
||||
RoundClose Matcher
|
||||
Asterisk Matcher
|
||||
Plus Matcher
|
||||
Comma Matcher
|
||||
Minus Matcher
|
||||
Dot Matcher
|
||||
Slash Matcher
|
||||
Colon Matcher
|
||||
Semicolon Matcher
|
||||
AngleOpen Matcher
|
||||
Equal Matcher
|
||||
AngleClose Matcher
|
||||
Question Matcher
|
||||
At Matcher
|
||||
SquareOpen Matcher
|
||||
Backslash Matcher
|
||||
SquareClose Matcher
|
||||
Caret Matcher
|
||||
Underscore Matcher
|
||||
Backquote Matcher
|
||||
CurlyOpen Matcher
|
||||
Pipe Matcher
|
||||
CurlyClose Matcher
|
||||
Tilde Matcher
|
||||
Whitespace Matcher
|
||||
WhitespaceAndNewlines Matcher
|
||||
EndOfLine Matcher
|
||||
Digit Matcher
|
||||
ASCII Matcher
|
||||
ASCIILower Matcher
|
||||
ASCIIUpper Matcher
|
||||
HexDigit Matcher
|
||||
}{
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
Space: C.Rune(' '),
|
||||
Tab: C.Rune('\t'),
|
||||
CarriageRet: C.Rune('\r'),
|
||||
Newline: C.Rune('\n'),
|
||||
Excl: C.Rune('!'),
|
||||
DoubleQuote: C.Rune('"'),
|
||||
Hash: C.Rune('#'),
|
||||
Dollar: C.Rune('$'),
|
||||
Percent: C.Rune('%'),
|
||||
Amp: C.Rune('&'),
|
||||
SingleQuote: C.Rune('\''),
|
||||
RoundOpen: C.Rune('('),
|
||||
RoundClose: C.Rune(')'),
|
||||
Asterisk: C.Rune('*'),
|
||||
Plus: C.Rune('+'),
|
||||
Comma: C.Rune(','),
|
||||
Minus: C.Rune('-'),
|
||||
Dot: C.Rune('.'),
|
||||
Slash: C.Rune('/'),
|
||||
Colon: C.Rune(':'),
|
||||
Semicolon: C.Rune(';'),
|
||||
AngleOpen: C.Rune('<'),
|
||||
Equal: C.Rune('='),
|
||||
AngleClose: C.Rune('>'),
|
||||
Question: C.Rune('?'),
|
||||
At: C.Rune('@'),
|
||||
SquareOpen: C.Rune('['),
|
||||
Backslash: C.Rune('\\'),
|
||||
SquareClose: C.Rune(']'),
|
||||
Caret: C.Rune('^'),
|
||||
Underscore: C.Rune('_'),
|
||||
Backquote: C.Rune('`'),
|
||||
CurlyOpen: C.Rune('{'),
|
||||
Pipe: C.Rune('|'),
|
||||
CurlyClose: C.Rune('}'),
|
||||
Tilde: C.Rune('~'),
|
||||
Whitespace: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'))),
|
||||
WhitespaceAndNewlines: C.OneOrMore(C.AnyOf(C.Rune(' '), C.Rune('\t'), C.Rune('\r'), C.Rune('\n'))),
|
||||
EndOfLine: C.AnyOf(C.String("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||
Digit: C.RuneRange('0', '9'),
|
||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||
ASCIILower: C.RuneRange('a', 'z'),
|
||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||
HexDigit: C.AnyOf(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
||||
}
|
||||
|
||||
func MatchEndOfFile() Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
fork := m.Fork()
|
||||
input, ok := fork.NextRune()
|
||||
return !ok && input == EOF
|
||||
}
|
||||
}
|
||||
|
||||
func MatchAnyRune() Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
_, ok := m.NextRune()
|
||||
return ok
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
for i, c := range []struct {
|
||||
input string
|
||||
matcher parsekit.Matcher
|
||||
mustMatch bool
|
||||
}{
|
||||
{"", a.EndOfFile, true},
|
||||
{"⌘", a.AnyRune, true},
|
||||
{"\xbc", a.AnyRune, false}, // invalid UTF8 rune
|
||||
{"", a.AnyRune, false}, // end of file
|
||||
{" ", a.Space, true},
|
||||
{"X", a.Space, false},
|
||||
{"\t", a.Tab, true},
|
||||
{"\r", a.CarriageRet, true},
|
||||
{"\n", a.Newline, true},
|
||||
{"!", a.Excl, true},
|
||||
{"\"", a.DoubleQuote, true},
|
||||
{"#", a.Hash, true},
|
||||
{"$", a.Dollar, true},
|
||||
{"%", a.Percent, true},
|
||||
{"&", a.Amp, true},
|
||||
{"'", a.SingleQuote, true},
|
||||
{"(", a.RoundOpen, true},
|
||||
{")", a.RoundClose, true},
|
||||
{"*", a.Asterisk, true},
|
||||
{"+", a.Plus, true},
|
||||
{",", a.Comma, true},
|
||||
{"-", a.Minus, true},
|
||||
{".", a.Dot, true},
|
||||
{"/", a.Slash, true},
|
||||
{":", a.Colon, true},
|
||||
{";", a.Semicolon, true},
|
||||
{"<", a.AngleOpen, true},
|
||||
{"=", a.Equal, true},
|
||||
{">", a.AngleClose, true},
|
||||
{"?", a.Question, true},
|
||||
{"@", a.At, true},
|
||||
{"[", a.SquareOpen, true},
|
||||
{"\\", a.Backslash, true},
|
||||
{"]", a.SquareClose, true},
|
||||
{"^", a.Caret, true},
|
||||
{"_", a.Underscore, true},
|
||||
{"`", a.Backquote, true},
|
||||
{"{", a.CurlyOpen, true},
|
||||
{"|", a.Pipe, true},
|
||||
{"}", a.CurlyClose, true},
|
||||
{"~", a.Tilde, true},
|
||||
{" \t \t ", a.Whitespace, true},
|
||||
{" \t\r\n ", a.WhitespaceAndNewlines, true},
|
||||
{"", a.EndOfLine, true},
|
||||
{"\r\n", a.EndOfLine, true},
|
||||
{"\n", a.EndOfLine, true},
|
||||
{"0", a.Digit, true},
|
||||
{"1", a.Digit, true},
|
||||
{"2", a.Digit, true},
|
||||
{"3", a.Digit, true},
|
||||
{"4", a.Digit, true},
|
||||
{"5", a.Digit, true},
|
||||
{"6", a.Digit, true},
|
||||
{"7", a.Digit, true},
|
||||
{"8", a.Digit, true},
|
||||
{"9", a.Digit, true},
|
||||
{"X", a.Digit, false},
|
||||
{"a", a.ASCIILower, true},
|
||||
{"z", a.ASCIILower, true},
|
||||
{"A", a.ASCIILower, false},
|
||||
{"Z", a.ASCIILower, false},
|
||||
{"A", a.ASCIIUpper, true},
|
||||
{"Z", a.ASCIIUpper, true},
|
||||
{"a", a.ASCIIUpper, false},
|
||||
{"z", a.ASCIIUpper, false},
|
||||
{"0", a.HexDigit, true},
|
||||
{"9", a.HexDigit, true},
|
||||
{"a", a.HexDigit, true},
|
||||
{"f", a.HexDigit, true},
|
||||
{"A", a.HexDigit, true},
|
||||
{"F", a.HexDigit, true},
|
||||
{"g", a.HexDigit, false},
|
||||
{"G", a.HexDigit, false},
|
||||
} {
|
||||
parser := parsekit.New(c.input, func(p *parsekit.P) {
|
||||
if p.On(c.matcher).Accept().End() {
|
||||
p.EmitLiteral(SuccessItem)
|
||||
} else {
|
||||
p.EmitLiteral(FailItem)
|
||||
}
|
||||
})
|
||||
item, err, ok := parser.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Test [%d] %q failed with error: %s", i+1, c.input, err)
|
||||
}
|
||||
if c.mustMatch && item.Type != SuccessItem {
|
||||
t.Fatalf("Test [%d] %q failed: should match, but it didn't", i+1, c.input)
|
||||
}
|
||||
if !c.mustMatch && item.Type != FailItem {
|
||||
t.Fatalf("Test [%d] %q failed: should not match, but it did", i+1, c.input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSequenceOfRunes(t *testing.T) {
|
||||
sequence := c.Sequence(
|
||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
||||
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||
)
|
||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
parser := parsekit.New(input, func(p *parsekit.P) {
|
||||
p.Expects("Sequence of runes")
|
||||
if p.On(sequence).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
}
|
||||
})
|
||||
item, err, ok := parser.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if item.Value != input {
|
||||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
||||
}
|
||||
}
|
|
@ -5,8 +5,10 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Not in need of it myself, but nice to have I guess:
|
||||
// Nice to have I guess:
|
||||
// - LookAhead
|
||||
// - Ready to go combinators for various number notations
|
||||
// - Ready to go atoms (C.space, C.tab, C.digits, C.asciiUpper, etc...)
|
||||
|
||||
type Matcher func(m *MatchDialog) bool
|
||||
|
||||
|
@ -49,7 +51,7 @@ func (m *MatchDialog) NextRune() (rune, bool) {
|
|||
// 3 runes returned from NextRune() which match the expectations, then the
|
||||
// slice of runes inside the MatchDialog will contain these 3 runes.
|
||||
// When after this the 4th rune turns out to be a mismatch, the forked
|
||||
// MatchDialog can simply be disarded, and the state in the parent will be
|
||||
// MatchDialog can simply be discarded, and the state in the parent will be
|
||||
// kept as-is.
|
||||
//
|
||||
// When a forked MatchDialog is in use, and the Matcher decides that a
|
||||
|
@ -87,15 +89,13 @@ func (m *MatchDialog) Clear() {
|
|||
m.widths = []int{}
|
||||
}
|
||||
|
||||
// C provides convenient access to a wide range of parser/combinator
|
||||
// C provides convenient access to a range of parser/combinator
|
||||
// constructors that can be used to build matching expressions.
|
||||
//
|
||||
// When using C in your own parser, then it is advised to create
|
||||
// a variable in your own package to reference it (var c = parsekit.C).
|
||||
// This saves a lot of typing, and it makes your code a lot cleaner.
|
||||
var C = struct {
|
||||
EndOfFile func() Matcher
|
||||
AnyRune func() Matcher
|
||||
Rune func(rune) Matcher
|
||||
Runes func(...rune) Matcher
|
||||
RuneRange func(rune, rune) Matcher
|
||||
|
@ -114,8 +114,6 @@ var C = struct {
|
|||
Separated func(Matcher, Matcher) Matcher
|
||||
Drop func(Matcher) Matcher
|
||||
}{
|
||||
EndOfFile: MatchEndOfFile,
|
||||
AnyRune: MatchAnyRune,
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
RuneRange: MatchRuneRange,
|
||||
|
@ -135,20 +133,6 @@ var C = struct {
|
|||
Drop: MatchDrop,
|
||||
}
|
||||
|
||||
func MatchEndOfFile() Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
input, ok := m.NextRune()
|
||||
return !ok && input == EOF
|
||||
}
|
||||
}
|
||||
|
||||
func MatchAnyRune() Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
_, ok := m.NextRune()
|
||||
return ok
|
||||
}
|
||||
}
|
||||
|
||||
func MatchRune(r rune) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
input, ok := m.NextRune()
|
|
@ -5,29 +5,24 @@ import (
|
|||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
p "git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
var c = p.C
|
||||
|
||||
const TestItem p.ItemType = 1
|
||||
|
||||
func newParser(input string, Matcher p.Matcher) *p.P {
|
||||
stateFn := func(p *p.P) {
|
||||
func newParser(input string, Matcher parsekit.Matcher) *parsekit.P {
|
||||
stateFn := func(p *parsekit.P) {
|
||||
p.Expects("MATCH")
|
||||
if p.On(Matcher).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.RouteRepeat()
|
||||
}
|
||||
}
|
||||
return p.New(input, stateFn)
|
||||
return parsekit.New(input, stateFn)
|
||||
}
|
||||
|
||||
func ExampleTestMatchAny(t *testing.T) {
|
||||
parser := parsekit.New(
|
||||
"¡Any / valid / character will dö!",
|
||||
func(p *parsekit.P) {
|
||||
p.On(parsekit.MatchAnyRune()).Accept()
|
||||
p.On(a.AnyRune).Accept()
|
||||
p.EmitLiteral(TestItem)
|
||||
})
|
||||
match, _, ok := parser.Next()
|
||||
|
@ -37,7 +32,7 @@ func ExampleTestMatchAny(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMatchAnyRune(t *testing.T) {
|
||||
p := newParser("o", c.AnyRune())
|
||||
p := newParser("o", a.AnyRune)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
|
@ -51,7 +46,7 @@ func TestMatchAnyRune(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
|
||||
p := newParser("", c.AnyRune())
|
||||
p := newParser("", a.AnyRune)
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
|
@ -63,7 +58,7 @@ func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
|
||||
p := newParser("\xcd", c.AnyRune())
|
||||
p := newParser("\xcd", a.AnyRune)
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
|
@ -335,7 +330,7 @@ func TestMatchOptional(t *testing.T) {
|
|||
|
||||
func TestMatchDrop(t *testing.T) {
|
||||
dashes := c.OneOrMore(c.Rune('-'))
|
||||
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes)))
|
||||
p := newParser("---X---", c.Sequence(c.Drop(dashes), a.AnyRune, c.Drop(dashes)))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
|
@ -0,0 +1,11 @@
|
|||
package parsekit_test
|
||||
|
||||
import "git.makaay.nl/mauricem/go-parsekit"
|
||||
|
||||
const (
|
||||
TestItem parsekit.ItemType = 1
|
||||
SuccessItem parsekit.ItemType = 2
|
||||
FailItem parsekit.ItemType = 3
|
||||
)
|
||||
|
||||
var c, a = parsekit.C, parsekit.A
|
|
@ -49,6 +49,9 @@ func (p *P) EmitLiteralTrim(t ItemType) {
|
|||
func (p *P) EmitInterpreted(t ItemType) error {
|
||||
s, err := p.buffer.asInterpretedString()
|
||||
if err != nil {
|
||||
p.EmitError(
|
||||
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
||||
p.buffer.asLiteralString(), err)
|
||||
return err
|
||||
}
|
||||
p.Emit(t, s)
|
||||
|
|
|
@ -5,14 +5,17 @@ package parsekit
|
|||
// This method is the start of a chain method in which multiple things can
|
||||
// be arranged in one go:
|
||||
//
|
||||
// * Checking whether or not there is a match (this is what On does)
|
||||
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move
|
||||
// the cursor forward, Accept(): move cursor forward and add the match in
|
||||
// the parser string buffer)
|
||||
// * Dedicing where to route to (e.g. using RouteTo() to route to a
|
||||
// StateHandler by name)
|
||||
// * Followup routing after that, when applicable (.e.g using something like
|
||||
// RouteTo(...).ThenTo(...))
|
||||
// 1) Checking whether or not there is a match (this is what On does)
|
||||
//
|
||||
// 2) Deciding what to do with the match (Stay(): do nothing, Skip(): only move
|
||||
// the cursor forward, Accept(): move cursor forward and add the match in
|
||||
// the parser string buffer)
|
||||
//
|
||||
// 3) Dedicing where to route to (e.g. using RouteTo() to route to a
|
||||
// StateHandler by name)
|
||||
//
|
||||
// 4) Followup routing after that, when applicable (.e.g using something like
|
||||
// RouteTo(...).ThenTo(...))
|
||||
//
|
||||
// For every step of this chain, you can end the chain using the
|
||||
// End() method. This will return a boolean value, indicating whether or
|
||||
|
@ -22,13 +25,19 @@ package parsekit
|
|||
// require a boolean expression).
|
||||
//
|
||||
// You can omit "what to do with the match" and go straight into a routing
|
||||
// method, e.g. On(...).RouteTo(...). This is functionally the same as
|
||||
// using On(...).Stay().RouteTo(...).
|
||||
// method, e.g.
|
||||
// On(...).RouteTo(...)
|
||||
// This is functionally the same as using
|
||||
// On(...).Stay().RouteTo(...).
|
||||
//
|
||||
// Here's a complete example chain:
|
||||
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
|
||||
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
|
||||
func (p *P) On(matcher Matcher) *matchAction {
|
||||
m := &MatchDialog{p: p}
|
||||
if matcher == nil {
|
||||
p.EmitError("internal parser error: matcher argument for On() is nil")
|
||||
return &matchAction{routeAction: routeAction{chainAction: chainAction{nil, false}}}
|
||||
}
|
||||
ok := matcher(m)
|
||||
|
||||
// Keep track of the last match, to allow parser implementations
|
||||
|
|
Loading…
Reference in New Issue