Banged some sense into the constructors. Instead of one convulated parsekit.New(), we now have parsekit.NewParser() and parsekit.NewMatcherWrapper(). ALso playing with adding examples to the documentation.

This commit is contained in:
Maurice Makaay 2019-05-24 20:50:31 +00:00
parent 6fe3c16a6d
commit 3e87e010fb
7 changed files with 170 additions and 113 deletions

91
examples_test.go Normal file
View File

@ -0,0 +1,91 @@
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleItemType() {
// Make use of positive values. Ideally, define your ItemTypes using
// iota for easy automatic value management like this:
const (
ItemWord parsekit.ItemType = iota
ItemNumber
ItemBlob
// ...
)
}
func ExampleError() {
error := parsekit.Error{
Message: "it broke down",
Line: 10,
Column: 42}
fmt.Println(error.Error())
fmt.Println(error.ErrorFull())
// Output:
// it broke down
// it broke down after line 10, column 42
}
func ExampleMatchAnyRune() {
// Easy access to the parsekit definitions.
var a = parsekit.A
handler := func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
parser := parsekit.NewParser(handler)
run := parser.Parse("¡Any / valid / character will dö!")
for i := 0; i < 5; i++ {
match, _, _ := run.Next()
fmt.Printf("Match = %q\n", match.Value)
}
// Output:
// Match = "¡"
// Match = "A"
// Match = "n"
// Match = "y"
// Match = " "
}
func ExampleModifyToUpper() {
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
// The numbers never start with a zero.
digitNotZero := c.Except(c.Rune('0'), a.Digit)
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
// It is good form to write the letters in upper case.
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
letters := m.ToUpper(c.Seq(letter, letter))
// It is good form to use a single space between letters and numbers,
// but it is not mandatory.
space := m.Replace(c.Opt(a.Whitespace), " ")
// With all the building blocks, we can now build the postcode parser.
postcode := c.Seq(numbers, space, letters)
// Create a parser and let is parse some postcode inputs.
// This will print "1234 AB" for every input, because of the built-in normalization.
p := parsekit.NewMatcherWrapper(postcode)
for _, input := range []string{"1234 AB", "1234Ab", "1234 ab", "1234ab"} {
output, _, _ := p.Match("1234 AB")
fmt.Printf("Input: %q, output: %q\n", input, output)
}
// Output:
// Input: "1234 AB", output: "1234 AB"
// Input: "1234Ab", output: "1234 AB"
// Input: "1234 ab", output: "1234 AB"
// Input: "1234ab", output: "1234 AB"
}

View File

@ -31,7 +31,8 @@ var C = struct {
ZeroOrMore func(Matcher) Matcher ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher OneOrMore func(Matcher) Matcher
MinMax func(int, int, Matcher) Matcher MinMax func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher Separated func(separated Matcher, separator Matcher) Matcher
Except func(except Matcher, matcher Matcher) Matcher
}{ }{
Rune: MatchRune, Rune: MatchRune,
Runes: MatchRunes, Runes: MatchRunes,
@ -49,6 +50,7 @@ var C = struct {
OneOrMore: MatchOneOrMore, OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax, MinMax: MatchMinMax,
Separated: MatchSeparated, Separated: MatchSeparated,
Except: MatchExcept,
} }
// MatchRune creates a Matcher function that checks if the next rune from // MatchRune creates a Matcher function that checks if the next rune from
@ -274,10 +276,23 @@ func matchMinMax(min int, max int, matcher Matcher) Matcher {
// Matchers of one type (the separated), separated by Matches of another type // Matchers of one type (the separated), separated by Matches of another type
// (the separator). All matches (separated + separator) are included in the // (the separator). All matches (separated + separator) are included in the
// output. // output.
func MatchSeparated(separated Matcher, separator Matcher) Matcher { func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated))) return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
} }
// MatchExcept creates a Matcher that checks if the provided matcher can be
// applied to the upcoming input. It also checks if the except Matcher can be
// applied. If the matcher applies, but the except Matcher too, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except Matcher, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
if except(m.Fork()) {
return false
}
return matcher(m)
}
}
// A provides convenient access to a range of atoms that can be used to // A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules. // build combinators or parsing rules.
// //

View File

@ -70,7 +70,7 @@ func TestCombinators(t *testing.T) {
{"ghijkl", c.Opt(c.Rune('h')), true, ""}, {"ghijkl", c.Opt(c.Rune('h')), true, ""},
{"ghijkl", c.Opt(c.Rune('g')), true, "g"}, {"ghijkl", c.Opt(c.Rune('g')), true, "g"},
{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"}, {"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"}, {"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`}, {`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""}, {" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""}, {" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
@ -183,7 +183,7 @@ func TestCombination(t *testing.T) {
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string { m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s)) return fmt.Sprintf("%d", len(s))
}), }),
m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "), m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)), m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl), m.Drop(a.Excl),
c.Rep(3, a.AngleOpen), c.Rep(3, a.AngleOpen),
@ -211,7 +211,7 @@ func TestSequenceOfRunes(t *testing.T) {
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde, a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
) )
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(func(p *parsekit.P) { parser := parsekit.NewParser(func(p *parsekit.P) {
p.Expects("Sequence of runes") p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() { if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem) p.EmitLiteral(TestItem)
@ -225,46 +225,3 @@ func TestSequenceOfRunes(t *testing.T) {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value) t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
} }
} }
func ExampleMatchAnyRune() {
handler := func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
}
parser := parsekit.New(handler)
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
// This will output '¡', since a.AnyRune matches exactly 1 rune.
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func ExampleModifyToUpper() {
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
// The numbers never start with a zero.
digitNotZero := c.RuneRange('1', '9')
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
// It is good form to write the letters in upper case.
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
letters := m.ToUpper(c.Seq(letter, letter))
// It is good form to use a single space between letters and numbers,
// but it is not mandatory.
space := m.Replace(c.Opt(a.Whitespace), " ")
// With all the building blocks, we can now build the postcode parser.
postcode := c.Seq(numbers, space, letters)
// Create a parser and let is parse some postcode inputs.
// This will print "1234 AB" for every input, because of the built-in normalization.
p := parsekit.New(postcode)
for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
r, _, _ := p.Parse("1234 AB").Next()
fmt.Printf("Input: %q, output: %q", input, r.Value)
}
}

View File

@ -7,63 +7,25 @@ import (
) )
// Parser is the top-level struct that holds the configuration for a parser. // Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.New() method. // The Parser can be instantiated using the parsekit.NewParser() method.
//
// To start parsing input data, use the method Parser.Parse().
type Parser struct { type Parser struct {
startState StateHandler // the function that handles the very first state startState StateHandler // the function that handles the very first state
} }
// New instantiates a new Parser. // NewParser instantiates a new Parser.
// The logic parameter provides the parsing logic to apply. This can be:
// //
// 1) A StateHandler function: in this case, a state machine-style // The Parser is a state machine-style recursive descent parser, in which
// recursive descent parser is created, in which StateHandler functions // StateHandler functions are used to move the state machine forward during
// are used to move the state machine forward during parsing. // parsing. This style of parser is typically used for parsing languages and
// This type of parser offers a lot of flexibility and it is possible to
// emit multiple items from the parse flow.
//
// This style of parser is typically used for parsing languages and
// structured data formats (like json, toml, etc.) // structured data formats (like json, toml, etc.)
// //
// 2) A Matcher function: in this case, a parser/combinator-style parser // To start parsing input data, use the method Parser.Parse().
// is created, which can be used to match against the provided logic. func NewParser(startState StateHandler) *Parser {
// The parser can only check input against the Matcher function, and return &Parser{startState: startState}
// reports back a successful match or a failure.
//
// This style of parser can typically be used for validation and normalization
// of input data. However, when you are about to use parsekit for that
// task, consider using regular expressions instead. They might serve
// you better.
func New(logic interface{}) *Parser {
switch logic := logic.(type) {
case func(*P):
return makeParserForStateHandler(logic)
case StateHandler:
return makeParserForStateHandler(logic)
case func(m *MatchDialog) bool:
return makeParserForMatcher(logic)
case Matcher:
return makeParserForMatcher(logic)
default:
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
}
}
func makeParserForStateHandler(handler StateHandler) *Parser {
return &Parser{startState: handler}
}
func makeParserForMatcher(matcher Matcher) *Parser {
return New(StateHandler(func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().RouteRepeat().End() {
p.EmitLiteral(MatchedItem)
}
}))
} }
// Run represents a single parse run for a Parser. // Run represents a single parse run for a Parser.
// TODO rename to ParseRun
type Run struct { type Run struct {
p *P // a struct holding the internal state of a parse run p *P // a struct holding the internal state of a parse run
} }
@ -106,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) { func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch { switch {
case i.Type == ItemEOF: case i.Type == itemEOF:
return i, nil, false return i, nil, false
case i.Type == ItemError: case i.Type == itemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn} run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false return i, run.p.err, false
default: default:
@ -176,3 +138,38 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
run.p.expecting = "" run.p.expecting = ""
run.p.state(run.p) run.p.state(run.p)
} }
// MatcherWrapper is the top-level struct that holds the configuration for
// a parser that is based solely on a Wrapper function.
// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
// method.
//
// To match input data against the wrapped Matcher function, use the method
// MatcherWrapper.Match().
type MatcherWrapper struct {
parser *Parser
}
// NewMatcherWrapper instantiates a new MatcherWrapper.
//
// This is a simple wrapper around a Matcher function. It can be used to
// match an input string against that Matcher function and retrieve the
// results in a straight forward way.
func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
handler := func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().End() {
p.EmitLiteral(0) // ItemType is irrelevant
}
}
return &MatcherWrapper{parser: NewParser(handler)}
}
// Match runs the wrapped Matcher function against the provided input data.
func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
item, err, ok := w.parser.Parse(input).Next()
if !ok {
return "", err, false
}
return item.Value, nil, true
}

View File

@ -27,16 +27,13 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
} }
func RunMatcherTest(t *testing.T, test MatcherTest) { func RunMatcherTest(t *testing.T, test MatcherTest) {
parser := parsekit.New(test.matcher).Parse(test.input) output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
item, err, ok := parser.Next()
if test.mustMatch { if test.mustMatch {
if !ok { if !ok {
t.Errorf("Test %q failed with error: %s", test.input, err) t.Errorf("Test %q failed with error: %s", test.input, err)
} else if item.Type != parsekit.MatchedItem { } else if output != test.expected {
t.Errorf("Test %q failed: should match, but it didn't", test.input) t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
} else if item.Value != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
} }
} else { } else {
if ok { if ok {

View File

@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
p.RouteTo(func(p *P) { p.RouteTo(func(p *P) {
p.Expects("end of file") p.Expects("end of file")
if p.On(A.EndOfFile).Stay().End() { if p.On(A.EndOfFile).Stay().End() {
p.Emit(ItemEOF, "EOF") p.Emit(itemEOF, "EOF")
} }
}) })
} }

View File

@ -5,19 +5,19 @@ import (
) )
// ItemType represents the type of a parser Item. // ItemType represents the type of a parser Item.
//
// When creating your own ItemType values, then make use of positive integer
// values. Negative values are possible, but they are reserved for internal
// use by parsekit.
type ItemType int type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the // itemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached. // end of the input was reached.
const ItemEOF ItemType = -1 const itemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that // itemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing. // an error has occurred during parsing.
const ItemError ItemType = -2 const itemError ItemType = -2
// MatchedItem is a built-in parser item type that is used for indicating a
// successful match when using a parser that is based on a Matcher.
const MatchedItem ItemType = -3
// Item represents an item that can be emitted from the parser. // Item represents an item that can be emitted from the parser.
type Item struct { type Item struct {
@ -81,7 +81,7 @@ func (err *Error) ErrorFull() string {
// EmitError emits a Parser error item to the client. // EmitError emits a Parser error item to the client.
func (p *P) EmitError(format string, args ...interface{}) { func (p *P) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...) message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message) p.Emit(itemError, message)
} }
// UnexpectedInput is used by a StateHandler function to emit an error item // UnexpectedInput is used by a StateHandler function to emit an error item