Banged some sense into the constructors. Instead of one convulated parsekit.New(), we now have parsekit.NewParser() and parsekit.NewMatcherWrapper(). ALso playing with adding examples to the documentation.

This commit is contained in:
Maurice Makaay 2019-05-24 20:50:31 +00:00
parent 6fe3c16a6d
commit 3e87e010fb
7 changed files with 170 additions and 113 deletions

91
examples_test.go Normal file
View File

@ -0,0 +1,91 @@
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleItemType() {
// Make use of positive values. Ideally, define your ItemTypes using
// iota for easy automatic value management like this:
const (
ItemWord parsekit.ItemType = iota
ItemNumber
ItemBlob
// ...
)
}
func ExampleError() {
error := parsekit.Error{
Message: "it broke down",
Line: 10,
Column: 42}
fmt.Println(error.Error())
fmt.Println(error.ErrorFull())
// Output:
// it broke down
// it broke down after line 10, column 42
}
func ExampleMatchAnyRune() {
// Easy access to the parsekit definitions.
var a = parsekit.A
handler := func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
parser := parsekit.NewParser(handler)
run := parser.Parse("¡Any / valid / character will dö!")
for i := 0; i < 5; i++ {
match, _, _ := run.Next()
fmt.Printf("Match = %q\n", match.Value)
}
// Output:
// Match = "¡"
// Match = "A"
// Match = "n"
// Match = "y"
// Match = " "
}
func ExampleModifyToUpper() {
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
// The numbers never start with a zero.
digitNotZero := c.Except(c.Rune('0'), a.Digit)
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
// It is good form to write the letters in upper case.
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
letters := m.ToUpper(c.Seq(letter, letter))
// It is good form to use a single space between letters and numbers,
// but it is not mandatory.
space := m.Replace(c.Opt(a.Whitespace), " ")
// With all the building blocks, we can now build the postcode parser.
postcode := c.Seq(numbers, space, letters)
// Create a parser and let is parse some postcode inputs.
// This will print "1234 AB" for every input, because of the built-in normalization.
p := parsekit.NewMatcherWrapper(postcode)
for _, input := range []string{"1234 AB", "1234Ab", "1234 ab", "1234ab"} {
output, _, _ := p.Match("1234 AB")
fmt.Printf("Input: %q, output: %q\n", input, output)
}
// Output:
// Input: "1234 AB", output: "1234 AB"
// Input: "1234Ab", output: "1234 AB"
// Input: "1234 ab", output: "1234 AB"
// Input: "1234ab", output: "1234 AB"
}

View File

@ -31,7 +31,8 @@ var C = struct {
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
MinMax func(int, int, Matcher) Matcher
Separated func(Matcher, Matcher) Matcher
Separated func(separated Matcher, separator Matcher) Matcher
Except func(except Matcher, matcher Matcher) Matcher
}{
Rune: MatchRune,
Runes: MatchRunes,
@ -49,6 +50,7 @@ var C = struct {
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Except: MatchExcept,
}
// MatchRune creates a Matcher function that checks if the next rune from
@ -274,10 +276,23 @@ func matchMinMax(min int, max int, matcher Matcher) Matcher {
// Matchers of one type (the separated), separated by Matches of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separated Matcher, separator Matcher) Matcher {
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}
// MatchExcept creates a Matcher that checks if the provided matcher can be
// applied to the upcoming input. It also checks if the except Matcher can be
// applied. If the matcher applies, but the except Matcher too, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except Matcher, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
if except(m.Fork()) {
return false
}
return matcher(m)
}
}
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
//

View File

@ -70,7 +70,7 @@ func TestCombinators(t *testing.T) {
{"ghijkl", c.Opt(c.Rune('h')), true, ""},
{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
@ -183,7 +183,7 @@ func TestCombination(t *testing.T) {
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "),
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl),
c.Rep(3, a.AngleOpen),
@ -211,7 +211,7 @@ func TestSequenceOfRunes(t *testing.T) {
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.New(func(p *parsekit.P) {
parser := parsekit.NewParser(func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept().End() {
p.EmitLiteral(TestItem)
@ -225,46 +225,3 @@ func TestSequenceOfRunes(t *testing.T) {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}
func ExampleMatchAnyRune() {
handler := func(p *parsekit.P) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept().End() {
p.EmitLiteral(TestItem)
}
}
parser := parsekit.New(handler)
run := parser.Parse("¡Any / valid / character will dö!")
match, _, ok := run.Next()
// This will output '¡', since a.AnyRune matches exactly 1 rune.
if ok {
fmt.Printf("Match = %q\n", match)
}
}
func ExampleModifyToUpper() {
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
// The numbers never start with a zero.
digitNotZero := c.RuneRange('1', '9')
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
// It is good form to write the letters in upper case.
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
letters := m.ToUpper(c.Seq(letter, letter))
// It is good form to use a single space between letters and numbers,
// but it is not mandatory.
space := m.Replace(c.Opt(a.Whitespace), " ")
// With all the building blocks, we can now build the postcode parser.
postcode := c.Seq(numbers, space, letters)
// Create a parser and let is parse some postcode inputs.
// This will print "1234 AB" for every input, because of the built-in normalization.
p := parsekit.New(postcode)
for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
r, _, _ := p.Parse("1234 AB").Next()
fmt.Printf("Input: %q, output: %q", input, r.Value)
}
}

View File

@ -7,63 +7,25 @@ import (
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.New() method.
//
// To start parsing input data, use the method Parser.Parse().
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
startState StateHandler // the function that handles the very first state
}
// New instantiates a new Parser.
// The logic parameter provides the parsing logic to apply. This can be:
// NewParser instantiates a new Parser.
//
// 1) A StateHandler function: in this case, a state machine-style
// recursive descent parser is created, in which StateHandler functions
// are used to move the state machine forward during parsing.
// This type of parser offers a lot of flexibility and it is possible to
// emit multiple items from the parse flow.
//
// This style of parser is typically used for parsing languages and
// The Parser is a state machine-style recursive descent parser, in which
// StateHandler functions are used to move the state machine forward during
// parsing. This style of parser is typically used for parsing languages and
// structured data formats (like json, toml, etc.)
//
// 2) A Matcher function: in this case, a parser/combinator-style parser
// is created, which can be used to match against the provided logic.
// The parser can only check input against the Matcher function, and
// reports back a successful match or a failure.
//
// This style of parser can typically be used for validation and normalization
// of input data. However, when you are about to use parsekit for that
// task, consider using regular expressions instead. They might serve
// you better.
func New(logic interface{}) *Parser {
switch logic := logic.(type) {
case func(*P):
return makeParserForStateHandler(logic)
case StateHandler:
return makeParserForStateHandler(logic)
case func(m *MatchDialog) bool:
return makeParserForMatcher(logic)
case Matcher:
return makeParserForMatcher(logic)
default:
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
}
}
func makeParserForStateHandler(handler StateHandler) *Parser {
return &Parser{startState: handler}
}
func makeParserForMatcher(matcher Matcher) *Parser {
return New(StateHandler(func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().RouteRepeat().End() {
p.EmitLiteral(MatchedItem)
}
}))
// To start parsing input data, use the method Parser.Parse().
func NewParser(startState StateHandler) *Parser {
return &Parser{startState: startState}
}
// Run represents a single parse run for a Parser.
// TODO rename to ParseRun
type Run struct {
p *P // a struct holding the internal state of a parse run
}
@ -106,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
case i.Type == itemEOF:
return i, nil, false
case i.Type == ItemError:
case i.Type == itemError:
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
return i, run.p.err, false
default:
@ -176,3 +138,38 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
run.p.expecting = ""
run.p.state(run.p)
}
// MatcherWrapper is the top-level struct that holds the configuration for
// a parser that is based solely on a Wrapper function.
// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
// method.
//
// To match input data against the wrapped Matcher function, use the method
// MatcherWrapper.Match().
type MatcherWrapper struct {
parser *Parser
}
// NewMatcherWrapper instantiates a new MatcherWrapper.
//
// This is a simple wrapper around a Matcher function. It can be used to
// match an input string against that Matcher function and retrieve the
// results in a straight forward way.
func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
handler := func(p *P) {
p.Expects("match")
if p.On(matcher).Accept().End() {
p.EmitLiteral(0) // ItemType is irrelevant
}
}
return &MatcherWrapper{parser: NewParser(handler)}
}
// Match runs the wrapped Matcher function against the provided input data.
func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
item, err, ok := w.parser.Parse(input).Next()
if !ok {
return "", err, false
}
return item.Value, nil, true
}

View File

@ -27,16 +27,13 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
}
func RunMatcherTest(t *testing.T, test MatcherTest) {
parser := parsekit.New(test.matcher).Parse(test.input)
item, err, ok := parser.Next()
output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
if test.mustMatch {
if !ok {
t.Errorf("Test %q failed with error: %s", test.input, err)
} else if item.Type != parsekit.MatchedItem {
t.Errorf("Test %q failed: should match, but it didn't", test.input)
} else if item.Value != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
} else if output != test.expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
}
} else {
if ok {

View File

@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
p.RouteTo(func(p *P) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay().End() {
p.Emit(ItemEOF, "EOF")
p.Emit(itemEOF, "EOF")
}
})
}

View File

@ -5,19 +5,19 @@ import (
)
// ItemType represents the type of a parser Item.
//
// When creating your own ItemType values, then make use of positive integer
// values. Negative values are possible, but they are reserved for internal
// use by parsekit.
type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the
// itemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
const itemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that
// itemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
// MatchedItem is a built-in parser item type that is used for indicating a
// successful match when using a parser that is based on a Matcher.
const MatchedItem ItemType = -3
const itemError ItemType = -2
// Item represents an item that can be emitted from the parser.
type Item struct {
@ -81,7 +81,7 @@ func (err *Error) ErrorFull() string {
// EmitError emits a Parser error item to the client.
func (p *P) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message)
p.Emit(itemError, message)
}
// UnexpectedInput is used by a StateHandler function to emit an error item