Banged some sense into the constructors. Instead of one convulated parsekit.New(), we now have parsekit.NewParser() and parsekit.NewMatcherWrapper(). ALso playing with adding examples to the documentation.
This commit is contained in:
parent
6fe3c16a6d
commit
3e87e010fb
|
@ -0,0 +1,91 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func ExampleItemType() {
|
||||
// Make use of positive values. Ideally, define your ItemTypes using
|
||||
// iota for easy automatic value management like this:
|
||||
const (
|
||||
ItemWord parsekit.ItemType = iota
|
||||
ItemNumber
|
||||
ItemBlob
|
||||
// ...
|
||||
)
|
||||
}
|
||||
|
||||
func ExampleError() {
|
||||
error := parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Line: 10,
|
||||
Column: 42}
|
||||
|
||||
fmt.Println(error.Error())
|
||||
fmt.Println(error.ErrorFull())
|
||||
// Output:
|
||||
// it broke down
|
||||
// it broke down after line 10, column 42
|
||||
}
|
||||
|
||||
func ExampleMatchAnyRune() {
|
||||
// Easy access to the parsekit definitions.
|
||||
var a = parsekit.A
|
||||
|
||||
handler := func(p *parsekit.P) {
|
||||
p.Expects("Any valid rune")
|
||||
if p.On(a.AnyRune).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.RouteRepeat()
|
||||
}
|
||||
}
|
||||
parser := parsekit.NewParser(handler)
|
||||
run := parser.Parse("¡Any / valid / character will dö!")
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
match, _, _ := run.Next()
|
||||
fmt.Printf("Match = %q\n", match.Value)
|
||||
}
|
||||
// Output:
|
||||
// Match = "¡"
|
||||
// Match = "A"
|
||||
// Match = "n"
|
||||
// Match = "y"
|
||||
// Match = " "
|
||||
}
|
||||
|
||||
func ExampleModifyToUpper() {
|
||||
// Easy access to the parsekit definitions.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
|
||||
// The numbers never start with a zero.
|
||||
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
||||
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||
|
||||
// It is good form to write the letters in upper case.
|
||||
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||
letters := m.ToUpper(c.Seq(letter, letter))
|
||||
|
||||
// It is good form to use a single space between letters and numbers,
|
||||
// but it is not mandatory.
|
||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||
|
||||
// With all the building blocks, we can now build the postcode parser.
|
||||
postcode := c.Seq(numbers, space, letters)
|
||||
|
||||
// Create a parser and let is parse some postcode inputs.
|
||||
// This will print "1234 AB" for every input, because of the built-in normalization.
|
||||
p := parsekit.NewMatcherWrapper(postcode)
|
||||
for _, input := range []string{"1234 AB", "1234Ab", "1234 ab", "1234ab"} {
|
||||
output, _, _ := p.Match("1234 AB")
|
||||
fmt.Printf("Input: %q, output: %q\n", input, output)
|
||||
}
|
||||
// Output:
|
||||
// Input: "1234 AB", output: "1234 AB"
|
||||
// Input: "1234Ab", output: "1234 AB"
|
||||
// Input: "1234 ab", output: "1234 AB"
|
||||
// Input: "1234ab", output: "1234 AB"
|
||||
}
|
|
@ -31,7 +31,8 @@ var C = struct {
|
|||
ZeroOrMore func(Matcher) Matcher
|
||||
OneOrMore func(Matcher) Matcher
|
||||
MinMax func(int, int, Matcher) Matcher
|
||||
Separated func(Matcher, Matcher) Matcher
|
||||
Separated func(separated Matcher, separator Matcher) Matcher
|
||||
Except func(except Matcher, matcher Matcher) Matcher
|
||||
}{
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
|
@ -49,6 +50,7 @@ var C = struct {
|
|||
OneOrMore: MatchOneOrMore,
|
||||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Except: MatchExcept,
|
||||
}
|
||||
|
||||
// MatchRune creates a Matcher function that checks if the next rune from
|
||||
|
@ -274,10 +276,23 @@ func matchMinMax(min int, max int, matcher Matcher) Matcher {
|
|||
// Matchers of one type (the separated), separated by Matches of another type
|
||||
// (the separator). All matches (separated + separator) are included in the
|
||||
// output.
|
||||
func MatchSeparated(separated Matcher, separator Matcher) Matcher {
|
||||
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
|
||||
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
||||
}
|
||||
|
||||
// MatchExcept creates a Matcher that checks if the provided matcher can be
|
||||
// applied to the upcoming input. It also checks if the except Matcher can be
|
||||
// applied. If the matcher applies, but the except Matcher too, then the match
|
||||
// as a whole will be treated as a mismatch.
|
||||
func MatchExcept(except Matcher, matcher Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
if except(m.Fork()) {
|
||||
return false
|
||||
}
|
||||
return matcher(m)
|
||||
}
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms that can be used to
|
||||
// build combinators or parsing rules.
|
||||
//
|
||||
|
|
|
@ -70,7 +70,7 @@ func TestCombinators(t *testing.T) {
|
|||
{"ghijkl", c.Opt(c.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
|
||||
{"1,2,3,b,c", c.Separated(a.Digit, a.Comma), true, "1,2,3"},
|
||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
|
@ -183,7 +183,7 @@ func TestCombination(t *testing.T) {
|
|||
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
|
||||
return fmt.Sprintf("%d", len(s))
|
||||
}),
|
||||
m.Replace(c.Separated(c.Opt(a.Whitespace), a.Comma), ", "),
|
||||
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
|
||||
m.ToUpper(c.Min(1, a.ASCIILower)),
|
||||
m.Drop(a.Excl),
|
||||
c.Rep(3, a.AngleOpen),
|
||||
|
@ -211,7 +211,7 @@ func TestSequenceOfRunes(t *testing.T) {
|
|||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||
)
|
||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
parser := parsekit.New(func(p *parsekit.P) {
|
||||
parser := parsekit.NewParser(func(p *parsekit.P) {
|
||||
p.Expects("Sequence of runes")
|
||||
if p.On(sequence).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
|
@ -225,46 +225,3 @@ func TestSequenceOfRunes(t *testing.T) {
|
|||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleMatchAnyRune() {
|
||||
handler := func(p *parsekit.P) {
|
||||
p.Expects("Any valid rune")
|
||||
if p.On(a.AnyRune).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
}
|
||||
}
|
||||
parser := parsekit.New(handler)
|
||||
run := parser.Parse("¡Any / valid / character will dö!")
|
||||
match, _, ok := run.Next()
|
||||
|
||||
// This will output '¡', since a.AnyRune matches exactly 1 rune.
|
||||
if ok {
|
||||
fmt.Printf("Match = %q\n", match)
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleModifyToUpper() {
|
||||
// A Dutch poscode consists of 4 numbers and 2 letters (1234XX).
|
||||
// The numbers never start with a zero.
|
||||
digitNotZero := c.RuneRange('1', '9')
|
||||
numbers := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||
|
||||
// It is good form to write the letters in upper case.
|
||||
letter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||
letters := m.ToUpper(c.Seq(letter, letter))
|
||||
|
||||
// It is good form to use a single space between letters and numbers,
|
||||
// but it is not mandatory.
|
||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||
|
||||
// With all the building blocks, we can now build the postcode parser.
|
||||
postcode := c.Seq(numbers, space, letters)
|
||||
|
||||
// Create a parser and let is parse some postcode inputs.
|
||||
// This will print "1234 AB" for every input, because of the built-in normalization.
|
||||
p := parsekit.New(postcode)
|
||||
for _, input := range []string{"1234 AB", "1234AB", "1234 ab", "1234ab"} {
|
||||
r, _, _ := p.Parse("1234 AB").Next()
|
||||
fmt.Printf("Input: %q, output: %q", input, r.Value)
|
||||
}
|
||||
}
|
||||
|
|
95
parsekit.go
95
parsekit.go
|
@ -7,63 +7,25 @@ import (
|
|||
)
|
||||
|
||||
// Parser is the top-level struct that holds the configuration for a parser.
|
||||
// The Parser can be instantiated using the parsekit.New() method.
|
||||
//
|
||||
// To start parsing input data, use the method Parser.Parse().
|
||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||
type Parser struct {
|
||||
startState StateHandler // the function that handles the very first state
|
||||
}
|
||||
|
||||
// New instantiates a new Parser.
|
||||
// The logic parameter provides the parsing logic to apply. This can be:
|
||||
// NewParser instantiates a new Parser.
|
||||
//
|
||||
// 1) A StateHandler function: in this case, a state machine-style
|
||||
// recursive descent parser is created, in which StateHandler functions
|
||||
// are used to move the state machine forward during parsing.
|
||||
// This type of parser offers a lot of flexibility and it is possible to
|
||||
// emit multiple items from the parse flow.
|
||||
//
|
||||
// This style of parser is typically used for parsing languages and
|
||||
// The Parser is a state machine-style recursive descent parser, in which
|
||||
// StateHandler functions are used to move the state machine forward during
|
||||
// parsing. This style of parser is typically used for parsing languages and
|
||||
// structured data formats (like json, toml, etc.)
|
||||
//
|
||||
// 2) A Matcher function: in this case, a parser/combinator-style parser
|
||||
// is created, which can be used to match against the provided logic.
|
||||
// The parser can only check input against the Matcher function, and
|
||||
// reports back a successful match or a failure.
|
||||
//
|
||||
// This style of parser can typically be used for validation and normalization
|
||||
// of input data. However, when you are about to use parsekit for that
|
||||
// task, consider using regular expressions instead. They might serve
|
||||
// you better.
|
||||
func New(logic interface{}) *Parser {
|
||||
switch logic := logic.(type) {
|
||||
case func(*P):
|
||||
return makeParserForStateHandler(logic)
|
||||
case StateHandler:
|
||||
return makeParserForStateHandler(logic)
|
||||
case func(m *MatchDialog) bool:
|
||||
return makeParserForMatcher(logic)
|
||||
case Matcher:
|
||||
return makeParserForMatcher(logic)
|
||||
default:
|
||||
panic(fmt.Sprintf("internal parser error: unsupported logic parameter of type %T used for parsekit.New()", logic))
|
||||
}
|
||||
}
|
||||
|
||||
func makeParserForStateHandler(handler StateHandler) *Parser {
|
||||
return &Parser{startState: handler}
|
||||
}
|
||||
|
||||
func makeParserForMatcher(matcher Matcher) *Parser {
|
||||
return New(StateHandler(func(p *P) {
|
||||
p.Expects("match")
|
||||
if p.On(matcher).Accept().RouteRepeat().End() {
|
||||
p.EmitLiteral(MatchedItem)
|
||||
}
|
||||
}))
|
||||
// To start parsing input data, use the method Parser.Parse().
|
||||
func NewParser(startState StateHandler) *Parser {
|
||||
return &Parser{startState: startState}
|
||||
}
|
||||
|
||||
// Run represents a single parse run for a Parser.
|
||||
// TODO rename to ParseRun
|
||||
type Run struct {
|
||||
p *P // a struct holding the internal state of a parse run
|
||||
}
|
||||
|
@ -106,9 +68,9 @@ func (run *Run) Next() (Item, *Error, bool) {
|
|||
|
||||
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||
switch {
|
||||
case i.Type == ItemEOF:
|
||||
case i.Type == itemEOF:
|
||||
return i, nil, false
|
||||
case i.Type == ItemError:
|
||||
case i.Type == itemError:
|
||||
run.p.err = &Error{i.Value, run.p.cursorLine, run.p.cursorColumn}
|
||||
return i, run.p.err, false
|
||||
default:
|
||||
|
@ -176,3 +138,38 @@ func (run *Run) invokeNextStateHandler(state StateHandler) {
|
|||
run.p.expecting = ""
|
||||
run.p.state(run.p)
|
||||
}
|
||||
|
||||
// MatcherWrapper is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a Wrapper function.
|
||||
// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
|
||||
// method.
|
||||
//
|
||||
// To match input data against the wrapped Matcher function, use the method
|
||||
// MatcherWrapper.Match().
|
||||
type MatcherWrapper struct {
|
||||
parser *Parser
|
||||
}
|
||||
|
||||
// NewMatcherWrapper instantiates a new MatcherWrapper.
|
||||
//
|
||||
// This is a simple wrapper around a Matcher function. It can be used to
|
||||
// match an input string against that Matcher function and retrieve the
|
||||
// results in a straight forward way.
|
||||
func NewMatcherWrapper(matcher Matcher) *MatcherWrapper {
|
||||
handler := func(p *P) {
|
||||
p.Expects("match")
|
||||
if p.On(matcher).Accept().End() {
|
||||
p.EmitLiteral(0) // ItemType is irrelevant
|
||||
}
|
||||
}
|
||||
return &MatcherWrapper{parser: NewParser(handler)}
|
||||
}
|
||||
|
||||
// Match runs the wrapped Matcher function against the provided input data.
|
||||
func (w *MatcherWrapper) Match(input string) (string, *Error, bool) {
|
||||
item, err, ok := w.parser.Parse(input).Next()
|
||||
if !ok {
|
||||
return "", err, false
|
||||
}
|
||||
return item.Value, nil, true
|
||||
}
|
||||
|
|
|
@ -27,16 +27,13 @@ func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
|
|||
}
|
||||
|
||||
func RunMatcherTest(t *testing.T, test MatcherTest) {
|
||||
parser := parsekit.New(test.matcher).Parse(test.input)
|
||||
item, err, ok := parser.Next()
|
||||
output, err, ok := parsekit.NewMatcherWrapper(test.matcher).Match(test.input)
|
||||
|
||||
if test.mustMatch {
|
||||
if !ok {
|
||||
t.Errorf("Test %q failed with error: %s", test.input, err)
|
||||
} else if item.Type != parsekit.MatchedItem {
|
||||
t.Errorf("Test %q failed: should match, but it didn't", test.input)
|
||||
} else if item.Value != test.expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, item.Value)
|
||||
} else if output != test.expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.input, test.expected, output)
|
||||
}
|
||||
} else {
|
||||
if ok {
|
||||
|
|
|
@ -134,7 +134,7 @@ func (p *P) ExpectEndOfFile() {
|
|||
p.RouteTo(func(p *P) {
|
||||
p.Expects("end of file")
|
||||
if p.On(A.EndOfFile).Stay().End() {
|
||||
p.Emit(ItemEOF, "EOF")
|
||||
p.Emit(itemEOF, "EOF")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
@ -5,19 +5,19 @@ import (
|
|||
)
|
||||
|
||||
// ItemType represents the type of a parser Item.
|
||||
//
|
||||
// When creating your own ItemType values, then make use of positive integer
|
||||
// values. Negative values are possible, but they are reserved for internal
|
||||
// use by parsekit.
|
||||
type ItemType int
|
||||
|
||||
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||
// itemEOF is a built-in parser item type that is used for flagging that the
|
||||
// end of the input was reached.
|
||||
const ItemEOF ItemType = -1
|
||||
const itemEOF ItemType = -1
|
||||
|
||||
// ItemError is a built-in parser item type that is used for flagging that
|
||||
// itemError is a built-in parser item type that is used for flagging that
|
||||
// an error has occurred during parsing.
|
||||
const ItemError ItemType = -2
|
||||
|
||||
// MatchedItem is a built-in parser item type that is used for indicating a
|
||||
// successful match when using a parser that is based on a Matcher.
|
||||
const MatchedItem ItemType = -3
|
||||
const itemError ItemType = -2
|
||||
|
||||
// Item represents an item that can be emitted from the parser.
|
||||
type Item struct {
|
||||
|
@ -81,7 +81,7 @@ func (err *Error) ErrorFull() string {
|
|||
// EmitError emits a Parser error item to the client.
|
||||
func (p *P) EmitError(format string, args ...interface{}) {
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.Emit(ItemError, message)
|
||||
p.Emit(itemError, message)
|
||||
}
|
||||
|
||||
// UnexpectedInput is used by a StateHandler function to emit an error item
|
||||
|
|
Loading…
Reference in New Issue