Added a few syntactic sugar methods for ParseHandler.

This commit is contained in:
Maurice Makaay 2019-06-11 09:09:41 +00:00
parent 65895ac502
commit 0f7b4e0d26
13 changed files with 106 additions and 59 deletions

View File

@ -94,9 +94,9 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) {
func (calc *calculator) expr(p *parsekit.ParseAPI) {
calc.interpreter.push()
var C, A = parsekit.C, parsekit.A
var A = parsekit.A
if p.Handle(calc.term) {
for p.Accept(C.Any(A.Add, A.Subtract)) {
for p.Accept(A.Add.Or(A.Subtract)) {
op := p.Result().Rune(0)
if !p.Handle(calc.term) {
return
@ -112,9 +112,9 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) {
func (calc *calculator) term(p *parsekit.ParseAPI) {
calc.interpreter.push()
var C, A = parsekit.C, parsekit.A
var A = parsekit.A
if p.Handle(calc.factor) {
for p.Accept(C.Any(A.Multiply, A.Divide)) {
for p.Accept(A.Multiply.Or(A.Divide)) {
op := p.Result().Rune(0)
if !p.Handle(calc.factor) {
return

View File

@ -62,9 +62,9 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
digitNotZero := C.Except(A.Rune('0'), A.Digit)
pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
pcDigits := C.Seq(digitNotZero, A.Digit.Times(3))
pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
pcLetters := M.ToUpper(pcLetter.Times(2))
space := M.Replace(C.Opt(A.Blanks), " ")
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)

View File

@ -57,7 +57,11 @@ func createHelloTokenizer() *parsekit.Tokenizer {
comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
separator := c.Any(comma, a.Blank)
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)
greeting := m.Drop(hello).
Then(m.Drop(separator)).
Then(name).
Then(m.Drop(a.Excl)).
Then(a.EndOfFile)
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.

View File

@ -183,7 +183,7 @@ func (p *ParseAPI) ExpectEndOfFile() {
}
// Expected is used to set an error that tells the user that some
// unexpected input was encountered, and that input was expected.
// unexpected input was encountered, and what input was expected.
//
// The 'expected' argument can be an empty string. In that case the error
// message will not contain a description of the expected input.

View File

@ -21,6 +21,9 @@ type ParseHandler func(*ParseAPI)
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// The startHandler argument points the Parser to the ParseHandler function
// that must be executed at the start of the parsing process.
//
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
if startHandler == nil {

View File

@ -36,7 +36,7 @@
// 0 6 9
//
// So after a flush, the first upcoming rune after the flushed runes
// will always have index 0.
// will always be at offset 0.
package reader
import (

View File

@ -20,7 +20,7 @@ import (
//
// By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. Runes collected this way can later on be
// retrieved using the method
// retrieved using the method Result().Runes().
//
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.

View File

@ -7,40 +7,40 @@ import (
)
func ExampleTokenAPI_Fork() {
// This custom TokenHandler checks for a sequence of runes: "abcd"
// This is done in 4 steps and only after finishing all steps,
// the TokenHandler will confirm a successful match.
abcdSequence := func(t *parsekit.TokenAPI) bool {
// This custom TokenHandler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *parsekit.TokenAPI) bool {
a := parsekit.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
for _, checkRune := range "abcd" {
readRune, err := child.NextRune()
if err != nil || readRune != checkRune {
return false // report mismatch, parent t is left untouched
if a.Rune(r)(child) {
child.Merge() // accept results into parent t
return true // and report a successful match
}
child.Accept() // add rune to child output
}
child.Merge() // we have a match, add resulting output to parent
return true // and report the successful match
// If we get here, then no match was found. Return false to communicate
// this to the caller.
return false
}
// Note: a custom TokenHandler is normally not what you need.
// You can make use of the parser/combinator tooling to do things
// a lot simpler. The handler from above can be replaced with:
simpler := parsekit.A.Str("abcd")
// a lot simpler and take care of forking at the appropriate places.
// The handler from above can be replaced with:
simpler := parsekit.A.RuneRange('a', 'c')
result, err := parsekit.NewTokenizer(abcdSequence).Execute("abcdefgh")
result, err := parsekit.NewTokenizer(abcHandler).Execute("another test")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(simpler).Execute("abcdefgh")
result, err = parsekit.NewTokenizer(simpler).Execute("curious")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcdSequence).Execute("abcx")
result, err = parsekit.NewTokenizer(abcHandler).Execute("bang on!")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcdSequence).Execute("xyz")
result, err = parsekit.NewTokenizer(abcHandler).Execute("not a match")
fmt.Println(result, err)
// Output:
// abcd <nil>
// abcd <nil>
// <nil> unexpected input at start of file
// a <nil>
// c <nil>
// b <nil>
// <nil> unexpected input at start of file
}

View File

@ -75,15 +75,12 @@ func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
fooToken := c.Seq(
m.Drop(c.ZeroOrMore(a.Asterisk)),
tok.Str("COMBI", c.Seq(
tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
)),
m.Drop(c.ZeroOrMore(a.Asterisk)),
)
ascii := tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII)))
utf8 := tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune))))
stars := m.Drop(c.ZeroOrMore(a.Asterisk))
fooToken := c.Seq(stars, tok.Str("COMBI", ascii.Then(utf8)), stars)
parser := parsekit.NewTokenizer(fooToken)
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
output := "This is fine ASCIIÅltho hère öt endĩt!"
result, err := parser.Execute(input)

View File

@ -5,8 +5,9 @@ import (
"strings"
)
// TokenHandlerResult is a struct that is used for holding and managing tokenizing results as
// produced by a TokenHandler.
// TokenHandlerResult is a struct that is used for holding tokenizing results
// as produced by a TokenHandler. It also provides the API that TokenHandlers
// and Parsers can use to respectively store and access the results.
type TokenHandlerResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune

View File

@ -36,7 +36,7 @@ var C = struct {
ZeroOrMore func(TokenHandler) TokenHandler
OneOrMore func(TokenHandler) TokenHandler
MinMax func(min int, max int, handler TokenHandler) TokenHandler
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler
Except func(except TokenHandler, handler TokenHandler) TokenHandler
}{
Opt: MatchOpt,
@ -241,13 +241,13 @@ var A = struct {
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
Trim func(handler TokenHandler, cutset string) TokenHandler
TrimLeft func(handler TokenHandler, cutset string) TokenHandler
TrimRight func(handler TokenHandler, cutset string) TokenHandler
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
Replace func(handler TokenHandler, replaceWith string) TokenHandler
ByCallback func(TokenHandler, func(string) string) TokenHandler
}{
Drop: ModifyDrop,
@ -409,13 +409,7 @@ func MatchStrNoCase(expected string) TokenHandler {
// no output is generated but still a successful match is reported (but the
// result will be empty).
func MatchOpt(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
child.Merge()
}
return true
}
return MatchMinMax(0, 1, handler)
}
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
@ -457,8 +451,7 @@ func MatchAny(handlers ...TokenHandler) TokenHandler {
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
probe := t.Fork()
if handler(probe) {
if handler(t.Fork()) {
return false
}
_, err := t.NextRune()
@ -479,6 +472,10 @@ func MatchNot(handler TokenHandler) TokenHandler {
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input.
//
// Another way to use this method, is by applying the following syntactic sugar:
//
// MatchRune('X').Times(4)
func MatchRep(times int, handler TokenHandler) TokenHandler {
return matchMinMax(times, times, handler, "MatchRep")
}
@ -495,7 +492,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// When more matches are possible, thhandler(ese will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
if max < 0 {
@ -535,20 +532,22 @@ func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandl
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
}
return func(t *TokenAPI) bool {
child := t.Fork()
total := 0
// Check for the minimum required amount of matches.
for total < min {
total++
child := t.Fork()
if !handler(child) {
return false
}
child.Merge()
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
//child.Merge()
for max < 0 || total < max {
total++
child := t.Fork()
if !handler(child) {
break
}

View File

@ -378,6 +378,19 @@ func TestTokenMakers(t *testing.T) {
})
}
func TestSyntacticSugar(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""},
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
})
}
func TestSequenceOfRunes(t *testing.T) {
var c, a = parsekit.C, parsekit.A
sequence := c.Seq(

View File

@ -18,6 +18,36 @@ type Tokenizer struct {
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
// Or is syntactic sugar that allows you to write a construction like
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
func (handler TokenHandler) Or(otherHandler TokenHandler) TokenHandler {
return MatchAny(handler, otherHandler)
}
// Times is syntactic sugar that allows you to write a construction like
// MatchRep(3, handler) as handler.Times(3).
func (handler TokenHandler) Times(n int) TokenHandler {
return MatchRep(n, handler)
}
// Then is syntactic sugar that allows you to write a construction like
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
func (handler TokenHandler) Then(otherHandler TokenHandler) TokenHandler {
return MatchSeq(handler, otherHandler)
}
// SeparatedBy is syntactic sugar that allows you to write a construction like
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHandler {
return MatchSeparated(separatorHandler, handler)
}
// Optional is syntactic sugar that allows you to write a construction like
// MatchOpt(handler) as handler.Optional().
func (handler TokenHandler) Optional() TokenHandler {
return MatchOpt(handler)
}
// NewTokenizer instantiates a new Tokenizer.
//
// This is a simple wrapper around a TokenHandler function. It can be used to