Added a few syntactic sugar methods for ParseHandler.

This commit is contained in:
Maurice Makaay 2019-06-11 09:09:41 +00:00
parent 65895ac502
commit 0f7b4e0d26
13 changed files with 106 additions and 59 deletions

View File

@ -94,9 +94,9 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) {
func (calc *calculator) expr(p *parsekit.ParseAPI) { func (calc *calculator) expr(p *parsekit.ParseAPI) {
calc.interpreter.push() calc.interpreter.push()
var C, A = parsekit.C, parsekit.A var A = parsekit.A
if p.Handle(calc.term) { if p.Handle(calc.term) {
for p.Accept(C.Any(A.Add, A.Subtract)) { for p.Accept(A.Add.Or(A.Subtract)) {
op := p.Result().Rune(0) op := p.Result().Rune(0)
if !p.Handle(calc.term) { if !p.Handle(calc.term) {
return return
@ -112,9 +112,9 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) {
func (calc *calculator) term(p *parsekit.ParseAPI) { func (calc *calculator) term(p *parsekit.ParseAPI) {
calc.interpreter.push() calc.interpreter.push()
var C, A = parsekit.C, parsekit.A var A = parsekit.A
if p.Handle(calc.factor) { if p.Handle(calc.factor) {
for p.Accept(C.Any(A.Multiply, A.Divide)) { for p.Accept(A.Multiply.Or(A.Divide)) {
op := p.Result().Rune(0) op := p.Result().Rune(0)
if !p.Handle(calc.factor) { if !p.Handle(calc.factor) {
return return

View File

@ -62,9 +62,9 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
// - It is good form to write the letters in upper case. // - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters. // - It is good form to use a single space between digits and letters.
digitNotZero := C.Except(A.Rune('0'), A.Digit) digitNotZero := C.Except(A.Rune('0'), A.Digit)
pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit)) pcDigits := C.Seq(digitNotZero, A.Digit.Times(3))
pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper) pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter)) pcLetters := M.ToUpper(pcLetter.Times(2))
space := M.Replace(C.Opt(A.Blanks), " ") space := M.Replace(C.Opt(A.Blanks), " ")
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile) postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)

View File

@ -57,7 +57,11 @@ func createHelloTokenizer() *parsekit.Tokenizer {
comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank)) comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
separator := c.Any(comma, a.Blank) separator := c.Any(comma, a.Blank)
name := c.OneOrMore(c.Not(a.Excl)) name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile) greeting := m.Drop(hello).
Then(m.Drop(separator)).
Then(name).
Then(m.Drop(a.Excl)).
Then(a.EndOfFile)
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows // Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler. // us to match some input against that handler.

View File

@ -183,7 +183,7 @@ func (p *ParseAPI) ExpectEndOfFile() {
} }
// Expected is used to set an error that tells the user that some // Expected is used to set an error that tells the user that some
// unexpected input was encountered, and that input was expected. // unexpected input was encountered, and what input was expected.
// //
// The 'expected' argument can be an empty string. In that case the error // The 'expected' argument can be an empty string. In that case the error
// message will not contain a description of the expected input. // message will not contain a description of the expected input.

View File

@ -21,6 +21,9 @@ type ParseHandler func(*ParseAPI)
// parsing. This style of parser is typically used for parsing programming // parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.) // languages and structured data formats (like json, xml, toml, etc.)
// //
// The startHandler argument points the Parser to the ParseHandler function
// that must be executed at the start of the parsing process.
//
// To parse input data, use the method Parser.Execute(). // To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser { func NewParser(startHandler ParseHandler) *Parser {
if startHandler == nil { if startHandler == nil {

View File

@ -36,7 +36,7 @@
// 0 6 9 // 0 6 9
// //
// So after a flush, the first upcoming rune after the flushed runes // So after a flush, the first upcoming rune after the flushed runes
// will always have index 0. // will always be at offset 0.
package reader package reader
import ( import (

View File

@ -20,7 +20,7 @@ import (
// //
// By invoking NextRune() + Accept() multiple times, the result can be extended // By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. Runes collected this way can later on be // with as many runes as needed. Runes collected this way can later on be
// retrieved using the method // retrieved using the method Result().Runes().
// //
// It is mandatory to call Accept() after retrieving a rune, before calling // It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic. // NextRune() again. Failing to do so will result in a panic.

View File

@ -7,40 +7,40 @@ import (
) )
func ExampleTokenAPI_Fork() { func ExampleTokenAPI_Fork() {
// This custom TokenHandler checks for a sequence of runes: "abcd" // This custom TokenHandler checks for input 'a', 'b' or 'c'.
// This is done in 4 steps and only after finishing all steps, abcHandler := func(t *parsekit.TokenAPI) bool {
// the TokenHandler will confirm a successful match. a := parsekit.A
abcdSequence := func(t *parsekit.TokenAPI) bool { for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t child := t.Fork() // fork, so we won't change parent t
for _, checkRune := range "abcd" { if a.Rune(r)(child) {
readRune, err := child.NextRune() child.Merge() // accept results into parent t
if err != nil || readRune != checkRune { return true // and report a successful match
return false // report mismatch, parent t is left untouched
} }
child.Accept() // add rune to child output
} }
child.Merge() // we have a match, add resulting output to parent // If we get here, then no match was found. Return false to communicate
return true // and report the successful match // this to the caller.
return false
} }
// Note: a custom TokenHandler is normally not what you need. // Note: a custom TokenHandler is normally not what you need.
// You can make use of the parser/combinator tooling to do things // You can make use of the parser/combinator tooling to do things
// a lot simpler. The handler from above can be replaced with: // a lot simpler and take care of forking at the appropriate places.
simpler := parsekit.A.Str("abcd") // The handler from above can be replaced with:
simpler := parsekit.A.RuneRange('a', 'c')
result, err := parsekit.NewTokenizer(abcdSequence).Execute("abcdefgh") result, err := parsekit.NewTokenizer(abcHandler).Execute("another test")
fmt.Println(result, err) fmt.Println(result, err)
result, err = parsekit.NewTokenizer(simpler).Execute("abcdefgh") result, err = parsekit.NewTokenizer(simpler).Execute("curious")
fmt.Println(result, err) fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcdSequence).Execute("abcx") result, err = parsekit.NewTokenizer(abcHandler).Execute("bang on!")
fmt.Println(result, err) fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcdSequence).Execute("xyz") result, err = parsekit.NewTokenizer(abcHandler).Execute("not a match")
fmt.Println(result, err) fmt.Println(result, err)
// Output: // Output:
// abcd <nil> // a <nil>
// abcd <nil> // c <nil>
// <nil> unexpected input at start of file // b <nil>
// <nil> unexpected input at start of file // <nil> unexpected input at start of file
} }

View File

@ -75,15 +75,12 @@ func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) { func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
fooToken := c.Seq( ascii := tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII)))
m.Drop(c.ZeroOrMore(a.Asterisk)), utf8 := tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune))))
tok.Str("COMBI", c.Seq( stars := m.Drop(c.ZeroOrMore(a.Asterisk))
tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))), fooToken := c.Seq(stars, tok.Str("COMBI", ascii.Then(utf8)), stars)
tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
)),
m.Drop(c.ZeroOrMore(a.Asterisk)),
)
parser := parsekit.NewTokenizer(fooToken) parser := parsekit.NewTokenizer(fooToken)
input := "*** This is fine ASCII Åltho hère öt endĩt! ***" input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
output := "This is fine ASCIIÅltho hère öt endĩt!" output := "This is fine ASCIIÅltho hère öt endĩt!"
result, err := parser.Execute(input) result, err := parser.Execute(input)

View File

@ -5,8 +5,9 @@ import (
"strings" "strings"
) )
// TokenHandlerResult is a struct that is used for holding and managing tokenizing results as // TokenHandlerResult is a struct that is used for holding tokenizing results
// produced by a TokenHandler. // as produced by a TokenHandler. It also provides the API that TokenHandlers
// and Parsers can use to respectively store and access the results.
type TokenHandlerResult struct { type TokenHandlerResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune() lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune runes []rune

View File

@ -36,7 +36,7 @@ var C = struct {
ZeroOrMore func(TokenHandler) TokenHandler ZeroOrMore func(TokenHandler) TokenHandler
OneOrMore func(TokenHandler) TokenHandler OneOrMore func(TokenHandler) TokenHandler
MinMax func(min int, max int, handler TokenHandler) TokenHandler MinMax func(min int, max int, handler TokenHandler) TokenHandler
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string? Separated func(separated TokenHandler, separator TokenHandler) TokenHandler
Except func(except TokenHandler, handler TokenHandler) TokenHandler Except func(except TokenHandler, handler TokenHandler) TokenHandler
}{ }{
Opt: MatchOpt, Opt: MatchOpt,
@ -241,13 +241,13 @@ var A = struct {
// Doing so saves you a lot of typing, and it makes your code a lot cleaner. // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct { var M = struct {
Drop func(TokenHandler) TokenHandler Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? Trim func(handler TokenHandler, cutset string) TokenHandler
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimLeft func(handler TokenHandler, cutset string) TokenHandler
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments? TrimRight func(handler TokenHandler, cutset string) TokenHandler
TrimSpace func(handler TokenHandler) TokenHandler TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments? Replace func(handler TokenHandler, replaceWith string) TokenHandler
ByCallback func(TokenHandler, func(string) string) TokenHandler ByCallback func(TokenHandler, func(string) string) TokenHandler
}{ }{
Drop: ModifyDrop, Drop: ModifyDrop,
@ -409,13 +409,7 @@ func MatchStrNoCase(expected string) TokenHandler {
// no output is generated but still a successful match is reported (but the // no output is generated but still a successful match is reported (but the
// result will be empty). // result will be empty).
func MatchOpt(handler TokenHandler) TokenHandler { func MatchOpt(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool { return MatchMinMax(0, 1, handler)
child := t.Fork()
if handler(child) {
child.Merge()
}
return true
}
} }
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be // MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
@ -457,8 +451,7 @@ func MatchAny(handlers ...TokenHandler) TokenHandler {
// does not, then the next rune from the input will be reported as a match. // does not, then the next rune from the input will be reported as a match.
func MatchNot(handler TokenHandler) TokenHandler { func MatchNot(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
probe := t.Fork() if handler(t.Fork()) {
if handler(probe) {
return false return false
} }
_, err := t.NextRune() _, err := t.NextRune()
@ -479,6 +472,10 @@ func MatchNot(handler TokenHandler) TokenHandler {
// //
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX". // will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input. // In that last case, there will be a remainder "XX" on the input.
//
// Another way to use this method, is by applying the following syntactic sugar:
//
// MatchRune('X').Times(4)
func MatchRep(times int, handler TokenHandler) TokenHandler { func MatchRep(times int, handler TokenHandler) TokenHandler {
return matchMinMax(times, times, handler, "MatchRep") return matchMinMax(times, times, handler, "MatchRep")
} }
@ -495,7 +492,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be // MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
// applied at maximum the provided minimum number of times. // applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output. // When more matches are possible, thhandler(ese will be included in the output.
// Zero matches are considered a successful match. // Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler { func MatchMax(max int, handler TokenHandler) TokenHandler {
if max < 0 { if max < 0 {
@ -535,20 +532,22 @@ func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandl
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
} }
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
child := t.Fork()
total := 0 total := 0
// Check for the minimum required amount of matches. // Check for the minimum required amount of matches.
for total < min { for total < min {
total++ total++
child := t.Fork()
if !handler(child) { if !handler(child) {
return false return false
} }
child.Merge()
} }
// No specified max: include the rest of the available matches. // No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max. // Specified max: include the rest of the availble matches, up to the max.
child.Merge() //child.Merge()
for max < 0 || total < max { for max < 0 || total < max {
total++ total++
child := t.Fork()
if !handler(child) { if !handler(child) {
break break
} }

View File

@ -378,6 +378,19 @@ func TestTokenMakers(t *testing.T) {
}) })
} }
func TestSyntacticSugar(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""},
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
})
}
func TestSequenceOfRunes(t *testing.T) { func TestSequenceOfRunes(t *testing.T) {
var c, a = parsekit.C, parsekit.A var c, a = parsekit.C, parsekit.A
sequence := c.Seq( sequence := c.Seq(

View File

@ -18,6 +18,36 @@ type Tokenizer struct {
// for retrieving input data to match against and for reporting back results. // for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool type TokenHandler func(t *TokenAPI) bool
// Or is syntactic sugar that allows you to write a construction like
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
func (handler TokenHandler) Or(otherHandler TokenHandler) TokenHandler {
return MatchAny(handler, otherHandler)
}
// Times is syntactic sugar that allows you to write a construction like
// MatchRep(3, handler) as handler.Times(3).
func (handler TokenHandler) Times(n int) TokenHandler {
return MatchRep(n, handler)
}
// Then is syntactic sugar that allows you to write a construction like
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
func (handler TokenHandler) Then(otherHandler TokenHandler) TokenHandler {
return MatchSeq(handler, otherHandler)
}
// SeparatedBy is syntactic sugar that allows you to write a construction like
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHandler {
return MatchSeparated(separatorHandler, handler)
}
// Optional is syntactic sugar that allows you to write a construction like
// MatchOpt(handler) as handler.Optional().
func (handler TokenHandler) Optional() TokenHandler {
return MatchOpt(handler)
}
// NewTokenizer instantiates a new Tokenizer. // NewTokenizer instantiates a new Tokenizer.
// //
// This is a simple wrapper around a TokenHandler function. It can be used to // This is a simple wrapper around a TokenHandler function. It can be used to