Added a few syntactic sugar methods for ParseHandler.
This commit is contained in:
parent
65895ac502
commit
0f7b4e0d26
|
@ -94,9 +94,9 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) {
|
|||
func (calc *calculator) expr(p *parsekit.ParseAPI) {
|
||||
calc.interpreter.push()
|
||||
|
||||
var C, A = parsekit.C, parsekit.A
|
||||
var A = parsekit.A
|
||||
if p.Handle(calc.term) {
|
||||
for p.Accept(C.Any(A.Add, A.Subtract)) {
|
||||
for p.Accept(A.Add.Or(A.Subtract)) {
|
||||
op := p.Result().Rune(0)
|
||||
if !p.Handle(calc.term) {
|
||||
return
|
||||
|
@ -112,9 +112,9 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) {
|
|||
func (calc *calculator) term(p *parsekit.ParseAPI) {
|
||||
calc.interpreter.push()
|
||||
|
||||
var C, A = parsekit.C, parsekit.A
|
||||
var A = parsekit.A
|
||||
if p.Handle(calc.factor) {
|
||||
for p.Accept(C.Any(A.Multiply, A.Divide)) {
|
||||
for p.Accept(A.Multiply.Or(A.Divide)) {
|
||||
op := p.Result().Rune(0)
|
||||
if !p.Handle(calc.factor) {
|
||||
return
|
||||
|
|
|
@ -62,9 +62,9 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
|
|||
// - It is good form to write the letters in upper case.
|
||||
// - It is good form to use a single space between digits and letters.
|
||||
digitNotZero := C.Except(A.Rune('0'), A.Digit)
|
||||
pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
|
||||
pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
|
||||
pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
|
||||
pcDigits := C.Seq(digitNotZero, A.Digit.Times(3))
|
||||
pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
|
||||
pcLetters := M.ToUpper(pcLetter.Times(2))
|
||||
space := M.Replace(C.Opt(A.Blanks), " ")
|
||||
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
|
||||
|
||||
|
|
|
@ -57,7 +57,11 @@ func createHelloTokenizer() *parsekit.Tokenizer {
|
|||
comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
|
||||
separator := c.Any(comma, a.Blank)
|
||||
name := c.OneOrMore(c.Not(a.Excl))
|
||||
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)
|
||||
greeting := m.Drop(hello).
|
||||
Then(m.Drop(separator)).
|
||||
Then(name).
|
||||
Then(m.Drop(a.Excl)).
|
||||
Then(a.EndOfFile)
|
||||
|
||||
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
|
|
|
@ -183,7 +183,7 @@ func (p *ParseAPI) ExpectEndOfFile() {
|
|||
}
|
||||
|
||||
// Expected is used to set an error that tells the user that some
|
||||
// unexpected input was encountered, and that input was expected.
|
||||
// unexpected input was encountered, and what input was expected.
|
||||
//
|
||||
// The 'expected' argument can be an empty string. In that case the error
|
||||
// message will not contain a description of the expected input.
|
||||
|
|
|
@ -21,6 +21,9 @@ type ParseHandler func(*ParseAPI)
|
|||
// parsing. This style of parser is typically used for parsing programming
|
||||
// languages and structured data formats (like json, xml, toml, etc.)
|
||||
//
|
||||
// The startHandler argument points the Parser to the ParseHandler function
|
||||
// that must be executed at the start of the parsing process.
|
||||
//
|
||||
// To parse input data, use the method Parser.Execute().
|
||||
func NewParser(startHandler ParseHandler) *Parser {
|
||||
if startHandler == nil {
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
// 0 6 9
|
||||
//
|
||||
// So after a flush, the first upcoming rune after the flushed runes
|
||||
// will always have index 0.
|
||||
// will always be at offset 0.
|
||||
package reader
|
||||
|
||||
import (
|
||||
|
|
|
@ -20,7 +20,7 @@ import (
|
|||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed. Runes collected this way can later on be
|
||||
// retrieved using the method
|
||||
// retrieved using the method Result().Runes().
|
||||
//
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
|
|
|
@ -7,40 +7,40 @@ import (
|
|||
)
|
||||
|
||||
func ExampleTokenAPI_Fork() {
|
||||
// This custom TokenHandler checks for a sequence of runes: "abcd"
|
||||
// This is done in 4 steps and only after finishing all steps,
|
||||
// the TokenHandler will confirm a successful match.
|
||||
abcdSequence := func(t *parsekit.TokenAPI) bool {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
for _, checkRune := range "abcd" {
|
||||
readRune, err := child.NextRune()
|
||||
if err != nil || readRune != checkRune {
|
||||
return false // report mismatch, parent t is left untouched
|
||||
// This custom TokenHandler checks for input 'a', 'b' or 'c'.
|
||||
abcHandler := func(t *parsekit.TokenAPI) bool {
|
||||
a := parsekit.A
|
||||
for _, r := range []rune{'a', 'b', 'c'} {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
if a.Rune(r)(child) {
|
||||
child.Merge() // accept results into parent t
|
||||
return true // and report a successful match
|
||||
}
|
||||
child.Accept() // add rune to child output
|
||||
}
|
||||
child.Merge() // we have a match, add resulting output to parent
|
||||
return true // and report the successful match
|
||||
// If we get here, then no match was found. Return false to communicate
|
||||
// this to the caller.
|
||||
return false
|
||||
}
|
||||
|
||||
// Note: a custom TokenHandler is normally not what you need.
|
||||
// You can make use of the parser/combinator tooling to do things
|
||||
// a lot simpler. The handler from above can be replaced with:
|
||||
simpler := parsekit.A.Str("abcd")
|
||||
// a lot simpler and take care of forking at the appropriate places.
|
||||
// The handler from above can be replaced with:
|
||||
simpler := parsekit.A.RuneRange('a', 'c')
|
||||
|
||||
result, err := parsekit.NewTokenizer(abcdSequence).Execute("abcdefgh")
|
||||
result, err := parsekit.NewTokenizer(abcHandler).Execute("another test")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(simpler).Execute("abcdefgh")
|
||||
result, err = parsekit.NewTokenizer(simpler).Execute("curious")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(abcdSequence).Execute("abcx")
|
||||
result, err = parsekit.NewTokenizer(abcHandler).Execute("bang on!")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(abcdSequence).Execute("xyz")
|
||||
result, err = parsekit.NewTokenizer(abcHandler).Execute("not a match")
|
||||
fmt.Println(result, err)
|
||||
|
||||
// Output:
|
||||
// abcd <nil>
|
||||
// abcd <nil>
|
||||
// <nil> unexpected input at start of file
|
||||
// a <nil>
|
||||
// c <nil>
|
||||
// b <nil>
|
||||
// <nil> unexpected input at start of file
|
||||
}
|
||||
|
||||
|
|
|
@ -75,15 +75,12 @@ func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
|
|||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
|
||||
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
|
||||
fooToken := c.Seq(
|
||||
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||
tok.Str("COMBI", c.Seq(
|
||||
tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
|
||||
tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
|
||||
)),
|
||||
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||
)
|
||||
ascii := tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII)))
|
||||
utf8 := tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune))))
|
||||
stars := m.Drop(c.ZeroOrMore(a.Asterisk))
|
||||
fooToken := c.Seq(stars, tok.Str("COMBI", ascii.Then(utf8)), stars)
|
||||
parser := parsekit.NewTokenizer(fooToken)
|
||||
|
||||
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
|
||||
output := "This is fine ASCIIÅltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
|
|
@ -5,8 +5,9 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
// TokenHandlerResult is a struct that is used for holding and managing tokenizing results as
|
||||
// produced by a TokenHandler.
|
||||
// TokenHandlerResult is a struct that is used for holding tokenizing results
|
||||
// as produced by a TokenHandler. It also provides the API that TokenHandlers
|
||||
// and Parsers can use to respectively store and access the results.
|
||||
type TokenHandlerResult struct {
|
||||
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||
runes []rune
|
||||
|
|
|
@ -36,7 +36,7 @@ var C = struct {
|
|||
ZeroOrMore func(TokenHandler) TokenHandler
|
||||
OneOrMore func(TokenHandler) TokenHandler
|
||||
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
|
||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler
|
||||
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
||||
}{
|
||||
Opt: MatchOpt,
|
||||
|
@ -241,13 +241,13 @@ var A = struct {
|
|||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var M = struct {
|
||||
Drop func(TokenHandler) TokenHandler
|
||||
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
Trim func(handler TokenHandler, cutset string) TokenHandler
|
||||
TrimLeft func(handler TokenHandler, cutset string) TokenHandler
|
||||
TrimRight func(handler TokenHandler, cutset string) TokenHandler
|
||||
TrimSpace func(handler TokenHandler) TokenHandler
|
||||
ToLower func(TokenHandler) TokenHandler
|
||||
ToUpper func(TokenHandler) TokenHandler
|
||||
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
||||
Replace func(handler TokenHandler, replaceWith string) TokenHandler
|
||||
ByCallback func(TokenHandler, func(string) string) TokenHandler
|
||||
}{
|
||||
Drop: ModifyDrop,
|
||||
|
@ -409,13 +409,7 @@ func MatchStrNoCase(expected string) TokenHandler {
|
|||
// no output is generated but still a successful match is reported (but the
|
||||
// result will be empty).
|
||||
func MatchOpt(handler TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
return MatchMinMax(0, 1, handler)
|
||||
}
|
||||
|
||||
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
|
||||
|
@ -457,8 +451,7 @@ func MatchAny(handlers ...TokenHandler) TokenHandler {
|
|||
// does not, then the next rune from the input will be reported as a match.
|
||||
func MatchNot(handler TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
probe := t.Fork()
|
||||
if handler(probe) {
|
||||
if handler(t.Fork()) {
|
||||
return false
|
||||
}
|
||||
_, err := t.NextRune()
|
||||
|
@ -479,6 +472,10 @@ func MatchNot(handler TokenHandler) TokenHandler {
|
|||
//
|
||||
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||||
// In that last case, there will be a remainder "XX" on the input.
|
||||
//
|
||||
// Another way to use this method, is by applying the following syntactic sugar:
|
||||
//
|
||||
// MatchRune('X').Times(4)
|
||||
func MatchRep(times int, handler TokenHandler) TokenHandler {
|
||||
return matchMinMax(times, times, handler, "MatchRep")
|
||||
}
|
||||
|
@ -495,7 +492,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
|
|||
|
||||
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
|
||||
// applied at maximum the provided minimum number of times.
|
||||
// When more matches are possible, these will be included in the output.
|
||||
// When more matches are possible, thhandler(ese will be included in the output.
|
||||
// Zero matches are considered a successful match.
|
||||
func MatchMax(max int, handler TokenHandler) TokenHandler {
|
||||
if max < 0 {
|
||||
|
@ -535,20 +532,22 @@ func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandl
|
|||
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
|
||||
}
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
total := 0
|
||||
// Check for the minimum required amount of matches.
|
||||
for total < min {
|
||||
total++
|
||||
child := t.Fork()
|
||||
if !handler(child) {
|
||||
return false
|
||||
}
|
||||
child.Merge()
|
||||
}
|
||||
// No specified max: include the rest of the available matches.
|
||||
// Specified max: include the rest of the availble matches, up to the max.
|
||||
child.Merge()
|
||||
//child.Merge()
|
||||
for max < 0 || total < max {
|
||||
total++
|
||||
child := t.Fork()
|
||||
if !handler(child) {
|
||||
break
|
||||
}
|
||||
|
|
|
@ -378,6 +378,19 @@ func TestTokenMakers(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestSyntacticSugar(t *testing.T) {
|
||||
var a = parsekit.A
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
||||
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
||||
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
||||
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
|
||||
{"cccccc", a.Rune('c').Optional(), true, "c"},
|
||||
{"dddddd", a.Rune('c').Optional(), true, ""},
|
||||
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestSequenceOfRunes(t *testing.T) {
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
sequence := c.Seq(
|
||||
|
|
30
tokenizer.go
30
tokenizer.go
|
@ -18,6 +18,36 @@ type Tokenizer struct {
|
|||
// for retrieving input data to match against and for reporting back results.
|
||||
type TokenHandler func(t *TokenAPI) bool
|
||||
|
||||
// Or is syntactic sugar that allows you to write a construction like
|
||||
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
|
||||
func (handler TokenHandler) Or(otherHandler TokenHandler) TokenHandler {
|
||||
return MatchAny(handler, otherHandler)
|
||||
}
|
||||
|
||||
// Times is syntactic sugar that allows you to write a construction like
|
||||
// MatchRep(3, handler) as handler.Times(3).
|
||||
func (handler TokenHandler) Times(n int) TokenHandler {
|
||||
return MatchRep(n, handler)
|
||||
}
|
||||
|
||||
// Then is syntactic sugar that allows you to write a construction like
|
||||
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
|
||||
func (handler TokenHandler) Then(otherHandler TokenHandler) TokenHandler {
|
||||
return MatchSeq(handler, otherHandler)
|
||||
}
|
||||
|
||||
// SeparatedBy is syntactic sugar that allows you to write a construction like
|
||||
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
|
||||
func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHandler {
|
||||
return MatchSeparated(separatorHandler, handler)
|
||||
}
|
||||
|
||||
// Optional is syntactic sugar that allows you to write a construction like
|
||||
// MatchOpt(handler) as handler.Optional().
|
||||
func (handler TokenHandler) Optional() TokenHandler {
|
||||
return MatchOpt(handler)
|
||||
}
|
||||
|
||||
// NewTokenizer instantiates a new Tokenizer.
|
||||
//
|
||||
// This is a simple wrapper around a TokenHandler function. It can be used to
|
||||
|
|
Loading…
Reference in New Issue