go-parsekit/tokenizer.go

78 lines
2.9 KiB
Go

package parsekit
// Tokenizer is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
// method.
type Tokenizer struct {
parser *Parser
result *TokenHandlerResult
}
// TokenHandler is the function type that is involved in turning a low level
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the results.
//
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
// indicate whether or not it found a match on the input. The TokenAPI is used
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
// Or is syntactic sugar that allows you to write a construction like
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
func (handler TokenHandler) Or(otherHandler TokenHandler) TokenHandler {
return MatchAny(handler, otherHandler)
}
// Times is syntactic sugar that allows you to write a construction like
// MatchRep(3, handler) as handler.Times(3).
func (handler TokenHandler) Times(n int) TokenHandler {
return MatchRep(n, handler)
}
// Then is syntactic sugar that allows you to write a construction like
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
func (handler TokenHandler) Then(otherHandler TokenHandler) TokenHandler {
return MatchSeq(handler, otherHandler)
}
// SeparatedBy is syntactic sugar that allows you to write a construction like
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHandler {
return MatchSeparated(separatorHandler, handler)
}
// Optional is syntactic sugar that allows you to write a construction like
// MatchOpt(handler) as handler.Optional().
func (handler TokenHandler) Optional() TokenHandler {
return MatchOpt(handler)
}
// NewTokenizer instantiates a new Tokenizer.
//
// This is a simple wrapper around a TokenHandler function. It can be used to
// match an input string against that TokenHandler function and retrieve the
// results in a straight forward way.
func NewTokenizer(tokenHandler TokenHandler) *Tokenizer {
tokenizer := &Tokenizer{}
tokenizer.parser = NewParser(func(p *ParseAPI) {
if p.Accept(tokenHandler) {
tokenizer.result = p.Result()
p.Stop()
} else {
p.Expected("")
}
})
return tokenizer
}
// Execute feeds the input to the wrapped TokenHandler function.
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
//
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
// during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *Error) {
err := t.parser.Execute(input)
return t.result, err
}