A big round of getting-ya-terminology-straight.

This commit is contained in:
Maurice Makaay 2019-05-26 09:25:34 +00:00
parent 2751c78003
commit c6fde2cf4e
17 changed files with 945 additions and 934 deletions

View File

@ -15,15 +15,15 @@ import (
)
// When writing a parser, it's a good start to use the parser/combinator
// functionality of parsekit to create some Matcher functions. These functions
// functionality of parsekit to create some TokenHandler functions. These functions
// can later be used in the parser state machine to check for matching strings
// on the input data.
//
// For the calculator, we only need a definition of "number, surrounded by
// optional whitespace". Skipping whitespace could be a part of the StateHandler
// functions below too, but including it in a Matcher makes things really
// functions below too, but including it in a TokenHandler makes things really
// practical.
func createNumberMatcher() parsekit.Matcher {
func createNumberMatcher() parsekit.TokenHandler {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
@ -43,17 +43,17 @@ const (
// We also need to define the state machine for parsing the input.
// The state machine is built up from functions that match the StateHandler
// signature: func(*parsekit.P)
// The P struct holds the internal state for the parser and it provides
// signature: func(*parsekit.ParseAPI)
// The ParseAPI struct holds the internal state for the parser and it provides
// some methods that form the API for your StateHandler implementation.
// State: expect a number. When a number is found on the input,
// it is accepted in the output buffer, after which the output buffer is
// it is accepted in the parser's string buffer, after which that buffer is
// emitted as a numberType item. Then we tell the state machine to continue
// with the calcWaitForOperatorOrEndOfInput state.
// When no number is found, the parser will emit an error, explaining that
// "a number" was expected.
func calcWaitForNumber(p *parsekit.P) {
func calcWaitForNumber(p *parsekit.ParseAPI) {
p.Expects("a number")
if p.On(calcNumber).Accept() {
p.EmitLiteral(numberType)
@ -61,13 +61,13 @@ func calcWaitForNumber(p *parsekit.P) {
}
}
// State: expect a plus or minus operator. When one of those
// is found, the appropriate Item is emitted and the parser is sent back
// to the numberHandler to find the next number on the input.
// When no operator is found, then the parser is told to expect the end of
// the input. When more input data is available (which is obviously wrong
// data since it does not match our syntax), the parser will emit an error.
func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
// State: expect a plus or minus operator. When one of those is found, the
// appropriate Item is emitted and the parser is sent back to the
// numberHandler to find the next number on the input. When no operator is
// found, then the parser is told to expect the end of the input. When more
// input data are available (which are obviously wrong data since they do
// not match our syntax), the parser will emit an error.
func calcWaitForOperatorOrEndOfInput(p *parsekit.ParseAPI) {
switch {
case p.On(a.Plus).Accept():
p.EmitLiteral(addType)
@ -81,18 +81,20 @@ func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
}
// All is ready for our parser. We now can create a new Parser struct.
// We need to tell it what the start state is. In our case, it is the
// We need to tell it what StateHandler to start with. In our case, it is the
// calcWaitForNumber state, since the calculation must start with a number.
var calcParser = parsekit.NewParser(calcWaitForNumber)
func Example_basicCalculator() {
// Let's feed the parser some input to work with.
// Let's feed the parser some input to work with. This provides us with
// a parse run for that input.
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
// We can now step through the results of the parsing process by repeated
// calls to run.Next(). Next() returns either the next parse item, a parse
// error or an end of file. Let's dump the parse results and handle the
// computation while we're at it.
// TODO this in convoluted for people using the parser code I think. Maybe use three output data types instead?
sum := 0
op := +1
for {

View File

@ -1,5 +1,5 @@
// In this example, a parser is created which can parse and normalize Dutch postcodes
// The implementation uses only a Matcher function and does not implement a
// In this example, a Parser is created which can parse and normalize Dutch postcodes
// The implementation uses only TokenHandler functions and does not implement a
// full-fledged state-based Parser for it.
package parsekit_test
@ -9,11 +9,11 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func createPostcodeMatcher() *parsekit.MatcherWrapper {
func createPostcodeMatcher() *parsekit.Matcher {
// Easy access to the parsekit definitions.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Matcher functions are created and combined to satisfy these rules:
// TokenHandler functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
// - The first digit is never a zero.
// - A space between letters and digits is optional.
@ -26,6 +26,8 @@ func createPostcodeMatcher() *parsekit.MatcherWrapper {
space := m.Replace(c.Opt(a.Whitespace), " ")
postcode := c.Seq(pcDigits, space, pcLetters)
// Create a Matcher, which wraps the 'postcode' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(postcode, "a Dutch postcode")
}

View File

@ -1,9 +1,9 @@
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
//
// The implementation uses only a Matcher function and does not implement a
// full-fledged state-based Parser for it. If you want to see the same kind of
// functionality, implementated using a Paser, take a look at the
// The implementation uses only parser/combinator TokenHandler functions and does
// not implement a full-fledged state-based Parser for it. If you want to see the
// same kind of functionality, implementated using a Paser, take a look at the
// HelloWorldUsingParser example.
package parsekit_test
@ -13,12 +13,12 @@ import (
"git.makaay.nl/mauricem/go-parsekit"
)
func createHelloMatcher() *parsekit.MatcherWrapper {
func createHelloMatcher() *parsekit.Matcher {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
// Using the parser/combinator support of parsekit, we create a Matcher function
// that does all the work. The 'greeting' Matcher matches the whole input and
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
@ -26,7 +26,8 @@ func createHelloMatcher() *parsekit.MatcherWrapper {
name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
// Using 'greeting' we can now create the Matcher-based parser.
// Create a Matcher, which wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewMatcher(greeting, "a friendly greeting")
}

View File

@ -2,14 +2,14 @@
// like "Hello, <name>!", and that extracts the name from it.
//
// This implementation uses a state-based Parser for it, and it does not implement
// any custom combinator/parser Matcher functions. Note that things are much easier to
// implement using custom Matchers (see the other HelloWorldUsingMatcher example
// for this). Doing this fully parser-based implementation is mainly for your
// any custom parser/combinator TokenHandler functions. Note that things are much
// easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher
// example for this). Doing this fully parser-based implementation is mainly for your
// learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that the
// state-based parser reports errors much more fine-grained. This might or might
// not be useful for your specific application.
// not be useful for your specific use case.
package parsekit_test
import (
@ -21,7 +21,7 @@ import (
const greeteeItem parsekit.ItemType = 1
func stateStartOfGreeting(p *parsekit.P) {
func stateStartOfGreeting(p *parsekit.ParseAPI) {
c := parsekit.C
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
@ -29,7 +29,7 @@ func stateStartOfGreeting(p *parsekit.P) {
}
}
func stateComma(p *parsekit.P) {
func stateComma(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("comma")
switch {
@ -40,7 +40,7 @@ func stateComma(p *parsekit.P) {
}
}
func stateName(p *parsekit.P) {
func stateName(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("name")
switch {
@ -51,7 +51,7 @@ func stateName(p *parsekit.P) {
}
}
func stateEndOfGreeting(p *parsekit.P) {
func stateEndOfGreeting(p *parsekit.ParseAPI) {
p.Expects("end of greeting")
if p.On(a.EndOfFile).Stay() {
name := strings.TrimSpace(p.BufLiteral())

View File

@ -28,7 +28,7 @@ func ExampleItem() {
// the p.Emit* methods on parsekit.P.
// When errors occur, or the end of the file is reached, then the built-in
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
stateHandler := func(p *parsekit.P) {
stateHandler := func(p *parsekit.ParseAPI) {
if p.On(c.Str("question")).Accept() {
p.EmitLiteral(QuestionItem)
}
@ -99,14 +99,14 @@ func ExampleMatchAnyRune() {
// Easy access to the parsekit definitions.
a := parsekit.A
handler := func(p *parsekit.P) {
stateHandler := func(p *parsekit.ParseAPI) {
p.Expects("Any valid rune")
if p.On(a.AnyRune).Accept() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
parser := parsekit.NewParser(handler)
parser := parsekit.NewParser(stateHandler)
run := parser.Parse("¡Any / valid / character will dö!")
for i := 0; i < 5; i++ {

View File

@ -1,187 +0,0 @@
package parsekit
import (
"fmt"
)
// Matcher is the function type that must be implemented to create a function
// that can be used in conjunction with parsekit.P.On() or parsekit.New().
// Its purpose is to check if input data matches some kind of pattern and to
// report back the match.
//
// A Matcher function gets a MatchDialog as its input and returns a boolean to
// indicate whether or not the Matcher found a match on the input.
// The MatchDialog is used for retrieving input data to match against
// and for reporting back results.
type Matcher func(m *MatchDialog) bool
// MatchDialog is used by Matcher functions to retrieve runes from the
// input to match against and to report back results.
//
// Basic operation:
//
// To retrieve the next rune from the input, the Matcher function can call
// the MatchDialog.NextRune() method.
//
// The Matcher function can then evaluate the retrieved rune and either
// accept of skip the rune. When accepting it using MatchDialog.Accept(),
// the rune is added to the output of the MatchDialog. When using
// MatchDialog.Skip(), the rune will not be added to the output. It is
// mandatory for a Matcher to call either Accept() or Skip() after retrieving
// a rune, before calling NextRune() again.
//
// Eventually, the Matcher function must return a boolean value, indicating
// whether or not a match was found. When true, then the calling code will
// use the runes that were accepted into the MatchDialog's resulting output.
//
// Forking operation for easy lookahead support:
//
// Sometimes, a Matcher function must be able to perform a lookahead, which
// might either succeed or fail. In case of a failing lookahead, the state
// of the MatchDialog must be brought back to the original state.
//
// The way in which this is supported, is by forking a MatchDialog by calling
// MatchDialog.Fork(). This will return a child MatchDialog, with an empty
// output buffer, but using the same input offset as the forked parent.
//
// The Matcher function can then use the same interface as described for
// normal operation to retrieve runes from the input and to fill the output
// buffer. When the Matcher function decides that the lookahead was successful,
// then the method MatchDialog.Merge() can be called on the forked child to
// append the resulting output from the child to the parent's resulting output,
// and to update the parent input offset to that of the child.
//
// When the Matcher function decides that the lookahead was unsuccessful, then
// it can simply discard the forked child. The parent MatchDialog was never
// modified, so a new match can be safely started using that parent, as if the
// lookahead never happened.
type MatchDialog struct {
p *P // parser state, used to retrieve input data to match against (TODO should be interface)
inputOffset int // the byte offset into the input
input []rune // a slice of runes that represents the retrieved input runes for the Matcher
output []rune // a slice of runes that represents the accepted output runes for the Matcher
currRune *runeToken // hold the last rune that was read from the input
parent *MatchDialog // the parent MatchDialog, in case this one was forked
}
type runeToken struct {
Rune rune
ByteSize int
OK bool
}
// NextRune retrieves the next rune from the input.
//
// It returns the rune and a boolean. The boolean will be false in case an
// invalid UTF8 rune or the end of the file was encountered.
//
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
// to respectively add the rune to the MatchDialog's resulting output or to
// fully ignore it. This way, a Matcher has full control over what runes are
// significant for the resulting output of that matcher.
//
// After using NextRune(), this method can not be reinvoked, until the last read
// rune is explicitly accepted or skipped as described above.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.currRune != nil {
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
}
r, w, ok := m.p.peek(m.inputOffset)
m.currRune = &runeToken{r, w, ok}
if ok {
m.input = append(m.input, r)
}
return r, ok
}
// Fork splits off a child MatchDialog, containing the same offset as the
// parent MatchDialog, but with all other data in a fresh state.
//
// By forking, a Matcher function can freely work with a MatchDialog, without
// affecting the parent MatchDialog. This is for example useful when the
// Matcher function must perform some form of lookahead.
//
// When a successful match was found, the Matcher function can call
// child.Merge() to have the resulting output added to the parent MatchDialog.
// When no match was found, the forked child can simply be discarded.
//
// Example case: A Matcher checks for a sequence of runes: 'a', 'b', 'c', 'd'.
// This is done in 4 steps and only after finishing all steps, the Matcher
// function can confirm a successful match. The Matcher function for this
// case could look like this (yes, it's naive, but it shows the point):
//
// func MatchAbcd(m *MatchDialog) bool {
// child := m.Fork() // fork to keep m from input untouched
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
// if r, ok := m.NextRune(); !ok || r != letter {
// return false // report mismatch, m is left untouched
// }
// child.Accept() // add rune to child output
// }
// child.Merge() // we have a match, add resulting output to parent
// return true // and report the successful match
// }
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
inputOffset: m.inputOffset,
parent: m,
}
return child
}
// Accept will add the last rune as read by NextRune() to the resulting
// output of the MatchDialog.
func (m *MatchDialog) Accept() {
m.checkAllowedCall("Accept()")
m.output = append(m.output, m.currRune.Rune)
m.inputOffset += m.currRune.ByteSize
m.currRune = nil
}
// Skip will ignore the last rune as read by NextRune().
func (m *MatchDialog) Skip() {
m.checkAllowedCall("Skip()")
m.inputOffset += m.currRune.ByteSize
m.currRune = nil
}
func (m *MatchDialog) checkAllowedCall(name string) {
if m.currRune == nil {
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
}
if !m.currRune.OK {
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
}
}
// Merge merges the resulting output from a forked child MatchDialog back into
// its parent: The runes that are accepted in the child are added to the parent
// runes and the parent's offset is advanced to the child's offset.
//
// After the merge, the child MatchDialog is reset so it can immediately be
// reused for performing another match (all data are cleared, except for the
// input offset which is kept at its current position).
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
}
m.parent.input = append(m.parent.input, m.input...)
m.parent.output = append(m.parent.output, m.output...)
m.parent.inputOffset = m.inputOffset
m.ClearOutput()
m.ClearInput()
return true
}
// ClearOutput clears the resulting output for the MatchDialog, but it keeps
// the input and input offset as-is.
func (m *MatchDialog) ClearOutput() {
m.output = []rune{}
}
// ClearInput clears the input for the MatchDialog, but it keeps the output
// and input offset as-is.
func (m *MatchDialog) ClearInput() {
m.input = []rune{}
}

View File

@ -1,559 +0,0 @@
package parsekit
import (
"fmt"
"strings"
"unicode"
)
// C provides convenient access to a range of parser/combinators
// that can be used to construct Matcher functions.
//
// When using C in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var c = parsekit.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) Matcher
Runes func(...rune) Matcher
RuneRange func(rune, rune) Matcher
Str func(string) Matcher
StrNoCase func(string) Matcher
Any func(...Matcher) Matcher
Not func(Matcher) Matcher
Opt func(Matcher) Matcher
Seq func(...Matcher) Matcher
Rep func(int, Matcher) Matcher
Min func(int, Matcher) Matcher
Max func(int, Matcher) Matcher
ZeroOrMore func(Matcher) Matcher
OneOrMore func(Matcher) Matcher
MinMax func(int, int, Matcher) Matcher
Separated func(separated Matcher, separator Matcher) Matcher
Except func(except Matcher, matcher Matcher) Matcher
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
Opt: MatchOpt,
Any: MatchAny,
Not: MatchNot,
Seq: MatchSeq,
Rep: MatchRep,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Except: MatchExcept,
}
// MatchRune creates a Matcher function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) Matcher {
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok && input == expected {
m.Accept()
return true
}
return false
}
}
// MatchRunes creates a Matcher function that that checks if the next rune
// from the input is one of the provided runes.
func MatchRunes(expected ...rune) Matcher {
s := string(expected)
return func(m *MatchDialog) bool {
input, ok := m.NextRune()
if ok {
if strings.ContainsRune(s, input) {
m.Accept()
return true
}
}
return false
}
}
// MatchRuneRange creates a Matcher function that that checks if the next rune
// from the input is contained by the provided rune range.
//
// The rune range is defined by a start and an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) Matcher {
return func(m *MatchDialog) bool {
if end < start {
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
input, ok := m.NextRune()
if ok && input >= start && input <= end {
m.Accept()
return true
}
return false
}
}
// MatchStr creates a Matcher that will check if the upcoming runes on the
// input match the provided string.
// TODO make this a more efficient string-level match?
func MatchStr(expected string) Matcher {
var matchers = []Matcher{}
for _, r := range expected {
matchers = append(matchers, MatchRune(r))
}
return MatchSeq(matchers...)
}
// MatchStrNoCase creates a Matcher that will check if the upcoming runes
// on the input match the provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStrNoCase(expected string) Matcher {
var matchers = []Matcher{}
for _, r := range expected {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
matchers = append(matchers, MatchRunes(u, l))
}
return MatchSeq(matchers...)
}
// MatchOpt creates a Matcher that makes the provided Matcher optional.
// When the provided Matcher applies, then its output is used, otherwise
// no output is generated but still a successful match is reported.
func MatchOpt(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
child.Merge()
}
return true
}
}
// MatchSeq creates a Matcher that checks if the provided Matchers can be
// applied in their exact order. Only if all matcher apply, the sequence
// reports successful match.
func MatchSeq(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
for _, matcher := range matchers {
if !matcher(child) {
return false
}
}
child.Merge()
return true
}
}
// MatchAny creates a Matcher that checks if any of the provided Matchers
// can be applied. They are applied in their provided order. The first Matcher
// that applies is used for reporting back a match.
func MatchAny(matchers ...Matcher) Matcher {
return func(m *MatchDialog) bool {
for _, matcher := range matchers {
child := m.Fork()
if matcher(child) {
return child.Merge()
}
}
return false
}
}
// MatchNot creates a Matcher that checks if the provided Matcher applies to
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
probe := m.Fork()
if matcher(probe) {
return false
}
_, ok := m.NextRune()
if ok {
m.Accept()
return true
}
return false
}
}
// MatchRep creates a Matcher that checks if the provided Matcher can be
// applied exactly the provided amount of times.
//
// Note that the input can contain more Matches for the provided matcher, e.g.:
//
// MatchRep(4, MatchRune('X'))
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" of the input.
func MatchRep(times int, matcher Matcher) Matcher {
return matchMinMax(times, times, matcher)
}
// MatchMin creates a Matcher that checks if the provided Matcher can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, matcher Matcher) Matcher {
return matchMinMax(min, -1, matcher)
}
// MatchMax creates a Matcher that checks if the provided Matcher can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, matcher Matcher) Matcher {
return matchMinMax(0, max, matcher)
}
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(matcher Matcher) Matcher {
return matchMinMax(0, -1, matcher)
}
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(matcher Matcher) Matcher {
return matchMinMax(1, -1, matcher)
}
// MatchMinMax creates a Matcher that checks if the provided Matcher can
// be applied between the provided minimum and maximum number of times,
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
if max < 0 {
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
}
if min < 0 {
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
}
return matchMinMax(min, max, matcher)
}
func matchMinMax(min int, max int, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if max >= 0 && min > max {
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
}
total := 0
// Check for the minimum required amount of matches.
for total < min {
total++
if !matcher(child) {
return false
}
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for max < 0 || total < max {
total++
if !matcher(child) {
break
}
child.Merge()
}
return true
}
}
// MatchSeparated creates a Matcher that checks for a pattern of one or more
// Matchers of one type (the separated), separated by Matches of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}
// MatchExcept creates a Matcher that checks if the provided matcher can be
// applied to the upcoming input. It also checks if the except Matcher can be
// applied. If the matcher applies, but the except Matcher too, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except Matcher, matcher Matcher) Matcher {
return func(m *MatchDialog) bool {
if except(m.Fork()) {
return false
}
return matcher(m)
}
}
// A provides convenient access to a range of atoms that can be used to
// build combinators or parsing rules.
//
// In parsekit, an atom is defined as a ready to go Matcher function.
//
// When using A in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
EndOfFile Matcher
AnyRune Matcher
Space Matcher
Tab Matcher
CR Matcher
LF Matcher
CRLF Matcher
Excl Matcher
DoubleQuote Matcher
Hash Matcher
Dollar Matcher
Percent Matcher
Amp Matcher
SingleQuote Matcher
RoundOpen Matcher
RoundClose Matcher
Asterisk Matcher
Plus Matcher
Comma Matcher
Minus Matcher
Dot Matcher
Slash Matcher
Colon Matcher
Semicolon Matcher
AngleOpen Matcher
Equal Matcher
AngleClose Matcher
Question Matcher
At Matcher
SquareOpen Matcher
Backslash Matcher
SquareClose Matcher
Caret Matcher
Underscore Matcher
Backquote Matcher
CurlyOpen Matcher
Pipe Matcher
CurlyClose Matcher
Tilde Matcher
Newline Matcher
Whitespace Matcher
WhitespaceAndNewlines Matcher
EndOfLine Matcher
Digit Matcher
ASCII Matcher
ASCIILower Matcher
ASCIIUpper Matcher
HexDigit Matcher
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.Str("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
// MatchEndOfFile creates a Matcher that checks if the end of the input data
// has been reached. This Matcher will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() Matcher {
return func(m *MatchDialog) bool {
fork := m.Fork()
input, ok := fork.NextRune()
return !ok && input == eofRune
}
}
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
// read from the input. It reports back a successful match if the end of the
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
func MatchAnyRune() Matcher {
return func(m *MatchDialog) bool {
_, ok := m.NextRune()
if ok {
m.Accept()
return true
}
return false
}
}
// M provides convenient access to a range of modifiers that can be
// used when creating Matcher functions.
//
// In parsekit, a modifier is defined as a Matcher function that modifies the
// resulting output of another Matcher in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create
// a variable in your own package to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(Matcher) Matcher
Trim func(Matcher, string) Matcher
TrimLeft func(Matcher, string) Matcher
TrimRight func(Matcher, string) Matcher
ToLower func(Matcher) Matcher
ToUpper func(Matcher) Matcher
Replace func(Matcher, string) Matcher
ModifyByCallback func(Matcher, func(string) string) Matcher
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ModifyByCallback: ModifyByCallback,
}
// ModifyDrop creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is discarded completely.
//
// Note that if the Matcher does not apply, a mismatch will be reported back,
// even though we would have dropped the output anyway. So if you would like
// to drop optional whitespace, then use something like:
//
// M.Drop(C.Opt(A.Whitespace))
//
// instead of:
//
// M.Drop(A.Whitespace)
//
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(matcher Matcher) Matcher {
return ModifyByCallback(matcher, func(s string) string {
return ""
})
}
// ModifyTrim creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
// TODO move cutset to the left arg
func ModifyTrim(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, true, true)
}
// ModifyTrimLeft creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the left of the output.
func ModifyTrimLeft(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, true, false)
}
// ModifyTrimRight creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the right of the output.
func ModifyTrimRight(matcher Matcher, cutset string) Matcher {
return modifyTrim(matcher, cutset, false, true)
}
func modifyTrim(matcher Matcher, cutset string, trimLeft bool, trimRight bool) Matcher {
modfunc := func(s string) string {
if trimLeft {
s = strings.TrimLeft(s, cutset)
}
if trimRight {
s = strings.TrimRight(s, cutset)
}
return s
}
return ModifyByCallback(matcher, modfunc)
}
// ModifyToUpper creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.
func ModifyToUpper(matcher Matcher) Matcher {
return ModifyByCallback(matcher, strings.ToUpper)
}
// ModifyToLower creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into lower case.
func ModifyToLower(matcher Matcher) Matcher {
return ModifyByCallback(matcher, strings.ToLower)
}
// ModifyReplace creates a Matcher that checks if the provided Matcher applies.
// If it does, then its output is replaced by the provided string.
func ModifyReplace(matcher Matcher, s string) Matcher {
return ModifyByCallback(matcher, func(string) string {
return s
})
}
// ModifyByCallback creates a Matcher that checks if the provided matcher applies.
// If it does, then its output is taken and it is fed to the provided modfunc.
// This is a simple function that takes a string on input and returns a possibly
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(matcher Matcher, modfunc func(string) string) Matcher {
return func(m *MatchDialog) bool {
child := m.Fork()
if matcher(child) {
s := modfunc(string(child.output))
child.output = []rune(s)
child.Merge()
return true
}
return false
}
}

View File

@ -24,17 +24,16 @@ func NewParser(startState StateHandler) *Parser {
return &Parser{startState: startState}
}
// Run represents a single parse run for a Parser.
// TODO rename to ParseRun
type Run struct {
p *P // a struct holding the internal state of a parse run
// ParseRun represents a single parse run for a Parser.
type ParseRun struct {
p *ParseAPI // holds the internal state of a parse run
}
// Parse starts a parse run on the provided input data.
// To retrieve parse items from the run, make use of the Run.Next() method.
func (p *Parser) Parse(input string) *Run {
return &Run{
p: &P{
// To retrieve parser Items from the run, make use of the ParseRun.Next() method.
func (p *Parser) Parse(input string) *ParseRun {
return &ParseRun{
p: &ParseAPI{
input: input,
len: len(input),
cursorLine: 1,
@ -51,7 +50,7 @@ func (p *Parser) Parse(input string) *Run {
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, false will be returned and the error return value will
// be set (default is nil).
func (run *Run) Next() (Item, *Error, bool) {
func (run *ParseRun) Next() (Item, *Error, bool) {
// State handling loop: we handle states, until an Item is ready to be returned.
for {
select {
@ -66,7 +65,7 @@ func (run *Run) Next() (Item, *Error, bool) {
}
}
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
@ -84,7 +83,7 @@ func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
// type StateHandler. This function represents the current status and
// is responsible for moving the parser to its next status, depending
// on the parsed input data.
func (run *Run) runNextStateHandler() {
func (run *ParseRun) runNextStateHandler() {
if state, ok := run.getNextStateHandler(); ok {
run.invokeNextStateHandler(state)
}
@ -115,7 +114,7 @@ func (run *Run) runNextStateHandler() {
//
// When no routing decision is provided by a StateHandler, then this is
// considered a bug in the state handler, and the parser will panic.
func (run *Run) getNextStateHandler() (StateHandler, bool) {
func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
switch {
case run.p.nextState != nil:
return run.p.nextState, true
@ -132,42 +131,45 @@ func (run *Run) getNextStateHandler() (StateHandler, bool) {
// invokeNextStateHandler moves the parser state to the provided state
// and invokes the StateHandler function.
func (run *Run) invokeNextStateHandler(state StateHandler) {
func (run *ParseRun) invokeNextStateHandler(state StateHandler) {
run.p.state = state
run.p.nextState = nil
run.p.expecting = ""
run.p.state(run.p)
}
// MatcherWrapper is the top-level struct that holds the configuration for
// a parser that is based solely on a Wrapper function.
// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
// Matcher is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Matcher can be instantiated using the parsekit.NewMatcher()
// method.
//
// To match input data against the wrapped Matcher function, use the method
// MatcherWrapper.Parse().
type MatcherWrapper struct {
// Matcher.Parse().
type Matcher struct {
parser *Parser
}
// NewMatcher instantiates a new MatcherWrapper.
// NewMatcher instantiates a new Matcher.
//
// This is a simple wrapper around a Matcher function. It can be used to
// match an input string against that Matcher function and retrieve the
// This is a simple wrapper around a TokenHandler function. It can be used to
// match an input string against that TokenHandler function and retrieve the
// results in a straight forward way.
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
handler := func(p *P) {
//
// The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler.
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
stateHandler := func(p *ParseAPI) {
p.Expects(expects)
if p.On(matcher).Accept() {
if p.On(tokenHandler).Accept() {
p.EmitLiteral(0) // ItemType is irrelevant
}
}
return &MatcherWrapper{parser: NewParser(handler)}
return &Matcher{parser: NewParser(stateHandler)}
}
// Parse runs the wrapped Matcher function against the provided input data.
func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) {
item, err, ok := w.parser.Parse(input).Next()
// Parse checks for a match on the provided input data.
func (m *Matcher) Parse(input string) (string, *Error, bool) {
item, err, ok := m.parser.Parse(input).Next()
if !ok {
return "", err, false
}

View File

@ -14,21 +14,21 @@ const TestItem parsekit.ItemType = 1
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
type MatcherTest struct {
input string
matcher parsekit.Matcher
mustMatch bool
expected string
type TokenHandlerTest struct {
input string
tokenHandler parsekit.TokenHandler
mustMatch bool
expected string
}
func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
for _, test := range testSet {
RunMatcherTest(t, test)
RunTokenHandlerTest(t, test)
}
}
func RunMatcherTest(t *testing.T, test MatcherTest) {
output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input)
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
output, err, ok := parsekit.NewMatcher(test.tokenHandler, "a match").Parse(test.input)
if test.mustMatch {
if !ok {

View File

@ -2,17 +2,17 @@ package parsekit
import "unicode/utf8"
// StateHandler defines the type of function that must be implemented to
// handle a parsing state.
// StateHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A StateHandler function gets a P struct as its input. This struct holds
// A StateHandler function gets a ParseAPI struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the StateHandler must use to interact with the parser.
type StateHandler func(*P)
// interface that the StateHandler uses to interact with the parser.
type StateHandler func(*ParseAPI)
// P holds the internal state of a parse run and provides an API to
// ParseAPI holds the internal state of a parse run and provides an API to
// StateHandler methods to communicate with the parser.
type P struct {
type ParseAPI struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
@ -37,7 +37,7 @@ type P struct {
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character). In this case, the returned rune
// will be one of eofRune or invalidRune.
func (p *P) peek(byteOffset int) (rune, int, bool) {
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}

View File

@ -4,6 +4,12 @@ import (
"fmt"
)
// Item represents an item that can be emitted from the parser.
type Item struct {
Type ItemType
Value string
}
// ItemType represents the type of a parser Item.
//
// When creating your own ItemType values, then make use of positive integer
@ -19,26 +25,14 @@ const ItemEOF ItemType = -1
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item represents an item that can be emitted from the parser.
type Item struct {
Type ItemType
Value string
}
// Emit passes a Parser item to the client, including the provided string.
func (p *P) Emit(t ItemType, v string) {
func (p *ParseAPI) Emit(t ItemType, v string) {
p.items <- Item{t, v}
p.buffer.reset()
}
// EmitLiteral passes a Parser item to the client, including accumulated
// string buffer data as a literal string.
func (p *P) EmitLiteral(t ItemType) {
p.Emit(t, p.buffer.asLiteralString())
}
// BufLiteral retrieves the contents of the parser buffer (all the runes that
// were added to it using P.Accept()) as a literal string.
// BufLiteral retrieves the contents of the parser's string buffer (all the
// runes that were added to it using ParseAPI.Accept()) as a literal string.
//
// Literal means that if the input had for example the subsequent runes '\' and 'n'
// in it, then the literal string would have a backslash and an 'n' it in, not a
@ -46,12 +40,19 @@ func (p *P) EmitLiteral(t ItemType) {
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (p *P) BufLiteral() string {
func (p *ParseAPI) BufLiteral() string {
return p.buffer.asLiteralString()
}
// BufInterpreted retrieves the contents of the parser buffer (all the runes that
// were added to it using P.Accept()) as an interpreted string.
// EmitLiteral passes a parser Item to the client, including the accumulated
// string buffer data as a literal string.
func (p *ParseAPI) EmitLiteral(t ItemType) {
p.Emit(t, p.BufLiteral())
}
// BufInterpreted retrieves the contents of the parser's string buffer (all
// the runes that were added to it using ParseAPI.Accept()) as an
// interpreted string.
//
// Interpreted means that the contents are treated as a Go double quoted
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
@ -64,7 +65,7 @@ func (p *P) BufLiteral() string {
//
// Retrieving the buffer contents will not affect the buffer itself. New runes can
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
func (p *P) BufInterpreted() (string, bool) {
func (p *ParseAPI) BufInterpreted() (string, bool) {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.EmitError(
@ -81,16 +82,12 @@ func (p *P) BufInterpreted() (string, bool) {
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and false will be returned.
func (p *P) EmitInterpreted(t ItemType) bool {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.EmitError(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return false
func (p *ParseAPI) EmitInterpreted(t ItemType) bool {
if s, ok := p.BufInterpreted(); ok {
p.Emit(t, s)
return true
}
p.Emit(t, s)
return true
return false
}
// Error is used as the error type when parsing errors occur.
@ -115,15 +112,15 @@ func (err *Error) ErrorFull() string {
return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column)
}
// EmitError emits a Parser error item to the client.
func (p *P) EmitError(format string, args ...interface{}) {
// EmitError emits a parser error item to the client.
func (p *ParseAPI) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message)
}
// UnexpectedInput is used by a StateHandler function to emit an error item
// that tells the client that an unexpected rune was encountered in the input.
func (p *P) UnexpectedInput() {
func (p *ParseAPI) UnexpectedInput() {
r, _, ok := p.peek(0)
switch {
case ok:
@ -137,7 +134,7 @@ func (p *P) UnexpectedInput() {
}
}
func fmtExpects(p *P) string {
func fmtExpects(p *ParseAPI) string {
if p.expecting == "" {
return ""
}

View File

@ -1,6 +1,6 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// Expects is used to let a StateHandler function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// When defining an expectation inside a StateHandler, you do not need to
@ -13,6 +13,6 @@ package parsekit
// 2) there is an invalid UTF8 character on input
//
// 3) the end of the file was reached.
func (p *P) Expects(description string) {
func (p *ParseAPI) Expects(description string) {
p.expecting = description
}

View File

@ -1,12 +1,12 @@
package parsekit
// On checks if the input at the current cursor position matches the provided Matcher.
// On must be chained with another method, which tells the parser what action to
// perform when a match was found:
// On checks if the input at the current cursor position matches the provided
// TokenHandler. On must be chained with another method, which tells the parser
// what action to perform when a match was found:
//
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
//
// 2) On(...).Accept() - Move cursor forward, add matched runes to the string buffer.
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
//
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
//
@ -32,16 +32,16 @@ package parsekit
// p.RouteTo(stateHandlerC)
// }
//
// // When there's a "hi" on input, emit it.
// // When there's a "hi" on input, emit a parser item for it.
// if p.On(parsekit.C.Str("hi")).Accept() {
// p.Emit(SomeItemType, p.BufLiteral())
// }
func (p *P) On(matcher Matcher) *matchAction {
m := &MatchDialog{p: p}
if matcher == nil {
panic("internal parser error: matcher argument for On() is nil")
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
m := &TokenAPI{p: p}
if tokenHandler == nil {
panic("internal parser error: tokenHandler argument for On() is nil")
}
ok := matcher(m)
ok := tokenHandler(m)
// Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like:
@ -51,7 +51,7 @@ func (p *P) On(matcher Matcher) *matchAction {
// }
p.LastMatch = string(m.input)
return &matchAction{
return &MatchAction{
p: p,
ok: ok,
input: m.input,
@ -60,9 +60,10 @@ func (p *P) On(matcher Matcher) *matchAction {
}
}
// matchAction is a struct that is used for building the On()-method chain.
type matchAction struct {
p *P
// MatchAction is a struct that is used for building the On()-method chain.
// The On() method will return an initialized struct of this type.
type MatchAction struct {
p *ParseAPI
ok bool
input []rune
output []rune
@ -70,11 +71,12 @@ type matchAction struct {
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// and to store the input that matched in the parser's string buffer.
// When no match was found, then no action is taken.
// It returns a routeAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *matchAction) Accept() bool {
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Accept() bool {
if a.ok {
a.p.buffer.writeString(string(a.output))
a.advanceCursor()
@ -83,10 +85,11 @@ func (a *matchAction) Accept() bool {
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// without storing the actual match in the parser's string buffer.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *matchAction) Skip() bool {
func (a *MatchAction) Skip() bool {
if a.ok {
a.advanceCursor()
}
@ -95,14 +98,14 @@ func (a *matchAction) Skip() bool {
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *matchAction) Stay() bool {
func (a *MatchAction) Stay() bool {
return a.ok
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (a *matchAction) advanceCursor() {
// advanceCursor advances the input position in the input data.
// While doing so, it keeps tracks of newlines that are encountered, so we
// can report on line + column positions on error.
func (a *MatchAction) advanceCursor() {
a.p.inputPos = a.inputPos
for _, r := range a.input {
if a.p.newline {

View File

@ -1,34 +1,34 @@
package parsekit
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
// RouteTo tells the parser what StateHandler function to invoke on
// the next parse cycle.
func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction {
p.nextState = state
return &routeFollowupAction{p}
return &RouteFollowupAction{p}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// RouteRepeat tells the parser that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() {
func (p *ParseAPI) RouteRepeat() {
p.RouteTo(p.state)
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
// RouteReturn tells the parser that on the next cycle the last StateHandler
// that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() {
func (p *ParseAPI) RouteReturn() {
p.nextState = p.popRoute()
}
// routeFollowupAction chains parsing routes.
// RouteFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type routeFollowupAction struct {
p *P
type RouteFollowupAction struct {
p *ParseAPI
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
@ -36,7 +36,7 @@ type routeFollowupAction struct {
// For example:
//
// p.RouteTo(handlerA).ThenTo(handlerB)
func (a *routeFollowupAction) ThenTo(state StateHandler) {
func (a *RouteFollowupAction) ThenTo(state StateHandler) {
a.p.pushRoute(state)
}
@ -45,18 +45,18 @@ func (a *routeFollowupAction) ThenTo(state StateHandler) {
// For example:
//
// p.RouteTo(handlerA).ThenReturnHere()
func (a *routeFollowupAction) ThenReturnHere() {
func (a *RouteFollowupAction) ThenReturnHere() {
a.p.pushRoute(a.p.state)
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
func (p *ParseAPI) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
func (p *ParseAPI) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
@ -66,8 +66,8 @@ func (p *P) popRoute() StateHandler {
// ExpectEndOfFile can be used from a StateHandler function to indicate that
// your parser expects to be at the end of the file. This will schedule
// a parsekit-provided StateHandler which will do the actual check for this.
func (p *P) ExpectEndOfFile() {
p.RouteTo(func(p *P) {
func (p *ParseAPI) ExpectEndOfFile() {
p.RouteTo(func(p *ParseAPI) {
p.Expects("end of file")
if p.On(A.EndOfFile).Stay() {
p.Emit(ItemEOF, "EOF")

192
tokenhandler.go Normal file
View File

@ -0,0 +1,192 @@
package parsekit
import (
"fmt"
)
// TokenHandler is the function type that is involved in turning a low level
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the match.
//
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
// parsekit.Matcher().
//
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
// indicate whether or not it found a match on the input. The TokenAPI is used
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
// TokenAPI is used by TokenHandler functions to retrieve runes from the
// input to match against and to report back results.
//
// Basic operation:
//
// To retrieve the next rune from the input, the TokenHandler function can call
// the TokenAPI.NextRune() method.
//
// The TokenHandler function can then evaluate the retrieved rune and either
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
// the rune will not be added to the output. It is mandatory for a TokenHandler
// to call either Accept() or Skip() after retrieving a rune, before calling
// NextRune() again.
//
// Eventually, the TokenHandler function must return a boolean value, indicating
// whether or not a match was found. When true, then the calling code will
// use the runes that were accepted into the TokenAPI's resulting output.
//
// Forking operation for easy lookahead support:
//
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
// might either succeed or fail. In case of a failing lookahead, the state
// of the TokenAPI must be brought back to the original state.
//
// The way in which this is supported, is by forking a TokenAPI by calling
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
// output buffer, but using the same input cursor position as the forked parent.
//
// The TokenHandler function can then use the same interface as described for
// normal operation to retrieve runes from the input and to fill the resulting
// output. When the TokenHandler function decides that the lookahead was successful,
// then the method TokenAPI.Merge() can be called on the forked child to
// append the resulting output from the child to the parent's resulting output,
// and to update the parent input cursor position to that of the child.
//
// When the TokenHandler function decides that the lookahead was unsuccessful,
// then it can simply discard the forked child. The parent TokenAPI was never
// modified, so a new match can be safely started using that parent, as if the
// lookahead never happened.
type TokenAPI struct {
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
inputOffset int // the byte offset into the input
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
output []rune // a slice of runes that represents the accepted output runes for the Matcher
currRune *runeInfo // hold information for the last rune that was read from the input
parent *TokenAPI // the parent MatchDialog, in case this one was forked
}
// runeInfo describes a single rune and its metadata.
type runeInfo struct {
Rune rune // an UTF8 rune
ByteSize int // the number of bytes in the rune
OK bool // false when the rune represents an invalid UTF8 rune or EOF
}
// NextRune retrieves the next rune from the input.
//
// It returns the rune and a boolean. The boolean will be false in case an
// invalid UTF8 rune or the end of the file was encountered.
//
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
// to respectively add the rune to the TokenAPI's resulting output or to
// fully ignore it. This way, a TokenHandler has full control over what runes are
// significant for the resulting output of that TokenHandler.
//
// After using NextRune(), this method can not be reinvoked, until the last read
// rune is explicitly accepted or skipped as described above.
func (t *TokenAPI) NextRune() (rune, bool) {
if t.currRune != nil {
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
}
r, w, ok := t.p.peek(t.inputOffset)
t.currRune = &runeInfo{r, w, ok}
if ok {
t.input = append(t.input, r)
}
return r, ok
}
// Fork splits off a child TokenAPI, containing the same input cursor position
// as the parent TokenAPI, but with all other data in a fresh state.
//
// By forking, a TokenHandler function can freely work with a TokenAPI, without
// affecting the parent TokenAPI. This is for example useful when the
// TokenHandler function must perform some form of lookahead.
//
// When a successful match was found, the TokenHandler function can call
// TokenAPI.Merge() on the forked child to have the resulting output added
// to the parent TokenAPI.
//
// When no match was found, the forked child can simply be discarded.
//
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
// This is done in 4 steps and only after finishing all steps, the TokenHandler
// function can confirm a successful match. The TokenHandler function for this
// case could look like this (yes, it's naive, but it shows the point):
// TODO make proper tested example
//
// func MatchAbcd(t *TokenAPI) bool {
// child := t.Fork() // fork to keep m from input untouched
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
// if r, ok := t.NextRune(); !ok || r != letter {
// return false // report mismatch, t is left untouched
// }
// child.Accept() // add rune to child output
// }
// child.Merge() // we have a match, add resulting output to parent
// return true // and report the successful match
// }
func (t *TokenAPI) Fork() *TokenAPI {
return &TokenAPI{
p: t.p,
inputOffset: t.inputOffset,
parent: t,
}
}
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
// output of the TokenAPI.
func (t *TokenAPI) Accept() {
t.checkAllowedCall("Accept()")
t.output = append(t.output, t.currRune.Rune)
t.inputOffset += t.currRune.ByteSize
t.currRune = nil
}
// Skip will ignore the last rune as read by NextRune().
func (t *TokenAPI) Skip() {
t.checkAllowedCall("Skip()")
t.inputOffset += t.currRune.ByteSize
t.currRune = nil
}
func (t *TokenAPI) checkAllowedCall(name string) {
if t.currRune == nil {
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
}
if !t.currRune.OK {
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
}
}
// Merge merges the resulting output from a forked child TokenAPI back into
// its parent: The runes that are accepted in the child are added to the parent
// runes and the parent's input cursor position is advanced to the child's
// cursor position.
//
// After the merge, the child TokenAPI is reset so it can immediately be
// reused for performing another match (all data are cleared, except for the
// input offset which is kept at its current position).
func (t *TokenAPI) Merge() bool {
if t.parent == nil {
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
}
t.parent.input = append(t.parent.input, t.input...)
t.parent.output = append(t.parent.output, t.output...)
t.parent.inputOffset = t.inputOffset
t.ClearOutput()
t.ClearInput()
return true
}
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
// the input and input offset as-is.
func (t *TokenAPI) ClearOutput() {
t.output = []rune{}
}
// ClearInput clears the input for the TokenAPI, but it keeps the output
// and input offset as-is.
func (t *TokenAPI) ClearInput() {
t.input = []rune{}
}

558
tokenhandlers_builtin.go Normal file
View File

@ -0,0 +1,558 @@
package parsekit
import (
"fmt"
"strings"
"unicode"
)
// C provides convenient access to a range of parser/combinators that can be
// used to construct TokenHandler functions.
//
// When using C in your own parser, then it is advised to create a variable
// to reference it:
//
// var c = parsekit.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) TokenHandler
Runes func(...rune) TokenHandler
RuneRange func(rune, rune) TokenHandler
Str func(string) TokenHandler
StrNoCase func(string) TokenHandler
Any func(...TokenHandler) TokenHandler
Not func(TokenHandler) TokenHandler
Opt func(TokenHandler) TokenHandler
Seq func(...TokenHandler) TokenHandler
Rep func(times int, handler TokenHandler) TokenHandler
Min func(min int, handler TokenHandler) TokenHandler
Max func(max int, handler TokenHandler) TokenHandler
ZeroOrMore func(TokenHandler) TokenHandler
OneOrMore func(TokenHandler) TokenHandler
MinMax func(min int, max int, handler TokenHandler) TokenHandler
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
Except func(except TokenHandler, handler TokenHandler) TokenHandler
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
Opt: MatchOpt,
Any: MatchAny,
Not: MatchNot,
Seq: MatchSeq,
Rep: MatchRep,
Min: MatchMin,
Max: MatchMax,
ZeroOrMore: MatchZeroOrMore,
OneOrMore: MatchOneOrMore,
MinMax: MatchMinMax,
Separated: MatchSeparated,
Except: MatchExcept,
}
// MatchRune creates a TokenHandler function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) TokenHandler {
return func(t *TokenAPI) bool {
input, ok := t.NextRune()
if ok && input == expected {
t.Accept()
return true
}
return false
}
}
// MatchRunes creates a TokenHandler function that that checks if the next rune
// from the input is one of the provided runes.
func MatchRunes(expected ...rune) TokenHandler {
s := string(expected)
return func(t *TokenAPI) bool {
input, ok := t.NextRune()
if ok {
if strings.ContainsRune(s, input) {
t.Accept()
return true
}
}
return false
}
}
// MatchRuneRange creates a TokenHandler function that that checks if the next rune
// from the input is contained by the provided rune range.
//
// The rune range is defined by a start and an end rune, inclusive, so:
//
// MatchRuneRange('g', 'k')
//
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) TokenHandler {
return func(t *TokenAPI) bool {
if end < start {
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
input, ok := t.NextRune()
if ok && input >= start && input <= end {
t.Accept()
return true
}
return false
}
}
// MatchStr creates a TokenHandler that will check if the upcoming runes on the
// input match the provided string.
// TODO make this a more efficient string-level match?
func MatchStr(expected string) TokenHandler {
var handlers = []TokenHandler{}
for _, r := range expected {
handlers = append(handlers, MatchRune(r))
}
return MatchSeq(handlers...)
}
// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
// on the input match the provided string in a case-insensitive manner.
// TODO make this a more efficient string-level match?
func MatchStrNoCase(expected string) TokenHandler {
var handlers = []TokenHandler{}
for _, r := range expected {
u := unicode.ToUpper(r)
l := unicode.ToLower(r)
handlers = append(handlers, MatchRunes(u, l))
}
return MatchSeq(handlers...)
}
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
// When the provided TokenHandler applies, then its output is used, otherwise
// no output is generated but still a successful match is reported.
func MatchOpt(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
child.Merge()
}
return true
}
}
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
// applied in their exact order. Only if all matcher apply, the sequence
// reports successful match.
func MatchSeq(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
for _, matcher := range handlers {
if !matcher(child) {
return false
}
}
child.Merge()
return true
}
}
// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
// can be applied. They are applied in their provided order. The first TokenHandler
// that applies is used for reporting back a match.
func MatchAny(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
for _, handler := range handlers {
child := t.Fork()
if handler(child) {
return child.Merge()
}
}
return false
}
}
// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
probe := t.Fork()
if handler(probe) {
return false
}
_, ok := t.NextRune()
if ok {
t.Accept()
return true
}
return false
}
}
// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
// applied exactly the provided amount of times.
//
// Note that the input can contain more than the provided number of matches, e.g.:
//
// MatchRep(4, MatchRune('X'))
//
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
// In that last case, there will be a remainder "XX" on the input.
func MatchRep(times int, handler TokenHandler) TokenHandler {
return matchMinMax(times, times, handler)
}
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
// applied at least the provided minimum number of times.
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler TokenHandler) TokenHandler {
return matchMinMax(min, -1, handler)
}
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
// applied at maximum the provided minimum number of times.
// When more matches are possible, these will be included in the output.
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
return matchMinMax(0, max, handler)
}
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied zero or more times. All matches will be included in the output.
// Zero matches are considered a successful match.
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
return matchMinMax(0, -1, handler)
}
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
// be applied one or more times. All matches will be included in the output.
func MatchOneOrMore(handler TokenHandler) TokenHandler {
return matchMinMax(1, -1, handler)
}
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
// be applied between the provided minimum and maximum number of times,
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
if max < 0 {
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
}
if min < 0 {
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
}
return matchMinMax(min, max, handler)
}
func matchMinMax(min int, max int, handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if max >= 0 && min > max {
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
}
total := 0
// Check for the minimum required amount of matches.
for total < min {
total++
if !handler(child) {
return false
}
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for max < 0 || total < max {
total++
if !handler(child) {
break
}
child.Merge()
}
return true
}
}
// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
// (the separator). All matches (separated + separator) are included in the
// output.
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
}
// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
// applied to the upcoming input. It also checks if the except TokenHandler can be
// applied. If the handler applies, but the except TokenHandler as well, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
return func(t *TokenAPI) bool {
if except(t.Fork()) {
return false
}
return handler(t)
}
}
// A provides convenient access to a range of atoms that can be used to
// build TokenHandlers or parser rules.
//
// In parsekit, an atom is defined as a ready for use TokenHandler function.
//
// When using A in your own parser, then it is advised to create a variable
// to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
EndOfFile TokenHandler
AnyRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
LF TokenHandler
CRLF TokenHandler
Excl TokenHandler
DoubleQuote TokenHandler
Hash TokenHandler
Dollar TokenHandler
Percent TokenHandler
Amp TokenHandler
SingleQuote TokenHandler
RoundOpen TokenHandler
RoundClose TokenHandler
Asterisk TokenHandler
Plus TokenHandler
Comma TokenHandler
Minus TokenHandler
Dot TokenHandler
Slash TokenHandler
Colon TokenHandler
Semicolon TokenHandler
AngleOpen TokenHandler
Equal TokenHandler
AngleClose TokenHandler
Question TokenHandler
At TokenHandler
SquareOpen TokenHandler
Backslash TokenHandler
SquareClose TokenHandler
Caret TokenHandler
Underscore TokenHandler
Backquote TokenHandler
CurlyOpen TokenHandler
Pipe TokenHandler
CurlyClose TokenHandler
Tilde TokenHandler
Newline TokenHandler
Whitespace TokenHandler
WhitespaceAndNewlines TokenHandler
EndOfLine TokenHandler
Digit TokenHandler
ASCII TokenHandler
ASCIILower TokenHandler
ASCIIUpper TokenHandler
HexDigit TokenHandler
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.Str("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
RoundClose: C.Rune(')'),
Asterisk: C.Rune('*'),
Plus: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: C.RuneRange('0', '9'),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
}
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
// has been reached. This TokenHandler will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() TokenHandler {
return func(t *TokenAPI) bool {
fork := t.Fork()
input, ok := fork.NextRune()
return !ok && input == eofRune
}
}
// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
// read from the input. It reports back a successful match if the end of the
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
func MatchAnyRune() TokenHandler {
return func(t *TokenAPI) bool {
_, ok := t.NextRune()
if ok {
t.Accept()
return true
}
return false
}
}
// M provides convenient access to a range of modifiers (which in their nature are
// parser/combinators) that can be used when creating TokenHandler functions.
//
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
// resulting output of another TokenHandler in some way. It does not do any matching
// against input of its own.
//
// When using M in your own parser, then it is advised to create a variable
// to reference it:
//
// var m = parsekit.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ModifyByCallback: ModifyByCallback,
}
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is discarded completely.
//
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
// even though we would have dropped the output anyway. So if you would like
// to drop optional whitespace, then use something like:
//
// M.Drop(C.Opt(A.Whitespace))
//
// instead of:
//
// M.Drop(A.Whitespace)
//
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, func(s string) string {
return ""
})
}
// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from both the left and the right of the output.
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
return modifyTrim(handler, cutset, true, true)
}
// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the left of the output.
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
return modifyTrim(handler, cutset, true, false)
}
// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are trimmed from the right of the output.
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
return modifyTrim(handler, cutset, false, true)
}
func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
modfunc := func(s string) string {
if trimLeft {
s = strings.TrimLeft(s, cutset)
}
if trimRight {
s = strings.TrimRight(s, cutset)
}
return s
}
return ModifyByCallback(handler, modfunc)
}
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into upper case.
func ModifyToUpper(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.ToUpper)
}
// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and characters from the provided
// cutset are converted into lower case.
func ModifyToLower(handler TokenHandler) TokenHandler {
return ModifyByCallback(handler, strings.ToLower)
}
// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is replaced by the provided string.
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
return ModifyByCallback(handler, func(string) string {
return replaceWith
})
}
// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
// If it does, then its output is taken and it is fed to the provided modfunc.
// This is a simple function that takes a string on input and returns a possibly
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
s := modfunc(string(child.output))
child.output = []rune(s)
child.Merge()
return true
}
return false
}
}

View File

@ -8,7 +8,7 @@ import (
)
func TestCombinators(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
RunTokenHandlerTests(t, []TokenHandlerTest{
{"xxx", c.Rune('x'), true, "x"},
{"x ", c.Rune(' '), false, ""},
{"aa", c.RuneRange('b', 'e'), false, ""},
@ -79,7 +79,7 @@ func TestCombinators(t *testing.T) {
}
func TestAtoms(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
RunTokenHandlerTests(t, []TokenHandlerTest{
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
@ -158,7 +158,7 @@ func TestAtoms(t *testing.T) {
}
func TestModifiers(t *testing.T) {
RunMatcherTests(t, []MatcherTest{
RunTokenHandlerTests(t, []TokenHandlerTest{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
@ -172,6 +172,30 @@ func TestModifiers(t *testing.T) {
})
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Seq(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Parse(input).Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
demonic := c.Seq(
@ -194,34 +218,10 @@ func TestCombination(t *testing.T) {
c.Opt(a.SquareClose),
)
RunMatcherTests(t, []MatcherTest{
RunTokenHandlerTests(t, []TokenHandlerTest{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
})
}
func TestSequenceOfRunes(t *testing.T) {
sequence := c.Seq(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
parser := parsekit.NewParser(func(p *parsekit.P) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept() {
p.EmitLiteral(TestItem)
}
})
item, err, ok := parser.Parse(input).Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if item.Value != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
}
}