A big round of getting-ya-terminology-straight.
This commit is contained in:
parent
2751c78003
commit
c6fde2cf4e
|
@ -15,15 +15,15 @@ import (
|
|||
)
|
||||
|
||||
// When writing a parser, it's a good start to use the parser/combinator
|
||||
// functionality of parsekit to create some Matcher functions. These functions
|
||||
// functionality of parsekit to create some TokenHandler functions. These functions
|
||||
// can later be used in the parser state machine to check for matching strings
|
||||
// on the input data.
|
||||
//
|
||||
// For the calculator, we only need a definition of "number, surrounded by
|
||||
// optional whitespace". Skipping whitespace could be a part of the StateHandler
|
||||
// functions below too, but including it in a Matcher makes things really
|
||||
// functions below too, but including it in a TokenHandler makes things really
|
||||
// practical.
|
||||
func createNumberMatcher() parsekit.Matcher {
|
||||
func createNumberMatcher() parsekit.TokenHandler {
|
||||
// Easy access to parsekit definition.
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
|
@ -43,17 +43,17 @@ const (
|
|||
|
||||
// We also need to define the state machine for parsing the input.
|
||||
// The state machine is built up from functions that match the StateHandler
|
||||
// signature: func(*parsekit.P)
|
||||
// The P struct holds the internal state for the parser and it provides
|
||||
// signature: func(*parsekit.ParseAPI)
|
||||
// The ParseAPI struct holds the internal state for the parser and it provides
|
||||
// some methods that form the API for your StateHandler implementation.
|
||||
|
||||
// State: expect a number. When a number is found on the input,
|
||||
// it is accepted in the output buffer, after which the output buffer is
|
||||
// it is accepted in the parser's string buffer, after which that buffer is
|
||||
// emitted as a numberType item. Then we tell the state machine to continue
|
||||
// with the calcWaitForOperatorOrEndOfInput state.
|
||||
// When no number is found, the parser will emit an error, explaining that
|
||||
// "a number" was expected.
|
||||
func calcWaitForNumber(p *parsekit.P) {
|
||||
func calcWaitForNumber(p *parsekit.ParseAPI) {
|
||||
p.Expects("a number")
|
||||
if p.On(calcNumber).Accept() {
|
||||
p.EmitLiteral(numberType)
|
||||
|
@ -61,13 +61,13 @@ func calcWaitForNumber(p *parsekit.P) {
|
|||
}
|
||||
}
|
||||
|
||||
// State: expect a plus or minus operator. When one of those
|
||||
// is found, the appropriate Item is emitted and the parser is sent back
|
||||
// to the numberHandler to find the next number on the input.
|
||||
// When no operator is found, then the parser is told to expect the end of
|
||||
// the input. When more input data is available (which is obviously wrong
|
||||
// data since it does not match our syntax), the parser will emit an error.
|
||||
func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
|
||||
// State: expect a plus or minus operator. When one of those is found, the
|
||||
// appropriate Item is emitted and the parser is sent back to the
|
||||
// numberHandler to find the next number on the input. When no operator is
|
||||
// found, then the parser is told to expect the end of the input. When more
|
||||
// input data are available (which are obviously wrong data since they do
|
||||
// not match our syntax), the parser will emit an error.
|
||||
func calcWaitForOperatorOrEndOfInput(p *parsekit.ParseAPI) {
|
||||
switch {
|
||||
case p.On(a.Plus).Accept():
|
||||
p.EmitLiteral(addType)
|
||||
|
@ -81,18 +81,20 @@ func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
|
|||
}
|
||||
|
||||
// All is ready for our parser. We now can create a new Parser struct.
|
||||
// We need to tell it what the start state is. In our case, it is the
|
||||
// We need to tell it what StateHandler to start with. In our case, it is the
|
||||
// calcWaitForNumber state, since the calculation must start with a number.
|
||||
var calcParser = parsekit.NewParser(calcWaitForNumber)
|
||||
|
||||
func Example_basicCalculator() {
|
||||
// Let's feed the parser some input to work with.
|
||||
// Let's feed the parser some input to work with. This provides us with
|
||||
// a parse run for that input.
|
||||
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
||||
|
||||
// We can now step through the results of the parsing process by repeated
|
||||
// calls to run.Next(). Next() returns either the next parse item, a parse
|
||||
// error or an end of file. Let's dump the parse results and handle the
|
||||
// computation while we're at it.
|
||||
// TODO this in convoluted for people using the parser code I think. Maybe use three output data types instead?
|
||||
sum := 0
|
||||
op := +1
|
||||
for {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
// In this example, a parser is created which can parse and normalize Dutch postcodes
|
||||
// The implementation uses only a Matcher function and does not implement a
|
||||
// In this example, a Parser is created which can parse and normalize Dutch postcodes
|
||||
// The implementation uses only TokenHandler functions and does not implement a
|
||||
// full-fledged state-based Parser for it.
|
||||
package parsekit_test
|
||||
|
||||
|
@ -9,11 +9,11 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
||||
func createPostcodeMatcher() *parsekit.Matcher {
|
||||
// Easy access to the parsekit definitions.
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// Matcher functions are created and combined to satisfy these rules:
|
||||
// TokenHandler functions are created and combined to satisfy these rules:
|
||||
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||
// - The first digit is never a zero.
|
||||
// - A space between letters and digits is optional.
|
||||
|
@ -26,6 +26,8 @@ func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
|||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||
postcode := c.Seq(pcDigits, space, pcLetters)
|
||||
|
||||
// Create a Matcher, which wraps the 'postcode' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
||||
}
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
// In this example, a parser is created that is able to parse input that looks
|
||||
// like "Hello, <name>!", and that extracts the name from it.
|
||||
//
|
||||
// The implementation uses only a Matcher function and does not implement a
|
||||
// full-fledged state-based Parser for it. If you want to see the same kind of
|
||||
// functionality, implementated using a Paser, take a look at the
|
||||
// The implementation uses only parser/combinator TokenHandler functions and does
|
||||
// not implement a full-fledged state-based Parser for it. If you want to see the
|
||||
// same kind of functionality, implementated using a Paser, take a look at the
|
||||
// HelloWorldUsingParser example.
|
||||
package parsekit_test
|
||||
|
||||
|
@ -13,12 +13,12 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func createHelloMatcher() *parsekit.MatcherWrapper {
|
||||
func createHelloMatcher() *parsekit.Matcher {
|
||||
// Easy access to parsekit definition.
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
// Using the parser/combinator support of parsekit, we create a Matcher function
|
||||
// that does all the work. The 'greeting' Matcher matches the whole input and
|
||||
// Using the parser/combinator support of parsekit, we create a TokenHandler function
|
||||
// that does all the work. The 'greeting' TokenHandler matches the whole input and
|
||||
// drops all but the name from it.
|
||||
hello := c.StrNoCase("hello")
|
||||
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
||||
|
@ -26,7 +26,8 @@ func createHelloMatcher() *parsekit.MatcherWrapper {
|
|||
name := c.OneOrMore(c.Not(a.Excl))
|
||||
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
||||
|
||||
// Using 'greeting' we can now create the Matcher-based parser.
|
||||
// Create a Matcher, which wraps the 'greeting' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
||||
}
|
||||
|
||||
|
|
|
@ -2,14 +2,14 @@
|
|||
// like "Hello, <name>!", and that extracts the name from it.
|
||||
//
|
||||
// This implementation uses a state-based Parser for it, and it does not implement
|
||||
// any custom combinator/parser Matcher functions. Note that things are much easier to
|
||||
// implement using custom Matchers (see the other HelloWorldUsingMatcher example
|
||||
// for this). Doing this fully parser-based implementation is mainly for your
|
||||
// any custom parser/combinator TokenHandler functions. Note that things are much
|
||||
// easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher
|
||||
// example for this). Doing this fully parser-based implementation is mainly for your
|
||||
// learning pleasure.
|
||||
//
|
||||
// One big difference between the Matcher-based example and this one, is that the
|
||||
// state-based parser reports errors much more fine-grained. This might or might
|
||||
// not be useful for your specific application.
|
||||
// not be useful for your specific use case.
|
||||
package parsekit_test
|
||||
|
||||
import (
|
||||
|
@ -21,7 +21,7 @@ import (
|
|||
|
||||
const greeteeItem parsekit.ItemType = 1
|
||||
|
||||
func stateStartOfGreeting(p *parsekit.P) {
|
||||
func stateStartOfGreeting(p *parsekit.ParseAPI) {
|
||||
c := parsekit.C
|
||||
p.Expects("hello")
|
||||
if p.On(c.StrNoCase("hello")).Skip() {
|
||||
|
@ -29,7 +29,7 @@ func stateStartOfGreeting(p *parsekit.P) {
|
|||
}
|
||||
}
|
||||
|
||||
func stateComma(p *parsekit.P) {
|
||||
func stateComma(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
p.Expects("comma")
|
||||
switch {
|
||||
|
@ -40,7 +40,7 @@ func stateComma(p *parsekit.P) {
|
|||
}
|
||||
}
|
||||
|
||||
func stateName(p *parsekit.P) {
|
||||
func stateName(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
p.Expects("name")
|
||||
switch {
|
||||
|
@ -51,7 +51,7 @@ func stateName(p *parsekit.P) {
|
|||
}
|
||||
}
|
||||
|
||||
func stateEndOfGreeting(p *parsekit.P) {
|
||||
func stateEndOfGreeting(p *parsekit.ParseAPI) {
|
||||
p.Expects("end of greeting")
|
||||
if p.On(a.EndOfFile).Stay() {
|
||||
name := strings.TrimSpace(p.BufLiteral())
|
||||
|
|
|
@ -28,7 +28,7 @@ func ExampleItem() {
|
|||
// the p.Emit* methods on parsekit.P.
|
||||
// When errors occur, or the end of the file is reached, then the built-in
|
||||
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
||||
stateHandler := func(p *parsekit.P) {
|
||||
stateHandler := func(p *parsekit.ParseAPI) {
|
||||
if p.On(c.Str("question")).Accept() {
|
||||
p.EmitLiteral(QuestionItem)
|
||||
}
|
||||
|
@ -99,14 +99,14 @@ func ExampleMatchAnyRune() {
|
|||
// Easy access to the parsekit definitions.
|
||||
a := parsekit.A
|
||||
|
||||
handler := func(p *parsekit.P) {
|
||||
stateHandler := func(p *parsekit.ParseAPI) {
|
||||
p.Expects("Any valid rune")
|
||||
if p.On(a.AnyRune).Accept() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.RouteRepeat()
|
||||
}
|
||||
}
|
||||
parser := parsekit.NewParser(handler)
|
||||
parser := parsekit.NewParser(stateHandler)
|
||||
run := parser.Parse("¡Any / valid / character will dö!")
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
|
|
187
matcher.go
187
matcher.go
|
@ -1,187 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Matcher is the function type that must be implemented to create a function
|
||||
// that can be used in conjunction with parsekit.P.On() or parsekit.New().
|
||||
// Its purpose is to check if input data matches some kind of pattern and to
|
||||
// report back the match.
|
||||
//
|
||||
// A Matcher function gets a MatchDialog as its input and returns a boolean to
|
||||
// indicate whether or not the Matcher found a match on the input.
|
||||
// The MatchDialog is used for retrieving input data to match against
|
||||
// and for reporting back results.
|
||||
type Matcher func(m *MatchDialog) bool
|
||||
|
||||
// MatchDialog is used by Matcher functions to retrieve runes from the
|
||||
// input to match against and to report back results.
|
||||
//
|
||||
// Basic operation:
|
||||
//
|
||||
// To retrieve the next rune from the input, the Matcher function can call
|
||||
// the MatchDialog.NextRune() method.
|
||||
//
|
||||
// The Matcher function can then evaluate the retrieved rune and either
|
||||
// accept of skip the rune. When accepting it using MatchDialog.Accept(),
|
||||
// the rune is added to the output of the MatchDialog. When using
|
||||
// MatchDialog.Skip(), the rune will not be added to the output. It is
|
||||
// mandatory for a Matcher to call either Accept() or Skip() after retrieving
|
||||
// a rune, before calling NextRune() again.
|
||||
//
|
||||
// Eventually, the Matcher function must return a boolean value, indicating
|
||||
// whether or not a match was found. When true, then the calling code will
|
||||
// use the runes that were accepted into the MatchDialog's resulting output.
|
||||
//
|
||||
// Forking operation for easy lookahead support:
|
||||
//
|
||||
// Sometimes, a Matcher function must be able to perform a lookahead, which
|
||||
// might either succeed or fail. In case of a failing lookahead, the state
|
||||
// of the MatchDialog must be brought back to the original state.
|
||||
//
|
||||
// The way in which this is supported, is by forking a MatchDialog by calling
|
||||
// MatchDialog.Fork(). This will return a child MatchDialog, with an empty
|
||||
// output buffer, but using the same input offset as the forked parent.
|
||||
//
|
||||
// The Matcher function can then use the same interface as described for
|
||||
// normal operation to retrieve runes from the input and to fill the output
|
||||
// buffer. When the Matcher function decides that the lookahead was successful,
|
||||
// then the method MatchDialog.Merge() can be called on the forked child to
|
||||
// append the resulting output from the child to the parent's resulting output,
|
||||
// and to update the parent input offset to that of the child.
|
||||
//
|
||||
// When the Matcher function decides that the lookahead was unsuccessful, then
|
||||
// it can simply discard the forked child. The parent MatchDialog was never
|
||||
// modified, so a new match can be safely started using that parent, as if the
|
||||
// lookahead never happened.
|
||||
type MatchDialog struct {
|
||||
p *P // parser state, used to retrieve input data to match against (TODO should be interface)
|
||||
inputOffset int // the byte offset into the input
|
||||
input []rune // a slice of runes that represents the retrieved input runes for the Matcher
|
||||
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
||||
currRune *runeToken // hold the last rune that was read from the input
|
||||
parent *MatchDialog // the parent MatchDialog, in case this one was forked
|
||||
}
|
||||
|
||||
type runeToken struct {
|
||||
Rune rune
|
||||
ByteSize int
|
||||
OK bool
|
||||
}
|
||||
|
||||
// NextRune retrieves the next rune from the input.
|
||||
//
|
||||
// It returns the rune and a boolean. The boolean will be false in case an
|
||||
// invalid UTF8 rune or the end of the file was encountered.
|
||||
//
|
||||
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
||||
// to respectively add the rune to the MatchDialog's resulting output or to
|
||||
// fully ignore it. This way, a Matcher has full control over what runes are
|
||||
// significant for the resulting output of that matcher.
|
||||
//
|
||||
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||
// rune is explicitly accepted or skipped as described above.
|
||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||
if m.currRune != nil {
|
||||
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
|
||||
}
|
||||
r, w, ok := m.p.peek(m.inputOffset)
|
||||
m.currRune = &runeToken{r, w, ok}
|
||||
if ok {
|
||||
m.input = append(m.input, r)
|
||||
}
|
||||
return r, ok
|
||||
}
|
||||
|
||||
// Fork splits off a child MatchDialog, containing the same offset as the
|
||||
// parent MatchDialog, but with all other data in a fresh state.
|
||||
//
|
||||
// By forking, a Matcher function can freely work with a MatchDialog, without
|
||||
// affecting the parent MatchDialog. This is for example useful when the
|
||||
// Matcher function must perform some form of lookahead.
|
||||
//
|
||||
// When a successful match was found, the Matcher function can call
|
||||
// child.Merge() to have the resulting output added to the parent MatchDialog.
|
||||
// When no match was found, the forked child can simply be discarded.
|
||||
//
|
||||
// Example case: A Matcher checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
||||
// This is done in 4 steps and only after finishing all steps, the Matcher
|
||||
// function can confirm a successful match. The Matcher function for this
|
||||
// case could look like this (yes, it's naive, but it shows the point):
|
||||
//
|
||||
// func MatchAbcd(m *MatchDialog) bool {
|
||||
// child := m.Fork() // fork to keep m from input untouched
|
||||
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||
// if r, ok := m.NextRune(); !ok || r != letter {
|
||||
// return false // report mismatch, m is left untouched
|
||||
// }
|
||||
// child.Accept() // add rune to child output
|
||||
// }
|
||||
// child.Merge() // we have a match, add resulting output to parent
|
||||
// return true // and report the successful match
|
||||
// }
|
||||
func (m *MatchDialog) Fork() *MatchDialog {
|
||||
child := &MatchDialog{
|
||||
p: m.p,
|
||||
inputOffset: m.inputOffset,
|
||||
parent: m,
|
||||
}
|
||||
return child
|
||||
}
|
||||
|
||||
// Accept will add the last rune as read by NextRune() to the resulting
|
||||
// output of the MatchDialog.
|
||||
func (m *MatchDialog) Accept() {
|
||||
m.checkAllowedCall("Accept()")
|
||||
m.output = append(m.output, m.currRune.Rune)
|
||||
m.inputOffset += m.currRune.ByteSize
|
||||
m.currRune = nil
|
||||
}
|
||||
|
||||
// Skip will ignore the last rune as read by NextRune().
|
||||
func (m *MatchDialog) Skip() {
|
||||
m.checkAllowedCall("Skip()")
|
||||
m.inputOffset += m.currRune.ByteSize
|
||||
m.currRune = nil
|
||||
}
|
||||
|
||||
func (m *MatchDialog) checkAllowedCall(name string) {
|
||||
if m.currRune == nil {
|
||||
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
|
||||
}
|
||||
if !m.currRune.OK {
|
||||
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
|
||||
}
|
||||
}
|
||||
|
||||
// Merge merges the resulting output from a forked child MatchDialog back into
|
||||
// its parent: The runes that are accepted in the child are added to the parent
|
||||
// runes and the parent's offset is advanced to the child's offset.
|
||||
//
|
||||
// After the merge, the child MatchDialog is reset so it can immediately be
|
||||
// reused for performing another match (all data are cleared, except for the
|
||||
// input offset which is kept at its current position).
|
||||
func (m *MatchDialog) Merge() bool {
|
||||
if m.parent == nil {
|
||||
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
||||
}
|
||||
m.parent.input = append(m.parent.input, m.input...)
|
||||
m.parent.output = append(m.parent.output, m.output...)
|
||||
m.parent.inputOffset = m.inputOffset
|
||||
m.ClearOutput()
|
||||
m.ClearInput()
|
||||
return true
|
||||
}
|
||||
|
||||
// ClearOutput clears the resulting output for the MatchDialog, but it keeps
|
||||
// the input and input offset as-is.
|
||||
func (m *MatchDialog) ClearOutput() {
|
||||
m.output = []rune{}
|
||||
}
|
||||
|
||||
// ClearInput clears the input for the MatchDialog, but it keeps the output
|
||||
// and input offset as-is.
|
||||
func (m *MatchDialog) ClearInput() {
|
||||
m.input = []rune{}
|
||||
}
|
|
@ -1,559 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// C provides convenient access to a range of parser/combinators
|
||||
// that can be used to construct Matcher functions.
|
||||
//
|
||||
// When using C in your own parser, then it is advised to create
|
||||
// a variable in your own package to reference it:
|
||||
//
|
||||
// var c = parsekit.C
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var C = struct {
|
||||
Rune func(rune) Matcher
|
||||
Runes func(...rune) Matcher
|
||||
RuneRange func(rune, rune) Matcher
|
||||
Str func(string) Matcher
|
||||
StrNoCase func(string) Matcher
|
||||
Any func(...Matcher) Matcher
|
||||
Not func(Matcher) Matcher
|
||||
Opt func(Matcher) Matcher
|
||||
Seq func(...Matcher) Matcher
|
||||
Rep func(int, Matcher) Matcher
|
||||
Min func(int, Matcher) Matcher
|
||||
Max func(int, Matcher) Matcher
|
||||
ZeroOrMore func(Matcher) Matcher
|
||||
OneOrMore func(Matcher) Matcher
|
||||
MinMax func(int, int, Matcher) Matcher
|
||||
Separated func(separated Matcher, separator Matcher) Matcher
|
||||
Except func(except Matcher, matcher Matcher) Matcher
|
||||
}{
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
RuneRange: MatchRuneRange,
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
Opt: MatchOpt,
|
||||
Any: MatchAny,
|
||||
Not: MatchNot,
|
||||
Seq: MatchSeq,
|
||||
Rep: MatchRep,
|
||||
Min: MatchMin,
|
||||
Max: MatchMax,
|
||||
ZeroOrMore: MatchZeroOrMore,
|
||||
OneOrMore: MatchOneOrMore,
|
||||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Except: MatchExcept,
|
||||
}
|
||||
|
||||
// MatchRune creates a Matcher function that checks if the next rune from
|
||||
// the input matches the provided rune.
|
||||
func MatchRune(expected rune) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
input, ok := m.NextRune()
|
||||
if ok && input == expected {
|
||||
m.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRunes creates a Matcher function that that checks if the next rune
|
||||
// from the input is one of the provided runes.
|
||||
func MatchRunes(expected ...rune) Matcher {
|
||||
s := string(expected)
|
||||
return func(m *MatchDialog) bool {
|
||||
input, ok := m.NextRune()
|
||||
if ok {
|
||||
if strings.ContainsRune(s, input) {
|
||||
m.Accept()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRuneRange creates a Matcher function that that checks if the next rune
|
||||
// from the input is contained by the provided rune range.
|
||||
//
|
||||
// The rune range is defined by a start and an end rune, inclusive, so:
|
||||
//
|
||||
// MatchRuneRange('g', 'k')
|
||||
//
|
||||
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
||||
func MatchRuneRange(start rune, end rune) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
if end < start {
|
||||
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
||||
}
|
||||
input, ok := m.NextRune()
|
||||
if ok && input >= start && input <= end {
|
||||
m.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchStr creates a Matcher that will check if the upcoming runes on the
|
||||
// input match the provided string.
|
||||
// TODO make this a more efficient string-level match?
|
||||
func MatchStr(expected string) Matcher {
|
||||
var matchers = []Matcher{}
|
||||
for _, r := range expected {
|
||||
matchers = append(matchers, MatchRune(r))
|
||||
}
|
||||
return MatchSeq(matchers...)
|
||||
}
|
||||
|
||||
// MatchStrNoCase creates a Matcher that will check if the upcoming runes
|
||||
// on the input match the provided string in a case-insensitive manner.
|
||||
// TODO make this a more efficient string-level match?
|
||||
func MatchStrNoCase(expected string) Matcher {
|
||||
var matchers = []Matcher{}
|
||||
for _, r := range expected {
|
||||
u := unicode.ToUpper(r)
|
||||
l := unicode.ToLower(r)
|
||||
matchers = append(matchers, MatchRunes(u, l))
|
||||
}
|
||||
return MatchSeq(matchers...)
|
||||
}
|
||||
|
||||
// MatchOpt creates a Matcher that makes the provided Matcher optional.
|
||||
// When the provided Matcher applies, then its output is used, otherwise
|
||||
// no output is generated but still a successful match is reported.
|
||||
func MatchOpt(matcher Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
child := m.Fork()
|
||||
if matcher(child) {
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchSeq creates a Matcher that checks if the provided Matchers can be
|
||||
// applied in their exact order. Only if all matcher apply, the sequence
|
||||
// reports successful match.
|
||||
func MatchSeq(matchers ...Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
child := m.Fork()
|
||||
for _, matcher := range matchers {
|
||||
if !matcher(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchAny creates a Matcher that checks if any of the provided Matchers
|
||||
// can be applied. They are applied in their provided order. The first Matcher
|
||||
// that applies is used for reporting back a match.
|
||||
func MatchAny(matchers ...Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
for _, matcher := range matchers {
|
||||
child := m.Fork()
|
||||
if matcher(child) {
|
||||
return child.Merge()
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchNot creates a Matcher that checks if the provided Matcher applies to
|
||||
// the current input. If it does, then a failed match will be reported. If it
|
||||
// does not, then the next rune from the input will be reported as a match.
|
||||
func MatchNot(matcher Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
probe := m.Fork()
|
||||
if matcher(probe) {
|
||||
return false
|
||||
}
|
||||
_, ok := m.NextRune()
|
||||
if ok {
|
||||
m.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRep creates a Matcher that checks if the provided Matcher can be
|
||||
// applied exactly the provided amount of times.
|
||||
//
|
||||
// Note that the input can contain more Matches for the provided matcher, e.g.:
|
||||
//
|
||||
// MatchRep(4, MatchRune('X'))
|
||||
//
|
||||
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||||
// In that last case, there will be a remainder "XX" of the input.
|
||||
func MatchRep(times int, matcher Matcher) Matcher {
|
||||
return matchMinMax(times, times, matcher)
|
||||
}
|
||||
|
||||
// MatchMin creates a Matcher that checks if the provided Matcher can be
|
||||
// applied at least the provided minimum number of times.
|
||||
// When more matches are possible, these will be included in the output.
|
||||
func MatchMin(min int, matcher Matcher) Matcher {
|
||||
return matchMinMax(min, -1, matcher)
|
||||
}
|
||||
|
||||
// MatchMax creates a Matcher that checks if the provided Matcher can be
|
||||
// applied at maximum the provided minimum number of times.
|
||||
// When more matches are possible, these will be included in the output.
|
||||
// Zero matches are considered a successful match.
|
||||
func MatchMax(max int, matcher Matcher) Matcher {
|
||||
return matchMinMax(0, max, matcher)
|
||||
}
|
||||
|
||||
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
|
||||
// be applied zero or more times. All matches will be included in the output.
|
||||
// Zero matches are considered a successful match.
|
||||
func MatchZeroOrMore(matcher Matcher) Matcher {
|
||||
return matchMinMax(0, -1, matcher)
|
||||
}
|
||||
|
||||
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
|
||||
// be applied one or more times. All matches will be included in the output.
|
||||
func MatchOneOrMore(matcher Matcher) Matcher {
|
||||
return matchMinMax(1, -1, matcher)
|
||||
}
|
||||
|
||||
// MatchMinMax creates a Matcher that checks if the provided Matcher can
|
||||
// be applied between the provided minimum and maximum number of times,
|
||||
// inclusive. All matches will be included in the output.
|
||||
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
|
||||
if max < 0 {
|
||||
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
|
||||
}
|
||||
if min < 0 {
|
||||
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
|
||||
}
|
||||
return matchMinMax(min, max, matcher)
|
||||
}
|
||||
|
||||
func matchMinMax(min int, max int, matcher Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
child := m.Fork()
|
||||
if max >= 0 && min > max {
|
||||
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
|
||||
}
|
||||
total := 0
|
||||
// Check for the minimum required amount of matches.
|
||||
for total < min {
|
||||
total++
|
||||
if !matcher(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// No specified max: include the rest of the available matches.
|
||||
// Specified max: include the rest of the availble matches, up to the max.
|
||||
child.Merge()
|
||||
for max < 0 || total < max {
|
||||
total++
|
||||
if !matcher(child) {
|
||||
break
|
||||
}
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchSeparated creates a Matcher that checks for a pattern of one or more
|
||||
// Matchers of one type (the separated), separated by Matches of another type
|
||||
// (the separator). All matches (separated + separator) are included in the
|
||||
// output.
|
||||
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
|
||||
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
||||
}
|
||||
|
||||
// MatchExcept creates a Matcher that checks if the provided matcher can be
|
||||
// applied to the upcoming input. It also checks if the except Matcher can be
|
||||
// applied. If the matcher applies, but the except Matcher too, then the match
|
||||
// as a whole will be treated as a mismatch.
|
||||
func MatchExcept(except Matcher, matcher Matcher) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
if except(m.Fork()) {
|
||||
return false
|
||||
}
|
||||
return matcher(m)
|
||||
}
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms that can be used to
|
||||
// build combinators or parsing rules.
|
||||
//
|
||||
// In parsekit, an atom is defined as a ready to go Matcher function.
|
||||
//
|
||||
// When using A in your own parser, then it is advised to create
|
||||
// a variable in your own package to reference it:
|
||||
//
|
||||
// var a = parsekit.A
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
EndOfFile Matcher
|
||||
AnyRune Matcher
|
||||
Space Matcher
|
||||
Tab Matcher
|
||||
CR Matcher
|
||||
LF Matcher
|
||||
CRLF Matcher
|
||||
Excl Matcher
|
||||
DoubleQuote Matcher
|
||||
Hash Matcher
|
||||
Dollar Matcher
|
||||
Percent Matcher
|
||||
Amp Matcher
|
||||
SingleQuote Matcher
|
||||
RoundOpen Matcher
|
||||
RoundClose Matcher
|
||||
Asterisk Matcher
|
||||
Plus Matcher
|
||||
Comma Matcher
|
||||
Minus Matcher
|
||||
Dot Matcher
|
||||
Slash Matcher
|
||||
Colon Matcher
|
||||
Semicolon Matcher
|
||||
AngleOpen Matcher
|
||||
Equal Matcher
|
||||
AngleClose Matcher
|
||||
Question Matcher
|
||||
At Matcher
|
||||
SquareOpen Matcher
|
||||
Backslash Matcher
|
||||
SquareClose Matcher
|
||||
Caret Matcher
|
||||
Underscore Matcher
|
||||
Backquote Matcher
|
||||
CurlyOpen Matcher
|
||||
Pipe Matcher
|
||||
CurlyClose Matcher
|
||||
Tilde Matcher
|
||||
Newline Matcher
|
||||
Whitespace Matcher
|
||||
WhitespaceAndNewlines Matcher
|
||||
EndOfLine Matcher
|
||||
Digit Matcher
|
||||
ASCII Matcher
|
||||
ASCIILower Matcher
|
||||
ASCIIUpper Matcher
|
||||
HexDigit Matcher
|
||||
}{
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
Space: C.Rune(' '),
|
||||
Tab: C.Rune('\t'),
|
||||
CR: C.Rune('\r'),
|
||||
LF: C.Rune('\n'),
|
||||
CRLF: C.Str("\r\n"),
|
||||
Excl: C.Rune('!'),
|
||||
DoubleQuote: C.Rune('"'),
|
||||
Hash: C.Rune('#'),
|
||||
Dollar: C.Rune('$'),
|
||||
Percent: C.Rune('%'),
|
||||
Amp: C.Rune('&'),
|
||||
SingleQuote: C.Rune('\''),
|
||||
RoundOpen: C.Rune('('),
|
||||
RoundClose: C.Rune(')'),
|
||||
Asterisk: C.Rune('*'),
|
||||
Plus: C.Rune('+'),
|
||||
Comma: C.Rune(','),
|
||||
Minus: C.Rune('-'),
|
||||
Dot: C.Rune('.'),
|
||||
Slash: C.Rune('/'),
|
||||
Colon: C.Rune(':'),
|
||||
Semicolon: C.Rune(';'),
|
||||
AngleOpen: C.Rune('<'),
|
||||
Equal: C.Rune('='),
|
||||
AngleClose: C.Rune('>'),
|
||||
Question: C.Rune('?'),
|
||||
At: C.Rune('@'),
|
||||
SquareOpen: C.Rune('['),
|
||||
Backslash: C.Rune('\\'),
|
||||
SquareClose: C.Rune(']'),
|
||||
Caret: C.Rune('^'),
|
||||
Underscore: C.Rune('_'),
|
||||
Backquote: C.Rune('`'),
|
||||
CurlyOpen: C.Rune('{'),
|
||||
Pipe: C.Rune('|'),
|
||||
CurlyClose: C.Rune('}'),
|
||||
Tilde: C.Rune('~'),
|
||||
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
||||
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
||||
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||
Digit: C.RuneRange('0', '9'),
|
||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||
ASCIILower: C.RuneRange('a', 'z'),
|
||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
||||
}
|
||||
|
||||
// MatchEndOfFile creates a Matcher that checks if the end of the input data
|
||||
// has been reached. This Matcher will never produce output. It only reports
|
||||
// a successful or a failing match through its boolean return value.
|
||||
func MatchEndOfFile() Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
fork := m.Fork()
|
||||
input, ok := fork.NextRune()
|
||||
return !ok && input == eofRune
|
||||
}
|
||||
}
|
||||
|
||||
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
|
||||
// read from the input. It reports back a successful match if the end of the
|
||||
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
||||
func MatchAnyRune() Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
_, ok := m.NextRune()
|
||||
if ok {
|
||||
m.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// M provides convenient access to a range of modifiers that can be
|
||||
// used when creating Matcher functions.
|
||||
//
|
||||
// In parsekit, a modifier is defined as a Matcher function that modifies the
|
||||
// resulting output of another Matcher in some way. It does not do any matching
|
||||
// against input of its own.
|
||||
//
|
||||
// When using M in your own parser, then it is advised to create
|
||||
// a variable in your own package to reference it:
|
||||
//
|
||||
// var m = parsekit.M
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var M = struct {
|
||||
Drop func(Matcher) Matcher
|
||||
Trim func(Matcher, string) Matcher
|
||||
TrimLeft func(Matcher, string) Matcher
|
||||
TrimRight func(Matcher, string) Matcher
|
||||
ToLower func(Matcher) Matcher
|
||||
ToUpper func(Matcher) Matcher
|
||||
Replace func(Matcher, string) Matcher
|
||||
ModifyByCallback func(Matcher, func(string) string) Matcher
|
||||
}{
|
||||
Drop: ModifyDrop,
|
||||
Trim: ModifyTrim,
|
||||
TrimLeft: ModifyTrimLeft,
|
||||
TrimRight: ModifyTrimRight,
|
||||
ToLower: ModifyToLower,
|
||||
ToUpper: ModifyToUpper,
|
||||
Replace: ModifyReplace,
|
||||
ModifyByCallback: ModifyByCallback,
|
||||
}
|
||||
|
||||
// ModifyDrop creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is discarded completely.
|
||||
//
|
||||
// Note that if the Matcher does not apply, a mismatch will be reported back,
|
||||
// even though we would have dropped the output anyway. So if you would like
|
||||
// to drop optional whitespace, then use something like:
|
||||
//
|
||||
// M.Drop(C.Opt(A.Whitespace))
|
||||
//
|
||||
// instead of:
|
||||
//
|
||||
// M.Drop(A.Whitespace)
|
||||
//
|
||||
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
|
||||
// string "bork" would not match against the second form, but " bork" would.
|
||||
// In both cases, it would match the first form.
|
||||
func ModifyDrop(matcher Matcher) Matcher {
|
||||
return ModifyByCallback(matcher, func(s string) string {
|
||||
return ""
|
||||
})
|
||||
}
|
||||
|
||||
// ModifyTrim creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from both the left and the right of the output.
|
||||
// TODO move cutset to the left arg
|
||||
func ModifyTrim(matcher Matcher, cutset string) Matcher {
|
||||
return modifyTrim(matcher, cutset, true, true)
|
||||
}
|
||||
|
||||
// ModifyTrimLeft creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from the left of the output.
|
||||
func ModifyTrimLeft(matcher Matcher, cutset string) Matcher {
|
||||
return modifyTrim(matcher, cutset, true, false)
|
||||
}
|
||||
|
||||
// ModifyTrimRight creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from the right of the output.
|
||||
func ModifyTrimRight(matcher Matcher, cutset string) Matcher {
|
||||
return modifyTrim(matcher, cutset, false, true)
|
||||
}
|
||||
|
||||
func modifyTrim(matcher Matcher, cutset string, trimLeft bool, trimRight bool) Matcher {
|
||||
modfunc := func(s string) string {
|
||||
if trimLeft {
|
||||
s = strings.TrimLeft(s, cutset)
|
||||
}
|
||||
if trimRight {
|
||||
s = strings.TrimRight(s, cutset)
|
||||
}
|
||||
return s
|
||||
}
|
||||
return ModifyByCallback(matcher, modfunc)
|
||||
}
|
||||
|
||||
// ModifyToUpper creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are converted into upper case.
|
||||
func ModifyToUpper(matcher Matcher) Matcher {
|
||||
return ModifyByCallback(matcher, strings.ToUpper)
|
||||
}
|
||||
|
||||
// ModifyToLower creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are converted into lower case.
|
||||
func ModifyToLower(matcher Matcher) Matcher {
|
||||
return ModifyByCallback(matcher, strings.ToLower)
|
||||
}
|
||||
|
||||
// ModifyReplace creates a Matcher that checks if the provided Matcher applies.
|
||||
// If it does, then its output is replaced by the provided string.
|
||||
func ModifyReplace(matcher Matcher, s string) Matcher {
|
||||
return ModifyByCallback(matcher, func(string) string {
|
||||
return s
|
||||
})
|
||||
}
|
||||
|
||||
// ModifyByCallback creates a Matcher that checks if the provided matcher applies.
|
||||
// If it does, then its output is taken and it is fed to the provided modfunc.
|
||||
// This is a simple function that takes a string on input and returns a possibly
|
||||
// modified string on output. The return value of the modfunc will replace the
|
||||
// resulting output.
|
||||
func ModifyByCallback(matcher Matcher, modfunc func(string) string) Matcher {
|
||||
return func(m *MatchDialog) bool {
|
||||
child := m.Fork()
|
||||
if matcher(child) {
|
||||
s := modfunc(string(child.output))
|
||||
child.output = []rune(s)
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
58
parsekit.go
58
parsekit.go
|
@ -24,17 +24,16 @@ func NewParser(startState StateHandler) *Parser {
|
|||
return &Parser{startState: startState}
|
||||
}
|
||||
|
||||
// Run represents a single parse run for a Parser.
|
||||
// TODO rename to ParseRun
|
||||
type Run struct {
|
||||
p *P // a struct holding the internal state of a parse run
|
||||
// ParseRun represents a single parse run for a Parser.
|
||||
type ParseRun struct {
|
||||
p *ParseAPI // holds the internal state of a parse run
|
||||
}
|
||||
|
||||
// Parse starts a parse run on the provided input data.
|
||||
// To retrieve parse items from the run, make use of the Run.Next() method.
|
||||
func (p *Parser) Parse(input string) *Run {
|
||||
return &Run{
|
||||
p: &P{
|
||||
// To retrieve parser Items from the run, make use of the ParseRun.Next() method.
|
||||
func (p *Parser) Parse(input string) *ParseRun {
|
||||
return &ParseRun{
|
||||
p: &ParseAPI{
|
||||
input: input,
|
||||
len: len(input),
|
||||
cursorLine: 1,
|
||||
|
@ -51,7 +50,7 @@ func (p *Parser) Parse(input string) *Run {
|
|||
// On error or when successfully reaching the end of the input, false is returned.
|
||||
// When an error occurred, false will be returned and the error return value will
|
||||
// be set (default is nil).
|
||||
func (run *Run) Next() (Item, *Error, bool) {
|
||||
func (run *ParseRun) Next() (Item, *Error, bool) {
|
||||
// State handling loop: we handle states, until an Item is ready to be returned.
|
||||
for {
|
||||
select {
|
||||
|
@ -66,7 +65,7 @@ func (run *Run) Next() (Item, *Error, bool) {
|
|||
}
|
||||
}
|
||||
|
||||
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||
func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||
switch {
|
||||
case i.Type == ItemEOF:
|
||||
return i, nil, false
|
||||
|
@ -84,7 +83,7 @@ func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
|||
// type StateHandler. This function represents the current status and
|
||||
// is responsible for moving the parser to its next status, depending
|
||||
// on the parsed input data.
|
||||
func (run *Run) runNextStateHandler() {
|
||||
func (run *ParseRun) runNextStateHandler() {
|
||||
if state, ok := run.getNextStateHandler(); ok {
|
||||
run.invokeNextStateHandler(state)
|
||||
}
|
||||
|
@ -115,7 +114,7 @@ func (run *Run) runNextStateHandler() {
|
|||
//
|
||||
// When no routing decision is provided by a StateHandler, then this is
|
||||
// considered a bug in the state handler, and the parser will panic.
|
||||
func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
||||
func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
|
||||
switch {
|
||||
case run.p.nextState != nil:
|
||||
return run.p.nextState, true
|
||||
|
@ -132,42 +131,45 @@ func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
|||
|
||||
// invokeNextStateHandler moves the parser state to the provided state
|
||||
// and invokes the StateHandler function.
|
||||
func (run *Run) invokeNextStateHandler(state StateHandler) {
|
||||
func (run *ParseRun) invokeNextStateHandler(state StateHandler) {
|
||||
run.p.state = state
|
||||
run.p.nextState = nil
|
||||
run.p.expecting = ""
|
||||
run.p.state(run.p)
|
||||
}
|
||||
|
||||
// MatcherWrapper is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a Wrapper function.
|
||||
// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
|
||||
// Matcher is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a TokenHandler function.
|
||||
// The Matcher can be instantiated using the parsekit.NewMatcher()
|
||||
// method.
|
||||
//
|
||||
// To match input data against the wrapped Matcher function, use the method
|
||||
// MatcherWrapper.Parse().
|
||||
type MatcherWrapper struct {
|
||||
// Matcher.Parse().
|
||||
type Matcher struct {
|
||||
parser *Parser
|
||||
}
|
||||
|
||||
// NewMatcher instantiates a new MatcherWrapper.
|
||||
// NewMatcher instantiates a new Matcher.
|
||||
//
|
||||
// This is a simple wrapper around a Matcher function. It can be used to
|
||||
// match an input string against that Matcher function and retrieve the
|
||||
// This is a simple wrapper around a TokenHandler function. It can be used to
|
||||
// match an input string against that TokenHandler function and retrieve the
|
||||
// results in a straight forward way.
|
||||
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
|
||||
handler := func(p *P) {
|
||||
//
|
||||
// The 'expects' parameter is used for creating an error message in case parsed
|
||||
// input does not match the TokenHandler.
|
||||
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
||||
stateHandler := func(p *ParseAPI) {
|
||||
p.Expects(expects)
|
||||
if p.On(matcher).Accept() {
|
||||
if p.On(tokenHandler).Accept() {
|
||||
p.EmitLiteral(0) // ItemType is irrelevant
|
||||
}
|
||||
}
|
||||
return &MatcherWrapper{parser: NewParser(handler)}
|
||||
return &Matcher{parser: NewParser(stateHandler)}
|
||||
}
|
||||
|
||||
// Parse runs the wrapped Matcher function against the provided input data.
|
||||
func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) {
|
||||
item, err, ok := w.parser.Parse(input).Next()
|
||||
// Parse checks for a match on the provided input data.
|
||||
func (m *Matcher) Parse(input string) (string, *Error, bool) {
|
||||
item, err, ok := m.parser.Parse(input).Next()
|
||||
if !ok {
|
||||
return "", err, false
|
||||
}
|
||||
|
|
|
@ -14,21 +14,21 @@ const TestItem parsekit.ItemType = 1
|
|||
// Easy access to the parsekit definitions.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
type MatcherTest struct {
|
||||
input string
|
||||
matcher parsekit.Matcher
|
||||
mustMatch bool
|
||||
expected string
|
||||
type TokenHandlerTest struct {
|
||||
input string
|
||||
tokenHandler parsekit.TokenHandler
|
||||
mustMatch bool
|
||||
expected string
|
||||
}
|
||||
|
||||
func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
|
||||
func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
|
||||
for _, test := range testSet {
|
||||
RunMatcherTest(t, test)
|
||||
RunTokenHandlerTest(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func RunMatcherTest(t *testing.T, test MatcherTest) {
|
||||
output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input)
|
||||
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
||||
output, err, ok := parsekit.NewMatcher(test.tokenHandler, "a match").Parse(test.input)
|
||||
|
||||
if test.mustMatch {
|
||||
if !ok {
|
||||
|
|
|
@ -2,17 +2,17 @@ package parsekit
|
|||
|
||||
import "unicode/utf8"
|
||||
|
||||
// StateHandler defines the type of function that must be implemented to
|
||||
// handle a parsing state.
|
||||
// StateHandler defines the type of function that must be implemented to handle
|
||||
// a parsing state in a Parser state machine.
|
||||
//
|
||||
// A StateHandler function gets a P struct as its input. This struct holds
|
||||
// A StateHandler function gets a ParseAPI struct as its input. This struct holds
|
||||
// all the internal state for the parsing state machine and provides the
|
||||
// interface that the StateHandler must use to interact with the parser.
|
||||
type StateHandler func(*P)
|
||||
// interface that the StateHandler uses to interact with the parser.
|
||||
type StateHandler func(*ParseAPI)
|
||||
|
||||
// P holds the internal state of a parse run and provides an API to
|
||||
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||
// StateHandler methods to communicate with the parser.
|
||||
type P struct {
|
||||
type ParseAPI struct {
|
||||
state StateHandler // the function that handles the current state
|
||||
nextState StateHandler // the function that will handle the next state
|
||||
routeStack []StateHandler // route stack, for handling nested parsing
|
||||
|
@ -37,7 +37,7 @@ type P struct {
|
|||
// The boolean will be false in case no upcoming rune can be peeked
|
||||
// (end of data or invalid UTF8 character). In this case, the returned rune
|
||||
// will be one of eofRune or invalidRune.
|
||||
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
||||
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
||||
return handleRuneError(r, w)
|
||||
}
|
||||
|
|
|
@ -4,6 +4,12 @@ import (
|
|||
"fmt"
|
||||
)
|
||||
|
||||
// Item represents an item that can be emitted from the parser.
|
||||
type Item struct {
|
||||
Type ItemType
|
||||
Value string
|
||||
}
|
||||
|
||||
// ItemType represents the type of a parser Item.
|
||||
//
|
||||
// When creating your own ItemType values, then make use of positive integer
|
||||
|
@ -19,26 +25,14 @@ const ItemEOF ItemType = -1
|
|||
// an error has occurred during parsing.
|
||||
const ItemError ItemType = -2
|
||||
|
||||
// Item represents an item that can be emitted from the parser.
|
||||
type Item struct {
|
||||
Type ItemType
|
||||
Value string
|
||||
}
|
||||
|
||||
// Emit passes a Parser item to the client, including the provided string.
|
||||
func (p *P) Emit(t ItemType, v string) {
|
||||
func (p *ParseAPI) Emit(t ItemType, v string) {
|
||||
p.items <- Item{t, v}
|
||||
p.buffer.reset()
|
||||
}
|
||||
|
||||
// EmitLiteral passes a Parser item to the client, including accumulated
|
||||
// string buffer data as a literal string.
|
||||
func (p *P) EmitLiteral(t ItemType) {
|
||||
p.Emit(t, p.buffer.asLiteralString())
|
||||
}
|
||||
|
||||
// BufLiteral retrieves the contents of the parser buffer (all the runes that
|
||||
// were added to it using P.Accept()) as a literal string.
|
||||
// BufLiteral retrieves the contents of the parser's string buffer (all the
|
||||
// runes that were added to it using ParseAPI.Accept()) as a literal string.
|
||||
//
|
||||
// Literal means that if the input had for example the subsequent runes '\' and 'n'
|
||||
// in it, then the literal string would have a backslash and an 'n' it in, not a
|
||||
|
@ -46,12 +40,19 @@ func (p *P) EmitLiteral(t ItemType) {
|
|||
//
|
||||
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
||||
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
||||
func (p *P) BufLiteral() string {
|
||||
func (p *ParseAPI) BufLiteral() string {
|
||||
return p.buffer.asLiteralString()
|
||||
}
|
||||
|
||||
// BufInterpreted retrieves the contents of the parser buffer (all the runes that
|
||||
// were added to it using P.Accept()) as an interpreted string.
|
||||
// EmitLiteral passes a parser Item to the client, including the accumulated
|
||||
// string buffer data as a literal string.
|
||||
func (p *ParseAPI) EmitLiteral(t ItemType) {
|
||||
p.Emit(t, p.BufLiteral())
|
||||
}
|
||||
|
||||
// BufInterpreted retrieves the contents of the parser's string buffer (all
|
||||
// the runes that were added to it using ParseAPI.Accept()) as an
|
||||
// interpreted string.
|
||||
//
|
||||
// Interpreted means that the contents are treated as a Go double quoted
|
||||
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
|
||||
|
@ -64,7 +65,7 @@ func (p *P) BufLiteral() string {
|
|||
//
|
||||
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
||||
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
||||
func (p *P) BufInterpreted() (string, bool) {
|
||||
func (p *ParseAPI) BufInterpreted() (string, bool) {
|
||||
s, err := p.buffer.asInterpretedString()
|
||||
if err != nil {
|
||||
p.EmitError(
|
||||
|
@ -81,16 +82,12 @@ func (p *P) BufInterpreted() (string, bool) {
|
|||
// This method returns a boolean value, indicating whether or not the string
|
||||
// interpretation was successful. On invalid string data, an error will
|
||||
// automatically be emitted and false will be returned.
|
||||
func (p *P) EmitInterpreted(t ItemType) bool {
|
||||
s, err := p.buffer.asInterpretedString()
|
||||
if err != nil {
|
||||
p.EmitError(
|
||||
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
||||
p.buffer.asLiteralString(), err)
|
||||
return false
|
||||
func (p *ParseAPI) EmitInterpreted(t ItemType) bool {
|
||||
if s, ok := p.BufInterpreted(); ok {
|
||||
p.Emit(t, s)
|
||||
return true
|
||||
}
|
||||
p.Emit(t, s)
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
// Error is used as the error type when parsing errors occur.
|
||||
|
@ -115,15 +112,15 @@ func (err *Error) ErrorFull() string {
|
|||
return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column)
|
||||
}
|
||||
|
||||
// EmitError emits a Parser error item to the client.
|
||||
func (p *P) EmitError(format string, args ...interface{}) {
|
||||
// EmitError emits a parser error item to the client.
|
||||
func (p *ParseAPI) EmitError(format string, args ...interface{}) {
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.Emit(ItemError, message)
|
||||
}
|
||||
|
||||
// UnexpectedInput is used by a StateHandler function to emit an error item
|
||||
// that tells the client that an unexpected rune was encountered in the input.
|
||||
func (p *P) UnexpectedInput() {
|
||||
func (p *ParseAPI) UnexpectedInput() {
|
||||
r, _, ok := p.peek(0)
|
||||
switch {
|
||||
case ok:
|
||||
|
@ -137,7 +134,7 @@ func (p *P) UnexpectedInput() {
|
|||
}
|
||||
}
|
||||
|
||||
func fmtExpects(p *P) string {
|
||||
func fmtExpects(p *ParseAPI) string {
|
||||
if p.expecting == "" {
|
||||
return ""
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package parsekit
|
||||
|
||||
// Expects is used to let a state function describe what input it is expecting.
|
||||
// Expects is used to let a StateHandler function describe what input it is expecting.
|
||||
// This expectation is used in error messages to make them more descriptive.
|
||||
//
|
||||
// When defining an expectation inside a StateHandler, you do not need to
|
||||
|
@ -13,6 +13,6 @@ package parsekit
|
|||
// 2) there is an invalid UTF8 character on input
|
||||
//
|
||||
// 3) the end of the file was reached.
|
||||
func (p *P) Expects(description string) {
|
||||
func (p *ParseAPI) Expects(description string) {
|
||||
p.expecting = description
|
||||
}
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
package parsekit
|
||||
|
||||
// On checks if the input at the current cursor position matches the provided Matcher.
|
||||
// On must be chained with another method, which tells the parser what action to
|
||||
// perform when a match was found:
|
||||
// On checks if the input at the current cursor position matches the provided
|
||||
// TokenHandler. On must be chained with another method, which tells the parser
|
||||
// what action to perform when a match was found:
|
||||
//
|
||||
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
||||
//
|
||||
// 2) On(...).Accept() - Move cursor forward, add matched runes to the string buffer.
|
||||
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
|
||||
//
|
||||
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
|
||||
//
|
||||
|
@ -32,16 +32,16 @@ package parsekit
|
|||
// p.RouteTo(stateHandlerC)
|
||||
// }
|
||||
//
|
||||
// // When there's a "hi" on input, emit it.
|
||||
// // When there's a "hi" on input, emit a parser item for it.
|
||||
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||
// p.Emit(SomeItemType, p.BufLiteral())
|
||||
// }
|
||||
func (p *P) On(matcher Matcher) *matchAction {
|
||||
m := &MatchDialog{p: p}
|
||||
if matcher == nil {
|
||||
panic("internal parser error: matcher argument for On() is nil")
|
||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
|
||||
m := &TokenAPI{p: p}
|
||||
if tokenHandler == nil {
|
||||
panic("internal parser error: tokenHandler argument for On() is nil")
|
||||
}
|
||||
ok := matcher(m)
|
||||
ok := tokenHandler(m)
|
||||
|
||||
// Keep track of the last match, to allow parser implementations
|
||||
// to access it in an easy way. Typical use would be something like:
|
||||
|
@ -51,7 +51,7 @@ func (p *P) On(matcher Matcher) *matchAction {
|
|||
// }
|
||||
p.LastMatch = string(m.input)
|
||||
|
||||
return &matchAction{
|
||||
return &MatchAction{
|
||||
p: p,
|
||||
ok: ok,
|
||||
input: m.input,
|
||||
|
@ -60,9 +60,10 @@ func (p *P) On(matcher Matcher) *matchAction {
|
|||
}
|
||||
}
|
||||
|
||||
// matchAction is a struct that is used for building the On()-method chain.
|
||||
type matchAction struct {
|
||||
p *P
|
||||
// MatchAction is a struct that is used for building the On()-method chain.
|
||||
// The On() method will return an initialized struct of this type.
|
||||
type MatchAction struct {
|
||||
p *ParseAPI
|
||||
ok bool
|
||||
input []rune
|
||||
output []rune
|
||||
|
@ -70,11 +71,12 @@ type matchAction struct {
|
|||
}
|
||||
|
||||
// Accept tells the parser to move the cursor past a match that was found,
|
||||
// and to store the input that matched in the string buffer.
|
||||
// and to store the input that matched in the parser's string buffer.
|
||||
// When no match was found, then no action is taken.
|
||||
// It returns a routeAction struct, which provides methods that can be used
|
||||
// to tell the parser what state to go to next.
|
||||
func (a *matchAction) Accept() bool {
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *MatchAction) Accept() bool {
|
||||
if a.ok {
|
||||
a.p.buffer.writeString(string(a.output))
|
||||
a.advanceCursor()
|
||||
|
@ -83,10 +85,11 @@ func (a *matchAction) Accept() bool {
|
|||
}
|
||||
|
||||
// Skip tells the parser to move the cursor past a match that was found,
|
||||
// without storing the actual match in the string buffer.
|
||||
// without storing the actual match in the parser's string buffer.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *matchAction) Skip() bool {
|
||||
func (a *MatchAction) Skip() bool {
|
||||
if a.ok {
|
||||
a.advanceCursor()
|
||||
}
|
||||
|
@ -95,14 +98,14 @@ func (a *matchAction) Skip() bool {
|
|||
|
||||
// Stay tells the parser to not move the cursor after finding a match.
|
||||
// Returns true in case a match was found, false otherwise.
|
||||
func (a *matchAction) Stay() bool {
|
||||
func (a *MatchAction) Stay() bool {
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// advanceCursor advances the rune cursor one position in the input data.
|
||||
// While doing so, it keeps tracks of newlines, so we can report on
|
||||
// row + column positions on error.
|
||||
func (a *matchAction) advanceCursor() {
|
||||
// advanceCursor advances the input position in the input data.
|
||||
// While doing so, it keeps tracks of newlines that are encountered, so we
|
||||
// can report on line + column positions on error.
|
||||
func (a *MatchAction) advanceCursor() {
|
||||
a.p.inputPos = a.inputPos
|
||||
for _, r := range a.input {
|
||||
if a.p.newline {
|
||||
|
|
|
@ -1,34 +1,34 @@
|
|||
package parsekit
|
||||
|
||||
// RouteTo tells the parser what StateHandler function to invoke
|
||||
// in the next parsing cycle.
|
||||
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
||||
// RouteTo tells the parser what StateHandler function to invoke on
|
||||
// the next parse cycle.
|
||||
func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction {
|
||||
p.nextState = state
|
||||
return &routeFollowupAction{p}
|
||||
return &RouteFollowupAction{p}
|
||||
}
|
||||
|
||||
// RouteRepeat indicates that on the next parsing cycle, the current
|
||||
// RouteRepeat tells the parser that on the next parsing cycle, the current
|
||||
// StateHandler must be reinvoked.
|
||||
func (p *P) RouteRepeat() {
|
||||
func (p *ParseAPI) RouteRepeat() {
|
||||
p.RouteTo(p.state)
|
||||
}
|
||||
|
||||
// RouteReturn tells the parser that on the next cycle the last
|
||||
// StateHandler that was pushed on the route stack must be invoked.
|
||||
// RouteReturn tells the parser that on the next cycle the last StateHandler
|
||||
// that was pushed on the route stack must be invoked.
|
||||
//
|
||||
// Using this method is optional. When implementating a StateHandler that
|
||||
// is used as a sort of subroutine (using constructions like
|
||||
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
||||
// providing an explicit routing decision from that handler. The parser will
|
||||
// automatically assume a RouteReturn() in that case.
|
||||
func (p *P) RouteReturn() {
|
||||
func (p *ParseAPI) RouteReturn() {
|
||||
p.nextState = p.popRoute()
|
||||
}
|
||||
|
||||
// routeFollowupAction chains parsing routes.
|
||||
// RouteFollowupAction chains parsing routes.
|
||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||
type routeFollowupAction struct {
|
||||
p *P
|
||||
type RouteFollowupAction struct {
|
||||
p *ParseAPI
|
||||
}
|
||||
|
||||
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
||||
|
@ -36,7 +36,7 @@ type routeFollowupAction struct {
|
|||
// For example:
|
||||
//
|
||||
// p.RouteTo(handlerA).ThenTo(handlerB)
|
||||
func (a *routeFollowupAction) ThenTo(state StateHandler) {
|
||||
func (a *RouteFollowupAction) ThenTo(state StateHandler) {
|
||||
a.p.pushRoute(state)
|
||||
}
|
||||
|
||||
|
@ -45,18 +45,18 @@ func (a *routeFollowupAction) ThenTo(state StateHandler) {
|
|||
// For example:
|
||||
//
|
||||
// p.RouteTo(handlerA).ThenReturnHere()
|
||||
func (a *routeFollowupAction) ThenReturnHere() {
|
||||
func (a *RouteFollowupAction) ThenReturnHere() {
|
||||
a.p.pushRoute(a.p.state)
|
||||
}
|
||||
|
||||
// pushRoute adds the StateHandler to the route stack.
|
||||
// This is used for implementing nested parsing.
|
||||
func (p *P) pushRoute(state StateHandler) {
|
||||
func (p *ParseAPI) pushRoute(state StateHandler) {
|
||||
p.routeStack = append(p.routeStack, state)
|
||||
}
|
||||
|
||||
// popRoute pops the last pushed StateHandler from the route stack.
|
||||
func (p *P) popRoute() StateHandler {
|
||||
func (p *ParseAPI) popRoute() StateHandler {
|
||||
last := len(p.routeStack) - 1
|
||||
head, tail := p.routeStack[:last], p.routeStack[last]
|
||||
p.routeStack = head
|
||||
|
@ -66,8 +66,8 @@ func (p *P) popRoute() StateHandler {
|
|||
// ExpectEndOfFile can be used from a StateHandler function to indicate that
|
||||
// your parser expects to be at the end of the file. This will schedule
|
||||
// a parsekit-provided StateHandler which will do the actual check for this.
|
||||
func (p *P) ExpectEndOfFile() {
|
||||
p.RouteTo(func(p *P) {
|
||||
func (p *ParseAPI) ExpectEndOfFile() {
|
||||
p.RouteTo(func(p *ParseAPI) {
|
||||
p.Expects("end of file")
|
||||
if p.On(A.EndOfFile).Stay() {
|
||||
p.Emit(ItemEOF, "EOF")
|
||||
|
|
|
@ -0,0 +1,192 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// TokenHandler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the match.
|
||||
//
|
||||
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
|
||||
// parsekit.Matcher().
|
||||
//
|
||||
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The TokenAPI is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type TokenHandler func(t *TokenAPI) bool
|
||||
|
||||
// TokenAPI is used by TokenHandler functions to retrieve runes from the
|
||||
// input to match against and to report back results.
|
||||
//
|
||||
// Basic operation:
|
||||
//
|
||||
// To retrieve the next rune from the input, the TokenHandler function can call
|
||||
// the TokenAPI.NextRune() method.
|
||||
//
|
||||
// The TokenHandler function can then evaluate the retrieved rune and either
|
||||
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
|
||||
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
|
||||
// the rune will not be added to the output. It is mandatory for a TokenHandler
|
||||
// to call either Accept() or Skip() after retrieving a rune, before calling
|
||||
// NextRune() again.
|
||||
//
|
||||
// Eventually, the TokenHandler function must return a boolean value, indicating
|
||||
// whether or not a match was found. When true, then the calling code will
|
||||
// use the runes that were accepted into the TokenAPI's resulting output.
|
||||
//
|
||||
// Forking operation for easy lookahead support:
|
||||
//
|
||||
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
|
||||
// might either succeed or fail. In case of a failing lookahead, the state
|
||||
// of the TokenAPI must be brought back to the original state.
|
||||
//
|
||||
// The way in which this is supported, is by forking a TokenAPI by calling
|
||||
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
|
||||
// output buffer, but using the same input cursor position as the forked parent.
|
||||
//
|
||||
// The TokenHandler function can then use the same interface as described for
|
||||
// normal operation to retrieve runes from the input and to fill the resulting
|
||||
// output. When the TokenHandler function decides that the lookahead was successful,
|
||||
// then the method TokenAPI.Merge() can be called on the forked child to
|
||||
// append the resulting output from the child to the parent's resulting output,
|
||||
// and to update the parent input cursor position to that of the child.
|
||||
//
|
||||
// When the TokenHandler function decides that the lookahead was unsuccessful,
|
||||
// then it can simply discard the forked child. The parent TokenAPI was never
|
||||
// modified, so a new match can be safely started using that parent, as if the
|
||||
// lookahead never happened.
|
||||
type TokenAPI struct {
|
||||
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
|
||||
inputOffset int // the byte offset into the input
|
||||
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
|
||||
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
||||
currRune *runeInfo // hold information for the last rune that was read from the input
|
||||
parent *TokenAPI // the parent MatchDialog, in case this one was forked
|
||||
}
|
||||
|
||||
// runeInfo describes a single rune and its metadata.
|
||||
type runeInfo struct {
|
||||
Rune rune // an UTF8 rune
|
||||
ByteSize int // the number of bytes in the rune
|
||||
OK bool // false when the rune represents an invalid UTF8 rune or EOF
|
||||
}
|
||||
|
||||
// NextRune retrieves the next rune from the input.
|
||||
//
|
||||
// It returns the rune and a boolean. The boolean will be false in case an
|
||||
// invalid UTF8 rune or the end of the file was encountered.
|
||||
//
|
||||
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
||||
// to respectively add the rune to the TokenAPI's resulting output or to
|
||||
// fully ignore it. This way, a TokenHandler has full control over what runes are
|
||||
// significant for the resulting output of that TokenHandler.
|
||||
//
|
||||
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||
// rune is explicitly accepted or skipped as described above.
|
||||
func (t *TokenAPI) NextRune() (rune, bool) {
|
||||
if t.currRune != nil {
|
||||
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
|
||||
}
|
||||
r, w, ok := t.p.peek(t.inputOffset)
|
||||
t.currRune = &runeInfo{r, w, ok}
|
||||
if ok {
|
||||
t.input = append(t.input, r)
|
||||
}
|
||||
return r, ok
|
||||
}
|
||||
|
||||
// Fork splits off a child TokenAPI, containing the same input cursor position
|
||||
// as the parent TokenAPI, but with all other data in a fresh state.
|
||||
//
|
||||
// By forking, a TokenHandler function can freely work with a TokenAPI, without
|
||||
// affecting the parent TokenAPI. This is for example useful when the
|
||||
// TokenHandler function must perform some form of lookahead.
|
||||
//
|
||||
// When a successful match was found, the TokenHandler function can call
|
||||
// TokenAPI.Merge() on the forked child to have the resulting output added
|
||||
// to the parent TokenAPI.
|
||||
//
|
||||
// When no match was found, the forked child can simply be discarded.
|
||||
//
|
||||
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
||||
// This is done in 4 steps and only after finishing all steps, the TokenHandler
|
||||
// function can confirm a successful match. The TokenHandler function for this
|
||||
// case could look like this (yes, it's naive, but it shows the point):
|
||||
// TODO make proper tested example
|
||||
//
|
||||
// func MatchAbcd(t *TokenAPI) bool {
|
||||
// child := t.Fork() // fork to keep m from input untouched
|
||||
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||
// if r, ok := t.NextRune(); !ok || r != letter {
|
||||
// return false // report mismatch, t is left untouched
|
||||
// }
|
||||
// child.Accept() // add rune to child output
|
||||
// }
|
||||
// child.Merge() // we have a match, add resulting output to parent
|
||||
// return true // and report the successful match
|
||||
// }
|
||||
func (t *TokenAPI) Fork() *TokenAPI {
|
||||
return &TokenAPI{
|
||||
p: t.p,
|
||||
inputOffset: t.inputOffset,
|
||||
parent: t,
|
||||
}
|
||||
}
|
||||
|
||||
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
|
||||
// output of the TokenAPI.
|
||||
func (t *TokenAPI) Accept() {
|
||||
t.checkAllowedCall("Accept()")
|
||||
t.output = append(t.output, t.currRune.Rune)
|
||||
t.inputOffset += t.currRune.ByteSize
|
||||
t.currRune = nil
|
||||
}
|
||||
|
||||
// Skip will ignore the last rune as read by NextRune().
|
||||
func (t *TokenAPI) Skip() {
|
||||
t.checkAllowedCall("Skip()")
|
||||
t.inputOffset += t.currRune.ByteSize
|
||||
t.currRune = nil
|
||||
}
|
||||
|
||||
func (t *TokenAPI) checkAllowedCall(name string) {
|
||||
if t.currRune == nil {
|
||||
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
|
||||
}
|
||||
if !t.currRune.OK {
|
||||
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
|
||||
}
|
||||
}
|
||||
|
||||
// Merge merges the resulting output from a forked child TokenAPI back into
|
||||
// its parent: The runes that are accepted in the child are added to the parent
|
||||
// runes and the parent's input cursor position is advanced to the child's
|
||||
// cursor position.
|
||||
//
|
||||
// After the merge, the child TokenAPI is reset so it can immediately be
|
||||
// reused for performing another match (all data are cleared, except for the
|
||||
// input offset which is kept at its current position).
|
||||
func (t *TokenAPI) Merge() bool {
|
||||
if t.parent == nil {
|
||||
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
||||
}
|
||||
t.parent.input = append(t.parent.input, t.input...)
|
||||
t.parent.output = append(t.parent.output, t.output...)
|
||||
t.parent.inputOffset = t.inputOffset
|
||||
t.ClearOutput()
|
||||
t.ClearInput()
|
||||
return true
|
||||
}
|
||||
|
||||
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
|
||||
// the input and input offset as-is.
|
||||
func (t *TokenAPI) ClearOutput() {
|
||||
t.output = []rune{}
|
||||
}
|
||||
|
||||
// ClearInput clears the input for the TokenAPI, but it keeps the output
|
||||
// and input offset as-is.
|
||||
func (t *TokenAPI) ClearInput() {
|
||||
t.input = []rune{}
|
||||
}
|
|
@ -0,0 +1,558 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// C provides convenient access to a range of parser/combinators that can be
|
||||
// used to construct TokenHandler functions.
|
||||
//
|
||||
// When using C in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var c = parsekit.C
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var C = struct {
|
||||
Rune func(rune) TokenHandler
|
||||
Runes func(...rune) TokenHandler
|
||||
RuneRange func(rune, rune) TokenHandler
|
||||
Str func(string) TokenHandler
|
||||
StrNoCase func(string) TokenHandler
|
||||
Any func(...TokenHandler) TokenHandler
|
||||
Not func(TokenHandler) TokenHandler
|
||||
Opt func(TokenHandler) TokenHandler
|
||||
Seq func(...TokenHandler) TokenHandler
|
||||
Rep func(times int, handler TokenHandler) TokenHandler
|
||||
Min func(min int, handler TokenHandler) TokenHandler
|
||||
Max func(max int, handler TokenHandler) TokenHandler
|
||||
ZeroOrMore func(TokenHandler) TokenHandler
|
||||
OneOrMore func(TokenHandler) TokenHandler
|
||||
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
||||
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
||||
}{
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
RuneRange: MatchRuneRange,
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
Opt: MatchOpt,
|
||||
Any: MatchAny,
|
||||
Not: MatchNot,
|
||||
Seq: MatchSeq,
|
||||
Rep: MatchRep,
|
||||
Min: MatchMin,
|
||||
Max: MatchMax,
|
||||
ZeroOrMore: MatchZeroOrMore,
|
||||
OneOrMore: MatchOneOrMore,
|
||||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Except: MatchExcept,
|
||||
}
|
||||
|
||||
// MatchRune creates a TokenHandler function that checks if the next rune from
|
||||
// the input matches the provided rune.
|
||||
func MatchRune(expected rune) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
input, ok := t.NextRune()
|
||||
if ok && input == expected {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRunes creates a TokenHandler function that that checks if the next rune
|
||||
// from the input is one of the provided runes.
|
||||
func MatchRunes(expected ...rune) TokenHandler {
|
||||
s := string(expected)
|
||||
return func(t *TokenAPI) bool {
|
||||
input, ok := t.NextRune()
|
||||
if ok {
|
||||
if strings.ContainsRune(s, input) {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRuneRange creates a TokenHandler function that that checks if the next rune
|
||||
// from the input is contained by the provided rune range.
|
||||
//
|
||||
// The rune range is defined by a start and an end rune, inclusive, so:
|
||||
//
|
||||
// MatchRuneRange('g', 'k')
|
||||
//
|
||||
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
||||
func MatchRuneRange(start rune, end rune) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
if end < start {
|
||||
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
||||
}
|
||||
input, ok := t.NextRune()
|
||||
if ok && input >= start && input <= end {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchStr creates a TokenHandler that will check if the upcoming runes on the
|
||||
// input match the provided string.
|
||||
// TODO make this a more efficient string-level match?
|
||||
func MatchStr(expected string) TokenHandler {
|
||||
var handlers = []TokenHandler{}
|
||||
for _, r := range expected {
|
||||
handlers = append(handlers, MatchRune(r))
|
||||
}
|
||||
return MatchSeq(handlers...)
|
||||
}
|
||||
|
||||
// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
|
||||
// on the input match the provided string in a case-insensitive manner.
|
||||
// TODO make this a more efficient string-level match?
|
||||
func MatchStrNoCase(expected string) TokenHandler {
|
||||
var handlers = []TokenHandler{}
|
||||
for _, r := range expected {
|
||||
u := unicode.ToUpper(r)
|
||||
l := unicode.ToLower(r)
|
||||
handlers = append(handlers, MatchRunes(u, l))
|
||||
}
|
||||
return MatchSeq(handlers...)
|
||||
}
|
||||
|
||||
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
|
||||
// When the provided TokenHandler applies, then its output is used, otherwise
|
||||
// no output is generated but still a successful match is reported.
|
||||
func MatchOpt(handler TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
|
||||
// applied in their exact order. Only if all matcher apply, the sequence
|
||||
// reports successful match.
|
||||
func MatchSeq(handlers ...TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
for _, matcher := range handlers {
|
||||
if !matcher(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
|
||||
// can be applied. They are applied in their provided order. The first TokenHandler
|
||||
// that applies is used for reporting back a match.
|
||||
func MatchAny(handlers ...TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
for _, handler := range handlers {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
return child.Merge()
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
|
||||
// the current input. If it does, then a failed match will be reported. If it
|
||||
// does not, then the next rune from the input will be reported as a match.
|
||||
func MatchNot(handler TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
probe := t.Fork()
|
||||
if handler(probe) {
|
||||
return false
|
||||
}
|
||||
_, ok := t.NextRune()
|
||||
if ok {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
|
||||
// applied exactly the provided amount of times.
|
||||
//
|
||||
// Note that the input can contain more than the provided number of matches, e.g.:
|
||||
//
|
||||
// MatchRep(4, MatchRune('X'))
|
||||
//
|
||||
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||||
// In that last case, there will be a remainder "XX" on the input.
|
||||
func MatchRep(times int, handler TokenHandler) TokenHandler {
|
||||
return matchMinMax(times, times, handler)
|
||||
}
|
||||
|
||||
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
|
||||
// applied at least the provided minimum number of times.
|
||||
// When more matches are possible, these will be included in the output.
|
||||
func MatchMin(min int, handler TokenHandler) TokenHandler {
|
||||
return matchMinMax(min, -1, handler)
|
||||
}
|
||||
|
||||
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
|
||||
// applied at maximum the provided minimum number of times.
|
||||
// When more matches are possible, these will be included in the output.
|
||||
// Zero matches are considered a successful match.
|
||||
func MatchMax(max int, handler TokenHandler) TokenHandler {
|
||||
return matchMinMax(0, max, handler)
|
||||
}
|
||||
|
||||
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
||||
// be applied zero or more times. All matches will be included in the output.
|
||||
// Zero matches are considered a successful match.
|
||||
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
|
||||
return matchMinMax(0, -1, handler)
|
||||
}
|
||||
|
||||
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
||||
// be applied one or more times. All matches will be included in the output.
|
||||
func MatchOneOrMore(handler TokenHandler) TokenHandler {
|
||||
return matchMinMax(1, -1, handler)
|
||||
}
|
||||
|
||||
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
|
||||
// be applied between the provided minimum and maximum number of times,
|
||||
// inclusive. All matches will be included in the output.
|
||||
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
||||
if max < 0 {
|
||||
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
|
||||
}
|
||||
if min < 0 {
|
||||
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
|
||||
}
|
||||
return matchMinMax(min, max, handler)
|
||||
}
|
||||
|
||||
func matchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
if max >= 0 && min > max {
|
||||
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
|
||||
}
|
||||
total := 0
|
||||
// Check for the minimum required amount of matches.
|
||||
for total < min {
|
||||
total++
|
||||
if !handler(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// No specified max: include the rest of the available matches.
|
||||
// Specified max: include the rest of the availble matches, up to the max.
|
||||
child.Merge()
|
||||
for max < 0 || total < max {
|
||||
total++
|
||||
if !handler(child) {
|
||||
break
|
||||
}
|
||||
child.Merge()
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
|
||||
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
|
||||
// (the separator). All matches (separated + separator) are included in the
|
||||
// output.
|
||||
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
|
||||
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
||||
}
|
||||
|
||||
// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
|
||||
// applied to the upcoming input. It also checks if the except TokenHandler can be
|
||||
// applied. If the handler applies, but the except TokenHandler as well, then the match
|
||||
// as a whole will be treated as a mismatch.
|
||||
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
if except(t.Fork()) {
|
||||
return false
|
||||
}
|
||||
return handler(t)
|
||||
}
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms that can be used to
|
||||
// build TokenHandlers or parser rules.
|
||||
//
|
||||
// In parsekit, an atom is defined as a ready for use TokenHandler function.
|
||||
//
|
||||
// When using A in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var a = parsekit.A
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
EndOfFile TokenHandler
|
||||
AnyRune TokenHandler
|
||||
Space TokenHandler
|
||||
Tab TokenHandler
|
||||
CR TokenHandler
|
||||
LF TokenHandler
|
||||
CRLF TokenHandler
|
||||
Excl TokenHandler
|
||||
DoubleQuote TokenHandler
|
||||
Hash TokenHandler
|
||||
Dollar TokenHandler
|
||||
Percent TokenHandler
|
||||
Amp TokenHandler
|
||||
SingleQuote TokenHandler
|
||||
RoundOpen TokenHandler
|
||||
RoundClose TokenHandler
|
||||
Asterisk TokenHandler
|
||||
Plus TokenHandler
|
||||
Comma TokenHandler
|
||||
Minus TokenHandler
|
||||
Dot TokenHandler
|
||||
Slash TokenHandler
|
||||
Colon TokenHandler
|
||||
Semicolon TokenHandler
|
||||
AngleOpen TokenHandler
|
||||
Equal TokenHandler
|
||||
AngleClose TokenHandler
|
||||
Question TokenHandler
|
||||
At TokenHandler
|
||||
SquareOpen TokenHandler
|
||||
Backslash TokenHandler
|
||||
SquareClose TokenHandler
|
||||
Caret TokenHandler
|
||||
Underscore TokenHandler
|
||||
Backquote TokenHandler
|
||||
CurlyOpen TokenHandler
|
||||
Pipe TokenHandler
|
||||
CurlyClose TokenHandler
|
||||
Tilde TokenHandler
|
||||
Newline TokenHandler
|
||||
Whitespace TokenHandler
|
||||
WhitespaceAndNewlines TokenHandler
|
||||
EndOfLine TokenHandler
|
||||
Digit TokenHandler
|
||||
ASCII TokenHandler
|
||||
ASCIILower TokenHandler
|
||||
ASCIIUpper TokenHandler
|
||||
HexDigit TokenHandler
|
||||
}{
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
Space: C.Rune(' '),
|
||||
Tab: C.Rune('\t'),
|
||||
CR: C.Rune('\r'),
|
||||
LF: C.Rune('\n'),
|
||||
CRLF: C.Str("\r\n"),
|
||||
Excl: C.Rune('!'),
|
||||
DoubleQuote: C.Rune('"'),
|
||||
Hash: C.Rune('#'),
|
||||
Dollar: C.Rune('$'),
|
||||
Percent: C.Rune('%'),
|
||||
Amp: C.Rune('&'),
|
||||
SingleQuote: C.Rune('\''),
|
||||
RoundOpen: C.Rune('('),
|
||||
RoundClose: C.Rune(')'),
|
||||
Asterisk: C.Rune('*'),
|
||||
Plus: C.Rune('+'),
|
||||
Comma: C.Rune(','),
|
||||
Minus: C.Rune('-'),
|
||||
Dot: C.Rune('.'),
|
||||
Slash: C.Rune('/'),
|
||||
Colon: C.Rune(':'),
|
||||
Semicolon: C.Rune(';'),
|
||||
AngleOpen: C.Rune('<'),
|
||||
Equal: C.Rune('='),
|
||||
AngleClose: C.Rune('>'),
|
||||
Question: C.Rune('?'),
|
||||
At: C.Rune('@'),
|
||||
SquareOpen: C.Rune('['),
|
||||
Backslash: C.Rune('\\'),
|
||||
SquareClose: C.Rune(']'),
|
||||
Caret: C.Rune('^'),
|
||||
Underscore: C.Rune('_'),
|
||||
Backquote: C.Rune('`'),
|
||||
CurlyOpen: C.Rune('{'),
|
||||
Pipe: C.Rune('|'),
|
||||
CurlyClose: C.Rune('}'),
|
||||
Tilde: C.Rune('~'),
|
||||
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
||||
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
||||
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||
Digit: C.RuneRange('0', '9'),
|
||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||
ASCIILower: C.RuneRange('a', 'z'),
|
||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
||||
}
|
||||
|
||||
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
||||
// has been reached. This TokenHandler will never produce output. It only reports
|
||||
// a successful or a failing match through its boolean return value.
|
||||
func MatchEndOfFile() TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
fork := t.Fork()
|
||||
input, ok := fork.NextRune()
|
||||
return !ok && input == eofRune
|
||||
}
|
||||
}
|
||||
|
||||
// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
|
||||
// read from the input. It reports back a successful match if the end of the
|
||||
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
||||
func MatchAnyRune() TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
_, ok := t.NextRune()
|
||||
if ok {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// M provides convenient access to a range of modifiers (which in their nature are
|
||||
// parser/combinators) that can be used when creating TokenHandler functions.
|
||||
//
|
||||
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
|
||||
// resulting output of another TokenHandler in some way. It does not do any matching
|
||||
// against input of its own.
|
||||
//
|
||||
// When using M in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var m = parsekit.M
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var M = struct {
|
||||
Drop func(TokenHandler) TokenHandler
|
||||
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
ToLower func(TokenHandler) TokenHandler
|
||||
ToUpper func(TokenHandler) TokenHandler
|
||||
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
||||
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
|
||||
}{
|
||||
Drop: ModifyDrop,
|
||||
Trim: ModifyTrim,
|
||||
TrimLeft: ModifyTrimLeft,
|
||||
TrimRight: ModifyTrimRight,
|
||||
ToLower: ModifyToLower,
|
||||
ToUpper: ModifyToUpper,
|
||||
Replace: ModifyReplace,
|
||||
ModifyByCallback: ModifyByCallback,
|
||||
}
|
||||
|
||||
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is discarded completely.
|
||||
//
|
||||
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
|
||||
// even though we would have dropped the output anyway. So if you would like
|
||||
// to drop optional whitespace, then use something like:
|
||||
//
|
||||
// M.Drop(C.Opt(A.Whitespace))
|
||||
//
|
||||
// instead of:
|
||||
//
|
||||
// M.Drop(A.Whitespace)
|
||||
//
|
||||
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
|
||||
// string "bork" would not match against the second form, but " bork" would.
|
||||
// In both cases, it would match the first form.
|
||||
func ModifyDrop(handler TokenHandler) TokenHandler {
|
||||
return ModifyByCallback(handler, func(s string) string {
|
||||
return ""
|
||||
})
|
||||
}
|
||||
|
||||
// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from both the left and the right of the output.
|
||||
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
|
||||
return modifyTrim(handler, cutset, true, true)
|
||||
}
|
||||
|
||||
// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from the left of the output.
|
||||
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
|
||||
return modifyTrim(handler, cutset, true, false)
|
||||
}
|
||||
|
||||
// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are trimmed from the right of the output.
|
||||
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
|
||||
return modifyTrim(handler, cutset, false, true)
|
||||
}
|
||||
|
||||
func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
|
||||
modfunc := func(s string) string {
|
||||
if trimLeft {
|
||||
s = strings.TrimLeft(s, cutset)
|
||||
}
|
||||
if trimRight {
|
||||
s = strings.TrimRight(s, cutset)
|
||||
}
|
||||
return s
|
||||
}
|
||||
return ModifyByCallback(handler, modfunc)
|
||||
}
|
||||
|
||||
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are converted into upper case.
|
||||
func ModifyToUpper(handler TokenHandler) TokenHandler {
|
||||
return ModifyByCallback(handler, strings.ToUpper)
|
||||
}
|
||||
|
||||
// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is taken and characters from the provided
|
||||
// cutset are converted into lower case.
|
||||
func ModifyToLower(handler TokenHandler) TokenHandler {
|
||||
return ModifyByCallback(handler, strings.ToLower)
|
||||
}
|
||||
|
||||
// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is replaced by the provided string.
|
||||
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
|
||||
return ModifyByCallback(handler, func(string) string {
|
||||
return replaceWith
|
||||
})
|
||||
}
|
||||
|
||||
// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
// If it does, then its output is taken and it is fed to the provided modfunc.
|
||||
// This is a simple function that takes a string on input and returns a possibly
|
||||
// modified string on output. The return value of the modfunc will replace the
|
||||
// resulting output.
|
||||
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
s := modfunc(string(child.output))
|
||||
child.output = []rune(s)
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@ import (
|
|||
)
|
||||
|
||||
func TestCombinators(t *testing.T) {
|
||||
RunMatcherTests(t, []MatcherTest{
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"xxx", c.Rune('x'), true, "x"},
|
||||
{"x ", c.Rune(' '), false, ""},
|
||||
{"aa", c.RuneRange('b', 'e'), false, ""},
|
||||
|
@ -79,7 +79,7 @@ func TestCombinators(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
RunMatcherTests(t, []MatcherTest{
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"", a.EndOfFile, true, ""},
|
||||
{"⌘", a.AnyRune, true, "⌘"},
|
||||
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
|
||||
|
@ -158,7 +158,7 @@ func TestAtoms(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestModifiers(t *testing.T) {
|
||||
RunMatcherTests(t, []MatcherTest{
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
|
@ -172,6 +172,30 @@ func TestModifiers(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestSequenceOfRunes(t *testing.T) {
|
||||
sequence := c.Seq(
|
||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
||||
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||
)
|
||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Expects("Sequence of runes")
|
||||
if p.On(sequence).Accept() {
|
||||
p.EmitLiteral(TestItem)
|
||||
}
|
||||
})
|
||||
item, err, ok := parser.Parse(input).Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if item.Value != input {
|
||||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
||||
}
|
||||
}
|
||||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
demonic := c.Seq(
|
||||
|
@ -194,34 +218,10 @@ func TestCombination(t *testing.T) {
|
|||
c.Opt(a.SquareClose),
|
||||
)
|
||||
|
||||
RunMatcherTests(t, []MatcherTest{
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
||||
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestSequenceOfRunes(t *testing.T) {
|
||||
sequence := c.Seq(
|
||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
||||
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||
)
|
||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
parser := parsekit.NewParser(func(p *parsekit.P) {
|
||||
p.Expects("Sequence of runes")
|
||||
if p.On(sequence).Accept() {
|
||||
p.EmitLiteral(TestItem)
|
||||
}
|
||||
})
|
||||
item, err, ok := parser.Parse(input).Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if item.Value != input {
|
||||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue