A big round of getting-ya-terminology-straight.
This commit is contained in:
parent
2751c78003
commit
c6fde2cf4e
|
@ -15,15 +15,15 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// When writing a parser, it's a good start to use the parser/combinator
|
// When writing a parser, it's a good start to use the parser/combinator
|
||||||
// functionality of parsekit to create some Matcher functions. These functions
|
// functionality of parsekit to create some TokenHandler functions. These functions
|
||||||
// can later be used in the parser state machine to check for matching strings
|
// can later be used in the parser state machine to check for matching strings
|
||||||
// on the input data.
|
// on the input data.
|
||||||
//
|
//
|
||||||
// For the calculator, we only need a definition of "number, surrounded by
|
// For the calculator, we only need a definition of "number, surrounded by
|
||||||
// optional whitespace". Skipping whitespace could be a part of the StateHandler
|
// optional whitespace". Skipping whitespace could be a part of the StateHandler
|
||||||
// functions below too, but including it in a Matcher makes things really
|
// functions below too, but including it in a TokenHandler makes things really
|
||||||
// practical.
|
// practical.
|
||||||
func createNumberMatcher() parsekit.Matcher {
|
func createNumberMatcher() parsekit.TokenHandler {
|
||||||
// Easy access to parsekit definition.
|
// Easy access to parsekit definition.
|
||||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
|
@ -43,17 +43,17 @@ const (
|
||||||
|
|
||||||
// We also need to define the state machine for parsing the input.
|
// We also need to define the state machine for parsing the input.
|
||||||
// The state machine is built up from functions that match the StateHandler
|
// The state machine is built up from functions that match the StateHandler
|
||||||
// signature: func(*parsekit.P)
|
// signature: func(*parsekit.ParseAPI)
|
||||||
// The P struct holds the internal state for the parser and it provides
|
// The ParseAPI struct holds the internal state for the parser and it provides
|
||||||
// some methods that form the API for your StateHandler implementation.
|
// some methods that form the API for your StateHandler implementation.
|
||||||
|
|
||||||
// State: expect a number. When a number is found on the input,
|
// State: expect a number. When a number is found on the input,
|
||||||
// it is accepted in the output buffer, after which the output buffer is
|
// it is accepted in the parser's string buffer, after which that buffer is
|
||||||
// emitted as a numberType item. Then we tell the state machine to continue
|
// emitted as a numberType item. Then we tell the state machine to continue
|
||||||
// with the calcWaitForOperatorOrEndOfInput state.
|
// with the calcWaitForOperatorOrEndOfInput state.
|
||||||
// When no number is found, the parser will emit an error, explaining that
|
// When no number is found, the parser will emit an error, explaining that
|
||||||
// "a number" was expected.
|
// "a number" was expected.
|
||||||
func calcWaitForNumber(p *parsekit.P) {
|
func calcWaitForNumber(p *parsekit.ParseAPI) {
|
||||||
p.Expects("a number")
|
p.Expects("a number")
|
||||||
if p.On(calcNumber).Accept() {
|
if p.On(calcNumber).Accept() {
|
||||||
p.EmitLiteral(numberType)
|
p.EmitLiteral(numberType)
|
||||||
|
@ -61,13 +61,13 @@ func calcWaitForNumber(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// State: expect a plus or minus operator. When one of those
|
// State: expect a plus or minus operator. When one of those is found, the
|
||||||
// is found, the appropriate Item is emitted and the parser is sent back
|
// appropriate Item is emitted and the parser is sent back to the
|
||||||
// to the numberHandler to find the next number on the input.
|
// numberHandler to find the next number on the input. When no operator is
|
||||||
// When no operator is found, then the parser is told to expect the end of
|
// found, then the parser is told to expect the end of the input. When more
|
||||||
// the input. When more input data is available (which is obviously wrong
|
// input data are available (which are obviously wrong data since they do
|
||||||
// data since it does not match our syntax), the parser will emit an error.
|
// not match our syntax), the parser will emit an error.
|
||||||
func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
|
func calcWaitForOperatorOrEndOfInput(p *parsekit.ParseAPI) {
|
||||||
switch {
|
switch {
|
||||||
case p.On(a.Plus).Accept():
|
case p.On(a.Plus).Accept():
|
||||||
p.EmitLiteral(addType)
|
p.EmitLiteral(addType)
|
||||||
|
@ -81,18 +81,20 @@ func calcWaitForOperatorOrEndOfInput(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// All is ready for our parser. We now can create a new Parser struct.
|
// All is ready for our parser. We now can create a new Parser struct.
|
||||||
// We need to tell it what the start state is. In our case, it is the
|
// We need to tell it what StateHandler to start with. In our case, it is the
|
||||||
// calcWaitForNumber state, since the calculation must start with a number.
|
// calcWaitForNumber state, since the calculation must start with a number.
|
||||||
var calcParser = parsekit.NewParser(calcWaitForNumber)
|
var calcParser = parsekit.NewParser(calcWaitForNumber)
|
||||||
|
|
||||||
func Example_basicCalculator() {
|
func Example_basicCalculator() {
|
||||||
// Let's feed the parser some input to work with.
|
// Let's feed the parser some input to work with. This provides us with
|
||||||
|
// a parse run for that input.
|
||||||
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
run := calcParser.Parse(" 153+22 + 31-4 -\t 6+42 ")
|
||||||
|
|
||||||
// We can now step through the results of the parsing process by repeated
|
// We can now step through the results of the parsing process by repeated
|
||||||
// calls to run.Next(). Next() returns either the next parse item, a parse
|
// calls to run.Next(). Next() returns either the next parse item, a parse
|
||||||
// error or an end of file. Let's dump the parse results and handle the
|
// error or an end of file. Let's dump the parse results and handle the
|
||||||
// computation while we're at it.
|
// computation while we're at it.
|
||||||
|
// TODO this in convoluted for people using the parser code I think. Maybe use three output data types instead?
|
||||||
sum := 0
|
sum := 0
|
||||||
op := +1
|
op := +1
|
||||||
for {
|
for {
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// In this example, a parser is created which can parse and normalize Dutch postcodes
|
// In this example, a Parser is created which can parse and normalize Dutch postcodes
|
||||||
// The implementation uses only a Matcher function and does not implement a
|
// The implementation uses only TokenHandler functions and does not implement a
|
||||||
// full-fledged state-based Parser for it.
|
// full-fledged state-based Parser for it.
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
|
@ -9,11 +9,11 @@ import (
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
)
|
)
|
||||||
|
|
||||||
func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
func createPostcodeMatcher() *parsekit.Matcher {
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
// Matcher functions are created and combined to satisfy these rules:
|
// TokenHandler functions are created and combined to satisfy these rules:
|
||||||
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||||
// - The first digit is never a zero.
|
// - The first digit is never a zero.
|
||||||
// - A space between letters and digits is optional.
|
// - A space between letters and digits is optional.
|
||||||
|
@ -26,6 +26,8 @@ func createPostcodeMatcher() *parsekit.MatcherWrapper {
|
||||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||||
postcode := c.Seq(pcDigits, space, pcLetters)
|
postcode := c.Seq(pcDigits, space, pcLetters)
|
||||||
|
|
||||||
|
// Create a Matcher, which wraps the 'postcode' TokenHandler and allows
|
||||||
|
// us to match some input against that handler.
|
||||||
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
// In this example, a parser is created that is able to parse input that looks
|
// In this example, a parser is created that is able to parse input that looks
|
||||||
// like "Hello, <name>!", and that extracts the name from it.
|
// like "Hello, <name>!", and that extracts the name from it.
|
||||||
//
|
//
|
||||||
// The implementation uses only a Matcher function and does not implement a
|
// The implementation uses only parser/combinator TokenHandler functions and does
|
||||||
// full-fledged state-based Parser for it. If you want to see the same kind of
|
// not implement a full-fledged state-based Parser for it. If you want to see the
|
||||||
// functionality, implementated using a Paser, take a look at the
|
// same kind of functionality, implementated using a Paser, take a look at the
|
||||||
// HelloWorldUsingParser example.
|
// HelloWorldUsingParser example.
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
|
@ -13,12 +13,12 @@ import (
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
)
|
)
|
||||||
|
|
||||||
func createHelloMatcher() *parsekit.MatcherWrapper {
|
func createHelloMatcher() *parsekit.Matcher {
|
||||||
// Easy access to parsekit definition.
|
// Easy access to parsekit definition.
|
||||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
// Using the parser/combinator support of parsekit, we create a Matcher function
|
// Using the parser/combinator support of parsekit, we create a TokenHandler function
|
||||||
// that does all the work. The 'greeting' Matcher matches the whole input and
|
// that does all the work. The 'greeting' TokenHandler matches the whole input and
|
||||||
// drops all but the name from it.
|
// drops all but the name from it.
|
||||||
hello := c.StrNoCase("hello")
|
hello := c.StrNoCase("hello")
|
||||||
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
||||||
|
@ -26,7 +26,8 @@ func createHelloMatcher() *parsekit.MatcherWrapper {
|
||||||
name := c.OneOrMore(c.Not(a.Excl))
|
name := c.OneOrMore(c.Not(a.Excl))
|
||||||
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl))
|
||||||
|
|
||||||
// Using 'greeting' we can now create the Matcher-based parser.
|
// Create a Matcher, which wraps the 'greeting' TokenHandler and allows
|
||||||
|
// us to match some input against that handler.
|
||||||
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,14 +2,14 @@
|
||||||
// like "Hello, <name>!", and that extracts the name from it.
|
// like "Hello, <name>!", and that extracts the name from it.
|
||||||
//
|
//
|
||||||
// This implementation uses a state-based Parser for it, and it does not implement
|
// This implementation uses a state-based Parser for it, and it does not implement
|
||||||
// any custom combinator/parser Matcher functions. Note that things are much easier to
|
// any custom parser/combinator TokenHandler functions. Note that things are much
|
||||||
// implement using custom Matchers (see the other HelloWorldUsingMatcher example
|
// easier to implement using custom TokenHandlers (see the other HelloWorldUsingMatcher
|
||||||
// for this). Doing this fully parser-based implementation is mainly for your
|
// example for this). Doing this fully parser-based implementation is mainly for your
|
||||||
// learning pleasure.
|
// learning pleasure.
|
||||||
//
|
//
|
||||||
// One big difference between the Matcher-based example and this one, is that the
|
// One big difference between the Matcher-based example and this one, is that the
|
||||||
// state-based parser reports errors much more fine-grained. This might or might
|
// state-based parser reports errors much more fine-grained. This might or might
|
||||||
// not be useful for your specific application.
|
// not be useful for your specific use case.
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -21,7 +21,7 @@ import (
|
||||||
|
|
||||||
const greeteeItem parsekit.ItemType = 1
|
const greeteeItem parsekit.ItemType = 1
|
||||||
|
|
||||||
func stateStartOfGreeting(p *parsekit.P) {
|
func stateStartOfGreeting(p *parsekit.ParseAPI) {
|
||||||
c := parsekit.C
|
c := parsekit.C
|
||||||
p.Expects("hello")
|
p.Expects("hello")
|
||||||
if p.On(c.StrNoCase("hello")).Skip() {
|
if p.On(c.StrNoCase("hello")).Skip() {
|
||||||
|
@ -29,7 +29,7 @@ func stateStartOfGreeting(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateComma(p *parsekit.P) {
|
func stateComma(p *parsekit.ParseAPI) {
|
||||||
a := parsekit.A
|
a := parsekit.A
|
||||||
p.Expects("comma")
|
p.Expects("comma")
|
||||||
switch {
|
switch {
|
||||||
|
@ -40,7 +40,7 @@ func stateComma(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateName(p *parsekit.P) {
|
func stateName(p *parsekit.ParseAPI) {
|
||||||
a := parsekit.A
|
a := parsekit.A
|
||||||
p.Expects("name")
|
p.Expects("name")
|
||||||
switch {
|
switch {
|
||||||
|
@ -51,7 +51,7 @@ func stateName(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func stateEndOfGreeting(p *parsekit.P) {
|
func stateEndOfGreeting(p *parsekit.ParseAPI) {
|
||||||
p.Expects("end of greeting")
|
p.Expects("end of greeting")
|
||||||
if p.On(a.EndOfFile).Stay() {
|
if p.On(a.EndOfFile).Stay() {
|
||||||
name := strings.TrimSpace(p.BufLiteral())
|
name := strings.TrimSpace(p.BufLiteral())
|
||||||
|
|
|
@ -28,7 +28,7 @@ func ExampleItem() {
|
||||||
// the p.Emit* methods on parsekit.P.
|
// the p.Emit* methods on parsekit.P.
|
||||||
// When errors occur, or the end of the file is reached, then the built-in
|
// When errors occur, or the end of the file is reached, then the built-in
|
||||||
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
// types parsekit.ItemEOF and parsekit.ItemError will be emitted by parsekit.
|
||||||
stateHandler := func(p *parsekit.P) {
|
stateHandler := func(p *parsekit.ParseAPI) {
|
||||||
if p.On(c.Str("question")).Accept() {
|
if p.On(c.Str("question")).Accept() {
|
||||||
p.EmitLiteral(QuestionItem)
|
p.EmitLiteral(QuestionItem)
|
||||||
}
|
}
|
||||||
|
@ -99,14 +99,14 @@ func ExampleMatchAnyRune() {
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
a := parsekit.A
|
a := parsekit.A
|
||||||
|
|
||||||
handler := func(p *parsekit.P) {
|
stateHandler := func(p *parsekit.ParseAPI) {
|
||||||
p.Expects("Any valid rune")
|
p.Expects("Any valid rune")
|
||||||
if p.On(a.AnyRune).Accept() {
|
if p.On(a.AnyRune).Accept() {
|
||||||
p.EmitLiteral(TestItem)
|
p.EmitLiteral(TestItem)
|
||||||
p.RouteRepeat()
|
p.RouteRepeat()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parser := parsekit.NewParser(handler)
|
parser := parsekit.NewParser(stateHandler)
|
||||||
run := parser.Parse("¡Any / valid / character will dö!")
|
run := parser.Parse("¡Any / valid / character will dö!")
|
||||||
|
|
||||||
for i := 0; i < 5; i++ {
|
for i := 0; i < 5; i++ {
|
||||||
|
|
187
matcher.go
187
matcher.go
|
@ -1,187 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Matcher is the function type that must be implemented to create a function
|
|
||||||
// that can be used in conjunction with parsekit.P.On() or parsekit.New().
|
|
||||||
// Its purpose is to check if input data matches some kind of pattern and to
|
|
||||||
// report back the match.
|
|
||||||
//
|
|
||||||
// A Matcher function gets a MatchDialog as its input and returns a boolean to
|
|
||||||
// indicate whether or not the Matcher found a match on the input.
|
|
||||||
// The MatchDialog is used for retrieving input data to match against
|
|
||||||
// and for reporting back results.
|
|
||||||
type Matcher func(m *MatchDialog) bool
|
|
||||||
|
|
||||||
// MatchDialog is used by Matcher functions to retrieve runes from the
|
|
||||||
// input to match against and to report back results.
|
|
||||||
//
|
|
||||||
// Basic operation:
|
|
||||||
//
|
|
||||||
// To retrieve the next rune from the input, the Matcher function can call
|
|
||||||
// the MatchDialog.NextRune() method.
|
|
||||||
//
|
|
||||||
// The Matcher function can then evaluate the retrieved rune and either
|
|
||||||
// accept of skip the rune. When accepting it using MatchDialog.Accept(),
|
|
||||||
// the rune is added to the output of the MatchDialog. When using
|
|
||||||
// MatchDialog.Skip(), the rune will not be added to the output. It is
|
|
||||||
// mandatory for a Matcher to call either Accept() or Skip() after retrieving
|
|
||||||
// a rune, before calling NextRune() again.
|
|
||||||
//
|
|
||||||
// Eventually, the Matcher function must return a boolean value, indicating
|
|
||||||
// whether or not a match was found. When true, then the calling code will
|
|
||||||
// use the runes that were accepted into the MatchDialog's resulting output.
|
|
||||||
//
|
|
||||||
// Forking operation for easy lookahead support:
|
|
||||||
//
|
|
||||||
// Sometimes, a Matcher function must be able to perform a lookahead, which
|
|
||||||
// might either succeed or fail. In case of a failing lookahead, the state
|
|
||||||
// of the MatchDialog must be brought back to the original state.
|
|
||||||
//
|
|
||||||
// The way in which this is supported, is by forking a MatchDialog by calling
|
|
||||||
// MatchDialog.Fork(). This will return a child MatchDialog, with an empty
|
|
||||||
// output buffer, but using the same input offset as the forked parent.
|
|
||||||
//
|
|
||||||
// The Matcher function can then use the same interface as described for
|
|
||||||
// normal operation to retrieve runes from the input and to fill the output
|
|
||||||
// buffer. When the Matcher function decides that the lookahead was successful,
|
|
||||||
// then the method MatchDialog.Merge() can be called on the forked child to
|
|
||||||
// append the resulting output from the child to the parent's resulting output,
|
|
||||||
// and to update the parent input offset to that of the child.
|
|
||||||
//
|
|
||||||
// When the Matcher function decides that the lookahead was unsuccessful, then
|
|
||||||
// it can simply discard the forked child. The parent MatchDialog was never
|
|
||||||
// modified, so a new match can be safely started using that parent, as if the
|
|
||||||
// lookahead never happened.
|
|
||||||
type MatchDialog struct {
|
|
||||||
p *P // parser state, used to retrieve input data to match against (TODO should be interface)
|
|
||||||
inputOffset int // the byte offset into the input
|
|
||||||
input []rune // a slice of runes that represents the retrieved input runes for the Matcher
|
|
||||||
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
|
||||||
currRune *runeToken // hold the last rune that was read from the input
|
|
||||||
parent *MatchDialog // the parent MatchDialog, in case this one was forked
|
|
||||||
}
|
|
||||||
|
|
||||||
type runeToken struct {
|
|
||||||
Rune rune
|
|
||||||
ByteSize int
|
|
||||||
OK bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextRune retrieves the next rune from the input.
|
|
||||||
//
|
|
||||||
// It returns the rune and a boolean. The boolean will be false in case an
|
|
||||||
// invalid UTF8 rune or the end of the file was encountered.
|
|
||||||
//
|
|
||||||
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
|
||||||
// to respectively add the rune to the MatchDialog's resulting output or to
|
|
||||||
// fully ignore it. This way, a Matcher has full control over what runes are
|
|
||||||
// significant for the resulting output of that matcher.
|
|
||||||
//
|
|
||||||
// After using NextRune(), this method can not be reinvoked, until the last read
|
|
||||||
// rune is explicitly accepted or skipped as described above.
|
|
||||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
|
||||||
if m.currRune != nil {
|
|
||||||
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
|
|
||||||
}
|
|
||||||
r, w, ok := m.p.peek(m.inputOffset)
|
|
||||||
m.currRune = &runeToken{r, w, ok}
|
|
||||||
if ok {
|
|
||||||
m.input = append(m.input, r)
|
|
||||||
}
|
|
||||||
return r, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fork splits off a child MatchDialog, containing the same offset as the
|
|
||||||
// parent MatchDialog, but with all other data in a fresh state.
|
|
||||||
//
|
|
||||||
// By forking, a Matcher function can freely work with a MatchDialog, without
|
|
||||||
// affecting the parent MatchDialog. This is for example useful when the
|
|
||||||
// Matcher function must perform some form of lookahead.
|
|
||||||
//
|
|
||||||
// When a successful match was found, the Matcher function can call
|
|
||||||
// child.Merge() to have the resulting output added to the parent MatchDialog.
|
|
||||||
// When no match was found, the forked child can simply be discarded.
|
|
||||||
//
|
|
||||||
// Example case: A Matcher checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
|
||||||
// This is done in 4 steps and only after finishing all steps, the Matcher
|
|
||||||
// function can confirm a successful match. The Matcher function for this
|
|
||||||
// case could look like this (yes, it's naive, but it shows the point):
|
|
||||||
//
|
|
||||||
// func MatchAbcd(m *MatchDialog) bool {
|
|
||||||
// child := m.Fork() // fork to keep m from input untouched
|
|
||||||
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
|
||||||
// if r, ok := m.NextRune(); !ok || r != letter {
|
|
||||||
// return false // report mismatch, m is left untouched
|
|
||||||
// }
|
|
||||||
// child.Accept() // add rune to child output
|
|
||||||
// }
|
|
||||||
// child.Merge() // we have a match, add resulting output to parent
|
|
||||||
// return true // and report the successful match
|
|
||||||
// }
|
|
||||||
func (m *MatchDialog) Fork() *MatchDialog {
|
|
||||||
child := &MatchDialog{
|
|
||||||
p: m.p,
|
|
||||||
inputOffset: m.inputOffset,
|
|
||||||
parent: m,
|
|
||||||
}
|
|
||||||
return child
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accept will add the last rune as read by NextRune() to the resulting
|
|
||||||
// output of the MatchDialog.
|
|
||||||
func (m *MatchDialog) Accept() {
|
|
||||||
m.checkAllowedCall("Accept()")
|
|
||||||
m.output = append(m.output, m.currRune.Rune)
|
|
||||||
m.inputOffset += m.currRune.ByteSize
|
|
||||||
m.currRune = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip will ignore the last rune as read by NextRune().
|
|
||||||
func (m *MatchDialog) Skip() {
|
|
||||||
m.checkAllowedCall("Skip()")
|
|
||||||
m.inputOffset += m.currRune.ByteSize
|
|
||||||
m.currRune = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *MatchDialog) checkAllowedCall(name string) {
|
|
||||||
if m.currRune == nil {
|
|
||||||
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
|
|
||||||
}
|
|
||||||
if !m.currRune.OK {
|
|
||||||
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge merges the resulting output from a forked child MatchDialog back into
|
|
||||||
// its parent: The runes that are accepted in the child are added to the parent
|
|
||||||
// runes and the parent's offset is advanced to the child's offset.
|
|
||||||
//
|
|
||||||
// After the merge, the child MatchDialog is reset so it can immediately be
|
|
||||||
// reused for performing another match (all data are cleared, except for the
|
|
||||||
// input offset which is kept at its current position).
|
|
||||||
func (m *MatchDialog) Merge() bool {
|
|
||||||
if m.parent == nil {
|
|
||||||
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
|
||||||
}
|
|
||||||
m.parent.input = append(m.parent.input, m.input...)
|
|
||||||
m.parent.output = append(m.parent.output, m.output...)
|
|
||||||
m.parent.inputOffset = m.inputOffset
|
|
||||||
m.ClearOutput()
|
|
||||||
m.ClearInput()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClearOutput clears the resulting output for the MatchDialog, but it keeps
|
|
||||||
// the input and input offset as-is.
|
|
||||||
func (m *MatchDialog) ClearOutput() {
|
|
||||||
m.output = []rune{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClearInput clears the input for the MatchDialog, but it keeps the output
|
|
||||||
// and input offset as-is.
|
|
||||||
func (m *MatchDialog) ClearInput() {
|
|
||||||
m.input = []rune{}
|
|
||||||
}
|
|
|
@ -1,559 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"unicode"
|
|
||||||
)
|
|
||||||
|
|
||||||
// C provides convenient access to a range of parser/combinators
|
|
||||||
// that can be used to construct Matcher functions.
|
|
||||||
//
|
|
||||||
// When using C in your own parser, then it is advised to create
|
|
||||||
// a variable in your own package to reference it:
|
|
||||||
//
|
|
||||||
// var c = parsekit.C
|
|
||||||
//
|
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
||||||
var C = struct {
|
|
||||||
Rune func(rune) Matcher
|
|
||||||
Runes func(...rune) Matcher
|
|
||||||
RuneRange func(rune, rune) Matcher
|
|
||||||
Str func(string) Matcher
|
|
||||||
StrNoCase func(string) Matcher
|
|
||||||
Any func(...Matcher) Matcher
|
|
||||||
Not func(Matcher) Matcher
|
|
||||||
Opt func(Matcher) Matcher
|
|
||||||
Seq func(...Matcher) Matcher
|
|
||||||
Rep func(int, Matcher) Matcher
|
|
||||||
Min func(int, Matcher) Matcher
|
|
||||||
Max func(int, Matcher) Matcher
|
|
||||||
ZeroOrMore func(Matcher) Matcher
|
|
||||||
OneOrMore func(Matcher) Matcher
|
|
||||||
MinMax func(int, int, Matcher) Matcher
|
|
||||||
Separated func(separated Matcher, separator Matcher) Matcher
|
|
||||||
Except func(except Matcher, matcher Matcher) Matcher
|
|
||||||
}{
|
|
||||||
Rune: MatchRune,
|
|
||||||
Runes: MatchRunes,
|
|
||||||
RuneRange: MatchRuneRange,
|
|
||||||
Str: MatchStr,
|
|
||||||
StrNoCase: MatchStrNoCase,
|
|
||||||
Opt: MatchOpt,
|
|
||||||
Any: MatchAny,
|
|
||||||
Not: MatchNot,
|
|
||||||
Seq: MatchSeq,
|
|
||||||
Rep: MatchRep,
|
|
||||||
Min: MatchMin,
|
|
||||||
Max: MatchMax,
|
|
||||||
ZeroOrMore: MatchZeroOrMore,
|
|
||||||
OneOrMore: MatchOneOrMore,
|
|
||||||
MinMax: MatchMinMax,
|
|
||||||
Separated: MatchSeparated,
|
|
||||||
Except: MatchExcept,
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchRune creates a Matcher function that checks if the next rune from
|
|
||||||
// the input matches the provided rune.
|
|
||||||
func MatchRune(expected rune) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
input, ok := m.NextRune()
|
|
||||||
if ok && input == expected {
|
|
||||||
m.Accept()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchRunes creates a Matcher function that that checks if the next rune
|
|
||||||
// from the input is one of the provided runes.
|
|
||||||
func MatchRunes(expected ...rune) Matcher {
|
|
||||||
s := string(expected)
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
input, ok := m.NextRune()
|
|
||||||
if ok {
|
|
||||||
if strings.ContainsRune(s, input) {
|
|
||||||
m.Accept()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchRuneRange creates a Matcher function that that checks if the next rune
|
|
||||||
// from the input is contained by the provided rune range.
|
|
||||||
//
|
|
||||||
// The rune range is defined by a start and an end rune, inclusive, so:
|
|
||||||
//
|
|
||||||
// MatchRuneRange('g', 'k')
|
|
||||||
//
|
|
||||||
// creates a Matcher that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
|
||||||
func MatchRuneRange(start rune, end rune) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
if end < start {
|
|
||||||
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
|
||||||
}
|
|
||||||
input, ok := m.NextRune()
|
|
||||||
if ok && input >= start && input <= end {
|
|
||||||
m.Accept()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchStr creates a Matcher that will check if the upcoming runes on the
|
|
||||||
// input match the provided string.
|
|
||||||
// TODO make this a more efficient string-level match?
|
|
||||||
func MatchStr(expected string) Matcher {
|
|
||||||
var matchers = []Matcher{}
|
|
||||||
for _, r := range expected {
|
|
||||||
matchers = append(matchers, MatchRune(r))
|
|
||||||
}
|
|
||||||
return MatchSeq(matchers...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchStrNoCase creates a Matcher that will check if the upcoming runes
|
|
||||||
// on the input match the provided string in a case-insensitive manner.
|
|
||||||
// TODO make this a more efficient string-level match?
|
|
||||||
func MatchStrNoCase(expected string) Matcher {
|
|
||||||
var matchers = []Matcher{}
|
|
||||||
for _, r := range expected {
|
|
||||||
u := unicode.ToUpper(r)
|
|
||||||
l := unicode.ToLower(r)
|
|
||||||
matchers = append(matchers, MatchRunes(u, l))
|
|
||||||
}
|
|
||||||
return MatchSeq(matchers...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchOpt creates a Matcher that makes the provided Matcher optional.
|
|
||||||
// When the provided Matcher applies, then its output is used, otherwise
|
|
||||||
// no output is generated but still a successful match is reported.
|
|
||||||
func MatchOpt(matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if matcher(child) {
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchSeq creates a Matcher that checks if the provided Matchers can be
|
|
||||||
// applied in their exact order. Only if all matcher apply, the sequence
|
|
||||||
// reports successful match.
|
|
||||||
func MatchSeq(matchers ...Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
for _, matcher := range matchers {
|
|
||||||
if !matcher(child) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
child.Merge()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchAny creates a Matcher that checks if any of the provided Matchers
|
|
||||||
// can be applied. They are applied in their provided order. The first Matcher
|
|
||||||
// that applies is used for reporting back a match.
|
|
||||||
func MatchAny(matchers ...Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
for _, matcher := range matchers {
|
|
||||||
child := m.Fork()
|
|
||||||
if matcher(child) {
|
|
||||||
return child.Merge()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchNot creates a Matcher that checks if the provided Matcher applies to
|
|
||||||
// the current input. If it does, then a failed match will be reported. If it
|
|
||||||
// does not, then the next rune from the input will be reported as a match.
|
|
||||||
func MatchNot(matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
probe := m.Fork()
|
|
||||||
if matcher(probe) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
_, ok := m.NextRune()
|
|
||||||
if ok {
|
|
||||||
m.Accept()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchRep creates a Matcher that checks if the provided Matcher can be
|
|
||||||
// applied exactly the provided amount of times.
|
|
||||||
//
|
|
||||||
// Note that the input can contain more Matches for the provided matcher, e.g.:
|
|
||||||
//
|
|
||||||
// MatchRep(4, MatchRune('X'))
|
|
||||||
//
|
|
||||||
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
|
||||||
// In that last case, there will be a remainder "XX" of the input.
|
|
||||||
func MatchRep(times int, matcher Matcher) Matcher {
|
|
||||||
return matchMinMax(times, times, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchMin creates a Matcher that checks if the provided Matcher can be
|
|
||||||
// applied at least the provided minimum number of times.
|
|
||||||
// When more matches are possible, these will be included in the output.
|
|
||||||
func MatchMin(min int, matcher Matcher) Matcher {
|
|
||||||
return matchMinMax(min, -1, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchMax creates a Matcher that checks if the provided Matcher can be
|
|
||||||
// applied at maximum the provided minimum number of times.
|
|
||||||
// When more matches are possible, these will be included in the output.
|
|
||||||
// Zero matches are considered a successful match.
|
|
||||||
func MatchMax(max int, matcher Matcher) Matcher {
|
|
||||||
return matchMinMax(0, max, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchZeroOrMore creates a Matcher that checks if the provided Matcher can
|
|
||||||
// be applied zero or more times. All matches will be included in the output.
|
|
||||||
// Zero matches are considered a successful match.
|
|
||||||
func MatchZeroOrMore(matcher Matcher) Matcher {
|
|
||||||
return matchMinMax(0, -1, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchOneOrMore creates a Matcher that checks if the provided Matcher can
|
|
||||||
// be applied one or more times. All matches will be included in the output.
|
|
||||||
func MatchOneOrMore(matcher Matcher) Matcher {
|
|
||||||
return matchMinMax(1, -1, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchMinMax creates a Matcher that checks if the provided Matcher can
|
|
||||||
// be applied between the provided minimum and maximum number of times,
|
|
||||||
// inclusive. All matches will be included in the output.
|
|
||||||
func MatchMinMax(min int, max int, matcher Matcher) Matcher {
|
|
||||||
if max < 0 {
|
|
||||||
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
|
|
||||||
}
|
|
||||||
if min < 0 {
|
|
||||||
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
|
|
||||||
}
|
|
||||||
return matchMinMax(min, max, matcher)
|
|
||||||
}
|
|
||||||
|
|
||||||
func matchMinMax(min int, max int, matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if max >= 0 && min > max {
|
|
||||||
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
|
|
||||||
}
|
|
||||||
total := 0
|
|
||||||
// Check for the minimum required amount of matches.
|
|
||||||
for total < min {
|
|
||||||
total++
|
|
||||||
if !matcher(child) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// No specified max: include the rest of the available matches.
|
|
||||||
// Specified max: include the rest of the availble matches, up to the max.
|
|
||||||
child.Merge()
|
|
||||||
for max < 0 || total < max {
|
|
||||||
total++
|
|
||||||
if !matcher(child) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchSeparated creates a Matcher that checks for a pattern of one or more
|
|
||||||
// Matchers of one type (the separated), separated by Matches of another type
|
|
||||||
// (the separator). All matches (separated + separator) are included in the
|
|
||||||
// output.
|
|
||||||
func MatchSeparated(separator Matcher, separated Matcher) Matcher {
|
|
||||||
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchExcept creates a Matcher that checks if the provided matcher can be
|
|
||||||
// applied to the upcoming input. It also checks if the except Matcher can be
|
|
||||||
// applied. If the matcher applies, but the except Matcher too, then the match
|
|
||||||
// as a whole will be treated as a mismatch.
|
|
||||||
func MatchExcept(except Matcher, matcher Matcher) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
if except(m.Fork()) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return matcher(m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// A provides convenient access to a range of atoms that can be used to
|
|
||||||
// build combinators or parsing rules.
|
|
||||||
//
|
|
||||||
// In parsekit, an atom is defined as a ready to go Matcher function.
|
|
||||||
//
|
|
||||||
// When using A in your own parser, then it is advised to create
|
|
||||||
// a variable in your own package to reference it:
|
|
||||||
//
|
|
||||||
// var a = parsekit.A
|
|
||||||
//
|
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
||||||
var A = struct {
|
|
||||||
EndOfFile Matcher
|
|
||||||
AnyRune Matcher
|
|
||||||
Space Matcher
|
|
||||||
Tab Matcher
|
|
||||||
CR Matcher
|
|
||||||
LF Matcher
|
|
||||||
CRLF Matcher
|
|
||||||
Excl Matcher
|
|
||||||
DoubleQuote Matcher
|
|
||||||
Hash Matcher
|
|
||||||
Dollar Matcher
|
|
||||||
Percent Matcher
|
|
||||||
Amp Matcher
|
|
||||||
SingleQuote Matcher
|
|
||||||
RoundOpen Matcher
|
|
||||||
RoundClose Matcher
|
|
||||||
Asterisk Matcher
|
|
||||||
Plus Matcher
|
|
||||||
Comma Matcher
|
|
||||||
Minus Matcher
|
|
||||||
Dot Matcher
|
|
||||||
Slash Matcher
|
|
||||||
Colon Matcher
|
|
||||||
Semicolon Matcher
|
|
||||||
AngleOpen Matcher
|
|
||||||
Equal Matcher
|
|
||||||
AngleClose Matcher
|
|
||||||
Question Matcher
|
|
||||||
At Matcher
|
|
||||||
SquareOpen Matcher
|
|
||||||
Backslash Matcher
|
|
||||||
SquareClose Matcher
|
|
||||||
Caret Matcher
|
|
||||||
Underscore Matcher
|
|
||||||
Backquote Matcher
|
|
||||||
CurlyOpen Matcher
|
|
||||||
Pipe Matcher
|
|
||||||
CurlyClose Matcher
|
|
||||||
Tilde Matcher
|
|
||||||
Newline Matcher
|
|
||||||
Whitespace Matcher
|
|
||||||
WhitespaceAndNewlines Matcher
|
|
||||||
EndOfLine Matcher
|
|
||||||
Digit Matcher
|
|
||||||
ASCII Matcher
|
|
||||||
ASCIILower Matcher
|
|
||||||
ASCIIUpper Matcher
|
|
||||||
HexDigit Matcher
|
|
||||||
}{
|
|
||||||
EndOfFile: MatchEndOfFile(),
|
|
||||||
AnyRune: MatchAnyRune(),
|
|
||||||
Space: C.Rune(' '),
|
|
||||||
Tab: C.Rune('\t'),
|
|
||||||
CR: C.Rune('\r'),
|
|
||||||
LF: C.Rune('\n'),
|
|
||||||
CRLF: C.Str("\r\n"),
|
|
||||||
Excl: C.Rune('!'),
|
|
||||||
DoubleQuote: C.Rune('"'),
|
|
||||||
Hash: C.Rune('#'),
|
|
||||||
Dollar: C.Rune('$'),
|
|
||||||
Percent: C.Rune('%'),
|
|
||||||
Amp: C.Rune('&'),
|
|
||||||
SingleQuote: C.Rune('\''),
|
|
||||||
RoundOpen: C.Rune('('),
|
|
||||||
RoundClose: C.Rune(')'),
|
|
||||||
Asterisk: C.Rune('*'),
|
|
||||||
Plus: C.Rune('+'),
|
|
||||||
Comma: C.Rune(','),
|
|
||||||
Minus: C.Rune('-'),
|
|
||||||
Dot: C.Rune('.'),
|
|
||||||
Slash: C.Rune('/'),
|
|
||||||
Colon: C.Rune(':'),
|
|
||||||
Semicolon: C.Rune(';'),
|
|
||||||
AngleOpen: C.Rune('<'),
|
|
||||||
Equal: C.Rune('='),
|
|
||||||
AngleClose: C.Rune('>'),
|
|
||||||
Question: C.Rune('?'),
|
|
||||||
At: C.Rune('@'),
|
|
||||||
SquareOpen: C.Rune('['),
|
|
||||||
Backslash: C.Rune('\\'),
|
|
||||||
SquareClose: C.Rune(']'),
|
|
||||||
Caret: C.Rune('^'),
|
|
||||||
Underscore: C.Rune('_'),
|
|
||||||
Backquote: C.Rune('`'),
|
|
||||||
CurlyOpen: C.Rune('{'),
|
|
||||||
Pipe: C.Rune('|'),
|
|
||||||
CurlyClose: C.Rune('}'),
|
|
||||||
Tilde: C.Rune('~'),
|
|
||||||
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
|
||||||
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
|
||||||
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
|
||||||
Digit: C.RuneRange('0', '9'),
|
|
||||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
|
||||||
ASCIILower: C.RuneRange('a', 'z'),
|
|
||||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
|
||||||
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchEndOfFile creates a Matcher that checks if the end of the input data
|
|
||||||
// has been reached. This Matcher will never produce output. It only reports
|
|
||||||
// a successful or a failing match through its boolean return value.
|
|
||||||
func MatchEndOfFile() Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
fork := m.Fork()
|
|
||||||
input, ok := fork.NextRune()
|
|
||||||
return !ok && input == eofRune
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MatchAnyRune creates a Matcher function that checks if a valid rune can be
|
|
||||||
// read from the input. It reports back a successful match if the end of the
|
|
||||||
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
|
||||||
func MatchAnyRune() Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
_, ok := m.NextRune()
|
|
||||||
if ok {
|
|
||||||
m.Accept()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// M provides convenient access to a range of modifiers that can be
|
|
||||||
// used when creating Matcher functions.
|
|
||||||
//
|
|
||||||
// In parsekit, a modifier is defined as a Matcher function that modifies the
|
|
||||||
// resulting output of another Matcher in some way. It does not do any matching
|
|
||||||
// against input of its own.
|
|
||||||
//
|
|
||||||
// When using M in your own parser, then it is advised to create
|
|
||||||
// a variable in your own package to reference it:
|
|
||||||
//
|
|
||||||
// var m = parsekit.M
|
|
||||||
//
|
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
|
||||||
var M = struct {
|
|
||||||
Drop func(Matcher) Matcher
|
|
||||||
Trim func(Matcher, string) Matcher
|
|
||||||
TrimLeft func(Matcher, string) Matcher
|
|
||||||
TrimRight func(Matcher, string) Matcher
|
|
||||||
ToLower func(Matcher) Matcher
|
|
||||||
ToUpper func(Matcher) Matcher
|
|
||||||
Replace func(Matcher, string) Matcher
|
|
||||||
ModifyByCallback func(Matcher, func(string) string) Matcher
|
|
||||||
}{
|
|
||||||
Drop: ModifyDrop,
|
|
||||||
Trim: ModifyTrim,
|
|
||||||
TrimLeft: ModifyTrimLeft,
|
|
||||||
TrimRight: ModifyTrimRight,
|
|
||||||
ToLower: ModifyToLower,
|
|
||||||
ToUpper: ModifyToUpper,
|
|
||||||
Replace: ModifyReplace,
|
|
||||||
ModifyByCallback: ModifyByCallback,
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyDrop creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is discarded completely.
|
|
||||||
//
|
|
||||||
// Note that if the Matcher does not apply, a mismatch will be reported back,
|
|
||||||
// even though we would have dropped the output anyway. So if you would like
|
|
||||||
// to drop optional whitespace, then use something like:
|
|
||||||
//
|
|
||||||
// M.Drop(C.Opt(A.Whitespace))
|
|
||||||
//
|
|
||||||
// instead of:
|
|
||||||
//
|
|
||||||
// M.Drop(A.Whitespace)
|
|
||||||
//
|
|
||||||
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
|
|
||||||
// string "bork" would not match against the second form, but " bork" would.
|
|
||||||
// In both cases, it would match the first form.
|
|
||||||
func ModifyDrop(matcher Matcher) Matcher {
|
|
||||||
return ModifyByCallback(matcher, func(s string) string {
|
|
||||||
return ""
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyTrim creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is taken and characters from the provided
|
|
||||||
// cutset are trimmed from both the left and the right of the output.
|
|
||||||
// TODO move cutset to the left arg
|
|
||||||
func ModifyTrim(matcher Matcher, cutset string) Matcher {
|
|
||||||
return modifyTrim(matcher, cutset, true, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyTrimLeft creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is taken and characters from the provided
|
|
||||||
// cutset are trimmed from the left of the output.
|
|
||||||
func ModifyTrimLeft(matcher Matcher, cutset string) Matcher {
|
|
||||||
return modifyTrim(matcher, cutset, true, false)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyTrimRight creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is taken and characters from the provided
|
|
||||||
// cutset are trimmed from the right of the output.
|
|
||||||
func ModifyTrimRight(matcher Matcher, cutset string) Matcher {
|
|
||||||
return modifyTrim(matcher, cutset, false, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
func modifyTrim(matcher Matcher, cutset string, trimLeft bool, trimRight bool) Matcher {
|
|
||||||
modfunc := func(s string) string {
|
|
||||||
if trimLeft {
|
|
||||||
s = strings.TrimLeft(s, cutset)
|
|
||||||
}
|
|
||||||
if trimRight {
|
|
||||||
s = strings.TrimRight(s, cutset)
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
return ModifyByCallback(matcher, modfunc)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyToUpper creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is taken and characters from the provided
|
|
||||||
// cutset are converted into upper case.
|
|
||||||
func ModifyToUpper(matcher Matcher) Matcher {
|
|
||||||
return ModifyByCallback(matcher, strings.ToUpper)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyToLower creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is taken and characters from the provided
|
|
||||||
// cutset are converted into lower case.
|
|
||||||
func ModifyToLower(matcher Matcher) Matcher {
|
|
||||||
return ModifyByCallback(matcher, strings.ToLower)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyReplace creates a Matcher that checks if the provided Matcher applies.
|
|
||||||
// If it does, then its output is replaced by the provided string.
|
|
||||||
func ModifyReplace(matcher Matcher, s string) Matcher {
|
|
||||||
return ModifyByCallback(matcher, func(string) string {
|
|
||||||
return s
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// ModifyByCallback creates a Matcher that checks if the provided matcher applies.
|
|
||||||
// If it does, then its output is taken and it is fed to the provided modfunc.
|
|
||||||
// This is a simple function that takes a string on input and returns a possibly
|
|
||||||
// modified string on output. The return value of the modfunc will replace the
|
|
||||||
// resulting output.
|
|
||||||
func ModifyByCallback(matcher Matcher, modfunc func(string) string) Matcher {
|
|
||||||
return func(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if matcher(child) {
|
|
||||||
s := modfunc(string(child.output))
|
|
||||||
child.output = []rune(s)
|
|
||||||
child.Merge()
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
58
parsekit.go
58
parsekit.go
|
@ -24,17 +24,16 @@ func NewParser(startState StateHandler) *Parser {
|
||||||
return &Parser{startState: startState}
|
return &Parser{startState: startState}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run represents a single parse run for a Parser.
|
// ParseRun represents a single parse run for a Parser.
|
||||||
// TODO rename to ParseRun
|
type ParseRun struct {
|
||||||
type Run struct {
|
p *ParseAPI // holds the internal state of a parse run
|
||||||
p *P // a struct holding the internal state of a parse run
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse starts a parse run on the provided input data.
|
// Parse starts a parse run on the provided input data.
|
||||||
// To retrieve parse items from the run, make use of the Run.Next() method.
|
// To retrieve parser Items from the run, make use of the ParseRun.Next() method.
|
||||||
func (p *Parser) Parse(input string) *Run {
|
func (p *Parser) Parse(input string) *ParseRun {
|
||||||
return &Run{
|
return &ParseRun{
|
||||||
p: &P{
|
p: &ParseAPI{
|
||||||
input: input,
|
input: input,
|
||||||
len: len(input),
|
len: len(input),
|
||||||
cursorLine: 1,
|
cursorLine: 1,
|
||||||
|
@ -51,7 +50,7 @@ func (p *Parser) Parse(input string) *Run {
|
||||||
// On error or when successfully reaching the end of the input, false is returned.
|
// On error or when successfully reaching the end of the input, false is returned.
|
||||||
// When an error occurred, false will be returned and the error return value will
|
// When an error occurred, false will be returned and the error return value will
|
||||||
// be set (default is nil).
|
// be set (default is nil).
|
||||||
func (run *Run) Next() (Item, *Error, bool) {
|
func (run *ParseRun) Next() (Item, *Error, bool) {
|
||||||
// State handling loop: we handle states, until an Item is ready to be returned.
|
// State handling loop: we handle states, until an Item is ready to be returned.
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
@ -66,7 +65,7 @@ func (run *Run) Next() (Item, *Error, bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
func (run *ParseRun) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||||
switch {
|
switch {
|
||||||
case i.Type == ItemEOF:
|
case i.Type == ItemEOF:
|
||||||
return i, nil, false
|
return i, nil, false
|
||||||
|
@ -84,7 +83,7 @@ func (run *Run) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||||
// type StateHandler. This function represents the current status and
|
// type StateHandler. This function represents the current status and
|
||||||
// is responsible for moving the parser to its next status, depending
|
// is responsible for moving the parser to its next status, depending
|
||||||
// on the parsed input data.
|
// on the parsed input data.
|
||||||
func (run *Run) runNextStateHandler() {
|
func (run *ParseRun) runNextStateHandler() {
|
||||||
if state, ok := run.getNextStateHandler(); ok {
|
if state, ok := run.getNextStateHandler(); ok {
|
||||||
run.invokeNextStateHandler(state)
|
run.invokeNextStateHandler(state)
|
||||||
}
|
}
|
||||||
|
@ -115,7 +114,7 @@ func (run *Run) runNextStateHandler() {
|
||||||
//
|
//
|
||||||
// When no routing decision is provided by a StateHandler, then this is
|
// When no routing decision is provided by a StateHandler, then this is
|
||||||
// considered a bug in the state handler, and the parser will panic.
|
// considered a bug in the state handler, and the parser will panic.
|
||||||
func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
func (run *ParseRun) getNextStateHandler() (StateHandler, bool) {
|
||||||
switch {
|
switch {
|
||||||
case run.p.nextState != nil:
|
case run.p.nextState != nil:
|
||||||
return run.p.nextState, true
|
return run.p.nextState, true
|
||||||
|
@ -132,42 +131,45 @@ func (run *Run) getNextStateHandler() (StateHandler, bool) {
|
||||||
|
|
||||||
// invokeNextStateHandler moves the parser state to the provided state
|
// invokeNextStateHandler moves the parser state to the provided state
|
||||||
// and invokes the StateHandler function.
|
// and invokes the StateHandler function.
|
||||||
func (run *Run) invokeNextStateHandler(state StateHandler) {
|
func (run *ParseRun) invokeNextStateHandler(state StateHandler) {
|
||||||
run.p.state = state
|
run.p.state = state
|
||||||
run.p.nextState = nil
|
run.p.nextState = nil
|
||||||
run.p.expecting = ""
|
run.p.expecting = ""
|
||||||
run.p.state(run.p)
|
run.p.state(run.p)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatcherWrapper is the top-level struct that holds the configuration for
|
// Matcher is the top-level struct that holds the configuration for
|
||||||
// a parser that is based solely on a Wrapper function.
|
// a parser that is based solely on a TokenHandler function.
|
||||||
// The MatcherWrapper can be instantiated using the parsekit.NewMatcher()
|
// The Matcher can be instantiated using the parsekit.NewMatcher()
|
||||||
// method.
|
// method.
|
||||||
//
|
//
|
||||||
// To match input data against the wrapped Matcher function, use the method
|
// To match input data against the wrapped Matcher function, use the method
|
||||||
// MatcherWrapper.Parse().
|
// Matcher.Parse().
|
||||||
type MatcherWrapper struct {
|
type Matcher struct {
|
||||||
parser *Parser
|
parser *Parser
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMatcher instantiates a new MatcherWrapper.
|
// NewMatcher instantiates a new Matcher.
|
||||||
//
|
//
|
||||||
// This is a simple wrapper around a Matcher function. It can be used to
|
// This is a simple wrapper around a TokenHandler function. It can be used to
|
||||||
// match an input string against that Matcher function and retrieve the
|
// match an input string against that TokenHandler function and retrieve the
|
||||||
// results in a straight forward way.
|
// results in a straight forward way.
|
||||||
func NewMatcher(matcher Matcher, expects string) *MatcherWrapper {
|
//
|
||||||
handler := func(p *P) {
|
// The 'expects' parameter is used for creating an error message in case parsed
|
||||||
|
// input does not match the TokenHandler.
|
||||||
|
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
||||||
|
stateHandler := func(p *ParseAPI) {
|
||||||
p.Expects(expects)
|
p.Expects(expects)
|
||||||
if p.On(matcher).Accept() {
|
if p.On(tokenHandler).Accept() {
|
||||||
p.EmitLiteral(0) // ItemType is irrelevant
|
p.EmitLiteral(0) // ItemType is irrelevant
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &MatcherWrapper{parser: NewParser(handler)}
|
return &Matcher{parser: NewParser(stateHandler)}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse runs the wrapped Matcher function against the provided input data.
|
// Parse checks for a match on the provided input data.
|
||||||
func (w *MatcherWrapper) Parse(input string) (string, *Error, bool) {
|
func (m *Matcher) Parse(input string) (string, *Error, bool) {
|
||||||
item, err, ok := w.parser.Parse(input).Next()
|
item, err, ok := m.parser.Parse(input).Next()
|
||||||
if !ok {
|
if !ok {
|
||||||
return "", err, false
|
return "", err, false
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,21 +14,21 @@ const TestItem parsekit.ItemType = 1
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||||
|
|
||||||
type MatcherTest struct {
|
type TokenHandlerTest struct {
|
||||||
input string
|
input string
|
||||||
matcher parsekit.Matcher
|
tokenHandler parsekit.TokenHandler
|
||||||
mustMatch bool
|
mustMatch bool
|
||||||
expected string
|
expected string
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunMatcherTests(t *testing.T, testSet []MatcherTest) {
|
func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
|
||||||
for _, test := range testSet {
|
for _, test := range testSet {
|
||||||
RunMatcherTest(t, test)
|
RunTokenHandlerTest(t, test)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunMatcherTest(t *testing.T, test MatcherTest) {
|
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
||||||
output, err, ok := parsekit.NewMatcher(test.matcher, "a match").Parse(test.input)
|
output, err, ok := parsekit.NewMatcher(test.tokenHandler, "a match").Parse(test.input)
|
||||||
|
|
||||||
if test.mustMatch {
|
if test.mustMatch {
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|
|
@ -2,17 +2,17 @@ package parsekit
|
||||||
|
|
||||||
import "unicode/utf8"
|
import "unicode/utf8"
|
||||||
|
|
||||||
// StateHandler defines the type of function that must be implemented to
|
// StateHandler defines the type of function that must be implemented to handle
|
||||||
// handle a parsing state.
|
// a parsing state in a Parser state machine.
|
||||||
//
|
//
|
||||||
// A StateHandler function gets a P struct as its input. This struct holds
|
// A StateHandler function gets a ParseAPI struct as its input. This struct holds
|
||||||
// all the internal state for the parsing state machine and provides the
|
// all the internal state for the parsing state machine and provides the
|
||||||
// interface that the StateHandler must use to interact with the parser.
|
// interface that the StateHandler uses to interact with the parser.
|
||||||
type StateHandler func(*P)
|
type StateHandler func(*ParseAPI)
|
||||||
|
|
||||||
// P holds the internal state of a parse run and provides an API to
|
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||||
// StateHandler methods to communicate with the parser.
|
// StateHandler methods to communicate with the parser.
|
||||||
type P struct {
|
type ParseAPI struct {
|
||||||
state StateHandler // the function that handles the current state
|
state StateHandler // the function that handles the current state
|
||||||
nextState StateHandler // the function that will handle the next state
|
nextState StateHandler // the function that will handle the next state
|
||||||
routeStack []StateHandler // route stack, for handling nested parsing
|
routeStack []StateHandler // route stack, for handling nested parsing
|
||||||
|
@ -37,7 +37,7 @@ type P struct {
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
// (end of data or invalid UTF8 character). In this case, the returned rune
|
// (end of data or invalid UTF8 character). In this case, the returned rune
|
||||||
// will be one of eofRune or invalidRune.
|
// will be one of eofRune or invalidRune.
|
||||||
func (p *P) peek(byteOffset int) (rune, int, bool) {
|
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
||||||
return handleRuneError(r, w)
|
return handleRuneError(r, w)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,12 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Item represents an item that can be emitted from the parser.
|
||||||
|
type Item struct {
|
||||||
|
Type ItemType
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
// ItemType represents the type of a parser Item.
|
// ItemType represents the type of a parser Item.
|
||||||
//
|
//
|
||||||
// When creating your own ItemType values, then make use of positive integer
|
// When creating your own ItemType values, then make use of positive integer
|
||||||
|
@ -19,26 +25,14 @@ const ItemEOF ItemType = -1
|
||||||
// an error has occurred during parsing.
|
// an error has occurred during parsing.
|
||||||
const ItemError ItemType = -2
|
const ItemError ItemType = -2
|
||||||
|
|
||||||
// Item represents an item that can be emitted from the parser.
|
|
||||||
type Item struct {
|
|
||||||
Type ItemType
|
|
||||||
Value string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit passes a Parser item to the client, including the provided string.
|
// Emit passes a Parser item to the client, including the provided string.
|
||||||
func (p *P) Emit(t ItemType, v string) {
|
func (p *ParseAPI) Emit(t ItemType, v string) {
|
||||||
p.items <- Item{t, v}
|
p.items <- Item{t, v}
|
||||||
p.buffer.reset()
|
p.buffer.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitLiteral passes a Parser item to the client, including accumulated
|
// BufLiteral retrieves the contents of the parser's string buffer (all the
|
||||||
// string buffer data as a literal string.
|
// runes that were added to it using ParseAPI.Accept()) as a literal string.
|
||||||
func (p *P) EmitLiteral(t ItemType) {
|
|
||||||
p.Emit(t, p.buffer.asLiteralString())
|
|
||||||
}
|
|
||||||
|
|
||||||
// BufLiteral retrieves the contents of the parser buffer (all the runes that
|
|
||||||
// were added to it using P.Accept()) as a literal string.
|
|
||||||
//
|
//
|
||||||
// Literal means that if the input had for example the subsequent runes '\' and 'n'
|
// Literal means that if the input had for example the subsequent runes '\' and 'n'
|
||||||
// in it, then the literal string would have a backslash and an 'n' it in, not a
|
// in it, then the literal string would have a backslash and an 'n' it in, not a
|
||||||
|
@ -46,12 +40,19 @@ func (p *P) EmitLiteral(t ItemType) {
|
||||||
//
|
//
|
||||||
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
||||||
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
||||||
func (p *P) BufLiteral() string {
|
func (p *ParseAPI) BufLiteral() string {
|
||||||
return p.buffer.asLiteralString()
|
return p.buffer.asLiteralString()
|
||||||
}
|
}
|
||||||
|
|
||||||
// BufInterpreted retrieves the contents of the parser buffer (all the runes that
|
// EmitLiteral passes a parser Item to the client, including the accumulated
|
||||||
// were added to it using P.Accept()) as an interpreted string.
|
// string buffer data as a literal string.
|
||||||
|
func (p *ParseAPI) EmitLiteral(t ItemType) {
|
||||||
|
p.Emit(t, p.BufLiteral())
|
||||||
|
}
|
||||||
|
|
||||||
|
// BufInterpreted retrieves the contents of the parser's string buffer (all
|
||||||
|
// the runes that were added to it using ParseAPI.Accept()) as an
|
||||||
|
// interpreted string.
|
||||||
//
|
//
|
||||||
// Interpreted means that the contents are treated as a Go double quoted
|
// Interpreted means that the contents are treated as a Go double quoted
|
||||||
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
|
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
|
||||||
|
@ -64,7 +65,7 @@ func (p *P) BufLiteral() string {
|
||||||
//
|
//
|
||||||
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
// Retrieving the buffer contents will not affect the buffer itself. New runes can
|
||||||
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
// still be added to it. Only when calling P.Emit(), the buffer will be cleared.
|
||||||
func (p *P) BufInterpreted() (string, bool) {
|
func (p *ParseAPI) BufInterpreted() (string, bool) {
|
||||||
s, err := p.buffer.asInterpretedString()
|
s, err := p.buffer.asInterpretedString()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.EmitError(
|
p.EmitError(
|
||||||
|
@ -81,16 +82,12 @@ func (p *P) BufInterpreted() (string, bool) {
|
||||||
// This method returns a boolean value, indicating whether or not the string
|
// This method returns a boolean value, indicating whether or not the string
|
||||||
// interpretation was successful. On invalid string data, an error will
|
// interpretation was successful. On invalid string data, an error will
|
||||||
// automatically be emitted and false will be returned.
|
// automatically be emitted and false will be returned.
|
||||||
func (p *P) EmitInterpreted(t ItemType) bool {
|
func (p *ParseAPI) EmitInterpreted(t ItemType) bool {
|
||||||
s, err := p.buffer.asInterpretedString()
|
if s, ok := p.BufInterpreted(); ok {
|
||||||
if err != nil {
|
p.Emit(t, s)
|
||||||
p.EmitError(
|
return true
|
||||||
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
|
||||||
p.buffer.asLiteralString(), err)
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
p.Emit(t, s)
|
return false
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Error is used as the error type when parsing errors occur.
|
// Error is used as the error type when parsing errors occur.
|
||||||
|
@ -115,15 +112,15 @@ func (err *Error) ErrorFull() string {
|
||||||
return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column)
|
return fmt.Sprintf("%s after line %d, column %d", err, err.Line, err.Column)
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitError emits a Parser error item to the client.
|
// EmitError emits a parser error item to the client.
|
||||||
func (p *P) EmitError(format string, args ...interface{}) {
|
func (p *ParseAPI) EmitError(format string, args ...interface{}) {
|
||||||
message := fmt.Sprintf(format, args...)
|
message := fmt.Sprintf(format, args...)
|
||||||
p.Emit(ItemError, message)
|
p.Emit(ItemError, message)
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnexpectedInput is used by a StateHandler function to emit an error item
|
// UnexpectedInput is used by a StateHandler function to emit an error item
|
||||||
// that tells the client that an unexpected rune was encountered in the input.
|
// that tells the client that an unexpected rune was encountered in the input.
|
||||||
func (p *P) UnexpectedInput() {
|
func (p *ParseAPI) UnexpectedInput() {
|
||||||
r, _, ok := p.peek(0)
|
r, _, ok := p.peek(0)
|
||||||
switch {
|
switch {
|
||||||
case ok:
|
case ok:
|
||||||
|
@ -137,7 +134,7 @@ func (p *P) UnexpectedInput() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func fmtExpects(p *P) string {
|
func fmtExpects(p *ParseAPI) string {
|
||||||
if p.expecting == "" {
|
if p.expecting == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
// Expects is used to let a state function describe what input it is expecting.
|
// Expects is used to let a StateHandler function describe what input it is expecting.
|
||||||
// This expectation is used in error messages to make them more descriptive.
|
// This expectation is used in error messages to make them more descriptive.
|
||||||
//
|
//
|
||||||
// When defining an expectation inside a StateHandler, you do not need to
|
// When defining an expectation inside a StateHandler, you do not need to
|
||||||
|
@ -13,6 +13,6 @@ package parsekit
|
||||||
// 2) there is an invalid UTF8 character on input
|
// 2) there is an invalid UTF8 character on input
|
||||||
//
|
//
|
||||||
// 3) the end of the file was reached.
|
// 3) the end of the file was reached.
|
||||||
func (p *P) Expects(description string) {
|
func (p *ParseAPI) Expects(description string) {
|
||||||
p.expecting = description
|
p.expecting = description
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
// On checks if the input at the current cursor position matches the provided Matcher.
|
// On checks if the input at the current cursor position matches the provided
|
||||||
// On must be chained with another method, which tells the parser what action to
|
// TokenHandler. On must be chained with another method, which tells the parser
|
||||||
// perform when a match was found:
|
// what action to perform when a match was found:
|
||||||
//
|
//
|
||||||
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
||||||
//
|
//
|
||||||
// 2) On(...).Accept() - Move cursor forward, add matched runes to the string buffer.
|
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
|
||||||
//
|
//
|
||||||
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
|
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
|
||||||
//
|
//
|
||||||
|
@ -32,16 +32,16 @@ package parsekit
|
||||||
// p.RouteTo(stateHandlerC)
|
// p.RouteTo(stateHandlerC)
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// // When there's a "hi" on input, emit it.
|
// // When there's a "hi" on input, emit a parser item for it.
|
||||||
// if p.On(parsekit.C.Str("hi")).Accept() {
|
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||||
// p.Emit(SomeItemType, p.BufLiteral())
|
// p.Emit(SomeItemType, p.BufLiteral())
|
||||||
// }
|
// }
|
||||||
func (p *P) On(matcher Matcher) *matchAction {
|
func (p *ParseAPI) On(tokenHandler TokenHandler) *MatchAction {
|
||||||
m := &MatchDialog{p: p}
|
m := &TokenAPI{p: p}
|
||||||
if matcher == nil {
|
if tokenHandler == nil {
|
||||||
panic("internal parser error: matcher argument for On() is nil")
|
panic("internal parser error: tokenHandler argument for On() is nil")
|
||||||
}
|
}
|
||||||
ok := matcher(m)
|
ok := tokenHandler(m)
|
||||||
|
|
||||||
// Keep track of the last match, to allow parser implementations
|
// Keep track of the last match, to allow parser implementations
|
||||||
// to access it in an easy way. Typical use would be something like:
|
// to access it in an easy way. Typical use would be something like:
|
||||||
|
@ -51,7 +51,7 @@ func (p *P) On(matcher Matcher) *matchAction {
|
||||||
// }
|
// }
|
||||||
p.LastMatch = string(m.input)
|
p.LastMatch = string(m.input)
|
||||||
|
|
||||||
return &matchAction{
|
return &MatchAction{
|
||||||
p: p,
|
p: p,
|
||||||
ok: ok,
|
ok: ok,
|
||||||
input: m.input,
|
input: m.input,
|
||||||
|
@ -60,9 +60,10 @@ func (p *P) On(matcher Matcher) *matchAction {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// matchAction is a struct that is used for building the On()-method chain.
|
// MatchAction is a struct that is used for building the On()-method chain.
|
||||||
type matchAction struct {
|
// The On() method will return an initialized struct of this type.
|
||||||
p *P
|
type MatchAction struct {
|
||||||
|
p *ParseAPI
|
||||||
ok bool
|
ok bool
|
||||||
input []rune
|
input []rune
|
||||||
output []rune
|
output []rune
|
||||||
|
@ -70,11 +71,12 @@ type matchAction struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accept tells the parser to move the cursor past a match that was found,
|
// Accept tells the parser to move the cursor past a match that was found,
|
||||||
// and to store the input that matched in the string buffer.
|
// and to store the input that matched in the parser's string buffer.
|
||||||
// When no match was found, then no action is taken.
|
// When no match was found, then no action is taken.
|
||||||
// It returns a routeAction struct, which provides methods that can be used
|
//
|
||||||
// to tell the parser what state to go to next.
|
// Returns true in case a match was found.
|
||||||
func (a *matchAction) Accept() bool {
|
// When no match was found, then no action is taken and false is returned.
|
||||||
|
func (a *MatchAction) Accept() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.p.buffer.writeString(string(a.output))
|
a.p.buffer.writeString(string(a.output))
|
||||||
a.advanceCursor()
|
a.advanceCursor()
|
||||||
|
@ -83,10 +85,11 @@ func (a *matchAction) Accept() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip tells the parser to move the cursor past a match that was found,
|
// Skip tells the parser to move the cursor past a match that was found,
|
||||||
// without storing the actual match in the string buffer.
|
// without storing the actual match in the parser's string buffer.
|
||||||
|
//
|
||||||
// Returns true in case a match was found.
|
// Returns true in case a match was found.
|
||||||
// When no match was found, then no action is taken and false is returned.
|
// When no match was found, then no action is taken and false is returned.
|
||||||
func (a *matchAction) Skip() bool {
|
func (a *MatchAction) Skip() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.advanceCursor()
|
a.advanceCursor()
|
||||||
}
|
}
|
||||||
|
@ -95,14 +98,14 @@ func (a *matchAction) Skip() bool {
|
||||||
|
|
||||||
// Stay tells the parser to not move the cursor after finding a match.
|
// Stay tells the parser to not move the cursor after finding a match.
|
||||||
// Returns true in case a match was found, false otherwise.
|
// Returns true in case a match was found, false otherwise.
|
||||||
func (a *matchAction) Stay() bool {
|
func (a *MatchAction) Stay() bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the input data.
|
// advanceCursor advances the input position in the input data.
|
||||||
// While doing so, it keeps tracks of newlines, so we can report on
|
// While doing so, it keeps tracks of newlines that are encountered, so we
|
||||||
// row + column positions on error.
|
// can report on line + column positions on error.
|
||||||
func (a *matchAction) advanceCursor() {
|
func (a *MatchAction) advanceCursor() {
|
||||||
a.p.inputPos = a.inputPos
|
a.p.inputPos = a.inputPos
|
||||||
for _, r := range a.input {
|
for _, r := range a.input {
|
||||||
if a.p.newline {
|
if a.p.newline {
|
||||||
|
|
|
@ -1,34 +1,34 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
// RouteTo tells the parser what StateHandler function to invoke
|
// RouteTo tells the parser what StateHandler function to invoke on
|
||||||
// in the next parsing cycle.
|
// the next parse cycle.
|
||||||
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
|
func (p *ParseAPI) RouteTo(state StateHandler) *RouteFollowupAction {
|
||||||
p.nextState = state
|
p.nextState = state
|
||||||
return &routeFollowupAction{p}
|
return &RouteFollowupAction{p}
|
||||||
}
|
}
|
||||||
|
|
||||||
// RouteRepeat indicates that on the next parsing cycle, the current
|
// RouteRepeat tells the parser that on the next parsing cycle, the current
|
||||||
// StateHandler must be reinvoked.
|
// StateHandler must be reinvoked.
|
||||||
func (p *P) RouteRepeat() {
|
func (p *ParseAPI) RouteRepeat() {
|
||||||
p.RouteTo(p.state)
|
p.RouteTo(p.state)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RouteReturn tells the parser that on the next cycle the last
|
// RouteReturn tells the parser that on the next cycle the last StateHandler
|
||||||
// StateHandler that was pushed on the route stack must be invoked.
|
// that was pushed on the route stack must be invoked.
|
||||||
//
|
//
|
||||||
// Using this method is optional. When implementating a StateHandler that
|
// Using this method is optional. When implementating a StateHandler that
|
||||||
// is used as a sort of subroutine (using constructions like
|
// is used as a sort of subroutine (using constructions like
|
||||||
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
||||||
// providing an explicit routing decision from that handler. The parser will
|
// providing an explicit routing decision from that handler. The parser will
|
||||||
// automatically assume a RouteReturn() in that case.
|
// automatically assume a RouteReturn() in that case.
|
||||||
func (p *P) RouteReturn() {
|
func (p *ParseAPI) RouteReturn() {
|
||||||
p.nextState = p.popRoute()
|
p.nextState = p.popRoute()
|
||||||
}
|
}
|
||||||
|
|
||||||
// routeFollowupAction chains parsing routes.
|
// RouteFollowupAction chains parsing routes.
|
||||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||||
type routeFollowupAction struct {
|
type RouteFollowupAction struct {
|
||||||
p *P
|
p *ParseAPI
|
||||||
}
|
}
|
||||||
|
|
||||||
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
||||||
|
@ -36,7 +36,7 @@ type routeFollowupAction struct {
|
||||||
// For example:
|
// For example:
|
||||||
//
|
//
|
||||||
// p.RouteTo(handlerA).ThenTo(handlerB)
|
// p.RouteTo(handlerA).ThenTo(handlerB)
|
||||||
func (a *routeFollowupAction) ThenTo(state StateHandler) {
|
func (a *RouteFollowupAction) ThenTo(state StateHandler) {
|
||||||
a.p.pushRoute(state)
|
a.p.pushRoute(state)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,18 +45,18 @@ func (a *routeFollowupAction) ThenTo(state StateHandler) {
|
||||||
// For example:
|
// For example:
|
||||||
//
|
//
|
||||||
// p.RouteTo(handlerA).ThenReturnHere()
|
// p.RouteTo(handlerA).ThenReturnHere()
|
||||||
func (a *routeFollowupAction) ThenReturnHere() {
|
func (a *RouteFollowupAction) ThenReturnHere() {
|
||||||
a.p.pushRoute(a.p.state)
|
a.p.pushRoute(a.p.state)
|
||||||
}
|
}
|
||||||
|
|
||||||
// pushRoute adds the StateHandler to the route stack.
|
// pushRoute adds the StateHandler to the route stack.
|
||||||
// This is used for implementing nested parsing.
|
// This is used for implementing nested parsing.
|
||||||
func (p *P) pushRoute(state StateHandler) {
|
func (p *ParseAPI) pushRoute(state StateHandler) {
|
||||||
p.routeStack = append(p.routeStack, state)
|
p.routeStack = append(p.routeStack, state)
|
||||||
}
|
}
|
||||||
|
|
||||||
// popRoute pops the last pushed StateHandler from the route stack.
|
// popRoute pops the last pushed StateHandler from the route stack.
|
||||||
func (p *P) popRoute() StateHandler {
|
func (p *ParseAPI) popRoute() StateHandler {
|
||||||
last := len(p.routeStack) - 1
|
last := len(p.routeStack) - 1
|
||||||
head, tail := p.routeStack[:last], p.routeStack[last]
|
head, tail := p.routeStack[:last], p.routeStack[last]
|
||||||
p.routeStack = head
|
p.routeStack = head
|
||||||
|
@ -66,8 +66,8 @@ func (p *P) popRoute() StateHandler {
|
||||||
// ExpectEndOfFile can be used from a StateHandler function to indicate that
|
// ExpectEndOfFile can be used from a StateHandler function to indicate that
|
||||||
// your parser expects to be at the end of the file. This will schedule
|
// your parser expects to be at the end of the file. This will schedule
|
||||||
// a parsekit-provided StateHandler which will do the actual check for this.
|
// a parsekit-provided StateHandler which will do the actual check for this.
|
||||||
func (p *P) ExpectEndOfFile() {
|
func (p *ParseAPI) ExpectEndOfFile() {
|
||||||
p.RouteTo(func(p *P) {
|
p.RouteTo(func(p *ParseAPI) {
|
||||||
p.Expects("end of file")
|
p.Expects("end of file")
|
||||||
if p.On(A.EndOfFile).Stay() {
|
if p.On(A.EndOfFile).Stay() {
|
||||||
p.Emit(ItemEOF, "EOF")
|
p.Emit(ItemEOF, "EOF")
|
||||||
|
|
|
@ -0,0 +1,192 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenHandler is the function type that is involved in turning a low level
|
||||||
|
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
|
||||||
|
// data matches some kind of pattern and to report back the match.
|
||||||
|
//
|
||||||
|
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
|
||||||
|
// parsekit.Matcher().
|
||||||
|
//
|
||||||
|
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
||||||
|
// indicate whether or not it found a match on the input. The TokenAPI is used
|
||||||
|
// for retrieving input data to match against and for reporting back results.
|
||||||
|
type TokenHandler func(t *TokenAPI) bool
|
||||||
|
|
||||||
|
// TokenAPI is used by TokenHandler functions to retrieve runes from the
|
||||||
|
// input to match against and to report back results.
|
||||||
|
//
|
||||||
|
// Basic operation:
|
||||||
|
//
|
||||||
|
// To retrieve the next rune from the input, the TokenHandler function can call
|
||||||
|
// the TokenAPI.NextRune() method.
|
||||||
|
//
|
||||||
|
// The TokenHandler function can then evaluate the retrieved rune and either
|
||||||
|
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
|
||||||
|
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
|
||||||
|
// the rune will not be added to the output. It is mandatory for a TokenHandler
|
||||||
|
// to call either Accept() or Skip() after retrieving a rune, before calling
|
||||||
|
// NextRune() again.
|
||||||
|
//
|
||||||
|
// Eventually, the TokenHandler function must return a boolean value, indicating
|
||||||
|
// whether or not a match was found. When true, then the calling code will
|
||||||
|
// use the runes that were accepted into the TokenAPI's resulting output.
|
||||||
|
//
|
||||||
|
// Forking operation for easy lookahead support:
|
||||||
|
//
|
||||||
|
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
|
||||||
|
// might either succeed or fail. In case of a failing lookahead, the state
|
||||||
|
// of the TokenAPI must be brought back to the original state.
|
||||||
|
//
|
||||||
|
// The way in which this is supported, is by forking a TokenAPI by calling
|
||||||
|
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
|
||||||
|
// output buffer, but using the same input cursor position as the forked parent.
|
||||||
|
//
|
||||||
|
// The TokenHandler function can then use the same interface as described for
|
||||||
|
// normal operation to retrieve runes from the input and to fill the resulting
|
||||||
|
// output. When the TokenHandler function decides that the lookahead was successful,
|
||||||
|
// then the method TokenAPI.Merge() can be called on the forked child to
|
||||||
|
// append the resulting output from the child to the parent's resulting output,
|
||||||
|
// and to update the parent input cursor position to that of the child.
|
||||||
|
//
|
||||||
|
// When the TokenHandler function decides that the lookahead was unsuccessful,
|
||||||
|
// then it can simply discard the forked child. The parent TokenAPI was never
|
||||||
|
// modified, so a new match can be safely started using that parent, as if the
|
||||||
|
// lookahead never happened.
|
||||||
|
type TokenAPI struct {
|
||||||
|
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
|
||||||
|
inputOffset int // the byte offset into the input
|
||||||
|
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
|
||||||
|
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
||||||
|
currRune *runeInfo // hold information for the last rune that was read from the input
|
||||||
|
parent *TokenAPI // the parent MatchDialog, in case this one was forked
|
||||||
|
}
|
||||||
|
|
||||||
|
// runeInfo describes a single rune and its metadata.
|
||||||
|
type runeInfo struct {
|
||||||
|
Rune rune // an UTF8 rune
|
||||||
|
ByteSize int // the number of bytes in the rune
|
||||||
|
OK bool // false when the rune represents an invalid UTF8 rune or EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextRune retrieves the next rune from the input.
|
||||||
|
//
|
||||||
|
// It returns the rune and a boolean. The boolean will be false in case an
|
||||||
|
// invalid UTF8 rune or the end of the file was encountered.
|
||||||
|
//
|
||||||
|
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
||||||
|
// to respectively add the rune to the TokenAPI's resulting output or to
|
||||||
|
// fully ignore it. This way, a TokenHandler has full control over what runes are
|
||||||
|
// significant for the resulting output of that TokenHandler.
|
||||||
|
//
|
||||||
|
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||||
|
// rune is explicitly accepted or skipped as described above.
|
||||||
|
func (t *TokenAPI) NextRune() (rune, bool) {
|
||||||
|
if t.currRune != nil {
|
||||||
|
panic("internal Matcher error: NextRune() was called without accepting or skipping the previously read rune")
|
||||||
|
}
|
||||||
|
r, w, ok := t.p.peek(t.inputOffset)
|
||||||
|
t.currRune = &runeInfo{r, w, ok}
|
||||||
|
if ok {
|
||||||
|
t.input = append(t.input, r)
|
||||||
|
}
|
||||||
|
return r, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fork splits off a child TokenAPI, containing the same input cursor position
|
||||||
|
// as the parent TokenAPI, but with all other data in a fresh state.
|
||||||
|
//
|
||||||
|
// By forking, a TokenHandler function can freely work with a TokenAPI, without
|
||||||
|
// affecting the parent TokenAPI. This is for example useful when the
|
||||||
|
// TokenHandler function must perform some form of lookahead.
|
||||||
|
//
|
||||||
|
// When a successful match was found, the TokenHandler function can call
|
||||||
|
// TokenAPI.Merge() on the forked child to have the resulting output added
|
||||||
|
// to the parent TokenAPI.
|
||||||
|
//
|
||||||
|
// When no match was found, the forked child can simply be discarded.
|
||||||
|
//
|
||||||
|
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
||||||
|
// This is done in 4 steps and only after finishing all steps, the TokenHandler
|
||||||
|
// function can confirm a successful match. The TokenHandler function for this
|
||||||
|
// case could look like this (yes, it's naive, but it shows the point):
|
||||||
|
// TODO make proper tested example
|
||||||
|
//
|
||||||
|
// func MatchAbcd(t *TokenAPI) bool {
|
||||||
|
// child := t.Fork() // fork to keep m from input untouched
|
||||||
|
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||||
|
// if r, ok := t.NextRune(); !ok || r != letter {
|
||||||
|
// return false // report mismatch, t is left untouched
|
||||||
|
// }
|
||||||
|
// child.Accept() // add rune to child output
|
||||||
|
// }
|
||||||
|
// child.Merge() // we have a match, add resulting output to parent
|
||||||
|
// return true // and report the successful match
|
||||||
|
// }
|
||||||
|
func (t *TokenAPI) Fork() *TokenAPI {
|
||||||
|
return &TokenAPI{
|
||||||
|
p: t.p,
|
||||||
|
inputOffset: t.inputOffset,
|
||||||
|
parent: t,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
|
||||||
|
// output of the TokenAPI.
|
||||||
|
func (t *TokenAPI) Accept() {
|
||||||
|
t.checkAllowedCall("Accept()")
|
||||||
|
t.output = append(t.output, t.currRune.Rune)
|
||||||
|
t.inputOffset += t.currRune.ByteSize
|
||||||
|
t.currRune = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip will ignore the last rune as read by NextRune().
|
||||||
|
func (t *TokenAPI) Skip() {
|
||||||
|
t.checkAllowedCall("Skip()")
|
||||||
|
t.inputOffset += t.currRune.ByteSize
|
||||||
|
t.currRune = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *TokenAPI) checkAllowedCall(name string) {
|
||||||
|
if t.currRune == nil {
|
||||||
|
panic(fmt.Sprintf("internal Matcher error: %s was called without a prior call to NextRune()", name))
|
||||||
|
}
|
||||||
|
if !t.currRune.OK {
|
||||||
|
panic(fmt.Sprintf("internal Matcher error: %s was called, but prior call to NextRun() did not return OK (EOF or invalid rune)", name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge merges the resulting output from a forked child TokenAPI back into
|
||||||
|
// its parent: The runes that are accepted in the child are added to the parent
|
||||||
|
// runes and the parent's input cursor position is advanced to the child's
|
||||||
|
// cursor position.
|
||||||
|
//
|
||||||
|
// After the merge, the child TokenAPI is reset so it can immediately be
|
||||||
|
// reused for performing another match (all data are cleared, except for the
|
||||||
|
// input offset which is kept at its current position).
|
||||||
|
func (t *TokenAPI) Merge() bool {
|
||||||
|
if t.parent == nil {
|
||||||
|
panic("internal parser error: Cannot call Merge a a non-forked MatchDialog")
|
||||||
|
}
|
||||||
|
t.parent.input = append(t.parent.input, t.input...)
|
||||||
|
t.parent.output = append(t.parent.output, t.output...)
|
||||||
|
t.parent.inputOffset = t.inputOffset
|
||||||
|
t.ClearOutput()
|
||||||
|
t.ClearInput()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
|
||||||
|
// the input and input offset as-is.
|
||||||
|
func (t *TokenAPI) ClearOutput() {
|
||||||
|
t.output = []rune{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearInput clears the input for the TokenAPI, but it keeps the output
|
||||||
|
// and input offset as-is.
|
||||||
|
func (t *TokenAPI) ClearInput() {
|
||||||
|
t.input = []rune{}
|
||||||
|
}
|
|
@ -0,0 +1,558 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// C provides convenient access to a range of parser/combinators that can be
|
||||||
|
// used to construct TokenHandler functions.
|
||||||
|
//
|
||||||
|
// When using C in your own parser, then it is advised to create a variable
|
||||||
|
// to reference it:
|
||||||
|
//
|
||||||
|
// var c = parsekit.C
|
||||||
|
//
|
||||||
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
|
var C = struct {
|
||||||
|
Rune func(rune) TokenHandler
|
||||||
|
Runes func(...rune) TokenHandler
|
||||||
|
RuneRange func(rune, rune) TokenHandler
|
||||||
|
Str func(string) TokenHandler
|
||||||
|
StrNoCase func(string) TokenHandler
|
||||||
|
Any func(...TokenHandler) TokenHandler
|
||||||
|
Not func(TokenHandler) TokenHandler
|
||||||
|
Opt func(TokenHandler) TokenHandler
|
||||||
|
Seq func(...TokenHandler) TokenHandler
|
||||||
|
Rep func(times int, handler TokenHandler) TokenHandler
|
||||||
|
Min func(min int, handler TokenHandler) TokenHandler
|
||||||
|
Max func(max int, handler TokenHandler) TokenHandler
|
||||||
|
ZeroOrMore func(TokenHandler) TokenHandler
|
||||||
|
OneOrMore func(TokenHandler) TokenHandler
|
||||||
|
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
||||||
|
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
||||||
|
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
||||||
|
}{
|
||||||
|
Rune: MatchRune,
|
||||||
|
Runes: MatchRunes,
|
||||||
|
RuneRange: MatchRuneRange,
|
||||||
|
Str: MatchStr,
|
||||||
|
StrNoCase: MatchStrNoCase,
|
||||||
|
Opt: MatchOpt,
|
||||||
|
Any: MatchAny,
|
||||||
|
Not: MatchNot,
|
||||||
|
Seq: MatchSeq,
|
||||||
|
Rep: MatchRep,
|
||||||
|
Min: MatchMin,
|
||||||
|
Max: MatchMax,
|
||||||
|
ZeroOrMore: MatchZeroOrMore,
|
||||||
|
OneOrMore: MatchOneOrMore,
|
||||||
|
MinMax: MatchMinMax,
|
||||||
|
Separated: MatchSeparated,
|
||||||
|
Except: MatchExcept,
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRune creates a TokenHandler function that checks if the next rune from
|
||||||
|
// the input matches the provided rune.
|
||||||
|
func MatchRune(expected rune) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
input, ok := t.NextRune()
|
||||||
|
if ok && input == expected {
|
||||||
|
t.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRunes creates a TokenHandler function that that checks if the next rune
|
||||||
|
// from the input is one of the provided runes.
|
||||||
|
func MatchRunes(expected ...rune) TokenHandler {
|
||||||
|
s := string(expected)
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
input, ok := t.NextRune()
|
||||||
|
if ok {
|
||||||
|
if strings.ContainsRune(s, input) {
|
||||||
|
t.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRuneRange creates a TokenHandler function that that checks if the next rune
|
||||||
|
// from the input is contained by the provided rune range.
|
||||||
|
//
|
||||||
|
// The rune range is defined by a start and an end rune, inclusive, so:
|
||||||
|
//
|
||||||
|
// MatchRuneRange('g', 'k')
|
||||||
|
//
|
||||||
|
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
||||||
|
func MatchRuneRange(start rune, end rune) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
if end < start {
|
||||||
|
panic(fmt.Sprintf("internal parser error: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
||||||
|
}
|
||||||
|
input, ok := t.NextRune()
|
||||||
|
if ok && input >= start && input <= end {
|
||||||
|
t.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchStr creates a TokenHandler that will check if the upcoming runes on the
|
||||||
|
// input match the provided string.
|
||||||
|
// TODO make this a more efficient string-level match?
|
||||||
|
func MatchStr(expected string) TokenHandler {
|
||||||
|
var handlers = []TokenHandler{}
|
||||||
|
for _, r := range expected {
|
||||||
|
handlers = append(handlers, MatchRune(r))
|
||||||
|
}
|
||||||
|
return MatchSeq(handlers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchStrNoCase creates a TokenHandler that will check if the upcoming runes
|
||||||
|
// on the input match the provided string in a case-insensitive manner.
|
||||||
|
// TODO make this a more efficient string-level match?
|
||||||
|
func MatchStrNoCase(expected string) TokenHandler {
|
||||||
|
var handlers = []TokenHandler{}
|
||||||
|
for _, r := range expected {
|
||||||
|
u := unicode.ToUpper(r)
|
||||||
|
l := unicode.ToLower(r)
|
||||||
|
handlers = append(handlers, MatchRunes(u, l))
|
||||||
|
}
|
||||||
|
return MatchSeq(handlers...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional.
|
||||||
|
// When the provided TokenHandler applies, then its output is used, otherwise
|
||||||
|
// no output is generated but still a successful match is reported.
|
||||||
|
func MatchOpt(handler TokenHandler) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
child := t.Fork()
|
||||||
|
if handler(child) {
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
|
||||||
|
// applied in their exact order. Only if all matcher apply, the sequence
|
||||||
|
// reports successful match.
|
||||||
|
func MatchSeq(handlers ...TokenHandler) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
child := t.Fork()
|
||||||
|
for _, matcher := range handlers {
|
||||||
|
if !matcher(child) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
child.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers
|
||||||
|
// can be applied. They are applied in their provided order. The first TokenHandler
|
||||||
|
// that applies is used for reporting back a match.
|
||||||
|
func MatchAny(handlers ...TokenHandler) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
for _, handler := range handlers {
|
||||||
|
child := t.Fork()
|
||||||
|
if handler(child) {
|
||||||
|
return child.Merge()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to
|
||||||
|
// the current input. If it does, then a failed match will be reported. If it
|
||||||
|
// does not, then the next rune from the input will be reported as a match.
|
||||||
|
func MatchNot(handler TokenHandler) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
probe := t.Fork()
|
||||||
|
if handler(probe) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, ok := t.NextRune()
|
||||||
|
if ok {
|
||||||
|
t.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be
|
||||||
|
// applied exactly the provided amount of times.
|
||||||
|
//
|
||||||
|
// Note that the input can contain more than the provided number of matches, e.g.:
|
||||||
|
//
|
||||||
|
// MatchRep(4, MatchRune('X'))
|
||||||
|
//
|
||||||
|
// will not match input "XXX", it will match input "XXXX", but also "XXXXXX".
|
||||||
|
// In that last case, there will be a remainder "XX" on the input.
|
||||||
|
func MatchRep(times int, handler TokenHandler) TokenHandler {
|
||||||
|
return matchMinMax(times, times, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be
|
||||||
|
// applied at least the provided minimum number of times.
|
||||||
|
// When more matches are possible, these will be included in the output.
|
||||||
|
func MatchMin(min int, handler TokenHandler) TokenHandler {
|
||||||
|
return matchMinMax(min, -1, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be
|
||||||
|
// applied at maximum the provided minimum number of times.
|
||||||
|
// When more matches are possible, these will be included in the output.
|
||||||
|
// Zero matches are considered a successful match.
|
||||||
|
func MatchMax(max int, handler TokenHandler) TokenHandler {
|
||||||
|
return matchMinMax(0, max, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
||||||
|
// be applied zero or more times. All matches will be included in the output.
|
||||||
|
// Zero matches are considered a successful match.
|
||||||
|
func MatchZeroOrMore(handler TokenHandler) TokenHandler {
|
||||||
|
return matchMinMax(0, -1, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can
|
||||||
|
// be applied one or more times. All matches will be included in the output.
|
||||||
|
func MatchOneOrMore(handler TokenHandler) TokenHandler {
|
||||||
|
return matchMinMax(1, -1, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can
|
||||||
|
// be applied between the provided minimum and maximum number of times,
|
||||||
|
// inclusive. All matches will be included in the output.
|
||||||
|
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
||||||
|
if max < 0 {
|
||||||
|
panic("internal parser error: MatchMinMax definition error: max must be >= 0 ")
|
||||||
|
}
|
||||||
|
if min < 0 {
|
||||||
|
panic("internal parser error: MatchMinMax definition error: min must be >= 0 ")
|
||||||
|
}
|
||||||
|
return matchMinMax(min, max, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
func matchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
child := t.Fork()
|
||||||
|
if max >= 0 && min > max {
|
||||||
|
panic(fmt.Sprintf("internal parser error: MatchRep definition error: max %d must not be < min %d", max, min))
|
||||||
|
}
|
||||||
|
total := 0
|
||||||
|
// Check for the minimum required amount of matches.
|
||||||
|
for total < min {
|
||||||
|
total++
|
||||||
|
if !handler(child) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// No specified max: include the rest of the available matches.
|
||||||
|
// Specified max: include the rest of the availble matches, up to the max.
|
||||||
|
child.Merge()
|
||||||
|
for max < 0 || total < max {
|
||||||
|
total++
|
||||||
|
if !handler(child) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchSeparated creates a TokenHandler that checks for a pattern of one or more
|
||||||
|
// TokenHandlers of one type (the separated), separated by TokenHandler of another type
|
||||||
|
// (the separator). All matches (separated + separator) are included in the
|
||||||
|
// output.
|
||||||
|
func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler {
|
||||||
|
return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be
|
||||||
|
// applied to the upcoming input. It also checks if the except TokenHandler can be
|
||||||
|
// applied. If the handler applies, but the except TokenHandler as well, then the match
|
||||||
|
// as a whole will be treated as a mismatch.
|
||||||
|
func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
if except(t.Fork()) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return handler(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A provides convenient access to a range of atoms that can be used to
|
||||||
|
// build TokenHandlers or parser rules.
|
||||||
|
//
|
||||||
|
// In parsekit, an atom is defined as a ready for use TokenHandler function.
|
||||||
|
//
|
||||||
|
// When using A in your own parser, then it is advised to create a variable
|
||||||
|
// to reference it:
|
||||||
|
//
|
||||||
|
// var a = parsekit.A
|
||||||
|
//
|
||||||
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
|
var A = struct {
|
||||||
|
EndOfFile TokenHandler
|
||||||
|
AnyRune TokenHandler
|
||||||
|
Space TokenHandler
|
||||||
|
Tab TokenHandler
|
||||||
|
CR TokenHandler
|
||||||
|
LF TokenHandler
|
||||||
|
CRLF TokenHandler
|
||||||
|
Excl TokenHandler
|
||||||
|
DoubleQuote TokenHandler
|
||||||
|
Hash TokenHandler
|
||||||
|
Dollar TokenHandler
|
||||||
|
Percent TokenHandler
|
||||||
|
Amp TokenHandler
|
||||||
|
SingleQuote TokenHandler
|
||||||
|
RoundOpen TokenHandler
|
||||||
|
RoundClose TokenHandler
|
||||||
|
Asterisk TokenHandler
|
||||||
|
Plus TokenHandler
|
||||||
|
Comma TokenHandler
|
||||||
|
Minus TokenHandler
|
||||||
|
Dot TokenHandler
|
||||||
|
Slash TokenHandler
|
||||||
|
Colon TokenHandler
|
||||||
|
Semicolon TokenHandler
|
||||||
|
AngleOpen TokenHandler
|
||||||
|
Equal TokenHandler
|
||||||
|
AngleClose TokenHandler
|
||||||
|
Question TokenHandler
|
||||||
|
At TokenHandler
|
||||||
|
SquareOpen TokenHandler
|
||||||
|
Backslash TokenHandler
|
||||||
|
SquareClose TokenHandler
|
||||||
|
Caret TokenHandler
|
||||||
|
Underscore TokenHandler
|
||||||
|
Backquote TokenHandler
|
||||||
|
CurlyOpen TokenHandler
|
||||||
|
Pipe TokenHandler
|
||||||
|
CurlyClose TokenHandler
|
||||||
|
Tilde TokenHandler
|
||||||
|
Newline TokenHandler
|
||||||
|
Whitespace TokenHandler
|
||||||
|
WhitespaceAndNewlines TokenHandler
|
||||||
|
EndOfLine TokenHandler
|
||||||
|
Digit TokenHandler
|
||||||
|
ASCII TokenHandler
|
||||||
|
ASCIILower TokenHandler
|
||||||
|
ASCIIUpper TokenHandler
|
||||||
|
HexDigit TokenHandler
|
||||||
|
}{
|
||||||
|
EndOfFile: MatchEndOfFile(),
|
||||||
|
AnyRune: MatchAnyRune(),
|
||||||
|
Space: C.Rune(' '),
|
||||||
|
Tab: C.Rune('\t'),
|
||||||
|
CR: C.Rune('\r'),
|
||||||
|
LF: C.Rune('\n'),
|
||||||
|
CRLF: C.Str("\r\n"),
|
||||||
|
Excl: C.Rune('!'),
|
||||||
|
DoubleQuote: C.Rune('"'),
|
||||||
|
Hash: C.Rune('#'),
|
||||||
|
Dollar: C.Rune('$'),
|
||||||
|
Percent: C.Rune('%'),
|
||||||
|
Amp: C.Rune('&'),
|
||||||
|
SingleQuote: C.Rune('\''),
|
||||||
|
RoundOpen: C.Rune('('),
|
||||||
|
RoundClose: C.Rune(')'),
|
||||||
|
Asterisk: C.Rune('*'),
|
||||||
|
Plus: C.Rune('+'),
|
||||||
|
Comma: C.Rune(','),
|
||||||
|
Minus: C.Rune('-'),
|
||||||
|
Dot: C.Rune('.'),
|
||||||
|
Slash: C.Rune('/'),
|
||||||
|
Colon: C.Rune(':'),
|
||||||
|
Semicolon: C.Rune(';'),
|
||||||
|
AngleOpen: C.Rune('<'),
|
||||||
|
Equal: C.Rune('='),
|
||||||
|
AngleClose: C.Rune('>'),
|
||||||
|
Question: C.Rune('?'),
|
||||||
|
At: C.Rune('@'),
|
||||||
|
SquareOpen: C.Rune('['),
|
||||||
|
Backslash: C.Rune('\\'),
|
||||||
|
SquareClose: C.Rune(']'),
|
||||||
|
Caret: C.Rune('^'),
|
||||||
|
Underscore: C.Rune('_'),
|
||||||
|
Backquote: C.Rune('`'),
|
||||||
|
CurlyOpen: C.Rune('{'),
|
||||||
|
Pipe: C.Rune('|'),
|
||||||
|
CurlyClose: C.Rune('}'),
|
||||||
|
Tilde: C.Rune('~'),
|
||||||
|
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
||||||
|
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
||||||
|
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||||
|
Digit: C.RuneRange('0', '9'),
|
||||||
|
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||||
|
ASCIILower: C.RuneRange('a', 'z'),
|
||||||
|
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||||
|
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
||||||
|
// has been reached. This TokenHandler will never produce output. It only reports
|
||||||
|
// a successful or a failing match through its boolean return value.
|
||||||
|
func MatchEndOfFile() TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
fork := t.Fork()
|
||||||
|
input, ok := fork.NextRune()
|
||||||
|
return !ok && input == eofRune
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchAnyRune creates a TokenHandler function that checks if a valid rune can be
|
||||||
|
// read from the input. It reports back a successful match if the end of the
|
||||||
|
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
||||||
|
func MatchAnyRune() TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
_, ok := t.NextRune()
|
||||||
|
if ok {
|
||||||
|
t.Accept()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// M provides convenient access to a range of modifiers (which in their nature are
|
||||||
|
// parser/combinators) that can be used when creating TokenHandler functions.
|
||||||
|
//
|
||||||
|
// In parsekit, a modifier is defined as a TokenHandler function that modifies the
|
||||||
|
// resulting output of another TokenHandler in some way. It does not do any matching
|
||||||
|
// against input of its own.
|
||||||
|
//
|
||||||
|
// When using M in your own parser, then it is advised to create a variable
|
||||||
|
// to reference it:
|
||||||
|
//
|
||||||
|
// var m = parsekit.M
|
||||||
|
//
|
||||||
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
|
var M = struct {
|
||||||
|
Drop func(TokenHandler) TokenHandler
|
||||||
|
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||||
|
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||||
|
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||||
|
ToLower func(TokenHandler) TokenHandler
|
||||||
|
ToUpper func(TokenHandler) TokenHandler
|
||||||
|
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
||||||
|
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
|
||||||
|
}{
|
||||||
|
Drop: ModifyDrop,
|
||||||
|
Trim: ModifyTrim,
|
||||||
|
TrimLeft: ModifyTrimLeft,
|
||||||
|
TrimRight: ModifyTrimRight,
|
||||||
|
ToLower: ModifyToLower,
|
||||||
|
ToUpper: ModifyToUpper,
|
||||||
|
Replace: ModifyReplace,
|
||||||
|
ModifyByCallback: ModifyByCallback,
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is discarded completely.
|
||||||
|
//
|
||||||
|
// Note that if the TokenHandler does not apply, a mismatch will be reported back,
|
||||||
|
// even though we would have dropped the output anyway. So if you would like
|
||||||
|
// to drop optional whitespace, then use something like:
|
||||||
|
//
|
||||||
|
// M.Drop(C.Opt(A.Whitespace))
|
||||||
|
//
|
||||||
|
// instead of:
|
||||||
|
//
|
||||||
|
// M.Drop(A.Whitespace)
|
||||||
|
//
|
||||||
|
// Since whitespace is defined as "1 or more spaces and/or tabs", the input
|
||||||
|
// string "bork" would not match against the second form, but " bork" would.
|
||||||
|
// In both cases, it would match the first form.
|
||||||
|
func ModifyDrop(handler TokenHandler) TokenHandler {
|
||||||
|
return ModifyByCallback(handler, func(s string) string {
|
||||||
|
return ""
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are trimmed from both the left and the right of the output.
|
||||||
|
func ModifyTrim(handler TokenHandler, cutset string) TokenHandler {
|
||||||
|
return modifyTrim(handler, cutset, true, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are trimmed from the left of the output.
|
||||||
|
func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler {
|
||||||
|
return modifyTrim(handler, cutset, true, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are trimmed from the right of the output.
|
||||||
|
func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler {
|
||||||
|
return modifyTrim(handler, cutset, false, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler {
|
||||||
|
modfunc := func(s string) string {
|
||||||
|
if trimLeft {
|
||||||
|
s = strings.TrimLeft(s, cutset)
|
||||||
|
}
|
||||||
|
if trimRight {
|
||||||
|
s = strings.TrimRight(s, cutset)
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return ModifyByCallback(handler, modfunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are converted into upper case.
|
||||||
|
func ModifyToUpper(handler TokenHandler) TokenHandler {
|
||||||
|
return ModifyByCallback(handler, strings.ToUpper)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is taken and characters from the provided
|
||||||
|
// cutset are converted into lower case.
|
||||||
|
func ModifyToLower(handler TokenHandler) TokenHandler {
|
||||||
|
return ModifyByCallback(handler, strings.ToLower)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is replaced by the provided string.
|
||||||
|
func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler {
|
||||||
|
return ModifyByCallback(handler, func(string) string {
|
||||||
|
return replaceWith
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
// If it does, then its output is taken and it is fed to the provided modfunc.
|
||||||
|
// This is a simple function that takes a string on input and returns a possibly
|
||||||
|
// modified string on output. The return value of the modfunc will replace the
|
||||||
|
// resulting output.
|
||||||
|
func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
child := t.Fork()
|
||||||
|
if handler(child) {
|
||||||
|
s := modfunc(string(child.output))
|
||||||
|
child.output = []rune(s)
|
||||||
|
child.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
|
@ -8,7 +8,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCombinators(t *testing.T) {
|
func TestCombinators(t *testing.T) {
|
||||||
RunMatcherTests(t, []MatcherTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
{"xxx", c.Rune('x'), true, "x"},
|
{"xxx", c.Rune('x'), true, "x"},
|
||||||
{"x ", c.Rune(' '), false, ""},
|
{"x ", c.Rune(' '), false, ""},
|
||||||
{"aa", c.RuneRange('b', 'e'), false, ""},
|
{"aa", c.RuneRange('b', 'e'), false, ""},
|
||||||
|
@ -79,7 +79,7 @@ func TestCombinators(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAtoms(t *testing.T) {
|
func TestAtoms(t *testing.T) {
|
||||||
RunMatcherTests(t, []MatcherTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
{"", a.EndOfFile, true, ""},
|
{"", a.EndOfFile, true, ""},
|
||||||
{"⌘", a.AnyRune, true, "⌘"},
|
{"⌘", a.AnyRune, true, "⌘"},
|
||||||
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
|
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
|
||||||
|
@ -158,7 +158,7 @@ func TestAtoms(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModifiers(t *testing.T) {
|
func TestModifiers(t *testing.T) {
|
||||||
RunMatcherTests(t, []MatcherTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
|
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
|
||||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||||
|
@ -172,6 +172,30 @@ func TestModifiers(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSequenceOfRunes(t *testing.T) {
|
||||||
|
sequence := c.Seq(
|
||||||
|
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
||||||
|
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||||
|
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||||
|
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||||
|
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||||
|
)
|
||||||
|
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||||
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
|
p.Expects("Sequence of runes")
|
||||||
|
if p.On(sequence).Accept() {
|
||||||
|
p.EmitLiteral(TestItem)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
item, err, ok := parser.Parse(input).Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if item.Value != input {
|
||||||
|
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// I know, this is hell, but that's the whole point for this test :->
|
// I know, this is hell, but that's the whole point for this test :->
|
||||||
func TestCombination(t *testing.T) {
|
func TestCombination(t *testing.T) {
|
||||||
demonic := c.Seq(
|
demonic := c.Seq(
|
||||||
|
@ -194,34 +218,10 @@ func TestCombination(t *testing.T) {
|
||||||
c.Opt(a.SquareClose),
|
c.Opt(a.SquareClose),
|
||||||
)
|
)
|
||||||
|
|
||||||
RunMatcherTests(t, []MatcherTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
||||||
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSequenceOfRunes(t *testing.T) {
|
|
||||||
sequence := c.Seq(
|
|
||||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.RoundOpen,
|
|
||||||
a.RoundClose, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
|
||||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
|
||||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
|
||||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
|
||||||
)
|
|
||||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
|
||||||
parser := parsekit.NewParser(func(p *parsekit.P) {
|
|
||||||
p.Expects("Sequence of runes")
|
|
||||||
if p.On(sequence).Accept() {
|
|
||||||
p.EmitLiteral(TestItem)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
item, err, ok := parser.Parse(input).Next()
|
|
||||||
if !ok {
|
|
||||||
t.Fatalf("Parsing failed: %s", err)
|
|
||||||
}
|
|
||||||
if item.Value != input {
|
|
||||||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, item.Value)
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue