Backup work.
This commit is contained in:
parent
d9d837fe6e
commit
478efe3e25
|
@ -1,119 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
// Expects is used to let a state function describe what input it is expecting.
|
||||
// This expectation is used in error messages to make them more descriptive.
|
||||
//
|
||||
// Also, when defining an expectation inside a StateHandler, you do not need
|
||||
// to handle unexpected input yourself. When the end of the function is
|
||||
// reached without setting the next state, an automatic error will be
|
||||
// emitted. This error differentiates between issues:
|
||||
// * there is valid data on input, but it was not accepted by the function
|
||||
// * there is an invalid UTF8 character on input
|
||||
// * the end of the file was reached.
|
||||
func (p *P) Expects(description string) {
|
||||
p.expecting = description
|
||||
}
|
||||
|
||||
// On checks if the current input matches the provided Matcher.
|
||||
// It returns a MatchAction struct, which provides methods that
|
||||
// can be used to tell the parser what to do with a match.
|
||||
//
|
||||
// The intended way to use this, is by chaining some methods,
|
||||
// for example: p.On(...).Accept()
|
||||
// The chained methods will as a whole return a boolean value,
|
||||
// indicating whether or not a match was found and processed.
|
||||
func (p *P) On(m Matcher) *MatchAction {
|
||||
runes, widths, ok := p.match(m)
|
||||
p.LastMatch = string(runes)
|
||||
return &MatchAction{
|
||||
p: p,
|
||||
runes: runes,
|
||||
widths: widths,
|
||||
ok: ok,
|
||||
}
|
||||
}
|
||||
|
||||
// Match checks if the provided Matcher matches the current input.
|
||||
// Returns a slice of matching runes, a slice of their respective
|
||||
// byte widths and a boolean.
|
||||
// The boolean will be false and the slices will be empty in case
|
||||
// the input did not match.
|
||||
func (p *P) match(matcher Matcher) ([]rune, []int, bool) {
|
||||
m := &MatchDialog{p: p}
|
||||
ok := matcher.Match(m)
|
||||
return m.runes, m.widths, ok
|
||||
}
|
||||
|
||||
type MatchAction struct {
|
||||
p *P
|
||||
runes []rune
|
||||
widths []int
|
||||
ok bool
|
||||
}
|
||||
|
||||
// Accept tells the parser to move the cursor past a match that was found,
|
||||
// and to store the input that matched in the string buffer.
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *MatchAction) Accept() bool {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
a.p.buffer.writeRune(r)
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Skip tells the parser to move the cursor past a match that was found,
|
||||
// without storing the actual match in the string buffer.
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *MatchAction) Skip() bool {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
type C struct {
|
||||
Rune MatchRune
|
||||
}
|
||||
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Stay tells the parser to not move the cursor after finding a match.
|
||||
// Returns true in case a match was found, false otherwise.
|
||||
func (a *MatchAction) Stay() bool {
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
|
||||
func (a *MatchAction) RouteTo(state StateHandler) bool {
|
||||
if a.ok {
|
||||
a.p.RouteTo(state)
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn().
|
||||
func (a *MatchAction) RouteReturn() bool {
|
||||
if a.ok {
|
||||
a.p.RouteReturn()
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// advanceCursor advances the rune cursor one position in the input data.
|
||||
// While doing so, it keeps tracks of newlines, so we can report on
|
||||
// row + column positions on error.
|
||||
func (p *P) advanceCursor(r rune, w int) {
|
||||
p.pos += w
|
||||
if p.newline {
|
||||
p.cursorColumn = 0
|
||||
p.cursorRow++
|
||||
} else {
|
||||
p.cursorColumn++
|
||||
}
|
||||
p.newline = r == '\n'
|
||||
}
|
|
@ -10,12 +10,12 @@ import (
|
|||
type P struct {
|
||||
state StateHandler // the function that handles the current state
|
||||
nextState StateHandler // the function that will handle the next state
|
||||
stack []StateHandler // state function stack, for nested parsing
|
||||
routeStack []StateHandler // route stack, for handling nested parsing
|
||||
input string // the scanned input
|
||||
len int // the total length of the input in bytes
|
||||
pos int // current byte scanning position in the input
|
||||
newline bool // keep track of when we have scanned a newline
|
||||
cursorRow int // current row number in the input
|
||||
cursorLine int // current row number in the input
|
||||
cursorColumn int // current column position in the input
|
||||
expecting string // a description of what the current state expects to find
|
||||
buffer stringBuffer // an efficient buffer, used to build string values
|
||||
|
@ -33,10 +33,12 @@ type StateHandler func(*P)
|
|||
// and initializes the parser for it.
|
||||
func New(input string, start StateHandler) *P {
|
||||
return &P{
|
||||
input: input,
|
||||
len: len(input),
|
||||
nextState: start,
|
||||
items: make(chan Item, 2),
|
||||
input: input,
|
||||
len: len(input),
|
||||
cursorLine: 1,
|
||||
cursorColumn: 1,
|
||||
nextState: start,
|
||||
items: make(chan Item, 2),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,8 +95,8 @@ func (p *P) getNextStateHandler() (StateHandler, bool) {
|
|||
switch {
|
||||
case p.nextState != nil:
|
||||
return p.nextState, true
|
||||
case len(p.stack) > 0:
|
||||
return p.popState(), true
|
||||
case len(p.routeStack) > 0:
|
||||
return p.popRoute(), true
|
||||
case p.expecting != "":
|
||||
p.UnexpectedInput()
|
||||
return nil, false
|
||||
|
@ -118,7 +120,7 @@ func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
|
|||
case i.Type == ItemEOF:
|
||||
return i, nil, false
|
||||
case i.Type == ItemError:
|
||||
p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
|
||||
p.err = &Error{i.Value, p.cursorLine, p.cursorColumn}
|
||||
return i, p.err, false
|
||||
default:
|
||||
p.item = i
|
||||
|
|
|
@ -92,7 +92,7 @@ type Matcher interface {
|
|||
|
||||
type matcherConstructors struct {
|
||||
EndOfFile func() MatchEndOfFile
|
||||
Any func() MatchAny
|
||||
AnyRune func() MatchAny
|
||||
Rune func(rune) MatchRune
|
||||
RuneRange func(rune, rune) MatchRuneRange
|
||||
Runes func(...rune) MatchAnyOf
|
||||
|
@ -121,7 +121,7 @@ var C = matcherConstructors{
|
|||
EndOfFile: func() MatchEndOfFile {
|
||||
return MatchEndOfFile{}
|
||||
},
|
||||
Any: func() MatchAny {
|
||||
AnyRune: func() MatchAny {
|
||||
return MatchAny{}
|
||||
},
|
||||
Rune: func(rune rune) MatchRune {
|
||||
|
@ -198,13 +198,6 @@ func (c MatchEndOfFile) Match(m *MatchDialog) bool {
|
|||
return !ok && r == EOF
|
||||
}
|
||||
|
||||
type MatchInvalidRune struct{}
|
||||
|
||||
func (c MatchInvalidRune) Match(m *MatchDialog) bool {
|
||||
r, ok := m.NextRune()
|
||||
return !ok && r == INVALID
|
||||
}
|
||||
|
||||
type MatchAny struct{}
|
||||
|
||||
func (c MatchAny) Match(m *MatchDialog) bool {
|
|
@ -13,7 +13,7 @@ const TestItem p.ItemType = 1
|
|||
func newParser(input string, matcher p.Matcher) *p.P {
|
||||
stateFn := func(p *p.P) {
|
||||
p.Expects("MATCH")
|
||||
if p.On(matcher).Accept() {
|
||||
if p.On(matcher).Accept().End() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.RouteRepeat()
|
||||
}
|
||||
|
@ -21,8 +21,8 @@ func newParser(input string, matcher p.Matcher) *p.P {
|
|||
return p.New(input, stateFn)
|
||||
}
|
||||
|
||||
func TestMatchAny(t *testing.T) {
|
||||
p := newParser("o", c.Any())
|
||||
func TestMatchAnyRune(t *testing.T) {
|
||||
p := newParser("o", c.AnyRune())
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
|
@ -35,8 +35,8 @@ func TestMatchAny(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestMatchAny_AtEndOfFile(t *testing.T) {
|
||||
p := newParser("", c.Any())
|
||||
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
|
||||
p := newParser("", c.AnyRune())
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
|
@ -47,8 +47,8 @@ func TestMatchAny_AtEndOfFile(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) {
|
||||
p := newParser("\xcd", c.Any())
|
||||
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
|
||||
p := newParser("\xcd", c.AnyRune())
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
|
@ -207,7 +207,7 @@ func TestMatchRepeat(t *testing.T) {
|
|||
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "xxxx" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
|
||||
|
@ -254,7 +254,7 @@ func TestMatchOneOrMore(t *testing.T) {
|
|||
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "xxxxxxxx" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
|
||||
|
@ -265,7 +265,7 @@ func TestMatchSequence(t *testing.T) {
|
|||
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "10" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
|
||||
|
@ -276,7 +276,7 @@ func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
|
|||
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "101010" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
|
||||
|
@ -290,7 +290,7 @@ func TestSequence_WithRepeatedRunes(t *testing.T) {
|
|||
p := newParser(" == 10", assignment)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != " =" {
|
||||
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value)
|
||||
|
@ -301,7 +301,7 @@ func TestMatchOptional(t *testing.T) {
|
|||
p := newParser("xyz", c.Optional(c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "x" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
|
@ -310,7 +310,7 @@ func TestMatchOptional(t *testing.T) {
|
|||
p = newParser("xyz", c.Optional(c.Rune('y')))
|
||||
r, err, ok = p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
|
||||
|
@ -319,10 +319,10 @@ func TestMatchOptional(t *testing.T) {
|
|||
|
||||
func TestMatchDrop(t *testing.T) {
|
||||
dashes := c.OneOrMore(c.Rune('-'))
|
||||
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes)))
|
||||
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes)))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "X" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
|
@ -336,7 +336,7 @@ func TestMatchSeparated(t *testing.T) {
|
|||
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != "1,2;3|44,55|66;777" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
|
||||
|
@ -352,7 +352,7 @@ func TestMixAndMatch(t *testing.T) {
|
|||
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
|
||||
}
|
||||
if r.Value != `\x9a\x01\xF0\xfC` {
|
||||
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
|
|
@ -8,12 +8,10 @@ import (
|
|||
// ItemType represents the type of a parser Item.
|
||||
type ItemType int
|
||||
|
||||
// TODO private?
|
||||
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||
// end of the input was reached.
|
||||
const ItemEOF ItemType = -1
|
||||
|
||||
// TODO private?
|
||||
// ItemError is a built-in parser item type that is used for flagging that
|
||||
// an error has occurred during parsing.
|
||||
const ItemError ItemType = -2
|
||||
|
@ -62,14 +60,22 @@ func (p *P) EmitInterpreted(t ItemType) error {
|
|||
// error messages to the user.
|
||||
type Error struct {
|
||||
Message string
|
||||
Row int
|
||||
Line int
|
||||
Column int
|
||||
}
|
||||
|
||||
func (err *Error) Error() string {
|
||||
if err == nil {
|
||||
panic("Error method called on the parser, but no error was set")
|
||||
}
|
||||
return err.Message
|
||||
}
|
||||
|
||||
func (err *Error) ErrorFull() string {
|
||||
message := err.Error()
|
||||
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)
|
||||
}
|
||||
|
||||
// EmitError emits a Parser error item to the client.
|
||||
func (p *P) EmitError(format string, args ...interface{}) {
|
||||
message := fmt.Sprintf(format, args...)
|
|
@ -0,0 +1,15 @@
|
|||
package parsekit
|
||||
|
||||
// Expects is used to let a state function describe what input it is expecting.
|
||||
// This expectation is used in error messages to make them more descriptive.
|
||||
//
|
||||
// Also, when defining an expectation inside a StateHandler, you do not need
|
||||
// to handle unexpected input yourself. When the end of the function is
|
||||
// reached without setting the next state, an automatic error will be
|
||||
// emitted. This error differentiates between issues:
|
||||
// * there is valid data on input, but it was not accepted by the function
|
||||
// * there is an invalid UTF8 character on input
|
||||
// * the end of the file was reached.
|
||||
func (p *P) Expects(description string) {
|
||||
p.expecting = description
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package parsekit
|
||||
|
||||
// On checks if the current input matches the provided Matcher.
|
||||
//
|
||||
// This method is the start of a chain method in which multiple things can
|
||||
// be arranged in one go:
|
||||
//
|
||||
// * Checking whether or not there is a match (this is what On does)
|
||||
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move
|
||||
// the cursor forward, Accept(): move cursor forward and add the match in
|
||||
// the parser string buffer)
|
||||
// * Dedicing where to route to (e.g. using RouteTo() to route to a
|
||||
// StateHandler by name)
|
||||
// * Followup routing after that, when applicable (.e.g using something like
|
||||
// RouteTo(...).ThenTo(...))
|
||||
//
|
||||
// For every step of this chain, you can end the chain using the
|
||||
// End() method. This will return a boolean value, indicating whether or
|
||||
// not the initial On() method found a match in the input.
|
||||
// End() is not mandatory. It is merely provided as a means to use
|
||||
// a chain as an expression for a switch/case or if statement (since those
|
||||
// require a boolean expression).
|
||||
//
|
||||
// You can omit "what to do with the match" and go straight into a routing
|
||||
// method, e.g. On(...).RouteTo(...). This is functionally the same as
|
||||
// using On(...).Stay().RouteTo(...).
|
||||
//
|
||||
// Here's a complete example chain:
|
||||
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
|
||||
func (p *P) On(matcher Matcher) *MatchAction {
|
||||
m := &MatchDialog{p: p}
|
||||
ok := matcher.Match(m)
|
||||
|
||||
// Keep track of the last match, to allow parser implementations
|
||||
// to access it in an easy way. Typical use would be something like:
|
||||
// if p.On(somethingBad).End() {
|
||||
// p.Errorf("This was bad: %s", p.LastMatch)
|
||||
// }
|
||||
p.LastMatch = string(m.runes)
|
||||
|
||||
return &MatchAction{
|
||||
ChainAction: ChainAction{p, ok},
|
||||
runes: m.runes,
|
||||
widths: m.widths,
|
||||
}
|
||||
}
|
||||
|
||||
// ChainAction is used for building method chains for the On() method.
|
||||
type ChainAction struct {
|
||||
p *P
|
||||
ok bool
|
||||
}
|
||||
|
||||
// End ends the method chain and returns a boolean indicating whether
|
||||
// or not a match was found in the input.
|
||||
func (a *ChainAction) End() bool {
|
||||
return a.ok
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package parsekit
|
||||
|
||||
// MatchAction is a struct that is used for building On()-method chains.
|
||||
//
|
||||
// It embeds the RouteAction struct, to make it possible to go right into
|
||||
// a route action, which is basically a simple way of aliasing a chain
|
||||
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
|
||||
type MatchAction struct {
|
||||
RouteAction
|
||||
ChainAction
|
||||
runes []rune
|
||||
widths []int
|
||||
}
|
||||
|
||||
// Accept tells the parser to move the cursor past a match that was found,
|
||||
// and to store the input that matched in the string buffer.
|
||||
// When no match was found, then no action is taken.
|
||||
// It returns a RouteAction struct, which provides methods that can be used
|
||||
// to tell the parser what state to go to next.
|
||||
func (a *MatchAction) Accept() *RouteAction {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
a.p.buffer.writeRune(r)
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
|
||||
}
|
||||
|
||||
// Skip tells the parser to move the cursor past a match that was found,
|
||||
// without storing the actual match in the string buffer.
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *MatchAction) Skip() *RouteAction {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
type C struct {
|
||||
Rune MatchRune
|
||||
}
|
||||
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
|
||||
}
|
||||
|
||||
// Stay tells the parser to not move the cursor after finding a match.
|
||||
// Returns true in case a match was found, false otherwise.
|
||||
func (a *MatchAction) Stay() *RouteAction {
|
||||
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
|
||||
}
|
||||
|
||||
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
|
||||
func (a *MatchAction) RouteTo(state StateHandler) *RouteFollowupAction {
|
||||
return a.Stay().RouteTo(state)
|
||||
}
|
||||
|
||||
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn(...).
|
||||
func (a *MatchAction) RouteReturn() *ChainAction {
|
||||
return a.Stay().RouteReturn()
|
||||
}
|
||||
|
||||
// advanceCursor advances the rune cursor one position in the input data.
|
||||
// While doing so, it keeps tracks of newlines, so we can report on
|
||||
// row + column positions on error.
|
||||
func (p *P) advanceCursor(r rune, w int) {
|
||||
p.pos += w
|
||||
if p.newline {
|
||||
p.cursorLine++
|
||||
p.cursorColumn = 1
|
||||
} else {
|
||||
p.cursorColumn++
|
||||
}
|
||||
p.newline = r == '\n'
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
package parsekit
|
||||
|
||||
// RouteAction is a struct that is used for building On() method chains.
|
||||
type RouteAction struct {
|
||||
ChainAction
|
||||
}
|
||||
|
||||
// RouteRepeat indicates that on the next parsing cycle,
|
||||
// the current StateHandler must be reinvoked.
|
||||
func (a *RouteAction) RouteRepeat() *ChainAction {
|
||||
if a.ok {
|
||||
return a.p.RouteRepeat()
|
||||
}
|
||||
return &ChainAction{nil, false}
|
||||
}
|
||||
|
||||
// RouteTo tells the parser what StateHandler function to invoke
|
||||
// in the next parsing cycle.
|
||||
func (a *RouteAction) RouteTo(state StateHandler) *RouteFollowupAction {
|
||||
if a.ok {
|
||||
return a.p.RouteTo(state)
|
||||
}
|
||||
return &RouteFollowupAction{ChainAction: ChainAction{nil, false}}
|
||||
}
|
||||
|
||||
// RouteReturn tells the parser that on the next cycle the next scheduled
|
||||
// route must be invoked.
|
||||
func (a *RouteAction) RouteReturn() *ChainAction {
|
||||
if a.ok {
|
||||
return a.p.RouteReturn()
|
||||
}
|
||||
return &ChainAction{nil, false}
|
||||
}
|
||||
|
||||
// RouteFollowupAction chains parsing routes.
|
||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||
type RouteFollowupAction struct {
|
||||
ChainAction
|
||||
}
|
||||
|
||||
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
|
||||
// StateHandler has been completed.
|
||||
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
|
||||
func (a *RouteFollowupAction) ThenTo(state StateHandler) *ChainAction {
|
||||
if a.ok {
|
||||
a.p.pushRoute(state)
|
||||
}
|
||||
return &ChainAction{nil, a.ok}
|
||||
}
|
||||
|
||||
// ThenReturnHere schedules the current StateHandler to be invoked after
|
||||
// the RouteTo StateHandler has been completed.
|
||||
// For example: p.RouteTo(handlerA).ThenReturnHere()
|
||||
func (a *RouteFollowupAction) ThenReturnHere() *ChainAction {
|
||||
if a.ok {
|
||||
a.p.pushRoute(a.p.state)
|
||||
}
|
||||
return &ChainAction{nil, a.ok}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package parsekit
|
||||
|
||||
// RouteTo tells the parser what StateHandler function to invoke
|
||||
// in the next parsing cycle.
|
||||
func (p *P) RouteTo(state StateHandler) *RouteFollowupAction {
|
||||
p.nextState = state
|
||||
return &RouteFollowupAction{ChainAction: ChainAction{p, true}}
|
||||
}
|
||||
|
||||
// RouteRepeat indicates that on the next parsing cycle, the current
|
||||
// StateHandler must be reinvoked.
|
||||
func (p *P) RouteRepeat() *ChainAction {
|
||||
p.RouteTo(p.state)
|
||||
return &ChainAction{nil, true}
|
||||
}
|
||||
|
||||
// RouteReturn tells the parser that on the next cycle the last
|
||||
// StateHandler that was pushed on the route stack must be invoked.
|
||||
//
|
||||
// Using this method is optional. When implementating a StateHandler that
|
||||
// is used as a sort of subroutine (using constructions like
|
||||
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
|
||||
// providing an explicit routing decision from that handler. The parser will
|
||||
// automatically assume a RouteReturn() in that case.
|
||||
func (p *P) RouteReturn() *ChainAction {
|
||||
p.nextState = p.popRoute()
|
||||
return &ChainAction{nil, true}
|
||||
}
|
||||
|
||||
// pushRoute adds the StateHandler to the route stack.
|
||||
// This is used for implementing nested parsing.
|
||||
func (p *P) pushRoute(state StateHandler) {
|
||||
p.routeStack = append(p.routeStack, state)
|
||||
}
|
||||
|
||||
// popRoute pops the last pushed StateHandler from the route stack.
|
||||
func (p *P) popRoute() StateHandler {
|
||||
last := len(p.routeStack) - 1
|
||||
head, tail := p.routeStack[:last], p.routeStack[last]
|
||||
p.routeStack = head
|
||||
return tail
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
// RouteRepeat indicates that on the next parsing cycle,
|
||||
// the current StateHandler must be invoked again.
|
||||
func (p *P) RouteRepeat() {
|
||||
p.RouteTo(p.state)
|
||||
}
|
||||
|
||||
// RouteTo tells the parser what StateHandler function to invoke
|
||||
// in the next parsing cycle.
|
||||
func (p *P) RouteTo(state StateHandler) *RouteFollowup {
|
||||
p.nextState = state
|
||||
return &RouteFollowup{p}
|
||||
}
|
||||
|
||||
// RouteFollowup chains parsing routes.
|
||||
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||
type RouteFollowup struct {
|
||||
p *P
|
||||
}
|
||||
|
||||
// ThenTo schedules a StateHandler that must be invoked
|
||||
// after the RouteTo StateHandler has been completed.
|
||||
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
|
||||
func (r *RouteFollowup) ThenTo(state StateHandler) {
|
||||
r.p.pushState(state)
|
||||
}
|
||||
|
||||
// ThenReturnHere schedules the current StateHandler to be
|
||||
// invoked after the RouteTo StateHandler has been completed.
|
||||
// For example: p.RouteTo(handlerA).ThenReturnHere()
|
||||
func (r *RouteFollowup) ThenReturnHere() {
|
||||
r.p.pushState(r.p.state)
|
||||
}
|
||||
|
||||
// RouteReturn tells the parser that on the next cycle the
|
||||
// next scheduled route must be invoked.
|
||||
// Using this method is optional. When implementating a
|
||||
// StateHandler that is used as a sort of subroutine (using
|
||||
// constructions like p.RouteTo(subroutine).ThenReturnHere()),
|
||||
// then you can refrain from providing a routing decision
|
||||
// from that handler. The parser will automatically assume
|
||||
// a RouteReturn in that case.
|
||||
func (p *P) RouteReturn() {
|
||||
p.nextState = p.popState()
|
||||
}
|
||||
|
||||
// PushState adds the state function to the state stack.
|
||||
// This is used for implementing nested parsing.
|
||||
func (p *P) pushState(state StateHandler) {
|
||||
p.stack = append(p.stack, state)
|
||||
}
|
||||
|
||||
// PopState pops the last pushed state from the state stack.
|
||||
func (p *P) popState() StateHandler {
|
||||
last := len(p.stack) - 1
|
||||
head, tail := p.stack[:last], p.stack[last]
|
||||
p.stack = head
|
||||
return tail
|
||||
}
|
|
@ -7,19 +7,15 @@ import (
|
|||
// A '#' hash symbol marks the rest of the line as a comment.
|
||||
func startComment(p *parsekit.P) {
|
||||
p.Expects("start of comment")
|
||||
if p.On(c.OneOrMore(hash)).Skip() {
|
||||
p.RouteTo(commentContents)
|
||||
}
|
||||
p.On(c.OneOrMore(hash)).Skip().RouteTo(commentContents)
|
||||
}
|
||||
|
||||
// All characters up to the end of the line are included in the comment.
|
||||
func commentContents(p *parsekit.P) {
|
||||
p.Expects("comment contents")
|
||||
switch {
|
||||
case p.On(endOfLine).Skip():
|
||||
case p.On(endOfLine).Skip().RouteReturn().End():
|
||||
p.EmitLiteralTrim(ItemComment)
|
||||
p.RouteReturn()
|
||||
case p.On(any).Accept():
|
||||
p.RouteRepeat()
|
||||
case p.On(anyRune).Accept().RouteRepeat().End():
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,15 +6,17 @@ import (
|
|||
|
||||
func TestComments(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"empty comment", "#", "#()", ""},
|
||||
// {"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||
// {"basic comment", "#chicken", "#(chicken)", ""},
|
||||
// {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||
// {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||
// {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||
// {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||
// {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
|
||||
// {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
|
||||
// {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||
{"empty comment at end of file", "#", "#()", ""},
|
||||
{"empty comment at end of windows line", "#\r\n", "#()", ""},
|
||||
{"empty comment at end of unix line", "#\n", "#()", ""},
|
||||
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
|
||||
{"basic comment", "#chicken", "#(chicken)", ""},
|
||||
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
|
||||
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
|
||||
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
|
||||
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
|
||||
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
|
||||
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
|
||||
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
|
||||
})
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ import "github.com/mmakaay/toml/parsekit"
|
|||
// TODO move into parsekit
|
||||
func endOfFile(p *parsekit.P) {
|
||||
p.Expects("end of file")
|
||||
if p.On(c.EndOfFile()).Stay() {
|
||||
if p.On(c.EndOfFile()).Stay().End() {
|
||||
p.Emit(parsekit.ItemEOF, "EOF")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,15 +29,14 @@ var (
|
|||
// This allows for grouping similar properties together. Whitespace
|
||||
// around dot-separated parts is ignored, however, best practice is to
|
||||
// not use any extraneous whitespace.
|
||||
keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
|
||||
keySeparatorDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
|
||||
)
|
||||
|
||||
func startKeyValuePair(p *parsekit.P) {
|
||||
p.On(whitespaceOrNewlines).Skip()
|
||||
switch {
|
||||
case p.On(hash).Stay():
|
||||
p.RouteTo(startComment).ThenReturnHere()
|
||||
case p.On(startOfKey).RouteTo(startKey):
|
||||
case p.On(whitespaceOrNewlines).Skip().RouteRepeat().End():
|
||||
case p.On(hash).RouteTo(startComment).ThenReturnHere().End():
|
||||
case p.On(startOfKey).RouteTo(startKey).End():
|
||||
default:
|
||||
p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
|
||||
}
|
||||
|
@ -50,14 +49,14 @@ func startKey(p *parsekit.P) {
|
|||
|
||||
func startBareKey(p *parsekit.P) {
|
||||
p.Expects("a bare key name")
|
||||
if p.On(bareKey).Accept() {
|
||||
if p.On(bareKey).Accept().End() {
|
||||
p.EmitLiteral(ItemKey)
|
||||
p.RouteTo(endOfKeyOrDot)
|
||||
}
|
||||
}
|
||||
|
||||
func endOfKeyOrDot(p *parsekit.P) {
|
||||
if p.On(keySeparatordDot).Skip() {
|
||||
if p.On(keySeparatorDot).Skip().End() {
|
||||
p.Emit(ItemKeyDot, ".")
|
||||
p.RouteTo(startKey)
|
||||
} else {
|
||||
|
@ -67,7 +66,7 @@ func endOfKeyOrDot(p *parsekit.P) {
|
|||
|
||||
func startAssignment(p *parsekit.P) {
|
||||
p.Expects("a value assignment")
|
||||
if p.On(keyAssignment).Skip() {
|
||||
if p.On(keyAssignment).Skip().End() {
|
||||
p.Emit(ItemAssignment, "=")
|
||||
p.RouteTo(startValue)
|
||||
}
|
||||
|
|
|
@ -33,3 +33,9 @@ func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
|||
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
|
||||
})
|
||||
}
|
||||
|
||||
func TestKeyWithValue(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"with string value", " -key- = \"value\" # nice\r\n", "[-key-]=STR(value)#(nice)", ""},
|
||||
})
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ var (
|
|||
dot = c.Rune('.')
|
||||
singleQuote = c.Rune('\'')
|
||||
doubleQuote = c.Rune('"')
|
||||
any = c.Any()
|
||||
anyRune = c.AnyRune()
|
||||
anyQuote = c.AnyOf(singleQuote, doubleQuote)
|
||||
backslash = c.Rune('\\')
|
||||
asciiLower = c.RuneRange('a', 'z')
|
||||
|
|
|
@ -10,15 +10,13 @@ func TestEmptyInput(t *testing.T) {
|
|||
runStatesT(t, statesT{"empty string", "", "", ""})
|
||||
}
|
||||
|
||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||
p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc")
|
||||
func TestErrorFullIncludesLineAndRowPosition(t *testing.T) {
|
||||
p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +")
|
||||
_, err := parseItemsToArray(p)
|
||||
t.Logf("Got error: %s", err.Error())
|
||||
if err.Row != 4 {
|
||||
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
|
||||
}
|
||||
if err.Column != 6 {
|
||||
t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6)
|
||||
actual := err.ErrorFull()
|
||||
expected := "unexpected character '+' (expected end of file) after line 6, column 3"
|
||||
if actual != expected {
|
||||
t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,29 +37,25 @@ var (
|
|||
func startString(p *parsekit.P) {
|
||||
p.Expects("a string value")
|
||||
switch {
|
||||
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
||||
case p.On(doubleQuote).RouteTo(startBasicString):
|
||||
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString).End():
|
||||
case p.On(doubleQuote).RouteTo(startBasicString).End():
|
||||
}
|
||||
}
|
||||
|
||||
func startBasicString(p *parsekit.P) {
|
||||
p.Expects("a basic string")
|
||||
if p.On(doubleQuote).Skip() {
|
||||
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
|
||||
}
|
||||
p.On(doubleQuote).Skip().RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
|
||||
}
|
||||
|
||||
func parseBasicString(p *parsekit.P) {
|
||||
p.Expects("string contents")
|
||||
switch {
|
||||
case p.On(charThatMustBeEscaped).Stay():
|
||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch)
|
||||
case p.On(validEscape).Accept():
|
||||
p.RouteRepeat()
|
||||
case p.On(backslash).RouteReturn():
|
||||
case p.On(doubleQuote).RouteReturn():
|
||||
case p.On(any).Accept():
|
||||
p.RouteRepeat()
|
||||
case p.On(charThatMustBeEscaped).End():
|
||||
p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch)
|
||||
case p.On(validEscape).Accept().RouteRepeat().End():
|
||||
case p.On(backslash).RouteReturn().End():
|
||||
case p.On(doubleQuote).RouteReturn().End():
|
||||
case p.On(anyRune).Accept().RouteRepeat().End():
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,20 +67,20 @@ func parseBasicString(p *parsekit.P) {
|
|||
func basicStringSpecifics(p *parsekit.P) {
|
||||
p.Expects("string contents")
|
||||
switch {
|
||||
case p.On(doubleQuote).Skip():
|
||||
case p.On(doubleQuote).Skip().End():
|
||||
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
|
||||
p.EmitError("Invalid data in string: %s", err)
|
||||
p.EmitError("invalid data in string: %s", err)
|
||||
} else {
|
||||
p.RouteTo(startKeyValuePair)
|
||||
}
|
||||
case p.On(backslash).Stay():
|
||||
p.EmitError("Invalid escape sequence")
|
||||
case p.On(backslash).End():
|
||||
p.EmitError("invalid escape sequence")
|
||||
}
|
||||
}
|
||||
|
||||
func startMultiLineBasicString(p *parsekit.P) {
|
||||
p.Expects("a multi-line basic string")
|
||||
if p.On(doubleQuote3).Skip() {
|
||||
p.EmitError("Not yet implemented")
|
||||
if p.On(doubleQuote3).Skip().End() {
|
||||
p.EmitError("not yet implemented")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@ func TestUnterminatedBasicString(t *testing.T) {
|
|||
|
||||
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: "\x00" (must be escaped)`},
|
||||
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: "\n" (must be escaped)`},
|
||||
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: "\u007f" (must be escaped)`},
|
||||
{"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`},
|
||||
{"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`},
|
||||
{"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`},
|
||||
})
|
||||
|
||||
// No need to write all test cases for disallowed characters by hand.
|
||||
|
@ -23,7 +23,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
|||
name := fmt.Sprintf("control character %x", rune(i))
|
||||
runStatesT(
|
||||
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
|
||||
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
|
||||
fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,7 @@ func TestBasicString(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
|
||||
err := "Invalid escape sequence"
|
||||
err := "invalid escape sequence"
|
||||
runStatesTs(t, []statesT{
|
||||
{"invalid escape sequence", `a="\x"`, "[a]=", err},
|
||||
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
|
Loading…
Reference in New Issue