Backup work.

This commit is contained in:
Maurice Makaay 2019-05-21 12:49:42 +00:00
parent d9d837fe6e
commit 478efe3e25
20 changed files with 345 additions and 277 deletions

View File

@ -1,119 +0,0 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateHandler, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}
// On checks if the current input matches the provided Matcher.
// It returns a MatchAction struct, which provides methods that
// can be used to tell the parser what to do with a match.
//
// The intended way to use this, is by chaining some methods,
// for example: p.On(...).Accept()
// The chained methods will as a whole return a boolean value,
// indicating whether or not a match was found and processed.
func (p *P) On(m Matcher) *MatchAction {
runes, widths, ok := p.match(m)
p.LastMatch = string(runes)
return &MatchAction{
p: p,
runes: runes,
widths: widths,
ok: ok,
}
}
// Match checks if the provided Matcher matches the current input.
// Returns a slice of matching runes, a slice of their respective
// byte widths and a boolean.
// The boolean will be false and the slices will be empty in case
// the input did not match.
func (p *P) match(matcher Matcher) ([]rune, []int, bool) {
m := &MatchDialog{p: p}
ok := matcher.Match(m)
return m.runes, m.widths, ok
}
type MatchAction struct {
p *P
runes []rune
widths []int
ok bool
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Accept() bool {
if a.ok {
for i, r := range a.runes {
a.p.buffer.writeRune(r)
a.p.advanceCursor(r, a.widths[i])
}
}
return a.ok
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Skip() bool {
if a.ok {
for i, r := range a.runes {
type C struct {
Rune MatchRune
}
a.p.advanceCursor(r, a.widths[i])
}
}
return a.ok
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *MatchAction) Stay() bool {
return a.ok
}
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
func (a *MatchAction) RouteTo(state StateHandler) bool {
if a.ok {
a.p.RouteTo(state)
}
return a.ok
}
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn().
func (a *MatchAction) RouteReturn() bool {
if a.ok {
a.p.RouteReturn()
}
return a.ok
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorColumn = 0
p.cursorRow++
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}

View File

@ -10,12 +10,12 @@ import (
type P struct { type P struct {
state StateHandler // the function that handles the current state state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state nextState StateHandler // the function that will handle the next state
stack []StateHandler // state function stack, for nested parsing routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input input string // the scanned input
len int // the total length of the input in bytes len int // the total length of the input in bytes
pos int // current byte scanning position in the input pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input cursorLine int // current row number in the input
cursorColumn int // current column position in the input cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values buffer stringBuffer // an efficient buffer, used to build string values
@ -33,10 +33,12 @@ type StateHandler func(*P)
// and initializes the parser for it. // and initializes the parser for it.
func New(input string, start StateHandler) *P { func New(input string, start StateHandler) *P {
return &P{ return &P{
input: input, input: input,
len: len(input), len: len(input),
nextState: start, cursorLine: 1,
items: make(chan Item, 2), cursorColumn: 1,
nextState: start,
items: make(chan Item, 2),
} }
} }
@ -93,8 +95,8 @@ func (p *P) getNextStateHandler() (StateHandler, bool) {
switch { switch {
case p.nextState != nil: case p.nextState != nil:
return p.nextState, true return p.nextState, true
case len(p.stack) > 0: case len(p.routeStack) > 0:
return p.popState(), true return p.popRoute(), true
case p.expecting != "": case p.expecting != "":
p.UnexpectedInput() p.UnexpectedInput()
return nil, false return nil, false
@ -118,7 +120,7 @@ func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
case i.Type == ItemEOF: case i.Type == ItemEOF:
return i, nil, false return i, nil, false
case i.Type == ItemError: case i.Type == ItemError:
p.err = &Error{i.Value, p.cursorRow, p.cursorColumn} p.err = &Error{i.Value, p.cursorLine, p.cursorColumn}
return i, p.err, false return i, p.err, false
default: default:
p.item = i p.item = i

View File

@ -92,7 +92,7 @@ type Matcher interface {
type matcherConstructors struct { type matcherConstructors struct {
EndOfFile func() MatchEndOfFile EndOfFile func() MatchEndOfFile
Any func() MatchAny AnyRune func() MatchAny
Rune func(rune) MatchRune Rune func(rune) MatchRune
RuneRange func(rune, rune) MatchRuneRange RuneRange func(rune, rune) MatchRuneRange
Runes func(...rune) MatchAnyOf Runes func(...rune) MatchAnyOf
@ -121,7 +121,7 @@ var C = matcherConstructors{
EndOfFile: func() MatchEndOfFile { EndOfFile: func() MatchEndOfFile {
return MatchEndOfFile{} return MatchEndOfFile{}
}, },
Any: func() MatchAny { AnyRune: func() MatchAny {
return MatchAny{} return MatchAny{}
}, },
Rune: func(rune rune) MatchRune { Rune: func(rune rune) MatchRune {
@ -198,13 +198,6 @@ func (c MatchEndOfFile) Match(m *MatchDialog) bool {
return !ok && r == EOF return !ok && r == EOF
} }
type MatchInvalidRune struct{}
func (c MatchInvalidRune) Match(m *MatchDialog) bool {
r, ok := m.NextRune()
return !ok && r == INVALID
}
type MatchAny struct{} type MatchAny struct{}
func (c MatchAny) Match(m *MatchDialog) bool { func (c MatchAny) Match(m *MatchDialog) bool {

View File

@ -13,7 +13,7 @@ const TestItem p.ItemType = 1
func newParser(input string, matcher p.Matcher) *p.P { func newParser(input string, matcher p.Matcher) *p.P {
stateFn := func(p *p.P) { stateFn := func(p *p.P) {
p.Expects("MATCH") p.Expects("MATCH")
if p.On(matcher).Accept() { if p.On(matcher).Accept().End() {
p.EmitLiteral(TestItem) p.EmitLiteral(TestItem)
p.RouteRepeat() p.RouteRepeat()
} }
@ -21,8 +21,8 @@ func newParser(input string, matcher p.Matcher) *p.P {
return p.New(input, stateFn) return p.New(input, stateFn)
} }
func TestMatchAny(t *testing.T) { func TestMatchAnyRune(t *testing.T) {
p := newParser("o", c.Any()) p := newParser("o", c.AnyRune())
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s", err) t.Fatalf("Parsing failed: %s", err)
@ -35,8 +35,8 @@ func TestMatchAny(t *testing.T) {
} }
} }
func TestMatchAny_AtEndOfFile(t *testing.T) { func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
p := newParser("", c.Any()) p := newParser("", c.AnyRune())
_, err, ok := p.Next() _, err, ok := p.Next()
if ok { if ok {
t.Fatalf("Parsing unexpectedly succeeded") t.Fatalf("Parsing unexpectedly succeeded")
@ -47,8 +47,8 @@ func TestMatchAny_AtEndOfFile(t *testing.T) {
} }
} }
func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) { func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", c.Any()) p := newParser("\xcd", c.AnyRune())
_, err, ok := p.Next() _, err, ok := p.Next()
if ok { if ok {
t.Fatalf("Parsing unexpectedly succeeded") t.Fatalf("Parsing unexpectedly succeeded")
@ -207,7 +207,7 @@ func TestMatchRepeat(t *testing.T) {
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x'))) p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "xxxx" { if r.Value != "xxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
@ -254,7 +254,7 @@ func TestMatchOneOrMore(t *testing.T) {
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x'))) p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "xxxxxxxx" { if r.Value != "xxxxxxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
@ -265,7 +265,7 @@ func TestMatchSequence(t *testing.T) {
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0'))) p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "10" { if r.Value != "10" {
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
@ -276,7 +276,7 @@ func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0')))) p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "101010" { if r.Value != "101010" {
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
@ -290,7 +290,7 @@ func TestSequence_WithRepeatedRunes(t *testing.T) {
p := newParser(" == 10", assignment) p := newParser(" == 10", assignment)
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != " =" { if r.Value != " =" {
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value) t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value)
@ -301,7 +301,7 @@ func TestMatchOptional(t *testing.T) {
p := newParser("xyz", c.Optional(c.Rune('x'))) p := newParser("xyz", c.Optional(c.Rune('x')))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "x" { if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
@ -310,7 +310,7 @@ func TestMatchOptional(t *testing.T) {
p = newParser("xyz", c.Optional(c.Rune('y'))) p = newParser("xyz", c.Optional(c.Rune('y')))
r, err, ok = p.Next() r, err, ok = p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "" { if r.Value != "" {
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
@ -319,10 +319,10 @@ func TestMatchOptional(t *testing.T) {
func TestMatchDrop(t *testing.T) { func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-')) dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes))) p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes)))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "X" { if r.Value != "X" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
@ -336,7 +336,7 @@ func TestMatchSeparated(t *testing.T) {
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers) p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != "1,2;3|44,55|66;777" { if r.Value != "1,2;3|44,55|66;777" {
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value) t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
@ -352,7 +352,7 @@ func TestMixAndMatch(t *testing.T) {
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte)) p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
r, err, ok := p.Next() r, err, ok := p.Next()
if !ok { if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
} }
if r.Value != `\x9a\x01\xF0\xfC` { if r.Value != `\x9a\x01\xF0\xfC` {
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`) t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)

View File

@ -8,12 +8,10 @@ import (
// ItemType represents the type of a parser Item. // ItemType represents the type of a parser Item.
type ItemType int type ItemType int
// TODO private?
// ItemEOF is a built-in parser item type that is used for flagging that the // ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached. // end of the input was reached.
const ItemEOF ItemType = -1 const ItemEOF ItemType = -1
// TODO private?
// ItemError is a built-in parser item type that is used for flagging that // ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing. // an error has occurred during parsing.
const ItemError ItemType = -2 const ItemError ItemType = -2
@ -62,14 +60,22 @@ func (p *P) EmitInterpreted(t ItemType) error {
// error messages to the user. // error messages to the user.
type Error struct { type Error struct {
Message string Message string
Row int Line int
Column int Column int
} }
func (err *Error) Error() string { func (err *Error) Error() string {
if err == nil {
panic("Error method called on the parser, but no error was set")
}
return err.Message return err.Message
} }
func (err *Error) ErrorFull() string {
message := err.Error()
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)
}
// EmitError emits a Parser error item to the client. // EmitError emits a Parser error item to the client.
func (p *P) EmitError(format string, args ...interface{}) { func (p *P) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...) message := fmt.Sprintf(format, args...)

View File

@ -0,0 +1,15 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateHandler, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}

View File

@ -0,0 +1,58 @@
package parsekit
// On checks if the current input matches the provided Matcher.
//
// This method is the start of a chain method in which multiple things can
// be arranged in one go:
//
// * Checking whether or not there is a match (this is what On does)
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the cursor forward, Accept(): move cursor forward and add the match in
// the parser string buffer)
// * Dedicing where to route to (e.g. using RouteTo() to route to a
// StateHandler by name)
// * Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
//
// For every step of this chain, you can end the chain using the
// End() method. This will return a boolean value, indicating whether or
// not the initial On() method found a match in the input.
// End() is not mandatory. It is merely provided as a means to use
// a chain as an expression for a switch/case or if statement (since those
// require a boolean expression).
//
// You can omit "what to do with the match" and go straight into a routing
// method, e.g. On(...).RouteTo(...). This is functionally the same as
// using On(...).Stay().RouteTo(...).
//
// Here's a complete example chain:
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(matcher Matcher) *MatchAction {
m := &MatchDialog{p: p}
ok := matcher.Match(m)
// Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like:
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
p.LastMatch = string(m.runes)
return &MatchAction{
ChainAction: ChainAction{p, ok},
runes: m.runes,
widths: m.widths,
}
}
// ChainAction is used for building method chains for the On() method.
type ChainAction struct {
p *P
ok bool
}
// End ends the method chain and returns a boolean indicating whether
// or not a match was found in the input.
func (a *ChainAction) End() bool {
return a.ok
}

View File

@ -0,0 +1,75 @@
package parsekit
// MatchAction is a struct that is used for building On()-method chains.
//
// It embeds the RouteAction struct, to make it possible to go right into
// a route action, which is basically a simple way of aliasing a chain
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
type MatchAction struct {
RouteAction
ChainAction
runes []rune
widths []int
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// When no match was found, then no action is taken.
// It returns a RouteAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *MatchAction) Accept() *RouteAction {
if a.ok {
for i, r := range a.runes {
a.p.buffer.writeRune(r)
a.p.advanceCursor(r, a.widths[i])
}
}
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Skip() *RouteAction {
if a.ok {
for i, r := range a.runes {
type C struct {
Rune MatchRune
}
a.p.advanceCursor(r, a.widths[i])
}
}
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *MatchAction) Stay() *RouteAction {
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
}
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
func (a *MatchAction) RouteTo(state StateHandler) *RouteFollowupAction {
return a.Stay().RouteTo(state)
}
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn(...).
func (a *MatchAction) RouteReturn() *ChainAction {
return a.Stay().RouteReturn()
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorLine++
p.cursorColumn = 1
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}

View File

@ -0,0 +1,59 @@
package parsekit
// RouteAction is a struct that is used for building On() method chains.
type RouteAction struct {
ChainAction
}
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *RouteAction) RouteRepeat() *ChainAction {
if a.ok {
return a.p.RouteRepeat()
}
return &ChainAction{nil, false}
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (a *RouteAction) RouteTo(state StateHandler) *RouteFollowupAction {
if a.ok {
return a.p.RouteTo(state)
}
return &RouteFollowupAction{ChainAction: ChainAction{nil, false}}
}
// RouteReturn tells the parser that on the next cycle the next scheduled
// route must be invoked.
func (a *RouteAction) RouteReturn() *ChainAction {
if a.ok {
return a.p.RouteReturn()
}
return &ChainAction{nil, false}
}
// RouteFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type RouteFollowupAction struct {
ChainAction
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (a *RouteFollowupAction) ThenTo(state StateHandler) *ChainAction {
if a.ok {
a.p.pushRoute(state)
}
return &ChainAction{nil, a.ok}
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (a *RouteFollowupAction) ThenReturnHere() *ChainAction {
if a.ok {
a.p.pushRoute(a.p.state)
}
return &ChainAction{nil, a.ok}
}

View File

@ -0,0 +1,42 @@
package parsekit
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *RouteFollowupAction {
p.nextState = state
return &RouteFollowupAction{ChainAction: ChainAction{p, true}}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *ChainAction {
p.RouteTo(p.state)
return &ChainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *ChainAction {
p.nextState = p.popRoute()
return &ChainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}

View File

@ -1,60 +0,0 @@
package parsekit
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be invoked again.
func (p *P) RouteRepeat() {
p.RouteTo(p.state)
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *RouteFollowup {
p.nextState = state
return &RouteFollowup{p}
}
// RouteFollowup chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type RouteFollowup struct {
p *P
}
// ThenTo schedules a StateHandler that must be invoked
// after the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (r *RouteFollowup) ThenTo(state StateHandler) {
r.p.pushState(state)
}
// ThenReturnHere schedules the current StateHandler to be
// invoked after the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (r *RouteFollowup) ThenReturnHere() {
r.p.pushState(r.p.state)
}
// RouteReturn tells the parser that on the next cycle the
// next scheduled route must be invoked.
// Using this method is optional. When implementating a
// StateHandler that is used as a sort of subroutine (using
// constructions like p.RouteTo(subroutine).ThenReturnHere()),
// then you can refrain from providing a routing decision
// from that handler. The parser will automatically assume
// a RouteReturn in that case.
func (p *P) RouteReturn() {
p.nextState = p.popState()
}
// PushState adds the state function to the state stack.
// This is used for implementing nested parsing.
func (p *P) pushState(state StateHandler) {
p.stack = append(p.stack, state)
}
// PopState pops the last pushed state from the state stack.
func (p *P) popState() StateHandler {
last := len(p.stack) - 1
head, tail := p.stack[:last], p.stack[last]
p.stack = head
return tail
}

View File

@ -7,19 +7,15 @@ import (
// A '#' hash symbol marks the rest of the line as a comment. // A '#' hash symbol marks the rest of the line as a comment.
func startComment(p *parsekit.P) { func startComment(p *parsekit.P) {
p.Expects("start of comment") p.Expects("start of comment")
if p.On(c.OneOrMore(hash)).Skip() { p.On(c.OneOrMore(hash)).Skip().RouteTo(commentContents)
p.RouteTo(commentContents)
}
} }
// All characters up to the end of the line are included in the comment. // All characters up to the end of the line are included in the comment.
func commentContents(p *parsekit.P) { func commentContents(p *parsekit.P) {
p.Expects("comment contents") p.Expects("comment contents")
switch { switch {
case p.On(endOfLine).Skip(): case p.On(endOfLine).Skip().RouteReturn().End():
p.EmitLiteralTrim(ItemComment) p.EmitLiteralTrim(ItemComment)
p.RouteReturn() case p.On(anyRune).Accept().RouteRepeat().End():
case p.On(any).Accept():
p.RouteRepeat()
} }
} }

View File

@ -6,15 +6,17 @@ import (
func TestComments(t *testing.T) { func TestComments(t *testing.T) {
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"empty comment", "#", "#()", ""}, {"empty comment at end of file", "#", "#()", ""},
// {"empty comment with spaces", "# \t \r\n", `#()`, ""}, {"empty comment at end of windows line", "#\r\n", "#()", ""},
// {"basic comment", "#chicken", "#(chicken)", ""}, {"empty comment at end of unix line", "#\n", "#()", ""},
// {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, {"empty comment with spaces", "# \t \r\n", `#()`, ""},
// {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, {"basic comment", "#chicken", "#(chicken)", ""},
// {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
// {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
// {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
// {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
// {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
}) })
} }

View File

@ -5,7 +5,7 @@ import "github.com/mmakaay/toml/parsekit"
// TODO move into parsekit // TODO move into parsekit
func endOfFile(p *parsekit.P) { func endOfFile(p *parsekit.P) {
p.Expects("end of file") p.Expects("end of file")
if p.On(c.EndOfFile()).Stay() { if p.On(c.EndOfFile()).Stay().End() {
p.Emit(parsekit.ItemEOF, "EOF") p.Emit(parsekit.ItemEOF, "EOF")
} }
} }

View File

@ -29,15 +29,14 @@ var (
// This allows for grouping similar properties together. Whitespace // This allows for grouping similar properties together. Whitespace
// around dot-separated parts is ignored, however, best practice is to // around dot-separated parts is ignored, however, best practice is to
// not use any extraneous whitespace. // not use any extraneous whitespace.
keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace) keySeparatorDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
) )
func startKeyValuePair(p *parsekit.P) { func startKeyValuePair(p *parsekit.P) {
p.On(whitespaceOrNewlines).Skip()
switch { switch {
case p.On(hash).Stay(): case p.On(whitespaceOrNewlines).Skip().RouteRepeat().End():
p.RouteTo(startComment).ThenReturnHere() case p.On(hash).RouteTo(startComment).ThenReturnHere().End():
case p.On(startOfKey).RouteTo(startKey): case p.On(startOfKey).RouteTo(startKey).End():
default: default:
p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler. p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
} }
@ -50,14 +49,14 @@ func startKey(p *parsekit.P) {
func startBareKey(p *parsekit.P) { func startBareKey(p *parsekit.P) {
p.Expects("a bare key name") p.Expects("a bare key name")
if p.On(bareKey).Accept() { if p.On(bareKey).Accept().End() {
p.EmitLiteral(ItemKey) p.EmitLiteral(ItemKey)
p.RouteTo(endOfKeyOrDot) p.RouteTo(endOfKeyOrDot)
} }
} }
func endOfKeyOrDot(p *parsekit.P) { func endOfKeyOrDot(p *parsekit.P) {
if p.On(keySeparatordDot).Skip() { if p.On(keySeparatorDot).Skip().End() {
p.Emit(ItemKeyDot, ".") p.Emit(ItemKeyDot, ".")
p.RouteTo(startKey) p.RouteTo(startKey)
} else { } else {
@ -67,7 +66,7 @@ func endOfKeyOrDot(p *parsekit.P) {
func startAssignment(p *parsekit.P) { func startAssignment(p *parsekit.P) {
p.Expects("a value assignment") p.Expects("a value assignment")
if p.On(keyAssignment).Skip() { if p.On(keyAssignment).Skip().End() {
p.Emit(ItemAssignment, "=") p.Emit(ItemAssignment, "=")
p.RouteTo(startValue) p.RouteTo(startValue)
} }

View File

@ -33,3 +33,9 @@ func TestKeyWithAssignmentButNoValue(t *testing.T) {
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, {"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
}) })
} }
func TestKeyWithValue(t *testing.T) {
runStatesTs(t, []statesT{
{"with string value", " -key- = \"value\" # nice\r\n", "[-key-]=STR(value)#(nice)", ""},
})
}

View File

@ -24,7 +24,7 @@ var (
dot = c.Rune('.') dot = c.Rune('.')
singleQuote = c.Rune('\'') singleQuote = c.Rune('\'')
doubleQuote = c.Rune('"') doubleQuote = c.Rune('"')
any = c.Any() anyRune = c.AnyRune()
anyQuote = c.AnyOf(singleQuote, doubleQuote) anyQuote = c.AnyOf(singleQuote, doubleQuote)
backslash = c.Rune('\\') backslash = c.Rune('\\')
asciiLower = c.RuneRange('a', 'z') asciiLower = c.RuneRange('a', 'z')

View File

@ -10,15 +10,13 @@ func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""}) runStatesT(t, statesT{"empty string", "", "", ""})
} }
func TestErrorsIncludeLineAndRowPosition(t *testing.T) { func TestErrorFullIncludesLineAndRowPosition(t *testing.T) {
p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc") p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +")
_, err := parseItemsToArray(p) _, err := parseItemsToArray(p)
t.Logf("Got error: %s", err.Error()) actual := err.ErrorFull()
if err.Row != 4 { expected := "unexpected character '+' (expected end of file) after line 6, column 3"
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4) if actual != expected {
} t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual)
if err.Column != 6 {
t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6)
} }
} }

View File

@ -37,29 +37,25 @@ var (
func startString(p *parsekit.P) { func startString(p *parsekit.P) {
p.Expects("a string value") p.Expects("a string value")
switch { switch {
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString): case p.On(doubleQuote3).RouteTo(startMultiLineBasicString).End():
case p.On(doubleQuote).RouteTo(startBasicString): case p.On(doubleQuote).RouteTo(startBasicString).End():
} }
} }
func startBasicString(p *parsekit.P) { func startBasicString(p *parsekit.P) {
p.Expects("a basic string") p.Expects("a basic string")
if p.On(doubleQuote).Skip() { p.On(doubleQuote).Skip().RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
} }
func parseBasicString(p *parsekit.P) { func parseBasicString(p *parsekit.P) {
p.Expects("string contents") p.Expects("string contents")
switch { switch {
case p.On(charThatMustBeEscaped).Stay(): case p.On(charThatMustBeEscaped).End():
p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch) p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch)
case p.On(validEscape).Accept(): case p.On(validEscape).Accept().RouteRepeat().End():
p.RouteRepeat() case p.On(backslash).RouteReturn().End():
case p.On(backslash).RouteReturn(): case p.On(doubleQuote).RouteReturn().End():
case p.On(doubleQuote).RouteReturn(): case p.On(anyRune).Accept().RouteRepeat().End():
case p.On(any).Accept():
p.RouteRepeat()
} }
} }
@ -71,20 +67,20 @@ func parseBasicString(p *parsekit.P) {
func basicStringSpecifics(p *parsekit.P) { func basicStringSpecifics(p *parsekit.P) {
p.Expects("string contents") p.Expects("string contents")
switch { switch {
case p.On(doubleQuote).Skip(): case p.On(doubleQuote).Skip().End():
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase? if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
p.EmitError("Invalid data in string: %s", err) p.EmitError("invalid data in string: %s", err)
} else { } else {
p.RouteTo(startKeyValuePair) p.RouteTo(startKeyValuePair)
} }
case p.On(backslash).Stay(): case p.On(backslash).End():
p.EmitError("Invalid escape sequence") p.EmitError("invalid escape sequence")
} }
} }
func startMultiLineBasicString(p *parsekit.P) { func startMultiLineBasicString(p *parsekit.P) {
p.Expects("a multi-line basic string") p.Expects("a multi-line basic string")
if p.On(doubleQuote3).Skip() { if p.On(doubleQuote3).Skip().End() {
p.EmitError("Not yet implemented") p.EmitError("not yet implemented")
} }
} }

View File

@ -13,9 +13,9 @@ func TestUnterminatedBasicString(t *testing.T) {
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: "\x00" (must be escaped)`}, {"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: "\n" (must be escaped)`}, {"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: "\u007f" (must be escaped)`}, {"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`},
}) })
// No need to write all test cases for disallowed characters by hand. // No need to write all test cases for disallowed characters by hand.
@ -23,7 +23,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
name := fmt.Sprintf("control character %x", rune(i)) name := fmt.Sprintf("control character %x", rune(i))
runStatesT( runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, string(rune(i)))}) fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
} }
} }
@ -46,7 +46,7 @@ func TestBasicString(t *testing.T) {
} }
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
err := "Invalid escape sequence" err := "invalid escape sequence"
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"invalid escape sequence", `a="\x"`, "[a]=", err}, {"invalid escape sequence", `a="\x"`, "[a]=", err},
{"too short \\u UTF8", `a="\u123"`, "[a]=", err}, {"too short \\u UTF8", `a="\u123"`, "[a]=", err},