From 478efe3e25917406211cc2cde449785b6b8a9c98 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Tue, 21 May 2019 12:49:42 +0000 Subject: [PATCH] Backup work. --- parsekit/matching.go | 119 ------------------ parsekit/parsekit.go | 20 +-- .../{matchers.go => parser_combinators.go} | 11 +- ...ers_test.go => parser_combinators_test.go} | 36 +++--- .../{emitting.go => statehandler_emit.go} | 12 +- parsekit/statehandler_expects.go | 15 +++ parsekit/statehandler_on.go | 58 +++++++++ parsekit/statehandler_on_match.go | 75 +++++++++++ parsekit/statehandler_on_route.go | 59 +++++++++ parsekit/statehandler_routing.go | 42 +++++++ parsekit/staterouting.go | 60 --------- parser/comment.go | 10 +- parser/comment_test.go | 22 ++-- parser/eof.go | 2 +- parser/keyvaluepair.go | 15 ++- parser/keyvaluepair_test.go | 6 + parser/toml.go | 2 +- parser/toml_test.go | 14 +-- parser/value_string.go | 34 +++-- ...lue_tring_test.go => value_string_test.go} | 10 +- 20 files changed, 345 insertions(+), 277 deletions(-) delete mode 100644 parsekit/matching.go rename parsekit/{matchers.go => parser_combinators.go} (97%) rename parsekit/{matchers_test.go => parser_combinators_test.go} (94%) rename parsekit/{emitting.go => statehandler_emit.go} (91%) create mode 100644 parsekit/statehandler_expects.go create mode 100644 parsekit/statehandler_on.go create mode 100644 parsekit/statehandler_on_match.go create mode 100644 parsekit/statehandler_on_route.go create mode 100644 parsekit/statehandler_routing.go delete mode 100644 parsekit/staterouting.go rename parser/{value_tring_test.go => value_string_test.go} (89%) diff --git a/parsekit/matching.go b/parsekit/matching.go deleted file mode 100644 index 8185e79..0000000 --- a/parsekit/matching.go +++ /dev/null @@ -1,119 +0,0 @@ -package parsekit - -// Expects is used to let a state function describe what input it is expecting. -// This expectation is used in error messages to make them more descriptive. -// -// Also, when defining an expectation inside a StateHandler, you do not need -// to handle unexpected input yourself. When the end of the function is -// reached without setting the next state, an automatic error will be -// emitted. This error differentiates between issues: -// * there is valid data on input, but it was not accepted by the function -// * there is an invalid UTF8 character on input -// * the end of the file was reached. -func (p *P) Expects(description string) { - p.expecting = description -} - -// On checks if the current input matches the provided Matcher. -// It returns a MatchAction struct, which provides methods that -// can be used to tell the parser what to do with a match. -// -// The intended way to use this, is by chaining some methods, -// for example: p.On(...).Accept() -// The chained methods will as a whole return a boolean value, -// indicating whether or not a match was found and processed. -func (p *P) On(m Matcher) *MatchAction { - runes, widths, ok := p.match(m) - p.LastMatch = string(runes) - return &MatchAction{ - p: p, - runes: runes, - widths: widths, - ok: ok, - } -} - -// Match checks if the provided Matcher matches the current input. -// Returns a slice of matching runes, a slice of their respective -// byte widths and a boolean. -// The boolean will be false and the slices will be empty in case -// the input did not match. -func (p *P) match(matcher Matcher) ([]rune, []int, bool) { - m := &MatchDialog{p: p} - ok := matcher.Match(m) - return m.runes, m.widths, ok -} - -type MatchAction struct { - p *P - runes []rune - widths []int - ok bool -} - -// Accept tells the parser to move the cursor past a match that was found, -// and to store the input that matched in the string buffer. -// Returns true in case a match was found. -// When no match was found, then no action is taken and false is returned. -func (a *MatchAction) Accept() bool { - if a.ok { - for i, r := range a.runes { - a.p.buffer.writeRune(r) - a.p.advanceCursor(r, a.widths[i]) - } - } - return a.ok -} - -// Skip tells the parser to move the cursor past a match that was found, -// without storing the actual match in the string buffer. -// Returns true in case a match was found. -// When no match was found, then no action is taken and false is returned. -func (a *MatchAction) Skip() bool { - if a.ok { - for i, r := range a.runes { - type C struct { - Rune MatchRune - } - - a.p.advanceCursor(r, a.widths[i]) - } - } - return a.ok -} - -// Stay tells the parser to not move the cursor after finding a match. -// Returns true in case a match was found, false otherwise. -func (a *MatchAction) Stay() bool { - return a.ok -} - -// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...). -func (a *MatchAction) RouteTo(state StateHandler) bool { - if a.ok { - a.p.RouteTo(state) - } - return a.ok -} - -// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn(). -func (a *MatchAction) RouteReturn() bool { - if a.ok { - a.p.RouteReturn() - } - return a.ok -} - -// advanceCursor advances the rune cursor one position in the input data. -// While doing so, it keeps tracks of newlines, so we can report on -// row + column positions on error. -func (p *P) advanceCursor(r rune, w int) { - p.pos += w - if p.newline { - p.cursorColumn = 0 - p.cursorRow++ - } else { - p.cursorColumn++ - } - p.newline = r == '\n' -} diff --git a/parsekit/parsekit.go b/parsekit/parsekit.go index b4f1805..b266cf3 100644 --- a/parsekit/parsekit.go +++ b/parsekit/parsekit.go @@ -10,12 +10,12 @@ import ( type P struct { state StateHandler // the function that handles the current state nextState StateHandler // the function that will handle the next state - stack []StateHandler // state function stack, for nested parsing + routeStack []StateHandler // route stack, for handling nested parsing input string // the scanned input len int // the total length of the input in bytes pos int // current byte scanning position in the input newline bool // keep track of when we have scanned a newline - cursorRow int // current row number in the input + cursorLine int // current row number in the input cursorColumn int // current column position in the input expecting string // a description of what the current state expects to find buffer stringBuffer // an efficient buffer, used to build string values @@ -33,10 +33,12 @@ type StateHandler func(*P) // and initializes the parser for it. func New(input string, start StateHandler) *P { return &P{ - input: input, - len: len(input), - nextState: start, - items: make(chan Item, 2), + input: input, + len: len(input), + cursorLine: 1, + cursorColumn: 1, + nextState: start, + items: make(chan Item, 2), } } @@ -93,8 +95,8 @@ func (p *P) getNextStateHandler() (StateHandler, bool) { switch { case p.nextState != nil: return p.nextState, true - case len(p.stack) > 0: - return p.popState(), true + case len(p.routeStack) > 0: + return p.popRoute(), true case p.expecting != "": p.UnexpectedInput() return nil, false @@ -118,7 +120,7 @@ func (p *P) makeReturnValues(i Item) (Item, *Error, bool) { case i.Type == ItemEOF: return i, nil, false case i.Type == ItemError: - p.err = &Error{i.Value, p.cursorRow, p.cursorColumn} + p.err = &Error{i.Value, p.cursorLine, p.cursorColumn} return i, p.err, false default: p.item = i diff --git a/parsekit/matchers.go b/parsekit/parser_combinators.go similarity index 97% rename from parsekit/matchers.go rename to parsekit/parser_combinators.go index 0ffa575..7e56d0b 100644 --- a/parsekit/matchers.go +++ b/parsekit/parser_combinators.go @@ -92,7 +92,7 @@ type Matcher interface { type matcherConstructors struct { EndOfFile func() MatchEndOfFile - Any func() MatchAny + AnyRune func() MatchAny Rune func(rune) MatchRune RuneRange func(rune, rune) MatchRuneRange Runes func(...rune) MatchAnyOf @@ -121,7 +121,7 @@ var C = matcherConstructors{ EndOfFile: func() MatchEndOfFile { return MatchEndOfFile{} }, - Any: func() MatchAny { + AnyRune: func() MatchAny { return MatchAny{} }, Rune: func(rune rune) MatchRune { @@ -198,13 +198,6 @@ func (c MatchEndOfFile) Match(m *MatchDialog) bool { return !ok && r == EOF } -type MatchInvalidRune struct{} - -func (c MatchInvalidRune) Match(m *MatchDialog) bool { - r, ok := m.NextRune() - return !ok && r == INVALID -} - type MatchAny struct{} func (c MatchAny) Match(m *MatchDialog) bool { diff --git a/parsekit/matchers_test.go b/parsekit/parser_combinators_test.go similarity index 94% rename from parsekit/matchers_test.go rename to parsekit/parser_combinators_test.go index ff4efaa..9f1b083 100644 --- a/parsekit/matchers_test.go +++ b/parsekit/parser_combinators_test.go @@ -13,7 +13,7 @@ const TestItem p.ItemType = 1 func newParser(input string, matcher p.Matcher) *p.P { stateFn := func(p *p.P) { p.Expects("MATCH") - if p.On(matcher).Accept() { + if p.On(matcher).Accept().End() { p.EmitLiteral(TestItem) p.RouteRepeat() } @@ -21,8 +21,8 @@ func newParser(input string, matcher p.Matcher) *p.P { return p.New(input, stateFn) } -func TestMatchAny(t *testing.T) { - p := newParser("o", c.Any()) +func TestMatchAnyRune(t *testing.T) { + p := newParser("o", c.AnyRune()) r, err, ok := p.Next() if !ok { t.Fatalf("Parsing failed: %s", err) @@ -35,8 +35,8 @@ func TestMatchAny(t *testing.T) { } } -func TestMatchAny_AtEndOfFile(t *testing.T) { - p := newParser("", c.Any()) +func TestMatchAnyRune_AtEndOfFile(t *testing.T) { + p := newParser("", c.AnyRune()) _, err, ok := p.Next() if ok { t.Fatalf("Parsing unexpectedly succeeded") @@ -47,8 +47,8 @@ func TestMatchAny_AtEndOfFile(t *testing.T) { } } -func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) { - p := newParser("\xcd", c.Any()) +func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) { + p := newParser("\xcd", c.AnyRune()) _, err, ok := p.Next() if ok { t.Fatalf("Parsing unexpectedly succeeded") @@ -207,7 +207,7 @@ func TestMatchRepeat(t *testing.T) { p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x'))) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "xxxx" { t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value) @@ -254,7 +254,7 @@ func TestMatchOneOrMore(t *testing.T) { p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x'))) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "xxxxxxxx" { t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value) @@ -265,7 +265,7 @@ func TestMatchSequence(t *testing.T) { p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0'))) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "10" { t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value) @@ -276,7 +276,7 @@ func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) { p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0')))) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "101010" { t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value) @@ -290,7 +290,7 @@ func TestSequence_WithRepeatedRunes(t *testing.T) { p := newParser(" == 10", assignment) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != " =" { t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value) @@ -301,7 +301,7 @@ func TestMatchOptional(t *testing.T) { p := newParser("xyz", c.Optional(c.Rune('x'))) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "x" { t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) @@ -310,7 +310,7 @@ func TestMatchOptional(t *testing.T) { p = newParser("xyz", c.Optional(c.Rune('y'))) r, err, ok = p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "" { t.Errorf("Parser item value is %q instead of expected \"\"", r.Value) @@ -319,10 +319,10 @@ func TestMatchOptional(t *testing.T) { func TestMatchDrop(t *testing.T) { dashes := c.OneOrMore(c.Rune('-')) - p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes))) + p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes))) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "X" { t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value) @@ -336,7 +336,7 @@ func TestMatchSeparated(t *testing.T) { p := newParser("1,2;3|44,55|66;777,abc", separated_numbers) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != "1,2;3|44,55|66;777" { t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value) @@ -352,7 +352,7 @@ func TestMixAndMatch(t *testing.T) { p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte)) r, err, ok := p.Next() if !ok { - t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column) + t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column) } if r.Value != `\x9a\x01\xF0\xfC` { t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`) diff --git a/parsekit/emitting.go b/parsekit/statehandler_emit.go similarity index 91% rename from parsekit/emitting.go rename to parsekit/statehandler_emit.go index 5f53ba3..646f342 100644 --- a/parsekit/emitting.go +++ b/parsekit/statehandler_emit.go @@ -8,12 +8,10 @@ import ( // ItemType represents the type of a parser Item. type ItemType int -// TODO private? // ItemEOF is a built-in parser item type that is used for flagging that the // end of the input was reached. const ItemEOF ItemType = -1 -// TODO private? // ItemError is a built-in parser item type that is used for flagging that // an error has occurred during parsing. const ItemError ItemType = -2 @@ -62,14 +60,22 @@ func (p *P) EmitInterpreted(t ItemType) error { // error messages to the user. type Error struct { Message string - Row int + Line int Column int } func (err *Error) Error() string { + if err == nil { + panic("Error method called on the parser, but no error was set") + } return err.Message } +func (err *Error) ErrorFull() string { + message := err.Error() + return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column) +} + // EmitError emits a Parser error item to the client. func (p *P) EmitError(format string, args ...interface{}) { message := fmt.Sprintf(format, args...) diff --git a/parsekit/statehandler_expects.go b/parsekit/statehandler_expects.go new file mode 100644 index 0000000..adc66ae --- /dev/null +++ b/parsekit/statehandler_expects.go @@ -0,0 +1,15 @@ +package parsekit + +// Expects is used to let a state function describe what input it is expecting. +// This expectation is used in error messages to make them more descriptive. +// +// Also, when defining an expectation inside a StateHandler, you do not need +// to handle unexpected input yourself. When the end of the function is +// reached without setting the next state, an automatic error will be +// emitted. This error differentiates between issues: +// * there is valid data on input, but it was not accepted by the function +// * there is an invalid UTF8 character on input +// * the end of the file was reached. +func (p *P) Expects(description string) { + p.expecting = description +} diff --git a/parsekit/statehandler_on.go b/parsekit/statehandler_on.go new file mode 100644 index 0000000..590a09d --- /dev/null +++ b/parsekit/statehandler_on.go @@ -0,0 +1,58 @@ +package parsekit + +// On checks if the current input matches the provided Matcher. +// +// This method is the start of a chain method in which multiple things can +// be arranged in one go: +// +// * Checking whether or not there is a match (this is what On does) +// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move +// the cursor forward, Accept(): move cursor forward and add the match in +// the parser string buffer) +// * Dedicing where to route to (e.g. using RouteTo() to route to a +// StateHandler by name) +// * Followup routing after that, when applicable (.e.g using something like +// RouteTo(...).ThenTo(...)) +// +// For every step of this chain, you can end the chain using the +// End() method. This will return a boolean value, indicating whether or +// not the initial On() method found a match in the input. +// End() is not mandatory. It is merely provided as a means to use +// a chain as an expression for a switch/case or if statement (since those +// require a boolean expression). +// +// You can omit "what to do with the match" and go straight into a routing +// method, e.g. On(...).RouteTo(...). This is functionally the same as +// using On(...).Stay().RouteTo(...). +// +// Here's a complete example chain: +// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End() +func (p *P) On(matcher Matcher) *MatchAction { + m := &MatchDialog{p: p} + ok := matcher.Match(m) + + // Keep track of the last match, to allow parser implementations + // to access it in an easy way. Typical use would be something like: + // if p.On(somethingBad).End() { + // p.Errorf("This was bad: %s", p.LastMatch) + // } + p.LastMatch = string(m.runes) + + return &MatchAction{ + ChainAction: ChainAction{p, ok}, + runes: m.runes, + widths: m.widths, + } +} + +// ChainAction is used for building method chains for the On() method. +type ChainAction struct { + p *P + ok bool +} + +// End ends the method chain and returns a boolean indicating whether +// or not a match was found in the input. +func (a *ChainAction) End() bool { + return a.ok +} diff --git a/parsekit/statehandler_on_match.go b/parsekit/statehandler_on_match.go new file mode 100644 index 0000000..f636a57 --- /dev/null +++ b/parsekit/statehandler_on_match.go @@ -0,0 +1,75 @@ +package parsekit + +// MatchAction is a struct that is used for building On()-method chains. +// +// It embeds the RouteAction struct, to make it possible to go right into +// a route action, which is basically a simple way of aliasing a chain +// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...). +type MatchAction struct { + RouteAction + ChainAction + runes []rune + widths []int +} + +// Accept tells the parser to move the cursor past a match that was found, +// and to store the input that matched in the string buffer. +// When no match was found, then no action is taken. +// It returns a RouteAction struct, which provides methods that can be used +// to tell the parser what state to go to next. +func (a *MatchAction) Accept() *RouteAction { + if a.ok { + for i, r := range a.runes { + a.p.buffer.writeRune(r) + a.p.advanceCursor(r, a.widths[i]) + } + } + return &RouteAction{ChainAction: ChainAction{a.p, a.ok}} +} + +// Skip tells the parser to move the cursor past a match that was found, +// without storing the actual match in the string buffer. +// Returns true in case a match was found. +// When no match was found, then no action is taken and false is returned. +func (a *MatchAction) Skip() *RouteAction { + if a.ok { + for i, r := range a.runes { + type C struct { + Rune MatchRune + } + + a.p.advanceCursor(r, a.widths[i]) + } + } + return &RouteAction{ChainAction: ChainAction{a.p, a.ok}} +} + +// Stay tells the parser to not move the cursor after finding a match. +// Returns true in case a match was found, false otherwise. +func (a *MatchAction) Stay() *RouteAction { + return &RouteAction{ChainAction: ChainAction{a.p, a.ok}} +} + +// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...). +func (a *MatchAction) RouteTo(state StateHandler) *RouteFollowupAction { + return a.Stay().RouteTo(state) +} + +// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn(...). +func (a *MatchAction) RouteReturn() *ChainAction { + return a.Stay().RouteReturn() +} + +// advanceCursor advances the rune cursor one position in the input data. +// While doing so, it keeps tracks of newlines, so we can report on +// row + column positions on error. +func (p *P) advanceCursor(r rune, w int) { + p.pos += w + if p.newline { + p.cursorLine++ + p.cursorColumn = 1 + } else { + p.cursorColumn++ + } + p.newline = r == '\n' +} diff --git a/parsekit/statehandler_on_route.go b/parsekit/statehandler_on_route.go new file mode 100644 index 0000000..b540e1e --- /dev/null +++ b/parsekit/statehandler_on_route.go @@ -0,0 +1,59 @@ +package parsekit + +// RouteAction is a struct that is used for building On() method chains. +type RouteAction struct { + ChainAction +} + +// RouteRepeat indicates that on the next parsing cycle, +// the current StateHandler must be reinvoked. +func (a *RouteAction) RouteRepeat() *ChainAction { + if a.ok { + return a.p.RouteRepeat() + } + return &ChainAction{nil, false} +} + +// RouteTo tells the parser what StateHandler function to invoke +// in the next parsing cycle. +func (a *RouteAction) RouteTo(state StateHandler) *RouteFollowupAction { + if a.ok { + return a.p.RouteTo(state) + } + return &RouteFollowupAction{ChainAction: ChainAction{nil, false}} +} + +// RouteReturn tells the parser that on the next cycle the next scheduled +// route must be invoked. +func (a *RouteAction) RouteReturn() *ChainAction { + if a.ok { + return a.p.RouteReturn() + } + return &ChainAction{nil, false} +} + +// RouteFollowupAction chains parsing routes. +// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB). +type RouteFollowupAction struct { + ChainAction +} + +// ThenTo schedules a StateHandler that must be invoked after the RouteTo +// StateHandler has been completed. +// For example: p.RouteTo(handlerA).ThenTo(handlerB) +func (a *RouteFollowupAction) ThenTo(state StateHandler) *ChainAction { + if a.ok { + a.p.pushRoute(state) + } + return &ChainAction{nil, a.ok} +} + +// ThenReturnHere schedules the current StateHandler to be invoked after +// the RouteTo StateHandler has been completed. +// For example: p.RouteTo(handlerA).ThenReturnHere() +func (a *RouteFollowupAction) ThenReturnHere() *ChainAction { + if a.ok { + a.p.pushRoute(a.p.state) + } + return &ChainAction{nil, a.ok} +} diff --git a/parsekit/statehandler_routing.go b/parsekit/statehandler_routing.go new file mode 100644 index 0000000..7eac7e2 --- /dev/null +++ b/parsekit/statehandler_routing.go @@ -0,0 +1,42 @@ +package parsekit + +// RouteTo tells the parser what StateHandler function to invoke +// in the next parsing cycle. +func (p *P) RouteTo(state StateHandler) *RouteFollowupAction { + p.nextState = state + return &RouteFollowupAction{ChainAction: ChainAction{p, true}} +} + +// RouteRepeat indicates that on the next parsing cycle, the current +// StateHandler must be reinvoked. +func (p *P) RouteRepeat() *ChainAction { + p.RouteTo(p.state) + return &ChainAction{nil, true} +} + +// RouteReturn tells the parser that on the next cycle the last +// StateHandler that was pushed on the route stack must be invoked. +// +// Using this method is optional. When implementating a StateHandler that +// is used as a sort of subroutine (using constructions like +// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from +// providing an explicit routing decision from that handler. The parser will +// automatically assume a RouteReturn() in that case. +func (p *P) RouteReturn() *ChainAction { + p.nextState = p.popRoute() + return &ChainAction{nil, true} +} + +// pushRoute adds the StateHandler to the route stack. +// This is used for implementing nested parsing. +func (p *P) pushRoute(state StateHandler) { + p.routeStack = append(p.routeStack, state) +} + +// popRoute pops the last pushed StateHandler from the route stack. +func (p *P) popRoute() StateHandler { + last := len(p.routeStack) - 1 + head, tail := p.routeStack[:last], p.routeStack[last] + p.routeStack = head + return tail +} diff --git a/parsekit/staterouting.go b/parsekit/staterouting.go deleted file mode 100644 index c629aa3..0000000 --- a/parsekit/staterouting.go +++ /dev/null @@ -1,60 +0,0 @@ -package parsekit - -// RouteRepeat indicates that on the next parsing cycle, -// the current StateHandler must be invoked again. -func (p *P) RouteRepeat() { - p.RouteTo(p.state) -} - -// RouteTo tells the parser what StateHandler function to invoke -// in the next parsing cycle. -func (p *P) RouteTo(state StateHandler) *RouteFollowup { - p.nextState = state - return &RouteFollowup{p} -} - -// RouteFollowup chains parsing routes. -// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB). -type RouteFollowup struct { - p *P -} - -// ThenTo schedules a StateHandler that must be invoked -// after the RouteTo StateHandler has been completed. -// For example: p.RouteTo(handlerA).ThenTo(handlerB) -func (r *RouteFollowup) ThenTo(state StateHandler) { - r.p.pushState(state) -} - -// ThenReturnHere schedules the current StateHandler to be -// invoked after the RouteTo StateHandler has been completed. -// For example: p.RouteTo(handlerA).ThenReturnHere() -func (r *RouteFollowup) ThenReturnHere() { - r.p.pushState(r.p.state) -} - -// RouteReturn tells the parser that on the next cycle the -// next scheduled route must be invoked. -// Using this method is optional. When implementating a -// StateHandler that is used as a sort of subroutine (using -// constructions like p.RouteTo(subroutine).ThenReturnHere()), -// then you can refrain from providing a routing decision -// from that handler. The parser will automatically assume -// a RouteReturn in that case. -func (p *P) RouteReturn() { - p.nextState = p.popState() -} - -// PushState adds the state function to the state stack. -// This is used for implementing nested parsing. -func (p *P) pushState(state StateHandler) { - p.stack = append(p.stack, state) -} - -// PopState pops the last pushed state from the state stack. -func (p *P) popState() StateHandler { - last := len(p.stack) - 1 - head, tail := p.stack[:last], p.stack[last] - p.stack = head - return tail -} diff --git a/parser/comment.go b/parser/comment.go index 07c4912..3feb999 100644 --- a/parser/comment.go +++ b/parser/comment.go @@ -7,19 +7,15 @@ import ( // A '#' hash symbol marks the rest of the line as a comment. func startComment(p *parsekit.P) { p.Expects("start of comment") - if p.On(c.OneOrMore(hash)).Skip() { - p.RouteTo(commentContents) - } + p.On(c.OneOrMore(hash)).Skip().RouteTo(commentContents) } // All characters up to the end of the line are included in the comment. func commentContents(p *parsekit.P) { p.Expects("comment contents") switch { - case p.On(endOfLine).Skip(): + case p.On(endOfLine).Skip().RouteReturn().End(): p.EmitLiteralTrim(ItemComment) - p.RouteReturn() - case p.On(any).Accept(): - p.RouteRepeat() + case p.On(anyRune).Accept().RouteRepeat().End(): } } diff --git a/parser/comment_test.go b/parser/comment_test.go index cc44e3b..234a879 100644 --- a/parser/comment_test.go +++ b/parser/comment_test.go @@ -6,15 +6,17 @@ import ( func TestComments(t *testing.T) { runStatesTs(t, []statesT{ - {"empty comment", "#", "#()", ""}, - // {"empty comment with spaces", "# \t \r\n", `#()`, ""}, - // {"basic comment", "#chicken", "#(chicken)", ""}, - // {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, - // {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, - // {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, - // {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, - // {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, - // {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, - // {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, + {"empty comment at end of file", "#", "#()", ""}, + {"empty comment at end of windows line", "#\r\n", "#()", ""}, + {"empty comment at end of unix line", "#\n", "#()", ""}, + {"empty comment with spaces", "# \t \r\n", `#()`, ""}, + {"basic comment", "#chicken", "#(chicken)", ""}, + {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""}, + {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""}, + {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""}, + {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""}, + {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""}, + {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""}, + {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""}, }) } diff --git a/parser/eof.go b/parser/eof.go index 97c6bb5..49e0c73 100644 --- a/parser/eof.go +++ b/parser/eof.go @@ -5,7 +5,7 @@ import "github.com/mmakaay/toml/parsekit" // TODO move into parsekit func endOfFile(p *parsekit.P) { p.Expects("end of file") - if p.On(c.EndOfFile()).Stay() { + if p.On(c.EndOfFile()).Stay().End() { p.Emit(parsekit.ItemEOF, "EOF") } } diff --git a/parser/keyvaluepair.go b/parser/keyvaluepair.go index d4be537..4a33b27 100644 --- a/parser/keyvaluepair.go +++ b/parser/keyvaluepair.go @@ -29,15 +29,14 @@ var ( // This allows for grouping similar properties together. Whitespace // around dot-separated parts is ignored, however, best practice is to // not use any extraneous whitespace. - keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace) + keySeparatorDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace) ) func startKeyValuePair(p *parsekit.P) { - p.On(whitespaceOrNewlines).Skip() switch { - case p.On(hash).Stay(): - p.RouteTo(startComment).ThenReturnHere() - case p.On(startOfKey).RouteTo(startKey): + case p.On(whitespaceOrNewlines).Skip().RouteRepeat().End(): + case p.On(hash).RouteTo(startComment).ThenReturnHere().End(): + case p.On(startOfKey).RouteTo(startKey).End(): default: p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler. } @@ -50,14 +49,14 @@ func startKey(p *parsekit.P) { func startBareKey(p *parsekit.P) { p.Expects("a bare key name") - if p.On(bareKey).Accept() { + if p.On(bareKey).Accept().End() { p.EmitLiteral(ItemKey) p.RouteTo(endOfKeyOrDot) } } func endOfKeyOrDot(p *parsekit.P) { - if p.On(keySeparatordDot).Skip() { + if p.On(keySeparatorDot).Skip().End() { p.Emit(ItemKeyDot, ".") p.RouteTo(startKey) } else { @@ -67,7 +66,7 @@ func endOfKeyOrDot(p *parsekit.P) { func startAssignment(p *parsekit.P) { p.Expects("a value assignment") - if p.On(keyAssignment).Skip() { + if p.On(keyAssignment).Skip().End() { p.Emit(ItemAssignment, "=") p.RouteTo(startValue) } diff --git a/parser/keyvaluepair_test.go b/parser/keyvaluepair_test.go index 4dc4ad0..75862c5 100644 --- a/parser/keyvaluepair_test.go +++ b/parser/keyvaluepair_test.go @@ -33,3 +33,9 @@ func TestKeyWithAssignmentButNoValue(t *testing.T) { {"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err}, }) } + +func TestKeyWithValue(t *testing.T) { + runStatesTs(t, []statesT{ + {"with string value", " -key- = \"value\" # nice\r\n", "[-key-]=STR(value)#(nice)", ""}, + }) +} diff --git a/parser/toml.go b/parser/toml.go index 8b86a65..28205bb 100644 --- a/parser/toml.go +++ b/parser/toml.go @@ -24,7 +24,7 @@ var ( dot = c.Rune('.') singleQuote = c.Rune('\'') doubleQuote = c.Rune('"') - any = c.Any() + anyRune = c.AnyRune() anyQuote = c.AnyOf(singleQuote, doubleQuote) backslash = c.Rune('\\') asciiLower = c.RuneRange('a', 'z') diff --git a/parser/toml_test.go b/parser/toml_test.go index f983ea6..16122f2 100644 --- a/parser/toml_test.go +++ b/parser/toml_test.go @@ -10,15 +10,13 @@ func TestEmptyInput(t *testing.T) { runStatesT(t, statesT{"empty string", "", "", ""}) } -func TestErrorsIncludeLineAndRowPosition(t *testing.T) { - p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc") +func TestErrorFullIncludesLineAndRowPosition(t *testing.T) { + p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +") _, err := parseItemsToArray(p) - t.Logf("Got error: %s", err.Error()) - if err.Row != 4 { - t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4) - } - if err.Column != 6 { - t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6) + actual := err.ErrorFull() + expected := "unexpected character '+' (expected end of file) after line 6, column 3" + if actual != expected { + t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual) } } diff --git a/parser/value_string.go b/parser/value_string.go index 1d3705f..d7dcda8 100644 --- a/parser/value_string.go +++ b/parser/value_string.go @@ -37,29 +37,25 @@ var ( func startString(p *parsekit.P) { p.Expects("a string value") switch { - case p.On(doubleQuote3).RouteTo(startMultiLineBasicString): - case p.On(doubleQuote).RouteTo(startBasicString): + case p.On(doubleQuote3).RouteTo(startMultiLineBasicString).End(): + case p.On(doubleQuote).RouteTo(startBasicString).End(): } } func startBasicString(p *parsekit.P) { p.Expects("a basic string") - if p.On(doubleQuote).Skip() { - p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics) - } + p.On(doubleQuote).Skip().RouteTo(parseBasicString).ThenTo(basicStringSpecifics) } func parseBasicString(p *parsekit.P) { p.Expects("string contents") switch { - case p.On(charThatMustBeEscaped).Stay(): - p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch) - case p.On(validEscape).Accept(): - p.RouteRepeat() - case p.On(backslash).RouteReturn(): - case p.On(doubleQuote).RouteReturn(): - case p.On(any).Accept(): - p.RouteRepeat() + case p.On(charThatMustBeEscaped).End(): + p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch) + case p.On(validEscape).Accept().RouteRepeat().End(): + case p.On(backslash).RouteReturn().End(): + case p.On(doubleQuote).RouteReturn().End(): + case p.On(anyRune).Accept().RouteRepeat().End(): } } @@ -71,20 +67,20 @@ func parseBasicString(p *parsekit.P) { func basicStringSpecifics(p *parsekit.P) { p.Expects("string contents") switch { - case p.On(doubleQuote).Skip(): + case p.On(doubleQuote).Skip().End(): if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase? - p.EmitError("Invalid data in string: %s", err) + p.EmitError("invalid data in string: %s", err) } else { p.RouteTo(startKeyValuePair) } - case p.On(backslash).Stay(): - p.EmitError("Invalid escape sequence") + case p.On(backslash).End(): + p.EmitError("invalid escape sequence") } } func startMultiLineBasicString(p *parsekit.P) { p.Expects("a multi-line basic string") - if p.On(doubleQuote3).Skip() { - p.EmitError("Not yet implemented") + if p.On(doubleQuote3).Skip().End() { + p.EmitError("not yet implemented") } } diff --git a/parser/value_tring_test.go b/parser/value_string_test.go similarity index 89% rename from parser/value_tring_test.go rename to parser/value_string_test.go index 62ddf18..b3ff934 100644 --- a/parser/value_tring_test.go +++ b/parser/value_string_test.go @@ -13,9 +13,9 @@ func TestUnterminatedBasicString(t *testing.T) { func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { runStatesTs(t, []statesT{ - {"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: "\x00" (must be escaped)`}, - {"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: "\n" (must be escaped)`}, - {"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: "\u007f" (must be escaped)`}, + {"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`}, + {"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`}, + {"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`}, }) // No need to write all test cases for disallowed characters by hand. @@ -23,7 +23,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { name := fmt.Sprintf("control character %x", rune(i)) runStatesT( t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", - fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, string(rune(i)))}) + fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))}) } } @@ -46,7 +46,7 @@ func TestBasicString(t *testing.T) { } func TestBasicStringWithInvalidEscapeSequence(t *testing.T) { - err := "Invalid escape sequence" + err := "invalid escape sequence" runStatesTs(t, []statesT{ {"invalid escape sequence", `a="\x"`, "[a]=", err}, {"too short \\u UTF8", `a="\u123"`, "[a]=", err},