Backup work.

This commit is contained in:
Maurice Makaay 2019-05-21 12:49:42 +00:00
parent d9d837fe6e
commit 478efe3e25
20 changed files with 345 additions and 277 deletions

View File

@ -1,119 +0,0 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateHandler, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}
// On checks if the current input matches the provided Matcher.
// It returns a MatchAction struct, which provides methods that
// can be used to tell the parser what to do with a match.
//
// The intended way to use this, is by chaining some methods,
// for example: p.On(...).Accept()
// The chained methods will as a whole return a boolean value,
// indicating whether or not a match was found and processed.
func (p *P) On(m Matcher) *MatchAction {
runes, widths, ok := p.match(m)
p.LastMatch = string(runes)
return &MatchAction{
p: p,
runes: runes,
widths: widths,
ok: ok,
}
}
// Match checks if the provided Matcher matches the current input.
// Returns a slice of matching runes, a slice of their respective
// byte widths and a boolean.
// The boolean will be false and the slices will be empty in case
// the input did not match.
func (p *P) match(matcher Matcher) ([]rune, []int, bool) {
m := &MatchDialog{p: p}
ok := matcher.Match(m)
return m.runes, m.widths, ok
}
type MatchAction struct {
p *P
runes []rune
widths []int
ok bool
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Accept() bool {
if a.ok {
for i, r := range a.runes {
a.p.buffer.writeRune(r)
a.p.advanceCursor(r, a.widths[i])
}
}
return a.ok
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Skip() bool {
if a.ok {
for i, r := range a.runes {
type C struct {
Rune MatchRune
}
a.p.advanceCursor(r, a.widths[i])
}
}
return a.ok
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *MatchAction) Stay() bool {
return a.ok
}
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
func (a *MatchAction) RouteTo(state StateHandler) bool {
if a.ok {
a.p.RouteTo(state)
}
return a.ok
}
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn().
func (a *MatchAction) RouteReturn() bool {
if a.ok {
a.p.RouteReturn()
}
return a.ok
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorColumn = 0
p.cursorRow++
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}

View File

@ -10,12 +10,12 @@ import (
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
stack []StateHandler // state function stack, for nested parsing
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input
cursorLine int // current row number in the input
cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
@ -35,6 +35,8 @@ func New(input string, start StateHandler) *P {
return &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: start,
items: make(chan Item, 2),
}
@ -93,8 +95,8 @@ func (p *P) getNextStateHandler() (StateHandler, bool) {
switch {
case p.nextState != nil:
return p.nextState, true
case len(p.stack) > 0:
return p.popState(), true
case len(p.routeStack) > 0:
return p.popRoute(), true
case p.expecting != "":
p.UnexpectedInput()
return nil, false
@ -118,7 +120,7 @@ func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
p.err = &Error{i.Value, p.cursorLine, p.cursorColumn}
return i, p.err, false
default:
p.item = i

View File

@ -92,7 +92,7 @@ type Matcher interface {
type matcherConstructors struct {
EndOfFile func() MatchEndOfFile
Any func() MatchAny
AnyRune func() MatchAny
Rune func(rune) MatchRune
RuneRange func(rune, rune) MatchRuneRange
Runes func(...rune) MatchAnyOf
@ -121,7 +121,7 @@ var C = matcherConstructors{
EndOfFile: func() MatchEndOfFile {
return MatchEndOfFile{}
},
Any: func() MatchAny {
AnyRune: func() MatchAny {
return MatchAny{}
},
Rune: func(rune rune) MatchRune {
@ -198,13 +198,6 @@ func (c MatchEndOfFile) Match(m *MatchDialog) bool {
return !ok && r == EOF
}
type MatchInvalidRune struct{}
func (c MatchInvalidRune) Match(m *MatchDialog) bool {
r, ok := m.NextRune()
return !ok && r == INVALID
}
type MatchAny struct{}
func (c MatchAny) Match(m *MatchDialog) bool {

View File

@ -13,7 +13,7 @@ const TestItem p.ItemType = 1
func newParser(input string, matcher p.Matcher) *p.P {
stateFn := func(p *p.P) {
p.Expects("MATCH")
if p.On(matcher).Accept() {
if p.On(matcher).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
@ -21,8 +21,8 @@ func newParser(input string, matcher p.Matcher) *p.P {
return p.New(input, stateFn)
}
func TestMatchAny(t *testing.T) {
p := newParser("o", c.Any())
func TestMatchAnyRune(t *testing.T) {
p := newParser("o", c.AnyRune())
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
@ -35,8 +35,8 @@ func TestMatchAny(t *testing.T) {
}
}
func TestMatchAny_AtEndOfFile(t *testing.T) {
p := newParser("", c.Any())
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
p := newParser("", c.AnyRune())
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
@ -47,8 +47,8 @@ func TestMatchAny_AtEndOfFile(t *testing.T) {
}
}
func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", c.Any())
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", c.AnyRune())
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
@ -207,7 +207,7 @@ func TestMatchRepeat(t *testing.T) {
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
@ -254,7 +254,7 @@ func TestMatchOneOrMore(t *testing.T) {
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxxxxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
@ -265,7 +265,7 @@ func TestMatchSequence(t *testing.T) {
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "10" {
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
@ -276,7 +276,7 @@ func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "101010" {
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
@ -290,7 +290,7 @@ func TestSequence_WithRepeatedRunes(t *testing.T) {
p := newParser(" == 10", assignment)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != " =" {
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value)
@ -301,7 +301,7 @@ func TestMatchOptional(t *testing.T) {
p := newParser("xyz", c.Optional(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
@ -310,7 +310,7 @@ func TestMatchOptional(t *testing.T) {
p = newParser("xyz", c.Optional(c.Rune('y')))
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "" {
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
@ -319,10 +319,10 @@ func TestMatchOptional(t *testing.T) {
func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.Any(), c.Drop(dashes)))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes)))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "X" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
@ -336,7 +336,7 @@ func TestMatchSeparated(t *testing.T) {
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "1,2;3|44,55|66;777" {
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
@ -352,7 +352,7 @@ func TestMixAndMatch(t *testing.T) {
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != `\x9a\x01\xF0\xfC` {
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)

View File

@ -8,12 +8,10 @@ import (
// ItemType represents the type of a parser Item.
type ItemType int
// TODO private?
// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
// TODO private?
// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
@ -62,14 +60,22 @@ func (p *P) EmitInterpreted(t ItemType) error {
// error messages to the user.
type Error struct {
Message string
Row int
Line int
Column int
}
func (err *Error) Error() string {
if err == nil {
panic("Error method called on the parser, but no error was set")
}
return err.Message
}
func (err *Error) ErrorFull() string {
message := err.Error()
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)
}
// EmitError emits a Parser error item to the client.
func (p *P) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...)

View File

@ -0,0 +1,15 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateHandler, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}

View File

@ -0,0 +1,58 @@
package parsekit
// On checks if the current input matches the provided Matcher.
//
// This method is the start of a chain method in which multiple things can
// be arranged in one go:
//
// * Checking whether or not there is a match (this is what On does)
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the cursor forward, Accept(): move cursor forward and add the match in
// the parser string buffer)
// * Dedicing where to route to (e.g. using RouteTo() to route to a
// StateHandler by name)
// * Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
//
// For every step of this chain, you can end the chain using the
// End() method. This will return a boolean value, indicating whether or
// not the initial On() method found a match in the input.
// End() is not mandatory. It is merely provided as a means to use
// a chain as an expression for a switch/case or if statement (since those
// require a boolean expression).
//
// You can omit "what to do with the match" and go straight into a routing
// method, e.g. On(...).RouteTo(...). This is functionally the same as
// using On(...).Stay().RouteTo(...).
//
// Here's a complete example chain:
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(matcher Matcher) *MatchAction {
m := &MatchDialog{p: p}
ok := matcher.Match(m)
// Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like:
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
p.LastMatch = string(m.runes)
return &MatchAction{
ChainAction: ChainAction{p, ok},
runes: m.runes,
widths: m.widths,
}
}
// ChainAction is used for building method chains for the On() method.
type ChainAction struct {
p *P
ok bool
}
// End ends the method chain and returns a boolean indicating whether
// or not a match was found in the input.
func (a *ChainAction) End() bool {
return a.ok
}

View File

@ -0,0 +1,75 @@
package parsekit
// MatchAction is a struct that is used for building On()-method chains.
//
// It embeds the RouteAction struct, to make it possible to go right into
// a route action, which is basically a simple way of aliasing a chain
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
type MatchAction struct {
RouteAction
ChainAction
runes []rune
widths []int
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// When no match was found, then no action is taken.
// It returns a RouteAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *MatchAction) Accept() *RouteAction {
if a.ok {
for i, r := range a.runes {
a.p.buffer.writeRune(r)
a.p.advanceCursor(r, a.widths[i])
}
}
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *MatchAction) Skip() *RouteAction {
if a.ok {
for i, r := range a.runes {
type C struct {
Rune MatchRune
}
a.p.advanceCursor(r, a.widths[i])
}
}
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *MatchAction) Stay() *RouteAction {
return &RouteAction{ChainAction: ChainAction{a.p, a.ok}}
}
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
func (a *MatchAction) RouteTo(state StateHandler) *RouteFollowupAction {
return a.Stay().RouteTo(state)
}
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn(...).
func (a *MatchAction) RouteReturn() *ChainAction {
return a.Stay().RouteReturn()
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorLine++
p.cursorColumn = 1
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}

View File

@ -0,0 +1,59 @@
package parsekit
// RouteAction is a struct that is used for building On() method chains.
type RouteAction struct {
ChainAction
}
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *RouteAction) RouteRepeat() *ChainAction {
if a.ok {
return a.p.RouteRepeat()
}
return &ChainAction{nil, false}
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (a *RouteAction) RouteTo(state StateHandler) *RouteFollowupAction {
if a.ok {
return a.p.RouteTo(state)
}
return &RouteFollowupAction{ChainAction: ChainAction{nil, false}}
}
// RouteReturn tells the parser that on the next cycle the next scheduled
// route must be invoked.
func (a *RouteAction) RouteReturn() *ChainAction {
if a.ok {
return a.p.RouteReturn()
}
return &ChainAction{nil, false}
}
// RouteFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type RouteFollowupAction struct {
ChainAction
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (a *RouteFollowupAction) ThenTo(state StateHandler) *ChainAction {
if a.ok {
a.p.pushRoute(state)
}
return &ChainAction{nil, a.ok}
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (a *RouteFollowupAction) ThenReturnHere() *ChainAction {
if a.ok {
a.p.pushRoute(a.p.state)
}
return &ChainAction{nil, a.ok}
}

View File

@ -0,0 +1,42 @@
package parsekit
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *RouteFollowupAction {
p.nextState = state
return &RouteFollowupAction{ChainAction: ChainAction{p, true}}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *ChainAction {
p.RouteTo(p.state)
return &ChainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *ChainAction {
p.nextState = p.popRoute()
return &ChainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}

View File

@ -1,60 +0,0 @@
package parsekit
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be invoked again.
func (p *P) RouteRepeat() {
p.RouteTo(p.state)
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *RouteFollowup {
p.nextState = state
return &RouteFollowup{p}
}
// RouteFollowup chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type RouteFollowup struct {
p *P
}
// ThenTo schedules a StateHandler that must be invoked
// after the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (r *RouteFollowup) ThenTo(state StateHandler) {
r.p.pushState(state)
}
// ThenReturnHere schedules the current StateHandler to be
// invoked after the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (r *RouteFollowup) ThenReturnHere() {
r.p.pushState(r.p.state)
}
// RouteReturn tells the parser that on the next cycle the
// next scheduled route must be invoked.
// Using this method is optional. When implementating a
// StateHandler that is used as a sort of subroutine (using
// constructions like p.RouteTo(subroutine).ThenReturnHere()),
// then you can refrain from providing a routing decision
// from that handler. The parser will automatically assume
// a RouteReturn in that case.
func (p *P) RouteReturn() {
p.nextState = p.popState()
}
// PushState adds the state function to the state stack.
// This is used for implementing nested parsing.
func (p *P) pushState(state StateHandler) {
p.stack = append(p.stack, state)
}
// PopState pops the last pushed state from the state stack.
func (p *P) popState() StateHandler {
last := len(p.stack) - 1
head, tail := p.stack[:last], p.stack[last]
p.stack = head
return tail
}

View File

@ -7,19 +7,15 @@ import (
// A '#' hash symbol marks the rest of the line as a comment.
func startComment(p *parsekit.P) {
p.Expects("start of comment")
if p.On(c.OneOrMore(hash)).Skip() {
p.RouteTo(commentContents)
}
p.On(c.OneOrMore(hash)).Skip().RouteTo(commentContents)
}
// All characters up to the end of the line are included in the comment.
func commentContents(p *parsekit.P) {
p.Expects("comment contents")
switch {
case p.On(endOfLine).Skip():
case p.On(endOfLine).Skip().RouteReturn().End():
p.EmitLiteralTrim(ItemComment)
p.RouteReturn()
case p.On(any).Accept():
p.RouteRepeat()
case p.On(anyRune).Accept().RouteRepeat().End():
}
}

View File

@ -6,15 +6,17 @@ import (
func TestComments(t *testing.T) {
runStatesTs(t, []statesT{
{"empty comment", "#", "#()", ""},
// {"empty comment with spaces", "# \t \r\n", `#()`, ""},
// {"basic comment", "#chicken", "#(chicken)", ""},
// {"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
// {"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
// {"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
// {"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
// {"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
// {"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
// {"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
{"empty comment at end of file", "#", "#()", ""},
{"empty comment at end of windows line", "#\r\n", "#()", ""},
{"empty comment at end of unix line", "#\n", "#()", ""},
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
{"basic comment", "#chicken", "#(chicken)", ""},
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
})
}

View File

@ -5,7 +5,7 @@ import "github.com/mmakaay/toml/parsekit"
// TODO move into parsekit
func endOfFile(p *parsekit.P) {
p.Expects("end of file")
if p.On(c.EndOfFile()).Stay() {
if p.On(c.EndOfFile()).Stay().End() {
p.Emit(parsekit.ItemEOF, "EOF")
}
}

View File

@ -29,15 +29,14 @@ var (
// This allows for grouping similar properties together. Whitespace
// around dot-separated parts is ignored, however, best practice is to
// not use any extraneous whitespace.
keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
keySeparatorDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
)
func startKeyValuePair(p *parsekit.P) {
p.On(whitespaceOrNewlines).Skip()
switch {
case p.On(hash).Stay():
p.RouteTo(startComment).ThenReturnHere()
case p.On(startOfKey).RouteTo(startKey):
case p.On(whitespaceOrNewlines).Skip().RouteRepeat().End():
case p.On(hash).RouteTo(startComment).ThenReturnHere().End():
case p.On(startOfKey).RouteTo(startKey).End():
default:
p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
}
@ -50,14 +49,14 @@ func startKey(p *parsekit.P) {
func startBareKey(p *parsekit.P) {
p.Expects("a bare key name")
if p.On(bareKey).Accept() {
if p.On(bareKey).Accept().End() {
p.EmitLiteral(ItemKey)
p.RouteTo(endOfKeyOrDot)
}
}
func endOfKeyOrDot(p *parsekit.P) {
if p.On(keySeparatordDot).Skip() {
if p.On(keySeparatorDot).Skip().End() {
p.Emit(ItemKeyDot, ".")
p.RouteTo(startKey)
} else {
@ -67,7 +66,7 @@ func endOfKeyOrDot(p *parsekit.P) {
func startAssignment(p *parsekit.P) {
p.Expects("a value assignment")
if p.On(keyAssignment).Skip() {
if p.On(keyAssignment).Skip().End() {
p.Emit(ItemAssignment, "=")
p.RouteTo(startValue)
}

View File

@ -33,3 +33,9 @@ func TestKeyWithAssignmentButNoValue(t *testing.T) {
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
})
}
func TestKeyWithValue(t *testing.T) {
runStatesTs(t, []statesT{
{"with string value", " -key- = \"value\" # nice\r\n", "[-key-]=STR(value)#(nice)", ""},
})
}

View File

@ -24,7 +24,7 @@ var (
dot = c.Rune('.')
singleQuote = c.Rune('\'')
doubleQuote = c.Rune('"')
any = c.Any()
anyRune = c.AnyRune()
anyQuote = c.AnyOf(singleQuote, doubleQuote)
backslash = c.Rune('\\')
asciiLower = c.RuneRange('a', 'z')

View File

@ -10,15 +10,13 @@ func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""})
}
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc")
func TestErrorFullIncludesLineAndRowPosition(t *testing.T) {
p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\n +")
_, err := parseItemsToArray(p)
t.Logf("Got error: %s", err.Error())
if err.Row != 4 {
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
}
if err.Column != 6 {
t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6)
actual := err.ErrorFull()
expected := "unexpected character '+' (expected end of file) after line 6, column 3"
if actual != expected {
t.Errorf("Unexpected error message:\nexpected: %s\nactual: %s\n", expected, actual)
}
}

View File

@ -37,29 +37,25 @@ var (
func startString(p *parsekit.P) {
p.Expects("a string value")
switch {
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
case p.On(doubleQuote).RouteTo(startBasicString):
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString).End():
case p.On(doubleQuote).RouteTo(startBasicString).End():
}
}
func startBasicString(p *parsekit.P) {
p.Expects("a basic string")
if p.On(doubleQuote).Skip() {
p.RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
p.On(doubleQuote).Skip().RouteTo(parseBasicString).ThenTo(basicStringSpecifics)
}
func parseBasicString(p *parsekit.P) {
p.Expects("string contents")
switch {
case p.On(charThatMustBeEscaped).Stay():
p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch)
case p.On(validEscape).Accept():
p.RouteRepeat()
case p.On(backslash).RouteReturn():
case p.On(doubleQuote).RouteReturn():
case p.On(any).Accept():
p.RouteRepeat()
case p.On(charThatMustBeEscaped).End():
p.EmitError("invalid character in basic string: %q (must be escaped)", p.LastMatch)
case p.On(validEscape).Accept().RouteRepeat().End():
case p.On(backslash).RouteReturn().End():
case p.On(doubleQuote).RouteReturn().End():
case p.On(anyRune).Accept().RouteRepeat().End():
}
}
@ -71,20 +67,20 @@ func parseBasicString(p *parsekit.P) {
func basicStringSpecifics(p *parsekit.P) {
p.Expects("string contents")
switch {
case p.On(doubleQuote).Skip():
case p.On(doubleQuote).Skip().End():
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
p.EmitError("Invalid data in string: %s", err)
p.EmitError("invalid data in string: %s", err)
} else {
p.RouteTo(startKeyValuePair)
}
case p.On(backslash).Stay():
p.EmitError("Invalid escape sequence")
case p.On(backslash).End():
p.EmitError("invalid escape sequence")
}
}
func startMultiLineBasicString(p *parsekit.P) {
p.Expects("a multi-line basic string")
if p.On(doubleQuote3).Skip() {
p.EmitError("Not yet implemented")
if p.On(doubleQuote3).Skip().End() {
p.EmitError("not yet implemented")
}
}

View File

@ -13,9 +13,9 @@ func TestUnterminatedBasicString(t *testing.T) {
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
runStatesTs(t, []statesT{
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: "\x00" (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: "\n" (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: "\u007f" (must be escaped)`},
{"null char", "a=\"\u0000\"", "[a]=", `invalid character in basic string: "\x00" (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `invalid character in basic string: "\n" (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `invalid character in basic string: "\u007f" (must be escaped)`},
})
// No need to write all test cases for disallowed characters by hand.
@ -23,7 +23,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
name := fmt.Sprintf("control character %x", rune(i))
runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
fmt.Sprintf(`invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
}
}
@ -46,7 +46,7 @@ func TestBasicString(t *testing.T) {
}
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
err := "Invalid escape sequence"
err := "invalid escape sequence"
runStatesTs(t, []statesT{
{"invalid escape sequence", `a="\x"`, "[a]=", err},
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},