Phew, that was quite the update. I've now got a working implementation of a parser/combinator-like matching API, which prevents us from having to specify everything in state functions. That is way too low level for a lot of things. I'd rather have parser/combinator-style definitions for chunks of the input and keeping the state functions for higher level document structure parsing.
This commit is contained in:
parent
55e23874f7
commit
e3e408dfdb
|
@ -3,6 +3,7 @@ package parsekit
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Emit passes a Parser item to the client, including the provided string.
|
// Emit passes a Parser item to the client, including the provided string.
|
||||||
|
@ -51,8 +52,16 @@ func (p *P) EmitError(format string, args ...interface{}) {
|
||||||
func (p *P) UnexpectedInput(expected string) {
|
func (p *P) UnexpectedInput(expected string) {
|
||||||
// next() takes care of error messages in cases where ok == false.
|
// next() takes care of error messages in cases where ok == false.
|
||||||
// Therefore, we only provide an error message for the ok case here.
|
// Therefore, we only provide an error message for the ok case here.
|
||||||
if r, ok := p.next(); ok {
|
r, _, ok := p.peek(0)
|
||||||
p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
switch {
|
||||||
|
case ok:
|
||||||
|
p.EmitError("unexpected character %q (expected %s)", r, expected)
|
||||||
|
case r == EOF:
|
||||||
|
p.EmitError("unexpected end of file (expected %s)", expected)
|
||||||
|
case r == utf8.RuneError:
|
||||||
|
p.EmitError("invalid UTF8 character in input (expected %s)", expected)
|
||||||
|
default:
|
||||||
|
panic("Unhandled output from peek()")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,32 +4,13 @@ import (
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// next returns the next rune from the input and a boolean indicating if
|
|
||||||
// reading the input was successful.
|
|
||||||
// When the end of input is reached, or an invalid UTF8 character is
|
|
||||||
// read, then false is returned. Both are considered error cases,
|
|
||||||
// and for that reason these automatically emit an error to the client.
|
|
||||||
func (p *P) next() (rune, bool) {
|
|
||||||
r, w, ok := p.peek(0)
|
|
||||||
if ok {
|
|
||||||
p.advanceCursor(r, w)
|
|
||||||
return r, true
|
|
||||||
}
|
|
||||||
if r == utf8.RuneError && w == 0 {
|
|
||||||
p.EmitError("unexpected end of file")
|
|
||||||
} else {
|
|
||||||
p.EmitError("invalid UTF8 character")
|
|
||||||
}
|
|
||||||
return r, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
// (end of data or invalid UTF8 character).
|
// (end of data or invalid UTF8 character).
|
||||||
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
||||||
peeked, width := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
||||||
return peeked, width, peeked != utf8.RuneError
|
return handleRuneError(r, w)
|
||||||
}
|
}
|
||||||
|
|
||||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||||
|
@ -43,13 +24,12 @@ func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
|
||||||
offset := 0
|
offset := 0
|
||||||
for i := 0; i < amount; i++ {
|
for i := 0; i < amount; i++ {
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
||||||
switch {
|
r, w, ok := handleRuneError(r, w)
|
||||||
case r == utf8.RuneError:
|
runes = append(runes, r)
|
||||||
|
widths = append(widths, w)
|
||||||
|
offset += w
|
||||||
|
if !ok {
|
||||||
return runes, widths, false
|
return runes, widths, false
|
||||||
default:
|
|
||||||
offset += w
|
|
||||||
runes = append(runes, r)
|
|
||||||
widths = append(widths, w)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return runes, widths, true
|
return runes, widths, true
|
||||||
|
@ -86,3 +66,21 @@ func (p *P) advanceCursor(r rune, w int) {
|
||||||
}
|
}
|
||||||
p.newline = r == '\n'
|
p.newline = r == '\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleRuneError is used to normale rune value in case of errors.
|
||||||
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||||
|
// This can however indicate one of two situations:
|
||||||
|
// * w == 0: end of file is reached
|
||||||
|
// * w == 1: invalid UTF character on input
|
||||||
|
// This function lets these two cases return respectively the
|
||||||
|
// package's own EOF or INVALID runes, to make it easy for client
|
||||||
|
// code to distinct between these two cases.
|
||||||
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||||
|
if r == utf8.RuneError {
|
||||||
|
if w == 0 {
|
||||||
|
return EOF, 0, false
|
||||||
|
}
|
||||||
|
return INVALID, w, false
|
||||||
|
}
|
||||||
|
return r, w, true
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,218 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import "unicode/utf8"
|
||||||
|
|
||||||
|
// Not in need of it myself, but nice to have I guess:
|
||||||
|
// - NotFollowedBy
|
||||||
|
// - Discard
|
||||||
|
// - Separated
|
||||||
|
|
||||||
|
type MatchDialog struct {
|
||||||
|
p *P
|
||||||
|
runes []rune
|
||||||
|
widths []int
|
||||||
|
offset int
|
||||||
|
curRune rune
|
||||||
|
curWidth int
|
||||||
|
forked bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MatchDialog) Fork() *MatchDialog {
|
||||||
|
fork := &MatchDialog{
|
||||||
|
p: m.p,
|
||||||
|
offset: m.offset,
|
||||||
|
forked: true,
|
||||||
|
}
|
||||||
|
return fork
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MatchDialog) Join(fork *MatchDialog) bool {
|
||||||
|
if !fork.forked {
|
||||||
|
panic("Cannot join a non-forked MatchDialog")
|
||||||
|
}
|
||||||
|
m.runes = append(m.runes, fork.runes...)
|
||||||
|
m.widths = append(m.widths, fork.widths...)
|
||||||
|
m.offset = fork.offset
|
||||||
|
fork.runes = []rune{}
|
||||||
|
fork.widths = []int{}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||||
|
if m.curRune == utf8.RuneError {
|
||||||
|
panic("Matcher must not call NextRune() after it returned false")
|
||||||
|
}
|
||||||
|
r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
|
||||||
|
m.offset += w
|
||||||
|
m.curRune = r
|
||||||
|
m.curWidth = w
|
||||||
|
m.runes = append(m.runes, r)
|
||||||
|
m.widths = append(m.widths, w)
|
||||||
|
return r, r != EOF && r != INVALID
|
||||||
|
}
|
||||||
|
|
||||||
|
// Matcher is the interface that can be implemented to provide
|
||||||
|
// a matching stategy for the match() function.
|
||||||
|
// A MatchDialog is provided as input. This implements a
|
||||||
|
// specific set of methods that a Matcher needs to retrieve data
|
||||||
|
// from the parser and to report back results.
|
||||||
|
type Matcher interface {
|
||||||
|
Match(*MatchDialog) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatcherConstructors struct {
|
||||||
|
Any func() MatchAny
|
||||||
|
Rune func(rune rune) MatchRune
|
||||||
|
RuneRange func(start rune, end rune) MatchRuneRange
|
||||||
|
Runes func(runes ...rune) MatchAnyOf
|
||||||
|
AnyOf func(matchers ...Matcher) MatchAnyOf
|
||||||
|
Repeat func(count int, matcher Matcher) MatchRepeat
|
||||||
|
Sequence func(matchers ...Matcher) MatchSequence
|
||||||
|
ZeroOrMore func(matcher Matcher) MatchZeroOrMore
|
||||||
|
OneOrMore func(matcher Matcher) MatchOneOrMore
|
||||||
|
Optional func(matcher Matcher) MatchOptional
|
||||||
|
}
|
||||||
|
|
||||||
|
var C = MatcherConstructors{
|
||||||
|
Any: func() MatchAny {
|
||||||
|
return MatchAny{}
|
||||||
|
},
|
||||||
|
Rune: func(rune rune) MatchRune {
|
||||||
|
return MatchRune{rune}
|
||||||
|
},
|
||||||
|
RuneRange: func(start rune, end rune) MatchRuneRange {
|
||||||
|
return MatchRuneRange{start, end}
|
||||||
|
},
|
||||||
|
Runes: func(runes ...rune) MatchAnyOf {
|
||||||
|
m := make([]Matcher, len(runes))
|
||||||
|
for i, r := range runes {
|
||||||
|
m[i] = MatchRune{r}
|
||||||
|
}
|
||||||
|
return MatchAnyOf{m}
|
||||||
|
},
|
||||||
|
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
||||||
|
return MatchAnyOf{matchers}
|
||||||
|
},
|
||||||
|
Repeat: func(count int, matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{count, matcher}
|
||||||
|
},
|
||||||
|
Sequence: func(matchers ...Matcher) MatchSequence {
|
||||||
|
return MatchSequence{matchers}
|
||||||
|
},
|
||||||
|
OneOrMore: func(matcher Matcher) MatchOneOrMore {
|
||||||
|
return MatchOneOrMore{matcher}
|
||||||
|
},
|
||||||
|
ZeroOrMore: func(matcher Matcher) MatchZeroOrMore {
|
||||||
|
return MatchZeroOrMore{matcher}
|
||||||
|
},
|
||||||
|
Optional: func(matcher Matcher) MatchOptional {
|
||||||
|
return MatchOptional{matcher}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchAny struct{}
|
||||||
|
|
||||||
|
func (c MatchAny) Match(m *MatchDialog) bool {
|
||||||
|
_, ok := m.NextRune()
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchRune struct {
|
||||||
|
match rune
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchRune) Match(m *MatchDialog) bool {
|
||||||
|
r, ok := m.NextRune()
|
||||||
|
return ok && r == c.match
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchRuneRange struct {
|
||||||
|
start rune
|
||||||
|
end rune
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchRuneRange) Match(m *MatchDialog) bool {
|
||||||
|
r, ok := m.NextRune()
|
||||||
|
return ok && r >= c.start && r <= c.end
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchAnyOf struct {
|
||||||
|
matcher []Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
||||||
|
for _, matcher := range c.matcher {
|
||||||
|
mc := m.Fork()
|
||||||
|
if matcher.Match(mc) {
|
||||||
|
return m.Join(mc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchRepeat struct {
|
||||||
|
count int
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
||||||
|
mc := m.Fork()
|
||||||
|
for i := 0; i < c.count; i++ {
|
||||||
|
if !c.matcher.Match(mc) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.Join(mc)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchSequence struct {
|
||||||
|
matchers []Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchSequence) Match(m *MatchDialog) bool {
|
||||||
|
mPart := m.Fork()
|
||||||
|
for _, matcher := range c.matchers {
|
||||||
|
if !matcher.Match(mPart) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.Join(mPart)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchOneOrMore struct {
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
|
||||||
|
mc := m.Fork()
|
||||||
|
for c.matcher.Match(mc) {
|
||||||
|
m.Join(mc)
|
||||||
|
}
|
||||||
|
return len(m.runes) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchZeroOrMore struct {
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
|
||||||
|
mc := m.Fork()
|
||||||
|
for c.matcher.Match(mc) {
|
||||||
|
m.Join(mc)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchOptional struct {
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchOptional) Match(m *MatchDialog) bool {
|
||||||
|
mc := m.Fork()
|
||||||
|
if c.matcher.Match(mc) {
|
||||||
|
m.Join(mc)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
|
@ -0,0 +1,260 @@
|
||||||
|
package parsekit_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
p "github.com/mmakaay/toml/parsekit"
|
||||||
|
)
|
||||||
|
|
||||||
|
var c = p.C
|
||||||
|
|
||||||
|
const TestItem p.ItemType = 1
|
||||||
|
|
||||||
|
func newParser(input string, matcher p.Matcher) *p.P {
|
||||||
|
stateFn := func(p *p.P) {
|
||||||
|
if p.On(matcher).Accept() {
|
||||||
|
p.EmitLiteral(TestItem)
|
||||||
|
p.Repeat()
|
||||||
|
} else {
|
||||||
|
p.UnexpectedInput("MATCH")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p.New(input, stateFn)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchAny(t *testing.T) {
|
||||||
|
p := newParser("o", c.Any())
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if r.Type != TestItem {
|
||||||
|
t.Error("Parser item type not expected TestTitem")
|
||||||
|
}
|
||||||
|
if r.Value != "o" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchAny_AtEndOfFile(t *testing.T) {
|
||||||
|
p := newParser("", c.Any())
|
||||||
|
_, err, ok := p.Next()
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Parsing unexpectedly succeeded")
|
||||||
|
}
|
||||||
|
expected := "unexpected end of file (expected MATCH)"
|
||||||
|
if err.Error() != expected {
|
||||||
|
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) {
|
||||||
|
p := newParser("\xcd", c.Any())
|
||||||
|
_, err, ok := p.Next()
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Parsing unexpectedly succeeded")
|
||||||
|
}
|
||||||
|
expected := "invalid UTF8 character in input (expected MATCH)"
|
||||||
|
if err.Error() != expected {
|
||||||
|
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRune(t *testing.T) {
|
||||||
|
p := newParser("xxx", c.Rune('x'))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if r.Type != TestItem {
|
||||||
|
t.Error("Parser item type not expected TestTitem")
|
||||||
|
}
|
||||||
|
if r.Value != "x" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRune_OnMismatch(t *testing.T) {
|
||||||
|
p := newParser("x ", c.Rune(' '))
|
||||||
|
_, err, ok := p.Next()
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Parsing did not fail unexpectedly")
|
||||||
|
}
|
||||||
|
expected := "unexpected character 'x' (expected MATCH)"
|
||||||
|
if err.Error() != expected {
|
||||||
|
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRuneRange(t *testing.T) {
|
||||||
|
m := c.RuneRange('b', 'y')
|
||||||
|
s := "mnopqrstuvwxybcdefghijkl"
|
||||||
|
p := newParser(s, m)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if s[i] != r.Value[0] {
|
||||||
|
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, _, ok := newParser("a", m).Next(); ok {
|
||||||
|
t.Fatalf("Unexpected parse success for input 'a'")
|
||||||
|
}
|
||||||
|
if _, _, ok := newParser("z", m).Next(); ok {
|
||||||
|
t.Fatalf("Unexpected parse success for input 'z'")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRunes(t *testing.T) {
|
||||||
|
m := c.Runes('+', '-', '*', '/')
|
||||||
|
s := "-+/*+++"
|
||||||
|
p := newParser(s, m)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if s[i] != r.Value[0] {
|
||||||
|
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, _, ok := newParser("^", m).Next(); ok {
|
||||||
|
t.Fatalf("Unexpected parse success for input '^'")
|
||||||
|
}
|
||||||
|
if _, _, ok := newParser("x", m).Next(); ok {
|
||||||
|
t.Fatalf("Unexpected parse success for input 'x'")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchAnyOf(t *testing.T) {
|
||||||
|
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if r.Type != TestItem {
|
||||||
|
t.Error("Parser item type not expected TestTitem")
|
||||||
|
}
|
||||||
|
if r.Value != "a" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||||
|
}
|
||||||
|
|
||||||
|
r, err, ok = p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if r.Type != TestItem {
|
||||||
|
t.Error("Parser item type not expected TestTitem")
|
||||||
|
}
|
||||||
|
if r.Value != "b" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRepeat(t *testing.T) {
|
||||||
|
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "xxxx" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRepeat_Mismatch(t *testing.T) {
|
||||||
|
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||||
|
_, err, ok := p.Next()
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Parsing did not fail unexpectedly")
|
||||||
|
}
|
||||||
|
expected := "unexpected character 'x' (expected MATCH)"
|
||||||
|
if err.Error() != expected {
|
||||||
|
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchOneOrMore(t *testing.T) {
|
||||||
|
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "xxxxxxxx" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchSequence(t *testing.T) {
|
||||||
|
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "10" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
|
||||||
|
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "101010" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSequence_WithRepeatedRunes(t *testing.T) {
|
||||||
|
whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
|
||||||
|
equal := c.Rune('=')
|
||||||
|
assignment := c.Sequence(whitespace, equal, whitespace)
|
||||||
|
p := newParser(" == 10", assignment)
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != " =" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchOptional(t *testing.T) {
|
||||||
|
p := newParser("xyz", c.Optional(c.Rune('x')))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "x" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||||
|
}
|
||||||
|
|
||||||
|
p = newParser("xyz", c.Optional(c.Rune('y')))
|
||||||
|
r, err, ok = p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMixAndMatch(t *testing.T) {
|
||||||
|
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||||
|
backslash := c.Rune('\\')
|
||||||
|
x := c.Rune('x')
|
||||||
|
hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
|
||||||
|
|
||||||
|
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != `\x9a\x01\xF0\xfC` {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
|
||||||
|
}
|
||||||
|
}
|
|
@ -64,6 +64,13 @@ func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
|
||||||
return runes, widths, false
|
return runes, widths, false
|
||||||
}
|
}
|
||||||
switch pattern := pattern.(type) {
|
switch pattern := pattern.(type) {
|
||||||
|
case Matcher:
|
||||||
|
m := &MatchDialog{p: p}
|
||||||
|
if pattern.Match(m) {
|
||||||
|
return m.runes, m.widths, true
|
||||||
|
} else {
|
||||||
|
return m.runes, m.widths, false
|
||||||
|
}
|
||||||
case []interface{}:
|
case []interface{}:
|
||||||
rs, ws, matched := p.match(offset, pattern...)
|
rs, ws, matched := p.match(offset, pattern...)
|
||||||
for i, r := range rs {
|
for i, r := range rs {
|
||||||
|
@ -98,17 +105,6 @@ func (p *P) Upcoming(patterns ...interface{}) bool {
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// AcceptAny adds the next rune from the input to the string buffer.
|
|
||||||
// If no rune could be read (end of file or invalid UTF8 data),
|
|
||||||
// then false is returned.
|
|
||||||
func (p *P) AcceptAny() bool {
|
|
||||||
if r, ok := p.next(); ok {
|
|
||||||
p.buffer.writeRune(r)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
type action struct {
|
type action struct {
|
||||||
p *P
|
p *P
|
||||||
runes []rune
|
runes []rune
|
||||||
|
@ -129,6 +125,10 @@ func (a *action) Accept() bool {
|
||||||
func (a *action) Skip() bool {
|
func (a *action) Skip() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
for i, r := range a.runes {
|
for i, r := range a.runes {
|
||||||
|
type C struct {
|
||||||
|
Rune MatchRune
|
||||||
|
}
|
||||||
|
|
||||||
a.p.advanceCursor(r, a.widths[i])
|
a.p.advanceCursor(r, a.widths[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -159,20 +159,10 @@ func (p *P) On(patterns ...interface{}) *action {
|
||||||
// AcceptMatching adds the next runes to the string buffer, but only
|
// AcceptMatching adds the next runes to the string buffer, but only
|
||||||
// if the upcoming runes satisfy the provided patterns.
|
// if the upcoming runes satisfy the provided patterns.
|
||||||
// When runes were added then true is returned, false otherwise.
|
// When runes were added then true is returned, false otherwise.
|
||||||
func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
// TODO not needed anymore
|
||||||
return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
// func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
||||||
}
|
// return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
||||||
|
// }
|
||||||
// AcceptConsecutive adds consecutive runes from the input to the string
|
|
||||||
// buffer, as long as they exist in the pattern.
|
|
||||||
// If any runes were added then true is returned, false otherwise.
|
|
||||||
func (p *P) AcceptConsecutive(pattern string) bool {
|
|
||||||
accepted := false
|
|
||||||
for p.AcceptMatching(pattern) {
|
|
||||||
accepted = true
|
|
||||||
}
|
|
||||||
return accepted
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
||||||
// Returns true when one or more runes were skipped.
|
// Returns true when one or more runes were skipped.
|
||||||
|
@ -185,13 +175,3 @@ func (p *P) SkipMatching(patterns ...interface{}) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// SkipConsecutive skips consecutive runes from the provided pattern.
|
|
||||||
// Returns true when one or more runes were skipped.
|
|
||||||
func (p *P) SkipConsecutive(pattern string) bool {
|
|
||||||
didSkip := false
|
|
||||||
for p.SkipMatching(pattern) {
|
|
||||||
didSkip = true
|
|
||||||
}
|
|
||||||
return didSkip
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
// P holds the internal state of the parser.
|
// P holds the internal state of the parser.
|
||||||
type P struct {
|
type P struct {
|
||||||
state StateFn // the function that handles the current state
|
state StateFn // the function that handles the current state
|
||||||
|
@ -50,3 +54,14 @@ type Error struct {
|
||||||
func (err *Error) Error() string {
|
func (err *Error) Error() string {
|
||||||
return err.Message
|
return err.Message
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EOF is a special rune, which is used to indicate an end of file when
|
||||||
|
// reading a character from the input.
|
||||||
|
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||||
|
// say 'I now expect the end of the file' is using something like:
|
||||||
|
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||||
|
const EOF rune = -1
|
||||||
|
|
||||||
|
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||||
|
// rune on the input.
|
||||||
|
const INVALID rune = utf8.RuneError
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/mmakaay/toml/parsekit"
|
"github.com/mmakaay/toml/parsekit"
|
||||||
lexer "github.com/mmakaay/toml/parser"
|
"github.com/mmakaay/toml/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
type statesT struct {
|
type statesT struct {
|
||||||
|
@ -23,7 +23,7 @@ func runStatesTs(t *testing.T, tests []statesT) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func runStatesT(t *testing.T, c statesT) {
|
func runStatesT(t *testing.T, c statesT) {
|
||||||
l, err := lexer.NewParser(c.in).ToArray()
|
l, err := parser.NewParser(c.in).ToArray()
|
||||||
if err == nil && c.err != "" {
|
if err == nil && c.err != "" {
|
||||||
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
||||||
}
|
}
|
||||||
|
@ -36,12 +36,12 @@ func runStatesT(t *testing.T, c statesT) {
|
||||||
switch expected := c.out.(type) {
|
switch expected := c.out.(type) {
|
||||||
case []string:
|
case []string:
|
||||||
if len(expected) != len(l) {
|
if len(expected) != len(l) {
|
||||||
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
|
t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
|
||||||
}
|
}
|
||||||
for i, e := range expected {
|
for i, e := range expected {
|
||||||
v := ParserItemToString(l[i])
|
v := ParserItemToString(l[i])
|
||||||
if v != e {
|
if v != e {
|
||||||
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
|
t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case string:
|
case string:
|
||||||
|
@ -51,7 +51,7 @@ func runStatesT(t *testing.T, c statesT) {
|
||||||
}
|
}
|
||||||
actual := strings.Join(a, "")
|
actual := strings.Join(a, "")
|
||||||
if actual != expected {
|
if actual != expected {
|
||||||
t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
|
t.Errorf("[%s] Unexpected parser output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,15 +59,15 @@ func runStatesT(t *testing.T, c statesT) {
|
||||||
// ParserItemToString returns a string representation of the parsekit.Item.
|
// ParserItemToString returns a string representation of the parsekit.Item.
|
||||||
func ParserItemToString(i parsekit.Item) string {
|
func ParserItemToString(i parsekit.Item) string {
|
||||||
switch i.Type {
|
switch i.Type {
|
||||||
case lexer.ItemComment:
|
case parser.ItemComment:
|
||||||
return fmt.Sprintf("#(%s)", i.Value)
|
return fmt.Sprintf("#(%s)", i.Value)
|
||||||
case lexer.ItemKey:
|
case parser.ItemKey:
|
||||||
return fmt.Sprintf("[%s]", i.Value)
|
return fmt.Sprintf("[%s]", i.Value)
|
||||||
case lexer.ItemString:
|
case parser.ItemString:
|
||||||
return fmt.Sprintf("STR(%s)", i.Value)
|
return fmt.Sprintf("STR(%s)", i.Value)
|
||||||
case lexer.ItemKeyDot:
|
case parser.ItemKeyDot:
|
||||||
return "."
|
return "."
|
||||||
case lexer.ItemAssignment:
|
case parser.ItemAssignment:
|
||||||
return "="
|
return "="
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
|
panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
|
||||||
|
|
|
@ -11,40 +11,28 @@ const (
|
||||||
ItemString // A value of type string
|
ItemString // A value of type string
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
whitespace string = " \t"
|
|
||||||
carriageReturn string = "\r"
|
|
||||||
newline string = "\n"
|
|
||||||
hash string = "#"
|
|
||||||
equal string = "="
|
|
||||||
lower string = "abcdefghijklmnopqrstuvwxyz"
|
|
||||||
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
||||||
digits string = "0123456789"
|
|
||||||
hex string = digits + "abcdefABCDEF"
|
|
||||||
dot string = "."
|
|
||||||
underscore string = "_"
|
|
||||||
dash string = "-"
|
|
||||||
singleQuote string = "'"
|
|
||||||
doubleQuote string = "\""
|
|
||||||
backslash string = "\\"
|
|
||||||
quoteChars string = singleQuote + doubleQuote
|
|
||||||
bareKeyChars string = lower + upper + digits + underscore + dash
|
|
||||||
startOfKey string = bareKeyChars + quoteChars
|
|
||||||
validEscapeChars string = `btnfr"\`
|
|
||||||
mustBeEscaped string = "" +
|
|
||||||
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
|
||||||
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
|
||||||
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
|
||||||
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
|
||||||
"\u007F"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
keySeparatorDot = []interface{}{whitespace, dot, whitespace}
|
c = parsekit.C
|
||||||
doubleQuote3 = []interface{}{doubleQuote, doubleQuote, doubleQuote}
|
space = c.Rune(' ')
|
||||||
hex4 = []interface{}{hex, hex, hex, hex}
|
tab = c.Rune('\t')
|
||||||
shortUtf8Match = []interface{}{backslash, 'u', hex4}
|
carriageReturn = c.Rune('\r')
|
||||||
longUtf8Match = []interface{}{backslash, 'U', hex4, hex4}
|
lineFeed = c.Rune('\n')
|
||||||
|
hash = c.Rune('#')
|
||||||
|
underscore = c.Rune('_')
|
||||||
|
dash = c.Rune('-')
|
||||||
|
equal = c.Rune('=')
|
||||||
|
dot = c.Rune('.')
|
||||||
|
singleQuote = c.Rune('\'')
|
||||||
|
doubleQuote = c.Rune('"')
|
||||||
|
any = c.Any()
|
||||||
|
anyQuote = c.AnyOf(singleQuote, doubleQuote)
|
||||||
|
backslash = c.Rune('\\')
|
||||||
|
lower = c.RuneRange('a', 'z')
|
||||||
|
upper = c.RuneRange('A', 'Z')
|
||||||
|
digit = c.RuneRange('0', '9')
|
||||||
|
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
||||||
|
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
||||||
|
optionalWhitespace = c.Optional(whitespace)
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewParser creates a new parser, using the provided input string
|
// NewParser creates a new parser, using the provided input string
|
||||||
|
|
|
@ -6,6 +6,10 @@ import (
|
||||||
"github.com/mmakaay/toml/parser"
|
"github.com/mmakaay/toml/parser"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestEmptyInput(t *testing.T) {
|
||||||
|
runStatesT(t, statesT{"empty string", "", "", ""})
|
||||||
|
}
|
||||||
|
|
||||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||||
_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
||||||
t.Logf("Got error: %s", err.Error())
|
t.Logf("Got error: %s", err.Error())
|
||||||
|
@ -17,18 +21,13 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEmptyInput(t *testing.T) {
|
|
||||||
runStatesT(t, statesT{"empty string", "", "", ""})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInvalidUtf8Data(t *testing.T) {
|
func TestInvalidUtf8Data(t *testing.T) {
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"inside comment", "# \xbc", "", "invalid UTF8 character"},
|
{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected comment contents)"},
|
||||||
{"bare key 1", "\xbc", "", "invalid UTF8 character"},
|
{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
|
||||||
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
|
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
|
||||||
{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
|
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
|
||||||
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
|
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
|
||||||
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
|
|
||||||
// A '#' hash symbol marks the rest of the line as a comment.
|
// A '#' hash symbol marks the rest of the line as a comment.
|
||||||
func startComment(p *parsekit.P) {
|
func startComment(p *parsekit.P) {
|
||||||
p.SkipConsecutive(hash)
|
p.On(c.OneOrMore(hash)).Skip()
|
||||||
p.RouteTo(commentContents)
|
p.RouteTo(commentContents)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,8 +16,9 @@ func commentContents(p *parsekit.P) {
|
||||||
case p.AtEndOfLine():
|
case p.AtEndOfLine():
|
||||||
p.EmitLiteralTrim(ItemComment)
|
p.EmitLiteralTrim(ItemComment)
|
||||||
p.RouteReturn()
|
p.RouteReturn()
|
||||||
default:
|
case p.On(any).Accept():
|
||||||
p.AcceptAny()
|
|
||||||
p.Repeat()
|
p.Repeat()
|
||||||
|
default:
|
||||||
|
p.UnexpectedInput("comment contents")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
package parser
|
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parsekit"
|
|
||||||
|
|
||||||
// The primary building block of a TOML document is the key/value pair.
|
|
||||||
func startKeyValuePair(p *parsekit.P) {
|
|
||||||
switch {
|
|
||||||
case p.On(whitespace + carriageReturn + newline).Skip():
|
|
||||||
p.Repeat()
|
|
||||||
case p.On(hash).Stay():
|
|
||||||
p.RouteTo(startComment).ThenReturnHere()
|
|
||||||
case p.On(startOfKey).RouteTo(startKey):
|
|
||||||
default:
|
|
||||||
p.RouteTo(endOfFile)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// A key may be either bare, quoted or dotted.
|
|
||||||
func startKey(p *parsekit.P) {
|
|
||||||
switch {
|
|
||||||
case p.On(bareKeyChars).RouteTo(startBareKey):
|
|
||||||
default:
|
|
||||||
p.UnexpectedInput("a valid key name")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
|
||||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
|
||||||
// keys are allowed to be composed of only ASCII digits,
|
|
||||||
// e.g. 1234, but are always interpreted as strings.
|
|
||||||
func startBareKey(p *parsekit.P) {
|
|
||||||
p.AcceptConsecutive(bareKeyChars) // TODO make a plan for adding this to After()
|
|
||||||
p.EmitLiteral(ItemKey)
|
|
||||||
p.RouteTo(endOfKeyOrDot)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
|
||||||
// This allows for grouping similar properties together:
|
|
||||||
func endOfKeyOrDot(p *parsekit.P) {
|
|
||||||
// Whitespace around dot-separated parts is ignored, however,
|
|
||||||
// best practice is to not use any extraneous whitespace.
|
|
||||||
p.SkipConsecutive(whitespace)
|
|
||||||
if p.On(dot).Accept() {
|
|
||||||
p.SkipConsecutive(whitespace)
|
|
||||||
p.EmitLiteral(ItemKeyDot)
|
|
||||||
p.RouteTo(startKey)
|
|
||||||
} else {
|
|
||||||
p.RouteTo(startKeyAssignment)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keys are on the left of the equals sign and values are on the right.
|
|
||||||
// Whitespace is ignored around key names and values. The key, equals
|
|
||||||
// sign, and value must be on the same line (though some values can
|
|
||||||
// be broken over multiple lines).
|
|
||||||
func startKeyAssignment(p *parsekit.P) {
|
|
||||||
p.SkipConsecutive(whitespace)
|
|
||||||
if p.On(equal).Accept() {
|
|
||||||
p.EmitLiteral(ItemAssignment)
|
|
||||||
p.SkipConsecutive(whitespace)
|
|
||||||
p.RouteTo(startValue)
|
|
||||||
} else {
|
|
||||||
p.UnexpectedInput("a value assignment")
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
|
// The primary building block of a TOML document is the key/value pair.
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Keys are on the left of the equals sign and values are on the right.
|
||||||
|
// Whitespace is ignored around key names and values. The key, equals
|
||||||
|
// sign, and value must be on the same line (though some values can be
|
||||||
|
// broken over multiple lines).
|
||||||
|
keyAssignment = c.Sequence(optionalWhitespace, equal, optionalWhitespace)
|
||||||
|
|
||||||
|
// A key may be either bare, quoted or dotted.
|
||||||
|
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||||
|
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||||
|
// keys are allowed to be composed of only ASCII digits,
|
||||||
|
// e.g. 1234, but are always interpreted as strings.
|
||||||
|
bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
|
||||||
|
bareKey = c.OneOrMore(bareKeyRune)
|
||||||
|
|
||||||
|
// Quoted keys follow the exact same rules as either basic
|
||||||
|
// strings or literal strings and allow you to use a much broader
|
||||||
|
// set of key names. Best practice is to use bare keys except
|
||||||
|
// when absolutely necessary.
|
||||||
|
// A bare key must be non-empty, but an empty quoted key is
|
||||||
|
// allowed (though discouraged).
|
||||||
|
startOfKey = c.AnyOf(bareKeyRune, anyQuote)
|
||||||
|
|
||||||
|
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||||
|
// This allows for grouping similar properties together.
|
||||||
|
// Whitespace around dot-separated parts is ignored, however, best
|
||||||
|
// practice is to not use any extraneous whitespace.
|
||||||
|
keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
|
||||||
|
)
|
||||||
|
|
||||||
|
func startKeyValuePair(p *parsekit.P) {
|
||||||
|
p.On(whitespaceOrNewlines).Skip()
|
||||||
|
switch {
|
||||||
|
case p.On(hash).Stay():
|
||||||
|
p.RouteTo(startComment).ThenReturnHere()
|
||||||
|
case p.On(startOfKey).RouteTo(startKey):
|
||||||
|
default:
|
||||||
|
p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func startKey(p *parsekit.P) {
|
||||||
|
switch {
|
||||||
|
case p.On(bareKeyRune).RouteTo(startBareKey):
|
||||||
|
default:
|
||||||
|
p.UnexpectedInput("a valid key name")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func startBareKey(p *parsekit.P) {
|
||||||
|
p.On(bareKey).Accept()
|
||||||
|
p.EmitLiteral(ItemKey)
|
||||||
|
p.RouteTo(endOfKeyOrDot)
|
||||||
|
}
|
||||||
|
|
||||||
|
func endOfKeyOrDot(p *parsekit.P) {
|
||||||
|
if p.On(keySeparatordDot).Skip() {
|
||||||
|
p.Emit(ItemKeyDot, ".")
|
||||||
|
p.RouteTo(startKey)
|
||||||
|
} else {
|
||||||
|
p.RouteTo(startKeyAssignment)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func startKeyAssignment(p *parsekit.P) {
|
||||||
|
if p.On(keyAssignment).Skip() {
|
||||||
|
p.Emit(ItemAssignment, "=")
|
||||||
|
p.RouteTo(startValue)
|
||||||
|
} else {
|
||||||
|
p.UnexpectedInput("a value assignment")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||||
|
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||||
|
func startValue(p *parsekit.P) {
|
||||||
|
switch {
|
||||||
|
case p.On(anyQuote).RouteTo(startString):
|
||||||
|
default:
|
||||||
|
p.UnexpectedInput("a value")
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,9 +5,9 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestKeyWithoutAssignment(t *testing.T) {
|
func TestKeyWithoutAssignment(t *testing.T) {
|
||||||
err := "unexpected end of file"
|
err := "unexpected end of file (expected a value assignment)"
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"bare with whitespace", " a ", "[a]", err},
|
{"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"},
|
||||||
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
|
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
|
||||||
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
|
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
|
||||||
{"bare numbers", "0123456789", "[0123456789]", err},
|
{"bare numbers", "0123456789", "[0123456789]", err},
|
||||||
|
@ -18,15 +18,14 @@ func TestKeyWithoutAssignment(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDottedKey(t *testing.T) {
|
func TestDottedKey(t *testing.T) {
|
||||||
err := "unexpected end of file"
|
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"bare dotted", "a._.c", "[a].[_].[c]", err},
|
{"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"},
|
||||||
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
|
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
||||||
err := "unexpected end of file"
|
err := "unexpected end of file (expected a value)"
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"bare", "a=", "[a]=", err},
|
{"bare", "a=", "[a]=", err},
|
||||||
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
|
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
|
|
@ -2,10 +2,36 @@ package parser
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parsekit"
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
// There are four ways to express strings: basic, multi-line basic, literal,
|
var (
|
||||||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||||
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||||
// * Basic strings are surrounded by quotation marks.
|
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
||||||
|
// * Basic strings are surrounded by quotation marks.
|
||||||
|
doubleQuote3 = c.Repeat(3, doubleQuote)
|
||||||
|
|
||||||
|
// Any Unicode character may be used except those that must be escaped:
|
||||||
|
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||||
|
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
|
||||||
|
|
||||||
|
// For convenience, some popular characters have a compact escape sequence.
|
||||||
|
//
|
||||||
|
// \b - backspace (U+0008)
|
||||||
|
// \t - tab (U+0009)
|
||||||
|
// \n - linefeed (U+000A)
|
||||||
|
// \f - form feed (U+000C)
|
||||||
|
// \r - carriage return (U+000D)
|
||||||
|
// \" - quote (U+0022)
|
||||||
|
// \\ - backslash (U+005C)
|
||||||
|
// \uXXXX - unicode (U+XXXX)
|
||||||
|
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||||
|
validEscapeChar = c.AnyOf(c.Runes('b', 't', 'n', 'f', 'r'), doubleQuote, backslash)
|
||||||
|
shortEscape = c.Sequence(backslash, validEscapeChar)
|
||||||
|
hex = c.AnyOf(digit, c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||||
|
shortUtf8Escape = c.Sequence(backslash, c.Rune('u'), c.Repeat(4, hex))
|
||||||
|
longUtf8Escape = c.Sequence(backslash, c.Rune('U'), c.Repeat(8, hex))
|
||||||
|
validEscape = c.AnyOf(shortEscape, shortUtf8Escape, longUtf8Escape)
|
||||||
|
)
|
||||||
|
|
||||||
func startString(p *parsekit.P) {
|
func startString(p *parsekit.P) {
|
||||||
switch {
|
switch {
|
||||||
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
||||||
|
@ -15,36 +41,21 @@ func startString(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// For convenience, some popular characters have a compact escape sequence.
|
|
||||||
//
|
|
||||||
// \b - backspace (U+0008)
|
|
||||||
// \t - tab (U+0009)
|
|
||||||
// \n - linefeed (U+000A)
|
|
||||||
// \f - form feed (U+000C)
|
|
||||||
// \r - carriage return (U+000D)
|
|
||||||
// \" - quote (U+0022)
|
|
||||||
// \\ - backslash (U+005C)
|
|
||||||
// \uXXXX - unicode (U+XXXX)
|
|
||||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
|
||||||
//
|
|
||||||
// Any Unicode character may be used except those that must be escaped:
|
|
||||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
|
||||||
func parseBasicString(p *parsekit.P) {
|
func parseBasicString(p *parsekit.P) {
|
||||||
switch {
|
switch {
|
||||||
case p.AtEndOfFile():
|
case p.On(parsekit.EOF).Stay():
|
||||||
p.UnexpectedEndOfFile("basic string token")
|
p.UnexpectedEndOfFile("basic string token")
|
||||||
case p.On(backslash, validEscapeChars).Accept() ||
|
case p.On(validEscape).Accept():
|
||||||
p.On(shortUtf8Match).Accept() ||
|
|
||||||
p.On(longUtf8Match).Accept():
|
|
||||||
p.Repeat()
|
p.Repeat()
|
||||||
case p.On(mustBeEscaped).Stay():
|
case p.On(charThatMustBeEscaped).Stay():
|
||||||
r, _, _ := p.Match(mustBeEscaped)
|
r, _, _ := p.Match(charThatMustBeEscaped)
|
||||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||||
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
||||||
p.RouteReturn()
|
p.RouteReturn()
|
||||||
default:
|
case p.On(any).Accept():
|
||||||
p.AcceptAny()
|
|
||||||
p.Repeat()
|
p.Repeat()
|
||||||
|
default:
|
||||||
|
p.UnexpectedInput("string contents")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,7 +80,7 @@ func basicStringSpecifics(p *parsekit.P) {
|
||||||
case p.On(backslash).Stay():
|
case p.On(backslash).Stay():
|
||||||
p.EmitError("Invalid escape sequence")
|
p.EmitError("Invalid escape sequence")
|
||||||
default:
|
default:
|
||||||
p.RouteTo(startBasicString)
|
panic("String parsing should not have ended up here")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,8 +33,8 @@ func TestEmptyBasicString(t *testing.T) {
|
||||||
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
|
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
|
||||||
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
|
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
|
||||||
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
|
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
|
||||||
{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
|
{"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
|
||||||
{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
|
{"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
package parser
|
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parsekit"
|
|
||||||
|
|
||||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
|
||||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
|
||||||
func startValue(p *parsekit.P) {
|
|
||||||
p.SkipConsecutive(whitespace)
|
|
||||||
if p.Upcoming(quoteChars) {
|
|
||||||
p.RouteTo(startString)
|
|
||||||
} else {
|
|
||||||
p.UnexpectedInput("a value")
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue