Phew, that was quite the update. I've now got a working implementation of a parser/combinator-like matching API, which prevents us from having to specify everything in state functions. That is way too low level for a lot of things. I'd rather have parser/combinator-style definitions for chunks of the input and keeping the state functions for higher level document structure parsing.
This commit is contained in:
parent
55e23874f7
commit
e3e408dfdb
|
@ -3,6 +3,7 @@ package parsekit
|
|||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Emit passes a Parser item to the client, including the provided string.
|
||||
|
@ -51,8 +52,16 @@ func (p *P) EmitError(format string, args ...interface{}) {
|
|||
func (p *P) UnexpectedInput(expected string) {
|
||||
// next() takes care of error messages in cases where ok == false.
|
||||
// Therefore, we only provide an error message for the ok case here.
|
||||
if r, ok := p.next(); ok {
|
||||
p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
||||
r, _, ok := p.peek(0)
|
||||
switch {
|
||||
case ok:
|
||||
p.EmitError("unexpected character %q (expected %s)", r, expected)
|
||||
case r == EOF:
|
||||
p.EmitError("unexpected end of file (expected %s)", expected)
|
||||
case r == utf8.RuneError:
|
||||
p.EmitError("invalid UTF8 character in input (expected %s)", expected)
|
||||
default:
|
||||
panic("Unhandled output from peek()")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -4,32 +4,13 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// next returns the next rune from the input and a boolean indicating if
|
||||
// reading the input was successful.
|
||||
// When the end of input is reached, or an invalid UTF8 character is
|
||||
// read, then false is returned. Both are considered error cases,
|
||||
// and for that reason these automatically emit an error to the client.
|
||||
func (p *P) next() (rune, bool) {
|
||||
r, w, ok := p.peek(0)
|
||||
if ok {
|
||||
p.advanceCursor(r, w)
|
||||
return r, true
|
||||
}
|
||||
if r == utf8.RuneError && w == 0 {
|
||||
p.EmitError("unexpected end of file")
|
||||
} else {
|
||||
p.EmitError("invalid UTF8 character")
|
||||
}
|
||||
return r, false
|
||||
}
|
||||
|
||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||
// Returns the rune, its width in bytes and a boolean.
|
||||
// The boolean will be false in case no upcoming rune can be peeked
|
||||
// (end of data or invalid UTF8 character).
|
||||
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
||||
peeked, width := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
||||
return peeked, width, peeked != utf8.RuneError
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
||||
return handleRuneError(r, w)
|
||||
}
|
||||
|
||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
||||
|
@ -43,13 +24,12 @@ func (p *P) peekMulti(amount int) ([]rune, []int, bool) {
|
|||
offset := 0
|
||||
for i := 0; i < amount; i++ {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offset:])
|
||||
switch {
|
||||
case r == utf8.RuneError:
|
||||
r, w, ok := handleRuneError(r, w)
|
||||
runes = append(runes, r)
|
||||
widths = append(widths, w)
|
||||
offset += w
|
||||
if !ok {
|
||||
return runes, widths, false
|
||||
default:
|
||||
offset += w
|
||||
runes = append(runes, r)
|
||||
widths = append(widths, w)
|
||||
}
|
||||
}
|
||||
return runes, widths, true
|
||||
|
@ -86,3 +66,21 @@ func (p *P) advanceCursor(r rune, w int) {
|
|||
}
|
||||
p.newline = r == '\n'
|
||||
}
|
||||
|
||||
// handleRuneError is used to normale rune value in case of errors.
|
||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||
// This can however indicate one of two situations:
|
||||
// * w == 0: end of file is reached
|
||||
// * w == 1: invalid UTF character on input
|
||||
// This function lets these two cases return respectively the
|
||||
// package's own EOF or INVALID runes, to make it easy for client
|
||||
// code to distinct between these two cases.
|
||||
func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||
if r == utf8.RuneError {
|
||||
if w == 0 {
|
||||
return EOF, 0, false
|
||||
}
|
||||
return INVALID, w, false
|
||||
}
|
||||
return r, w, true
|
||||
}
|
||||
|
|
|
@ -0,0 +1,218 @@
|
|||
package parsekit
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// Not in need of it myself, but nice to have I guess:
|
||||
// - NotFollowedBy
|
||||
// - Discard
|
||||
// - Separated
|
||||
|
||||
type MatchDialog struct {
|
||||
p *P
|
||||
runes []rune
|
||||
widths []int
|
||||
offset int
|
||||
curRune rune
|
||||
curWidth int
|
||||
forked bool
|
||||
}
|
||||
|
||||
func (m *MatchDialog) Fork() *MatchDialog {
|
||||
fork := &MatchDialog{
|
||||
p: m.p,
|
||||
offset: m.offset,
|
||||
forked: true,
|
||||
}
|
||||
return fork
|
||||
}
|
||||
|
||||
func (m *MatchDialog) Join(fork *MatchDialog) bool {
|
||||
if !fork.forked {
|
||||
panic("Cannot join a non-forked MatchDialog")
|
||||
}
|
||||
m.runes = append(m.runes, fork.runes...)
|
||||
m.widths = append(m.widths, fork.widths...)
|
||||
m.offset = fork.offset
|
||||
fork.runes = []rune{}
|
||||
fork.widths = []int{}
|
||||
return true
|
||||
}
|
||||
|
||||
func (m *MatchDialog) NextRune() (rune, bool) {
|
||||
if m.curRune == utf8.RuneError {
|
||||
panic("Matcher must not call NextRune() after it returned false")
|
||||
}
|
||||
r, w := utf8.DecodeRuneInString(m.p.input[m.p.pos+m.offset:])
|
||||
m.offset += w
|
||||
m.curRune = r
|
||||
m.curWidth = w
|
||||
m.runes = append(m.runes, r)
|
||||
m.widths = append(m.widths, w)
|
||||
return r, r != EOF && r != INVALID
|
||||
}
|
||||
|
||||
// Matcher is the interface that can be implemented to provide
|
||||
// a matching stategy for the match() function.
|
||||
// A MatchDialog is provided as input. This implements a
|
||||
// specific set of methods that a Matcher needs to retrieve data
|
||||
// from the parser and to report back results.
|
||||
type Matcher interface {
|
||||
Match(*MatchDialog) bool
|
||||
}
|
||||
|
||||
type MatcherConstructors struct {
|
||||
Any func() MatchAny
|
||||
Rune func(rune rune) MatchRune
|
||||
RuneRange func(start rune, end rune) MatchRuneRange
|
||||
Runes func(runes ...rune) MatchAnyOf
|
||||
AnyOf func(matchers ...Matcher) MatchAnyOf
|
||||
Repeat func(count int, matcher Matcher) MatchRepeat
|
||||
Sequence func(matchers ...Matcher) MatchSequence
|
||||
ZeroOrMore func(matcher Matcher) MatchZeroOrMore
|
||||
OneOrMore func(matcher Matcher) MatchOneOrMore
|
||||
Optional func(matcher Matcher) MatchOptional
|
||||
}
|
||||
|
||||
var C = MatcherConstructors{
|
||||
Any: func() MatchAny {
|
||||
return MatchAny{}
|
||||
},
|
||||
Rune: func(rune rune) MatchRune {
|
||||
return MatchRune{rune}
|
||||
},
|
||||
RuneRange: func(start rune, end rune) MatchRuneRange {
|
||||
return MatchRuneRange{start, end}
|
||||
},
|
||||
Runes: func(runes ...rune) MatchAnyOf {
|
||||
m := make([]Matcher, len(runes))
|
||||
for i, r := range runes {
|
||||
m[i] = MatchRune{r}
|
||||
}
|
||||
return MatchAnyOf{m}
|
||||
},
|
||||
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
||||
return MatchAnyOf{matchers}
|
||||
},
|
||||
Repeat: func(count int, matcher Matcher) MatchRepeat {
|
||||
return MatchRepeat{count, matcher}
|
||||
},
|
||||
Sequence: func(matchers ...Matcher) MatchSequence {
|
||||
return MatchSequence{matchers}
|
||||
},
|
||||
OneOrMore: func(matcher Matcher) MatchOneOrMore {
|
||||
return MatchOneOrMore{matcher}
|
||||
},
|
||||
ZeroOrMore: func(matcher Matcher) MatchZeroOrMore {
|
||||
return MatchZeroOrMore{matcher}
|
||||
},
|
||||
Optional: func(matcher Matcher) MatchOptional {
|
||||
return MatchOptional{matcher}
|
||||
},
|
||||
}
|
||||
|
||||
type MatchAny struct{}
|
||||
|
||||
func (c MatchAny) Match(m *MatchDialog) bool {
|
||||
_, ok := m.NextRune()
|
||||
return ok
|
||||
}
|
||||
|
||||
type MatchRune struct {
|
||||
match rune
|
||||
}
|
||||
|
||||
func (c MatchRune) Match(m *MatchDialog) bool {
|
||||
r, ok := m.NextRune()
|
||||
return ok && r == c.match
|
||||
}
|
||||
|
||||
type MatchRuneRange struct {
|
||||
start rune
|
||||
end rune
|
||||
}
|
||||
|
||||
func (c MatchRuneRange) Match(m *MatchDialog) bool {
|
||||
r, ok := m.NextRune()
|
||||
return ok && r >= c.start && r <= c.end
|
||||
}
|
||||
|
||||
type MatchAnyOf struct {
|
||||
matcher []Matcher
|
||||
}
|
||||
|
||||
func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
||||
for _, matcher := range c.matcher {
|
||||
mc := m.Fork()
|
||||
if matcher.Match(mc) {
|
||||
return m.Join(mc)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type MatchRepeat struct {
|
||||
count int
|
||||
matcher Matcher
|
||||
}
|
||||
|
||||
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
for i := 0; i < c.count; i++ {
|
||||
if !c.matcher.Match(mc) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
m.Join(mc)
|
||||
return true
|
||||
}
|
||||
|
||||
type MatchSequence struct {
|
||||
matchers []Matcher
|
||||
}
|
||||
|
||||
func (c MatchSequence) Match(m *MatchDialog) bool {
|
||||
mPart := m.Fork()
|
||||
for _, matcher := range c.matchers {
|
||||
if !matcher.Match(mPart) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
m.Join(mPart)
|
||||
return true
|
||||
}
|
||||
|
||||
type MatchOneOrMore struct {
|
||||
matcher Matcher
|
||||
}
|
||||
|
||||
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
for c.matcher.Match(mc) {
|
||||
m.Join(mc)
|
||||
}
|
||||
return len(m.runes) > 0
|
||||
}
|
||||
|
||||
type MatchZeroOrMore struct {
|
||||
matcher Matcher
|
||||
}
|
||||
|
||||
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
for c.matcher.Match(mc) {
|
||||
m.Join(mc)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type MatchOptional struct {
|
||||
matcher Matcher
|
||||
}
|
||||
|
||||
func (c MatchOptional) Match(m *MatchDialog) bool {
|
||||
mc := m.Fork()
|
||||
if c.matcher.Match(mc) {
|
||||
m.Join(mc)
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,260 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
p "github.com/mmakaay/toml/parsekit"
|
||||
)
|
||||
|
||||
var c = p.C
|
||||
|
||||
const TestItem p.ItemType = 1
|
||||
|
||||
func newParser(input string, matcher p.Matcher) *p.P {
|
||||
stateFn := func(p *p.P) {
|
||||
if p.On(matcher).Accept() {
|
||||
p.EmitLiteral(TestItem)
|
||||
p.Repeat()
|
||||
} else {
|
||||
p.UnexpectedInput("MATCH")
|
||||
}
|
||||
}
|
||||
return p.New(input, stateFn)
|
||||
}
|
||||
|
||||
func TestMatchAny(t *testing.T) {
|
||||
p := newParser("o", c.Any())
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "o" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchAny_AtEndOfFile(t *testing.T) {
|
||||
p := newParser("", c.Any())
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
}
|
||||
expected := "unexpected end of file (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchAny_AtInvalidUtf8Rune(t *testing.T) {
|
||||
p := newParser("\xcd", c.Any())
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing unexpectedly succeeded")
|
||||
}
|
||||
expected := "invalid UTF8 character in input (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRune(t *testing.T) {
|
||||
p := newParser("xxx", c.Rune('x'))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "x" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRune_OnMismatch(t *testing.T) {
|
||||
p := newParser("x ", c.Rune(' '))
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing did not fail unexpectedly")
|
||||
}
|
||||
expected := "unexpected character 'x' (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRuneRange(t *testing.T) {
|
||||
m := c.RuneRange('b', 'y')
|
||||
s := "mnopqrstuvwxybcdefghijkl"
|
||||
p := newParser(s, m)
|
||||
for i := 0; i < len(s); i++ {
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if s[i] != r.Value[0] {
|
||||
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
|
||||
}
|
||||
}
|
||||
if _, _, ok := newParser("a", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input 'a'")
|
||||
}
|
||||
if _, _, ok := newParser("z", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input 'z'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRunes(t *testing.T) {
|
||||
m := c.Runes('+', '-', '*', '/')
|
||||
s := "-+/*+++"
|
||||
p := newParser(s, m)
|
||||
for i := 0; i < len(s); i++ {
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if s[i] != r.Value[0] {
|
||||
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
|
||||
}
|
||||
}
|
||||
if _, _, ok := newParser("^", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input '^'")
|
||||
}
|
||||
if _, _, ok := newParser("x", m).Next(); ok {
|
||||
t.Fatalf("Unexpected parse success for input 'x'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchAnyOf(t *testing.T) {
|
||||
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "a" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||
}
|
||||
|
||||
r, err, ok = p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if r.Type != TestItem {
|
||||
t.Error("Parser item type not expected TestTitem")
|
||||
}
|
||||
if r.Value != "b" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat(t *testing.T) {
|
||||
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "xxxx" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchRepeat_Mismatch(t *testing.T) {
|
||||
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||
_, err, ok := p.Next()
|
||||
if ok {
|
||||
t.Fatalf("Parsing did not fail unexpectedly")
|
||||
}
|
||||
expected := "unexpected character 'x' (expected MATCH)"
|
||||
if err.Error() != expected {
|
||||
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchOneOrMore(t *testing.T) {
|
||||
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "xxxxxxxx" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSequence(t *testing.T) {
|
||||
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "10" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
|
||||
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "101010" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSequence_WithRepeatedRunes(t *testing.T) {
|
||||
whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
|
||||
equal := c.Rune('=')
|
||||
assignment := c.Sequence(whitespace, equal, whitespace)
|
||||
p := newParser(" == 10", assignment)
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != " =" {
|
||||
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchOptional(t *testing.T) {
|
||||
p := newParser("xyz", c.Optional(c.Rune('x')))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "x" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||
}
|
||||
|
||||
p = newParser("xyz", c.Optional(c.Rune('y')))
|
||||
r, err, ok = p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != "" {
|
||||
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMixAndMatch(t *testing.T) {
|
||||
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||
backslash := c.Rune('\\')
|
||||
x := c.Rune('x')
|
||||
hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
|
||||
|
||||
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
|
||||
r, err, ok := p.Next()
|
||||
if !ok {
|
||||
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||
}
|
||||
if r.Value != `\x9a\x01\xF0\xfC` {
|
||||
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
|
||||
}
|
||||
}
|
|
@ -64,6 +64,13 @@ func (p *P) match(offset int, patterns ...interface{}) ([]rune, []int, bool) {
|
|||
return runes, widths, false
|
||||
}
|
||||
switch pattern := pattern.(type) {
|
||||
case Matcher:
|
||||
m := &MatchDialog{p: p}
|
||||
if pattern.Match(m) {
|
||||
return m.runes, m.widths, true
|
||||
} else {
|
||||
return m.runes, m.widths, false
|
||||
}
|
||||
case []interface{}:
|
||||
rs, ws, matched := p.match(offset, pattern...)
|
||||
for i, r := range rs {
|
||||
|
@ -98,17 +105,6 @@ func (p *P) Upcoming(patterns ...interface{}) bool {
|
|||
return ok
|
||||
}
|
||||
|
||||
// AcceptAny adds the next rune from the input to the string buffer.
|
||||
// If no rune could be read (end of file or invalid UTF8 data),
|
||||
// then false is returned.
|
||||
func (p *P) AcceptAny() bool {
|
||||
if r, ok := p.next(); ok {
|
||||
p.buffer.writeRune(r)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type action struct {
|
||||
p *P
|
||||
runes []rune
|
||||
|
@ -129,6 +125,10 @@ func (a *action) Accept() bool {
|
|||
func (a *action) Skip() bool {
|
||||
if a.ok {
|
||||
for i, r := range a.runes {
|
||||
type C struct {
|
||||
Rune MatchRune
|
||||
}
|
||||
|
||||
a.p.advanceCursor(r, a.widths[i])
|
||||
}
|
||||
}
|
||||
|
@ -159,20 +159,10 @@ func (p *P) On(patterns ...interface{}) *action {
|
|||
// AcceptMatching adds the next runes to the string buffer, but only
|
||||
// if the upcoming runes satisfy the provided patterns.
|
||||
// When runes were added then true is returned, false otherwise.
|
||||
func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
||||
return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
||||
}
|
||||
|
||||
// AcceptConsecutive adds consecutive runes from the input to the string
|
||||
// buffer, as long as they exist in the pattern.
|
||||
// If any runes were added then true is returned, false otherwise.
|
||||
func (p *P) AcceptConsecutive(pattern string) bool {
|
||||
accepted := false
|
||||
for p.AcceptMatching(pattern) {
|
||||
accepted = true
|
||||
}
|
||||
return accepted
|
||||
}
|
||||
// TODO not needed anymore
|
||||
// func (p *P) AcceptMatching(patterns ...interface{}) bool {
|
||||
// return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
|
||||
// }
|
||||
|
||||
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
||||
// Returns true when one or more runes were skipped.
|
||||
|
@ -185,13 +175,3 @@ func (p *P) SkipMatching(patterns ...interface{}) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// SkipConsecutive skips consecutive runes from the provided pattern.
|
||||
// Returns true when one or more runes were skipped.
|
||||
func (p *P) SkipConsecutive(pattern string) bool {
|
||||
didSkip := false
|
||||
for p.SkipMatching(pattern) {
|
||||
didSkip = true
|
||||
}
|
||||
return didSkip
|
||||
}
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// P holds the internal state of the parser.
|
||||
type P struct {
|
||||
state StateFn // the function that handles the current state
|
||||
|
@ -50,3 +54,14 @@ type Error struct {
|
|||
func (err *Error) Error() string {
|
||||
return err.Message
|
||||
}
|
||||
|
||||
// EOF is a special rune, which is used to indicate an end of file when
|
||||
// reading a character from the input.
|
||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||
// say 'I now expect the end of the file' is using something like:
|
||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||
const EOF rune = -1
|
||||
|
||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||
// rune on the input.
|
||||
const INVALID rune = utf8.RuneError
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
"testing"
|
||||
|
||||
"github.com/mmakaay/toml/parsekit"
|
||||
lexer "github.com/mmakaay/toml/parser"
|
||||
"github.com/mmakaay/toml/parser"
|
||||
)
|
||||
|
||||
type statesT struct {
|
||||
|
@ -23,7 +23,7 @@ func runStatesTs(t *testing.T, tests []statesT) {
|
|||
}
|
||||
|
||||
func runStatesT(t *testing.T, c statesT) {
|
||||
l, err := lexer.NewParser(c.in).ToArray()
|
||||
l, err := parser.NewParser(c.in).ToArray()
|
||||
if err == nil && c.err != "" {
|
||||
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
||||
}
|
||||
|
@ -36,12 +36,12 @@ func runStatesT(t *testing.T, c statesT) {
|
|||
switch expected := c.out.(type) {
|
||||
case []string:
|
||||
if len(expected) != len(l) {
|
||||
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
|
||||
t.Errorf("[%s] Unexpected number of parser items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
|
||||
}
|
||||
for i, e := range expected {
|
||||
v := ParserItemToString(l[i])
|
||||
if v != e {
|
||||
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
|
||||
t.Errorf("[%s] Unexpected parser item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
|
||||
}
|
||||
}
|
||||
case string:
|
||||
|
@ -51,7 +51,7 @@ func runStatesT(t *testing.T, c statesT) {
|
|||
}
|
||||
actual := strings.Join(a, "")
|
||||
if actual != expected {
|
||||
t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
|
||||
t.Errorf("[%s] Unexpected parser output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,15 +59,15 @@ func runStatesT(t *testing.T, c statesT) {
|
|||
// ParserItemToString returns a string representation of the parsekit.Item.
|
||||
func ParserItemToString(i parsekit.Item) string {
|
||||
switch i.Type {
|
||||
case lexer.ItemComment:
|
||||
case parser.ItemComment:
|
||||
return fmt.Sprintf("#(%s)", i.Value)
|
||||
case lexer.ItemKey:
|
||||
case parser.ItemKey:
|
||||
return fmt.Sprintf("[%s]", i.Value)
|
||||
case lexer.ItemString:
|
||||
case parser.ItemString:
|
||||
return fmt.Sprintf("STR(%s)", i.Value)
|
||||
case lexer.ItemKeyDot:
|
||||
case parser.ItemKeyDot:
|
||||
return "."
|
||||
case lexer.ItemAssignment:
|
||||
case parser.ItemAssignment:
|
||||
return "="
|
||||
default:
|
||||
panic(fmt.Sprintf("No string representation available for parsekit.Item id %d", i.Type))
|
||||
|
|
|
@ -11,40 +11,28 @@ const (
|
|||
ItemString // A value of type string
|
||||
)
|
||||
|
||||
const (
|
||||
whitespace string = " \t"
|
||||
carriageReturn string = "\r"
|
||||
newline string = "\n"
|
||||
hash string = "#"
|
||||
equal string = "="
|
||||
lower string = "abcdefghijklmnopqrstuvwxyz"
|
||||
upper string = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
digits string = "0123456789"
|
||||
hex string = digits + "abcdefABCDEF"
|
||||
dot string = "."
|
||||
underscore string = "_"
|
||||
dash string = "-"
|
||||
singleQuote string = "'"
|
||||
doubleQuote string = "\""
|
||||
backslash string = "\\"
|
||||
quoteChars string = singleQuote + doubleQuote
|
||||
bareKeyChars string = lower + upper + digits + underscore + dash
|
||||
startOfKey string = bareKeyChars + quoteChars
|
||||
validEscapeChars string = `btnfr"\`
|
||||
mustBeEscaped string = "" +
|
||||
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
|
||||
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
|
||||
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
|
||||
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
|
||||
"\u007F"
|
||||
)
|
||||
|
||||
var (
|
||||
keySeparatorDot = []interface{}{whitespace, dot, whitespace}
|
||||
doubleQuote3 = []interface{}{doubleQuote, doubleQuote, doubleQuote}
|
||||
hex4 = []interface{}{hex, hex, hex, hex}
|
||||
shortUtf8Match = []interface{}{backslash, 'u', hex4}
|
||||
longUtf8Match = []interface{}{backslash, 'U', hex4, hex4}
|
||||
c = parsekit.C
|
||||
space = c.Rune(' ')
|
||||
tab = c.Rune('\t')
|
||||
carriageReturn = c.Rune('\r')
|
||||
lineFeed = c.Rune('\n')
|
||||
hash = c.Rune('#')
|
||||
underscore = c.Rune('_')
|
||||
dash = c.Rune('-')
|
||||
equal = c.Rune('=')
|
||||
dot = c.Rune('.')
|
||||
singleQuote = c.Rune('\'')
|
||||
doubleQuote = c.Rune('"')
|
||||
any = c.Any()
|
||||
anyQuote = c.AnyOf(singleQuote, doubleQuote)
|
||||
backslash = c.Rune('\\')
|
||||
lower = c.RuneRange('a', 'z')
|
||||
upper = c.RuneRange('A', 'Z')
|
||||
digit = c.RuneRange('0', '9')
|
||||
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
||||
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
||||
optionalWhitespace = c.Optional(whitespace)
|
||||
)
|
||||
|
||||
// NewParser creates a new parser, using the provided input string
|
||||
|
|
|
@ -6,6 +6,10 @@ import (
|
|||
"github.com/mmakaay/toml/parser"
|
||||
)
|
||||
|
||||
func TestEmptyInput(t *testing.T) {
|
||||
runStatesT(t, statesT{"empty string", "", "", ""})
|
||||
}
|
||||
|
||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||
_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
||||
t.Logf("Got error: %s", err.Error())
|
||||
|
@ -17,18 +21,13 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestEmptyInput(t *testing.T) {
|
||||
runStatesT(t, statesT{"empty string", "", "", ""})
|
||||
}
|
||||
|
||||
func TestInvalidUtf8Data(t *testing.T) {
|
||||
runStatesTs(t, []statesT{
|
||||
{"inside comment", "# \xbc", "", "invalid UTF8 character"},
|
||||
{"bare key 1", "\xbc", "", "invalid UTF8 character"},
|
||||
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
|
||||
{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
|
||||
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
|
||||
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
|
||||
{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected comment contents)"},
|
||||
{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
|
||||
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
|
||||
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
|
||||
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character in input (expected string contents)"},
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
|
||||
// A '#' hash symbol marks the rest of the line as a comment.
|
||||
func startComment(p *parsekit.P) {
|
||||
p.SkipConsecutive(hash)
|
||||
p.On(c.OneOrMore(hash)).Skip()
|
||||
p.RouteTo(commentContents)
|
||||
}
|
||||
|
||||
|
@ -16,8 +16,9 @@ func commentContents(p *parsekit.P) {
|
|||
case p.AtEndOfLine():
|
||||
p.EmitLiteralTrim(ItemComment)
|
||||
p.RouteReturn()
|
||||
default:
|
||||
p.AcceptAny()
|
||||
case p.On(any).Accept():
|
||||
p.Repeat()
|
||||
default:
|
||||
p.UnexpectedInput("comment contents")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
package parser
|
||||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// The primary building block of a TOML document is the key/value pair.
|
||||
func startKeyValuePair(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(whitespace + carriageReturn + newline).Skip():
|
||||
p.Repeat()
|
||||
case p.On(hash).Stay():
|
||||
p.RouteTo(startComment).ThenReturnHere()
|
||||
case p.On(startOfKey).RouteTo(startKey):
|
||||
default:
|
||||
p.RouteTo(endOfFile)
|
||||
}
|
||||
}
|
||||
|
||||
// A key may be either bare, quoted or dotted.
|
||||
func startKey(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(bareKeyChars).RouteTo(startBareKey):
|
||||
default:
|
||||
p.UnexpectedInput("a valid key name")
|
||||
}
|
||||
}
|
||||
|
||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||
// keys are allowed to be composed of only ASCII digits,
|
||||
// e.g. 1234, but are always interpreted as strings.
|
||||
func startBareKey(p *parsekit.P) {
|
||||
p.AcceptConsecutive(bareKeyChars) // TODO make a plan for adding this to After()
|
||||
p.EmitLiteral(ItemKey)
|
||||
p.RouteTo(endOfKeyOrDot)
|
||||
}
|
||||
|
||||
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||
// This allows for grouping similar properties together:
|
||||
func endOfKeyOrDot(p *parsekit.P) {
|
||||
// Whitespace around dot-separated parts is ignored, however,
|
||||
// best practice is to not use any extraneous whitespace.
|
||||
p.SkipConsecutive(whitespace)
|
||||
if p.On(dot).Accept() {
|
||||
p.SkipConsecutive(whitespace)
|
||||
p.EmitLiteral(ItemKeyDot)
|
||||
p.RouteTo(startKey)
|
||||
} else {
|
||||
p.RouteTo(startKeyAssignment)
|
||||
}
|
||||
}
|
||||
|
||||
// Keys are on the left of the equals sign and values are on the right.
|
||||
// Whitespace is ignored around key names and values. The key, equals
|
||||
// sign, and value must be on the same line (though some values can
|
||||
// be broken over multiple lines).
|
||||
func startKeyAssignment(p *parsekit.P) {
|
||||
p.SkipConsecutive(whitespace)
|
||||
if p.On(equal).Accept() {
|
||||
p.EmitLiteral(ItemAssignment)
|
||||
p.SkipConsecutive(whitespace)
|
||||
p.RouteTo(startValue)
|
||||
} else {
|
||||
p.UnexpectedInput("a value assignment")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
package parser
|
||||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// The primary building block of a TOML document is the key/value pair.
|
||||
|
||||
var (
|
||||
// Keys are on the left of the equals sign and values are on the right.
|
||||
// Whitespace is ignored around key names and values. The key, equals
|
||||
// sign, and value must be on the same line (though some values can be
|
||||
// broken over multiple lines).
|
||||
keyAssignment = c.Sequence(optionalWhitespace, equal, optionalWhitespace)
|
||||
|
||||
// A key may be either bare, quoted or dotted.
|
||||
// Bare keys may only contain ASCII letters, ASCII digits,
|
||||
// underscores, and dashes (A-Za-z0-9_-). Note that bare
|
||||
// keys are allowed to be composed of only ASCII digits,
|
||||
// e.g. 1234, but are always interpreted as strings.
|
||||
bareKeyRune = c.AnyOf(lower, upper, digit, underscore, dash)
|
||||
bareKey = c.OneOrMore(bareKeyRune)
|
||||
|
||||
// Quoted keys follow the exact same rules as either basic
|
||||
// strings or literal strings and allow you to use a much broader
|
||||
// set of key names. Best practice is to use bare keys except
|
||||
// when absolutely necessary.
|
||||
// A bare key must be non-empty, but an empty quoted key is
|
||||
// allowed (though discouraged).
|
||||
startOfKey = c.AnyOf(bareKeyRune, anyQuote)
|
||||
|
||||
// Dotted keys are a sequence of bare or quoted keys joined with a dot.
|
||||
// This allows for grouping similar properties together.
|
||||
// Whitespace around dot-separated parts is ignored, however, best
|
||||
// practice is to not use any extraneous whitespace.
|
||||
keySeparatordDot = c.Sequence(optionalWhitespace, dot, optionalWhitespace)
|
||||
)
|
||||
|
||||
func startKeyValuePair(p *parsekit.P) {
|
||||
p.On(whitespaceOrNewlines).Skip()
|
||||
switch {
|
||||
case p.On(hash).Stay():
|
||||
p.RouteTo(startComment).ThenReturnHere()
|
||||
case p.On(startOfKey).RouteTo(startKey):
|
||||
default:
|
||||
p.RouteTo(endOfFile) // TODO Make end of file a Matcher, so this can be simpler.
|
||||
}
|
||||
}
|
||||
|
||||
func startKey(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(bareKeyRune).RouteTo(startBareKey):
|
||||
default:
|
||||
p.UnexpectedInput("a valid key name")
|
||||
}
|
||||
}
|
||||
|
||||
func startBareKey(p *parsekit.P) {
|
||||
p.On(bareKey).Accept()
|
||||
p.EmitLiteral(ItemKey)
|
||||
p.RouteTo(endOfKeyOrDot)
|
||||
}
|
||||
|
||||
func endOfKeyOrDot(p *parsekit.P) {
|
||||
if p.On(keySeparatordDot).Skip() {
|
||||
p.Emit(ItemKeyDot, ".")
|
||||
p.RouteTo(startKey)
|
||||
} else {
|
||||
p.RouteTo(startKeyAssignment)
|
||||
}
|
||||
}
|
||||
|
||||
func startKeyAssignment(p *parsekit.P) {
|
||||
if p.On(keyAssignment).Skip() {
|
||||
p.Emit(ItemAssignment, "=")
|
||||
p.RouteTo(startValue)
|
||||
} else {
|
||||
p.UnexpectedInput("a value assignment")
|
||||
}
|
||||
}
|
||||
|
||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||
func startValue(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(anyQuote).RouteTo(startString):
|
||||
default:
|
||||
p.UnexpectedInput("a value")
|
||||
}
|
||||
}
|
|
@ -5,9 +5,9 @@ import (
|
|||
)
|
||||
|
||||
func TestKeyWithoutAssignment(t *testing.T) {
|
||||
err := "unexpected end of file"
|
||||
err := "unexpected end of file (expected a value assignment)"
|
||||
runStatesTs(t, []statesT{
|
||||
{"bare with whitespace", " a ", "[a]", err},
|
||||
{"bare with whitespace", " a ", "[a]", "unexpected character ' ' (expected a value assignment)"},
|
||||
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
|
||||
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
|
||||
{"bare numbers", "0123456789", "[0123456789]", err},
|
||||
|
@ -18,15 +18,14 @@ func TestKeyWithoutAssignment(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestDottedKey(t *testing.T) {
|
||||
err := "unexpected end of file"
|
||||
runStatesTs(t, []statesT{
|
||||
{"bare dotted", "a._.c", "[a].[_].[c]", err},
|
||||
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
|
||||
{"bare dotted", "a._.c", "[a].[_].[c]", "unexpected end of file (expected a value assignment)"},
|
||||
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", `unexpected character '\t' (expected a value assignment)`},
|
||||
})
|
||||
}
|
||||
|
||||
func TestKeyWithAssignmentButNoValue(t *testing.T) {
|
||||
err := "unexpected end of file"
|
||||
err := "unexpected end of file (expected a value)"
|
||||
runStatesTs(t, []statesT{
|
||||
{"bare", "a=", "[a]=", err},
|
||||
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
|
|
@ -2,10 +2,36 @@ package parser
|
|||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
||||
// * Basic strings are surrounded by quotation marks.
|
||||
var (
|
||||
// There are four ways to express strings: basic, multi-line basic, literal,
|
||||
// and multi-line literal. All strings must contain only valid UTF-8 characters.
|
||||
// * Multi-line basic strings are surrounded by three quotation marks on each side.
|
||||
// * Basic strings are surrounded by quotation marks.
|
||||
doubleQuote3 = c.Repeat(3, doubleQuote)
|
||||
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||
charThatMustBeEscaped = c.AnyOf(c.RuneRange('\u0000', '\u001F'), c.Rune('\u007F'))
|
||||
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
//
|
||||
// \b - backspace (U+0008)
|
||||
// \t - tab (U+0009)
|
||||
// \n - linefeed (U+000A)
|
||||
// \f - form feed (U+000C)
|
||||
// \r - carriage return (U+000D)
|
||||
// \" - quote (U+0022)
|
||||
// \\ - backslash (U+005C)
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
validEscapeChar = c.AnyOf(c.Runes('b', 't', 'n', 'f', 'r'), doubleQuote, backslash)
|
||||
shortEscape = c.Sequence(backslash, validEscapeChar)
|
||||
hex = c.AnyOf(digit, c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||
shortUtf8Escape = c.Sequence(backslash, c.Rune('u'), c.Repeat(4, hex))
|
||||
longUtf8Escape = c.Sequence(backslash, c.Rune('U'), c.Repeat(8, hex))
|
||||
validEscape = c.AnyOf(shortEscape, shortUtf8Escape, longUtf8Escape)
|
||||
)
|
||||
|
||||
func startString(p *parsekit.P) {
|
||||
switch {
|
||||
case p.On(doubleQuote3).RouteTo(startMultiLineBasicString):
|
||||
|
@ -15,36 +41,21 @@ func startString(p *parsekit.P) {
|
|||
}
|
||||
}
|
||||
|
||||
// For convenience, some popular characters have a compact escape sequence.
|
||||
//
|
||||
// \b - backspace (U+0008)
|
||||
// \t - tab (U+0009)
|
||||
// \n - linefeed (U+000A)
|
||||
// \f - form feed (U+000C)
|
||||
// \r - carriage return (U+000D)
|
||||
// \" - quote (U+0022)
|
||||
// \\ - backslash (U+005C)
|
||||
// \uXXXX - unicode (U+XXXX)
|
||||
// \UXXXXXXXX - unicode (U+XXXXXXXX)
|
||||
//
|
||||
// Any Unicode character may be used except those that must be escaped:
|
||||
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
|
||||
func parseBasicString(p *parsekit.P) {
|
||||
switch {
|
||||
case p.AtEndOfFile():
|
||||
case p.On(parsekit.EOF).Stay():
|
||||
p.UnexpectedEndOfFile("basic string token")
|
||||
case p.On(backslash, validEscapeChars).Accept() ||
|
||||
p.On(shortUtf8Match).Accept() ||
|
||||
p.On(longUtf8Match).Accept():
|
||||
case p.On(validEscape).Accept():
|
||||
p.Repeat()
|
||||
case p.On(mustBeEscaped).Stay():
|
||||
r, _, _ := p.Match(mustBeEscaped)
|
||||
case p.On(charThatMustBeEscaped).Stay():
|
||||
r, _, _ := p.Match(charThatMustBeEscaped)
|
||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
||||
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
||||
p.RouteReturn()
|
||||
default:
|
||||
p.AcceptAny()
|
||||
case p.On(any).Accept():
|
||||
p.Repeat()
|
||||
default:
|
||||
p.UnexpectedInput("string contents")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,7 +80,7 @@ func basicStringSpecifics(p *parsekit.P) {
|
|||
case p.On(backslash).Stay():
|
||||
p.EmitError("Invalid escape sequence")
|
||||
default:
|
||||
p.RouteTo(startBasicString)
|
||||
panic("String parsing should not have ended up here")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,8 +33,8 @@ func TestEmptyBasicString(t *testing.T) {
|
|||
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
|
||||
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
|
||||
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
|
||||
{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
|
||||
{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
|
||||
{"multiple on same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
|
||||
{"multiple on multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
package parser
|
||||
|
||||
import "github.com/mmakaay/toml/parsekit"
|
||||
|
||||
// Values must be of the following types: String, Integer, Float, Boolean,
|
||||
// Datetime, Array, or Inline Table. Unspecified values are invalid.
|
||||
func startValue(p *parsekit.P) {
|
||||
p.SkipConsecutive(whitespace)
|
||||
if p.Upcoming(quoteChars) {
|
||||
p.RouteTo(startString)
|
||||
} else {
|
||||
p.UnexpectedInput("a value")
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue