Split off the parsekit code into a separate repository and Go module.

This commit is contained in:
Maurice Makaay 2019-05-21 15:25:06 +00:00
parent a3947feea7
commit 9a13e0dd7a
29 changed files with 84 additions and 1403 deletions

42
.gitignore vendored
View File

@ -1,2 +1,40 @@
.vscode
*-workspace
# ---> Vim
# Swap
[._]*.s[a-v][a-z]
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
# Temporary
.netrwhist
*~
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~
# ---> Go
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# ---> VisualStudioCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"go.formatTool": "goimports"
}

View File

@ -1,3 +0,0 @@
test:
@cd parsekit && go test
@cd parser && go test

View File

@ -1,7 +1,7 @@
package parser
import (
"github.com/mmakaay/toml/parsekit"
"git.makaay.nl/mauricem/go-parsekit"
)
// A '#' hash symbol marks the rest of the line as a comment.

View File

@ -1,6 +1,6 @@
package parser
import "github.com/mmakaay/toml/parsekit"
import "git.makaay.nl/mauricem/go-parsekit"
// TODO move into parsekit
func endOfFile(p *parsekit.P) {

6
go.mod
View File

@ -1,3 +1,7 @@
module github.com/mmakaay/toml
module git.makaay.nl/mauricem/go-toml
go 1.12
require git.makaay.nl/mauricem/go-parsekit v0.0.0
replace git.makaay.nl/mauricem/go-parsekit v0.0.0 => ../go-parsekit

3
go.sum Normal file
View File

@ -0,0 +1,3 @@
git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939 h1:cMBHhfSJR2BZgVN7NmP+c2agNlXDef4Iz6+XQp5AqdU=
git.makaay.nl/mauricem/go-parsekit v0.0.0-20190521150537-747456517939/go.mod h1:/mo+aM5Im5rkBqBvXTAsVR0//OfsAAiFyvuxxcxGGlU=
github.com/mmakaay/toml v0.3.1 h1:2uKRPvA/smKM8YuYGxWnW4KximMkWOMfunJOXgM5Zos=

View File

@ -5,8 +5,8 @@ import (
"strings"
"testing"
"github.com/mmakaay/toml/parsekit"
"github.com/mmakaay/toml/parser"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-toml"
)
type statesT struct {

View File

@ -1,6 +1,6 @@
package parser
import "github.com/mmakaay/toml/parsekit"
import "git.makaay.nl/mauricem/go-parsekit"
// The primary building block of a TOML document is the key/value pair.

View File

@ -36,6 +36,11 @@ func TestKeyWithAssignmentButNoValue(t *testing.T) {
func TestKeyWithValue(t *testing.T) {
runStatesTs(t, []statesT{
{"with string value", " -key- = \"value\" # nice\r\n", "[-key-]=STR(value)#(nice)", ""},
{"with string value",
" -key- = \"value\" # nice\r\n",
"[-key-]=STR(value)#(nice)", ""},
{"multiple string values",
"key = \"value1\"\nbare_key = \"value2\"\n# More coming up!\nbare-key = \"value3\"\n1234 = \"value4\"\n",
"[key]=STR(value1)[bare_key]=STR(value2)#(More coming up!)[bare-key]=STR(value3)[1234]=STR(value4)", ""},
})
}

View File

@ -1,7 +0,0 @@
package main
import "fmt"
func main() {
fmt.Println("Just a file to make 'go get' happy")
}

View File

@ -1,129 +0,0 @@
package parsekit
import (
"fmt"
"reflect"
"runtime"
)
// P holds the internal state of the parser.
type P struct {
state StateHandler // the function that handles the current state
nextState StateHandler // the function that will handle the next state
routeStack []StateHandler // route stack, for handling nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorLine int // current row number in the input
cursorColumn int // current column position in the input
expecting string // a description of what the current state expects to find
buffer stringBuffer // an efficient buffer, used to build string values
LastMatch string // a string representation of the last matched input data
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
}
// StateHandler defines the type of function that can be used to
// handle a parser state.
type StateHandler func(*P)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, start StateHandler) *P {
return &P{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
nextState: start,
items: make(chan Item, 2),
}
}
// Next retrieves the next parsed item.
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise.
func (p *P) Next() (Item, *Error, bool) {
for {
select {
case i := <-p.items:
return p.makeReturnValues(i)
default:
p.runStatusHandler()
}
}
}
// runStatusHandler moves the parser, which is bascially a state machine,
// to its next status. It does so by invoking a function of the
// type StateHandler. This function represents the current status.
func (p *P) runStatusHandler() {
if state, ok := p.getNextStateHandler(); ok {
p.invokeNextStatusHandler(state)
}
}
// getNextStateHandler determintes the next StatusHandler to invoke in order
// to move the parsing state machine one step further.
//
// When implementing a parser, the StateHandler functions must provide
// a routing decision in every invocation. A routing decision is one
// of the following:
//
// * A route is specified explicitly, which means that the next StatusHandler
// function to invoke is registered during the StateHandler function
// invocation. For example: p.RouteTo(nextStatus)
//
// * A route is specified implicitly, which means that a previous StateHandler
// invocation has registered the followup route for the current state.
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
// In this example, the nextStatus StateHandler will not have to specify
// a route explicitly, but otherStatus will be used implicitly after
// the nextStatus function has returned.
//
// * An expectation is registered by the StatusHandler.
// For example: p.Expects("a cool thing")
// When the StatusHandler returns without having specified a route, this
// expectation is used to generate an "unexpected input" error message.
//
// When no routing decision is provided by a StateHandler, then this is
// considered a bug in the state handler, and the parser will panic.
func (p *P) getNextStateHandler() (StateHandler, bool) {
switch {
case p.nextState != nil:
return p.nextState, true
case len(p.routeStack) > 0:
return p.popRoute(), true
case p.expecting != "":
p.UnexpectedInput()
return nil, false
default:
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name))
}
}
// invokeNextStatusHandler moves the parser state to the provided state
// and invokes the StatusHandler function.
func (p *P) invokeNextStatusHandler(state StateHandler) {
p.state = state
p.nextState = nil
p.expecting = ""
p.state(p)
}
func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
p.err = &Error{i.Value, p.cursorLine, p.cursorColumn}
return i, p.err, false
default:
p.item = i
return i, nil, true
}
}

View File

@ -1,342 +0,0 @@
package parsekit
import (
"unicode"
"unicode/utf8"
)
// Not in need of it myself, but nice to have I guess:
// - LookAhead
// MatchDialog is used by Matcher implementations as a means
// to retrieve data to match against and to report back
// successful matches.
type MatchDialog struct {
p *P
runes []rune
widths []int
offset int
curRune rune
curWidth int
parent *MatchDialog
}
// Fork splits off a child MatchDialog, containing the same
// offset as the parent MatchDialog, but with all other data
// in a new state.
// By forking, a Matcher implementation can freely work with
// a MatchDialog, without affecting the parent MatchDialog.
// When the Matcher decides that a match was found, it can
// use the Merge() method on the child to merge the child's
// matching data into the parent MatchDialog.
func (m *MatchDialog) Fork() *MatchDialog {
child := &MatchDialog{
p: m.p,
offset: m.offset,
parent: m,
}
return child
}
// Merge merges the data for a a forked child MatchDialog back
// into its parent:
// * the runes that are accumulated in the child are added
// to the parent's runes
// * the parent's offset is set to the child's offset
// After a Merge, the child MatchDialog is reset so it can
// immediately be reused for performing another match.
func (m *MatchDialog) Merge() bool {
if m.parent == nil {
panic("Cannot call Merge a a non-forked MatchDialog")
}
m.parent.runes = append(m.parent.runes, m.runes...)
m.parent.widths = append(m.parent.widths, m.widths...)
m.parent.offset = m.offset
m.Clear()
return true
}
// NextRune can be called by a Matcher on a MatchDialog in order
// to receive the next rune from the input.
// The rune is automatically added to the MatchDialog's runes.
// Returns the rune and a boolean. The boolean will be false in
// case an invalid UTF8 rune of the end of the file was encountered.
func (m *MatchDialog) NextRune() (rune, bool) {
if m.curRune == utf8.RuneError {
panic("Matcher must not call NextRune() after it returned false")
}
r, w, ok := m.p.peek(m.offset)
m.offset += w
m.curRune = r
m.curWidth = w
m.runes = append(m.runes, r)
m.widths = append(m.widths, w)
return r, ok
}
// Clear empties out the accumulated runes that are stored
// in the MatchDialog. The offset is kept as-is.
func (m *MatchDialog) Clear() {
m.runes = []rune{}
m.widths = []int{}
}
// Matcher is the interface that must be implemented to provide
// a matching stategy for the match() function.
// A MatchDialog is provided as input. This implements a
// specific set of methods that a Matcher needs to retrieve data
// from the parser and to report back results.
type Matcher interface {
Match(*MatchDialog) bool
}
type MatcherConstructors struct {
EndOfFile func() MatchEndOfFile
AnyRune func() MatchAny
Rune func(rune) MatchRune
RuneRange func(rune, rune) MatchRuneRange
Runes func(...rune) MatchAnyOf
String func(string) MatchSequence
StringNoCase func(string) MatchSequence
AnyOf func(...Matcher) MatchAnyOf
Not func(Matcher) MatchNot
Optional func(Matcher) MatchOptional
Sequence func(...Matcher) MatchSequence
Repeat func(int, Matcher) MatchRepeat
Min func(int, Matcher) MatchRepeat
Max func(int, Matcher) MatchRepeat
Bounded func(int, int, Matcher) MatchRepeat
ZeroOrMore func(Matcher) MatchRepeat
OneOrMore func(Matcher) MatchRepeat
Separated func(Matcher, Matcher) MatchSeparated
Drop func(Matcher) MatchDrop
}
// C provides access to a wide range of parser/combinator
// constructorshat can be used to build matching expressions.
// When using C in your own parser, then it is advised to create
// an alias in your own package for easy reference:
// var c = parsekit.C
var C = MatcherConstructors{
EndOfFile: func() MatchEndOfFile {
return MatchEndOfFile{}
},
AnyRune: func() MatchAny {
return MatchAny{}
},
Rune: func(rune rune) MatchRune {
return MatchRune{rune}
},
RuneRange: func(start rune, end rune) MatchRuneRange {
return MatchRuneRange{start, end}
},
Runes: func(runes ...rune) MatchAnyOf {
m := make([]Matcher, len(runes))
for i, r := range runes {
m[i] = MatchRune{r}
}
return MatchAnyOf{m}
},
String: func(s string) MatchSequence {
var m = []Matcher{}
for _, r := range s {
m = append(m, MatchRune{r})
}
return MatchSequence{m}
},
StringNoCase: func(s string) MatchSequence {
var m = []Matcher{}
for _, r := range s {
u := MatchRune{unicode.ToUpper(r)}
l := MatchRune{unicode.ToLower(r)}
m = append(m, MatchAnyOf{[]Matcher{u, l}})
}
return MatchSequence{m}
},
Optional: func(Matcher Matcher) MatchOptional {
return MatchOptional{Matcher}
},
Not: func(Matcher Matcher) MatchNot {
return MatchNot{Matcher}
},
AnyOf: func(Matchers ...Matcher) MatchAnyOf {
return MatchAnyOf{Matchers}
},
Sequence: func(Matchers ...Matcher) MatchSequence {
return MatchSequence{Matchers}
},
Repeat: func(count int, Matcher Matcher) MatchRepeat {
return MatchRepeat{count, count, Matcher}
},
Min: func(min int, Matcher Matcher) MatchRepeat {
return MatchRepeat{min, -1, Matcher}
},
Max: func(max int, Matcher Matcher) MatchRepeat {
return MatchRepeat{-1, max, Matcher}
},
Bounded: func(min int, max int, Matcher Matcher) MatchRepeat {
return MatchRepeat{min, max, Matcher}
},
OneOrMore: func(Matcher Matcher) MatchRepeat {
return MatchRepeat{1, -1, Matcher}
},
ZeroOrMore: func(Matcher Matcher) MatchRepeat {
return MatchRepeat{0, -1, Matcher}
},
Separated: func(separator Matcher, Matcher Matcher) MatchSeparated {
return MatchSeparated{separator, Matcher}
},
Drop: func(Matcher Matcher) MatchDrop {
return MatchDrop{Matcher}
},
}
type MatchEndOfFile struct{}
func (c MatchEndOfFile) Match(m *MatchDialog) bool {
r, ok := m.NextRune()
return !ok && r == EOF
}
type MatchAny struct{}
func (c MatchAny) Match(m *MatchDialog) bool {
_, ok := m.NextRune()
return ok
}
type MatchNot struct {
Matcher Matcher
}
func (c MatchNot) Match(m *MatchDialog) bool {
child := m.Fork()
if !c.Matcher.Match(child) {
child.Merge()
return true
}
return false
}
type MatchOptional struct {
Matcher Matcher
}
func (c MatchOptional) Match(m *MatchDialog) bool {
child := m.Fork()
if c.Matcher.Match(child) {
child.Merge()
}
return true
}
type MatchRune struct {
match rune
}
func (c MatchRune) Match(m *MatchDialog) bool {
r, ok := m.NextRune()
return ok && r == c.match
}
type MatchRuneRange struct {
start rune
end rune
}
func (c MatchRuneRange) Match(m *MatchDialog) bool {
r, ok := m.NextRune()
return ok && r >= c.start && r <= c.end
}
type MatchAnyOf struct {
Matcher []Matcher
}
func (c MatchAnyOf) Match(m *MatchDialog) bool {
for _, Matcher := range c.Matcher {
child := m.Fork()
if Matcher.Match(child) {
return child.Merge()
}
}
return false
}
type MatchRepeat struct {
min int
max int
Matcher Matcher
}
func (c MatchRepeat) Match(m *MatchDialog) bool {
child := m.Fork()
if c.min >= 0 && c.max >= 0 && c.min > c.max {
panic("MatchRepeat definition error: max must not be < min")
}
total := 0
// Specified min: check for the minimal required amount of matches.
for total < c.min {
total++
if !c.Matcher.Match(child) {
return false
}
}
// No specified max: include the rest of the available matches.
if c.max < 0 {
child.Merge()
for c.Matcher.Match(child) {
child.Merge()
}
return true
}
// Specified max: include the rest of the availble matches, up to the max.
child.Merge()
for total < c.max {
total++
if !c.Matcher.Match(child) {
break
}
child.Merge()
}
return true
}
type MatchSequence struct {
Matchers []Matcher
}
func (c MatchSequence) Match(m *MatchDialog) bool {
child := m.Fork()
for _, Matcher := range c.Matchers {
if !Matcher.Match(child) {
return false
}
}
child.Merge()
return true
}
type MatchSeparated struct {
separator Matcher
Matcher Matcher
}
func (c MatchSeparated) Match(m *MatchDialog) bool {
seq := C.Sequence(c.Matcher, C.ZeroOrMore(C.Sequence(c.separator, c.Matcher)))
return seq.Match(m)
}
type MatchDrop struct {
Matcher Matcher
}
func (c MatchDrop) Match(m *MatchDialog) bool {
child := m.Fork()
if c.Matcher.Match(child) {
child.Clear()
child.Merge()
return true
}
return false
}

View File

@ -1,360 +0,0 @@
package parsekit_test
import (
"testing"
p "github.com/mmakaay/toml/parsekit"
)
var c = p.C
const TestItem p.ItemType = 1
func newParser(input string, Matcher p.Matcher) *p.P {
stateFn := func(p *p.P) {
p.Expects("MATCH")
if p.On(Matcher).Accept().End() {
p.EmitLiteral(TestItem)
p.RouteRepeat()
}
}
return p.New(input, stateFn)
}
func TestMatchAnyRune(t *testing.T) {
p := newParser("o", c.AnyRune())
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "o" {
t.Errorf("Parser item value is %q instead of expected \"o\"", r.Value)
}
}
func TestMatchAnyRune_AtEndOfFile(t *testing.T) {
p := newParser("", c.AnyRune())
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "unexpected end of file (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchAnyRune_AtInvalidUtf8Rune(t *testing.T) {
p := newParser("\xcd", c.AnyRune())
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "invalid UTF8 character in input (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchRune(t *testing.T) {
p := newParser("xxx", c.Rune('x'))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMatchRune_OnMismatch(t *testing.T) {
p := newParser("x ", c.Rune(' '))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing did not fail unexpectedly")
}
expected := "unexpected character 'x' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchRuneRange(t *testing.T) {
m := c.RuneRange('b', 'y')
s := "mnopqrstuvwxybcdefghijkl"
p := newParser(s, m)
for i := 0; i < len(s); i++ {
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if s[i] != r.Value[0] {
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
}
}
if _, _, ok := newParser("a", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'a'")
}
if _, _, ok := newParser("z", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'z'")
}
}
func TestMatchString(t *testing.T) {
p := newParser("Hello, world!", c.String("Hello"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "Hello" {
t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
}
}
func TestMatchStringNoCase(t *testing.T) {
p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "HellÖ" {
t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value)
}
}
func TestMatchRunes(t *testing.T) {
m := c.Runes('+', '-', '*', '/')
s := "-+/*+++"
p := newParser(s, m)
for i := 0; i < len(s); i++ {
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if s[i] != r.Value[0] {
t.Fatalf("Unexpected parse output on cycle %d:\nexpected: %q\nactual: %q\n", i+1, s[i], r.Value[0])
}
}
if _, _, ok := newParser("^", m).Next(); ok {
t.Fatalf("Unexpected parse success for input '^'")
}
if _, _, ok := newParser("x", m).Next(); ok {
t.Fatalf("Unexpected parse success for input 'x'")
}
}
func TestMatchNot(t *testing.T) {
p := newParser("aabc", c.Not(c.Rune('b')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Value != "a" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
}
func TestMatchNot_Mismatch(t *testing.T) {
p := newParser("aabc", c.Not(c.Rune('a')))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing unexpectedly succeeded")
}
expected := "unexpected character 'a' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchAnyOf(t *testing.T) {
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "a" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s", err)
}
if r.Type != TestItem {
t.Error("Parser item type not expected TestTitem")
}
if r.Value != "b" {
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
}
}
func TestMatchRepeat(t *testing.T) {
p := newParser("xxxxyyyy", c.Repeat(4, c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxx\"", r.Value)
}
}
func TestMatchRepeat_Min(t *testing.T) {
p := newParser("1111112345", c.Min(4, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "111111" {
t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value)
}
}
func TestMatchRepeat_Max(t *testing.T) {
p := newParser("1111112345", c.Max(4, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "1111" {
t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value)
}
}
func TestMatchRepeat_Bounded(t *testing.T) {
p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1')))
r, _, _ := p.Next()
if r.Value != "11111" {
t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value)
}
}
func TestMatchRepeat_Mismatch(t *testing.T) {
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
_, err, ok := p.Next()
if ok {
t.Fatalf("Parsing did not fail unexpectedly")
}
expected := "unexpected character 'x' (expected MATCH)"
if err.Error() != expected {
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
}
}
func TestMatchOneOrMore(t *testing.T) {
p := newParser("xxxxxxxxyyyy", c.OneOrMore(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "xxxxxxxx" {
t.Errorf("Parser item value is %q instead of expected \"xxxxxxxx\"", r.Value)
}
}
func TestMatchSequence(t *testing.T) {
p := newParser("10101", c.Sequence(c.Rune('1'), c.Rune('0')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "10" {
t.Errorf("Parser item value is %q instead of expected \"10\"", r.Value)
}
}
func TestMatchSequence_CombinedWithOneOrMore(t *testing.T) {
p := newParser("101010987", c.OneOrMore(c.Sequence(c.Rune('1'), c.Rune('0'))))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "101010" {
t.Errorf("Parser item value is %q instead of expected \"101010\"", r.Value)
}
}
func TestSequence_WithRepeatedRunes(t *testing.T) {
whitespace := c.Optional(c.OneOrMore(c.Rune(' ')))
equal := c.Rune('=')
assignment := c.Sequence(whitespace, equal, whitespace)
p := newParser(" == 10", assignment)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != " =" {
t.Errorf("Parser item value is %q instead of expected \" =\"", r.Value)
}
}
func TestMatchOptional(t *testing.T) {
p := newParser("xyz", c.Optional(c.Rune('x')))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "x" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
p = newParser("xyz", c.Optional(c.Rune('y')))
r, err, ok = p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "" {
t.Errorf("Parser item value is %q instead of expected \"\"", r.Value)
}
}
func TestMatchDrop(t *testing.T) {
dashes := c.OneOrMore(c.Rune('-'))
p := newParser("---X---", c.Sequence(c.Drop(dashes), c.AnyRune(), c.Drop(dashes)))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "X" {
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
}
}
func TestMatchSeparated(t *testing.T) {
number := c.Bounded(1, 3, c.RuneRange('0', '9'))
separators := c.Runes('|', ';', ',')
separated_numbers := c.Separated(separators, number)
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != "1,2;3|44,55|66;777" {
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
}
}
func TestMixAndMatch(t *testing.T) {
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
backslash := c.Rune('\\')
x := c.Rune('x')
hexbyte := c.Sequence(backslash, x, c.Repeat(2, hex))
p := newParser(`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.Repeat(4, hexbyte))
r, err, ok := p.Next()
if !ok {
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Line, err.Column)
}
if r.Value != `\x9a\x01\xF0\xfC` {
t.Errorf("Parser item value is %q instead of expected \"%q\"", r.Value, `\x9a\x01\xF0\xfC`)
}
}

View File

@ -1,43 +0,0 @@
package parsekit
import (
"unicode/utf8"
)
// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
return handleRuneError(r, w)
}
// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return EOF, 0, false
}
return INVALID, w, false
}
return r, w, true
}
// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1
// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError

View File

@ -1,107 +0,0 @@
package parsekit
import (
"fmt"
"strings"
)
// ItemType represents the type of a parser Item.
type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item represents an item that can be emitted from the parser.
type Item struct {
Type ItemType
Value string
}
// Emit passes a Parser item to the client, including the provided string.
func (p *P) Emit(t ItemType, s string) {
p.items <- Item{t, s}
p.buffer.reset()
}
// EmitLiteral passes a Parser item to the client, including accumulated
// string buffer data as a literal string.
func (p *P) EmitLiteral(t ItemType) {
p.Emit(t, p.buffer.asLiteralString())
}
// EmitLiteralTrim passes a Parser item to the client, including
// accumulated string buffer data as a literal string with whitespace
// trimmed from it.
func (p *P) EmitLiteralTrim(t ItemType) {
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
}
// EmitInterpreted passes a Parser item to the client, including
// accumulated string buffer data a Go doubled quoted interpreted string
// (handling escape codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation.
func (p *P) EmitInterpreted(t ItemType) error {
s, err := p.buffer.asInterpretedString()
if err != nil {
return err
}
p.Emit(t, s)
return nil
}
// Error is used as the error type when parsing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
Message string
Line int
Column int
}
func (err *Error) Error() string {
if err == nil {
panic("Error method called on the parser, but no error was set")
}
return err.Message
}
func (err *Error) ErrorFull() string {
message := err.Error()
return fmt.Sprintf("%s after line %d, column %d", message, err.Line, err.Column)
}
// EmitError emits a Parser error item to the client.
func (p *P) EmitError(format string, args ...interface{}) {
message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message)
}
// UnexpectedInput is used by a parser implementation to emit an
// error item that tells the client that an unexpected rune was
// encountered in the input.
func (p *P) UnexpectedInput() {
r, _, ok := p.peek(0)
switch {
case ok:
p.EmitError("unexpected character %q%s", r, fmtExpects(p))
case r == EOF:
p.EmitError("unexpected end of file%s", fmtExpects(p))
case r == INVALID:
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
default:
panic("Unhandled output from peek()")
}
}
func fmtExpects(p *P) string {
if p.expecting == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", p.expecting)
}

View File

@ -1,15 +0,0 @@
package parsekit
// Expects is used to let a state function describe what input it is expecting.
// This expectation is used in error messages to make them more descriptive.
//
// Also, when defining an expectation inside a StateHandler, you do not need
// to handle unexpected input yourself. When the end of the function is
// reached without setting the next state, an automatic error will be
// emitted. This error differentiates between issues:
// * there is valid data on input, but it was not accepted by the function
// * there is an invalid UTF8 character on input
// * the end of the file was reached.
func (p *P) Expects(description string) {
p.expecting = description
}

View File

@ -1,58 +0,0 @@
package parsekit
// On checks if the current input matches the provided Matcher.
//
// This method is the start of a chain method in which multiple things can
// be arranged in one go:
//
// * Checking whether or not there is a match (this is what On does)
// * Deciding what to do with the match (Stay(): do nothing, Skip(): only move
// the cursor forward, Accept(): move cursor forward and add the match in
// the parser string buffer)
// * Dedicing where to route to (e.g. using RouteTo() to route to a
// StateHandler by name)
// * Followup routing after that, when applicable (.e.g using something like
// RouteTo(...).ThenTo(...))
//
// For every step of this chain, you can end the chain using the
// End() method. This will return a boolean value, indicating whether or
// not the initial On() method found a match in the input.
// End() is not mandatory. It is merely provided as a means to use
// a chain as an expression for a switch/case or if statement (since those
// require a boolean expression).
//
// You can omit "what to do with the match" and go straight into a routing
// method, e.g. On(...).RouteTo(...). This is functionally the same as
// using On(...).Stay().RouteTo(...).
//
// Here's a complete example chain:
// p.On(something).Accept().RouteTo(stateB).ThenTo(stateC).End()
func (p *P) On(Matcher Matcher) *matchAction {
m := &MatchDialog{p: p}
ok := Matcher.Match(m)
// Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like:
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
p.LastMatch = string(m.runes)
return &matchAction{
routeAction: routeAction{chainAction{p, ok}},
runes: m.runes,
widths: m.widths,
}
}
// chainAction is used for building method chains for the On() method.
type chainAction struct {
p *P
ok bool
}
// End ends the method chain and returns a boolean indicating whether
// or not a match was found in the input.
func (a *chainAction) End() bool {
return a.ok
}

View File

@ -1,64 +0,0 @@
package parsekit
// matchAction is a struct that is used for building On()-method chains.
//
// It embeds the routeAction struct, to make it possible to go right into
// a route action, which is basically a simple way of aliasing a chain
// like p.On(...).Stay().RouteTo(...) into p.On(...).RouteTo(...).
type matchAction struct {
routeAction
runes []rune
widths []int
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the string buffer.
// When no match was found, then no action is taken.
// It returns a routeAction struct, which provides methods that can be used
// to tell the parser what state to go to next.
func (a *matchAction) Accept() *routeAction {
if a.ok {
for i, r := range a.runes {
a.p.buffer.writeRune(r)
a.p.advanceCursor(r, a.widths[i])
}
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the string buffer.
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *matchAction) Skip() *routeAction {
if a.ok {
for i, r := range a.runes {
type C struct {
Rune MatchRune
}
a.p.advanceCursor(r, a.widths[i])
}
}
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *matchAction) Stay() *routeAction {
return &routeAction{chainAction: chainAction{a.p, a.ok}}
}
// advanceCursor advances the rune cursor one position in the input data.
// While doing so, it keeps tracks of newlines, so we can report on
// row + column positions on error.
func (p *P) advanceCursor(r rune, w int) {
p.pos += w
if p.newline {
p.cursorLine++
p.cursorColumn = 1
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}

View File

@ -1,59 +0,0 @@
package parsekit
// routeAction is a struct that is used for building On() method chains.
type routeAction struct {
chainAction
}
// RouteRepeat indicates that on the next parsing cycle,
// the current StateHandler must be reinvoked.
func (a *routeAction) RouteRepeat() *chainAction {
if a.ok {
return a.p.RouteRepeat()
}
return &chainAction{nil, false}
}
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (a *routeAction) RouteTo(state StateHandler) *routeFollowupAction {
if a.ok {
return a.p.RouteTo(state)
}
return &routeFollowupAction{chainAction: chainAction{nil, false}}
}
// RouteReturn tells the parser that on the next cycle the next scheduled
// route must be invoked.
func (a *routeAction) RouteReturn() *chainAction {
if a.ok {
return a.p.RouteReturn()
}
return &chainAction{nil, false}
}
// routeFollowupAction chains parsing routes.
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
type routeFollowupAction struct {
chainAction
}
// ThenTo schedules a StateHandler that must be invoked after the RouteTo
// StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
func (a *routeFollowupAction) ThenTo(state StateHandler) *chainAction {
if a.ok {
a.p.pushRoute(state)
}
return &chainAction{nil, a.ok}
}
// ThenReturnHere schedules the current StateHandler to be invoked after
// the RouteTo StateHandler has been completed.
// For example: p.RouteTo(handlerA).ThenReturnHere()
func (a *routeFollowupAction) ThenReturnHere() *chainAction {
if a.ok {
a.p.pushRoute(a.p.state)
}
return &chainAction{nil, a.ok}
}

View File

@ -1,42 +0,0 @@
package parsekit
// RouteTo tells the parser what StateHandler function to invoke
// in the next parsing cycle.
func (p *P) RouteTo(state StateHandler) *routeFollowupAction {
p.nextState = state
return &routeFollowupAction{chainAction: chainAction{p, true}}
}
// RouteRepeat indicates that on the next parsing cycle, the current
// StateHandler must be reinvoked.
func (p *P) RouteRepeat() *chainAction {
p.RouteTo(p.state)
return &chainAction{nil, true}
}
// RouteReturn tells the parser that on the next cycle the last
// StateHandler that was pushed on the route stack must be invoked.
//
// Using this method is optional. When implementating a StateHandler that
// is used as a sort of subroutine (using constructions like
// p.RouteTo(subroutine).ThenReturnHere()), you can refrain from
// providing an explicit routing decision from that handler. The parser will
// automatically assume a RouteReturn() in that case.
func (p *P) RouteReturn() *chainAction {
p.nextState = p.popRoute()
return &chainAction{nil, true}
}
// pushRoute adds the StateHandler to the route stack.
// This is used for implementing nested parsing.
func (p *P) pushRoute(state StateHandler) {
p.routeStack = append(p.routeStack, state)
}
// popRoute pops the last pushed StateHandler from the route stack.
func (p *P) popRoute() StateHandler {
last := len(p.routeStack) - 1
head, tail := p.routeStack[:last], p.routeStack[last]
p.routeStack = head
return tail
}

View File

@ -1,62 +0,0 @@
package parsekit
import (
"bytes"
"strconv"
"strings"
)
// stringBuffer is a string buffer implementation, which is used by the parser
// to efficiently accumulate runes from the input and eventually turn these
// into a string, either literal or interpreted.
type stringBuffer struct {
buffer bytes.Buffer
}
// reset resets the string buffer, in order to build a new string.
func (b *stringBuffer) reset() *stringBuffer {
b.buffer.Reset()
return b
}
// writeString adds the runes of the input string to the string buffer.
func (b *stringBuffer) writeString(s string) *stringBuffer {
for _, r := range s {
b.writeRune(r)
}
return b
}
// writeRune adds a single rune to the string buffer.
func (b *stringBuffer) writeRune(r rune) *stringBuffer {
b.buffer.WriteRune(r)
return b
}
// asLiteralString returns the string buffer as a literal string.
// Literal means that no escape sequences are processed.
func (b *stringBuffer) asLiteralString() string {
return b.buffer.String()
}
// asInterpretedString returns the string in its interpreted form.
// Interpreted means that escape sequences are handled in the way that Go would
// have, had it been inside double quotes. It translates for example escape
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
// representations.
// Since the input might contain invalid escape sequences, this method
// also returns an error. When an error is returned, the returned string will
// contain the string as far as it could be interpreted.
func (b *stringBuffer) asInterpretedString() (string, error) {
var sb strings.Builder
tail := b.buffer.String()
for len(tail) > 0 {
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
if err != nil {
return sb.String(), err
}
tail = newtail
sb.WriteRune(r)
}
return sb.String(), nil
}

View File

@ -1,88 +0,0 @@
package parsekit
import (
"testing"
)
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
var b stringBuffer
s1, _ := b.writeString(`hi\nthere`).asInterpretedString()
s2 := b.asLiteralString()
if s1 != "hi\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
}
if s2 != "hi\\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
}
}
func TestResetResetsBuffer(t *testing.T) {
var b stringBuffer
s := b.writeRune('X').reset().asLiteralString()
if s != "" {
t.Fatalf("Did not get expected empty string, but %q", s)
}
}
func TestAsLiteralString(t *testing.T) {
b := stringBuffer{}
for _, c := range []stringbufT{
{"empty string", ``, ``, OK},
{"simple string", `Simple string!`, `Simple string!`, OK},
{"single quote", `'`, `'`, OK},
{"double quote", `"`, `"`, OK},
{"escaped single quote", `\'`, `\'`, OK},
{"escaped double quote", `\"`, `\"`, OK},
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
} {
s := b.reset().writeString(c.in).asLiteralString()
if s != c.out {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
func TestAsInterpretedString(t *testing.T) {
b := stringBuffer{}
for _, c := range []stringbufT{
{"empty string", "", "", OK},
{"one character", "Simple string!", "Simple string!", OK},
{"escaped single quote", `\'`, "", FAIL},
{"escaped double quote", `\"`, `"`, OK},
{"bare single quote", `'`, "'", OK},
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
{"example from spec",
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
} {
s, err := b.reset().writeString(c.in).asInterpretedString()
if c.isSuccessCase && err != nil {
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
}
if !c.isSuccessCase && err == nil {
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
}
if s != c.out && c.isSuccessCase {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
type stringbufT struct {
name string
in string
out string
isSuccessCase bool
}
const (
OK bool = true
FAIL bool = false
)

7
toml.code-workspace Normal file
View File

@ -0,0 +1,7 @@
{
"folders": [
{
"path": "."
}
]
}

View File

@ -1,6 +1,6 @@
package parser
import "github.com/mmakaay/toml/parsekit"
import "git.makaay.nl/mauricem/go-parsekit"
// Item types that are produced by this parser.
const (
@ -13,20 +13,20 @@ const (
var (
c = parsekit.C
space = c.Rune(' ')
tab = c.Rune('\t')
carriageReturn = c.Rune('\r')
lineFeed = c.Rune('\n')
hash = c.Rune('#')
underscore = c.Rune('_')
dash = c.Rune('-')
equal = c.Rune('=')
dot = c.Rune('.')
singleQuote = c.Rune('\'')
doubleQuote = c.Rune('"')
space = parsekit.MatchRune(' ')
tab = parsekit.MatchRune('\t')
carriageReturn = parsekit.MatchRune('\r')
lineFeed = parsekit.MatchRune('\n')
hash = parsekit.MatchRune('#')
underscore = parsekit.MatchRune('_')
dash = parsekit.MatchRune('-')
equal = parsekit.MatchRune('=')
dot = parsekit.MatchRune('.')
singleQuote = parsekit.MatchRune('\'')
doubleQuote = parsekit.MatchRune('"')
anyRune = c.AnyRune()
anyQuote = c.AnyOf(singleQuote, doubleQuote)
backslash = c.Rune('\\')
backslash = parsekit.MatchRune('\\')
asciiLower = c.RuneRange('a', 'z')
asciiUpper = c.RuneRange('A', 'Z')
digit = c.RuneRange('0', '9')

View File

@ -3,7 +3,7 @@ package parser_test
import (
"testing"
"github.com/mmakaay/toml/parser"
"git.makaay.nl/mauricem/go-toml"
)
func TestEmptyInput(t *testing.T) {

View File

@ -1,6 +1,6 @@
package parser
import "github.com/mmakaay/toml/parsekit"
import "git.makaay.nl/mauricem/go-parsekit"
var (
// There are four ways to express strings: basic, multi-line basic,