Added a load of parser/combinator implementation, the system seems feasible!
This commit is contained in:
parent
3677ab18cb
commit
d9d837fe6e
|
@ -5,26 +5,45 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ItemType represents the type of a parser Item.
|
||||||
|
type ItemType int
|
||||||
|
|
||||||
|
// TODO private?
|
||||||
|
// ItemEOF is a built-in parser item type that is used for flagging that the
|
||||||
|
// end of the input was reached.
|
||||||
|
const ItemEOF ItemType = -1
|
||||||
|
|
||||||
|
// TODO private?
|
||||||
|
// ItemError is a built-in parser item type that is used for flagging that
|
||||||
|
// an error has occurred during parsing.
|
||||||
|
const ItemError ItemType = -2
|
||||||
|
|
||||||
|
// Item represents an item that can be emitted from the parser.
|
||||||
|
type Item struct {
|
||||||
|
Type ItemType
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
// Emit passes a Parser item to the client, including the provided string.
|
// Emit passes a Parser item to the client, including the provided string.
|
||||||
func (p *P) Emit(t ItemType, s string) {
|
func (p *P) Emit(t ItemType, s string) {
|
||||||
p.items <- Item{t, s}
|
p.items <- Item{t, s}
|
||||||
p.buffer.reset()
|
p.buffer.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitLiteral passes a Parser item to the client, including the accumulated
|
// EmitLiteral passes a Parser item to the client, including accumulated
|
||||||
// string buffer data as a literal string.
|
// string buffer data as a literal string.
|
||||||
func (p *P) EmitLiteral(t ItemType) {
|
func (p *P) EmitLiteral(t ItemType) {
|
||||||
p.Emit(t, p.buffer.asLiteralString())
|
p.Emit(t, p.buffer.asLiteralString())
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitLiteralTrim passes a Parser item to the client, including the
|
// EmitLiteralTrim passes a Parser item to the client, including
|
||||||
// accumulated string buffer data as a literal string with whitespace
|
// accumulated string buffer data as a literal string with whitespace
|
||||||
// trimmed from it.
|
// trimmed from it.
|
||||||
func (p *P) EmitLiteralTrim(t ItemType) {
|
func (p *P) EmitLiteralTrim(t ItemType) {
|
||||||
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
|
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
|
||||||
}
|
}
|
||||||
|
|
||||||
// EmitInterpreted passes a Parser item to the client, including the
|
// EmitInterpreted passes a Parser item to the client, including
|
||||||
// accumulated string buffer data a Go doubled quoted interpreted string
|
// accumulated string buffer data a Go doubled quoted interpreted string
|
||||||
// (handling escape codes like \n, \t, \uXXXX, etc.)
|
// (handling escape codes like \n, \t, \uXXXX, etc.)
|
||||||
// This method might return an error, in case there is data in the
|
// This method might return an error, in case there is data in the
|
||||||
|
@ -38,6 +57,19 @@ func (p *P) EmitInterpreted(t ItemType) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Error is used as the error type when parsing errors occur.
|
||||||
|
// The error includes some extra meta information to allow for useful
|
||||||
|
// error messages to the user.
|
||||||
|
type Error struct {
|
||||||
|
Message string
|
||||||
|
Row int
|
||||||
|
Column int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *Error) Error() string {
|
||||||
|
return err.Message
|
||||||
|
}
|
||||||
|
|
||||||
// EmitError emits a Parser error item to the client.
|
// EmitError emits a Parser error item to the client.
|
||||||
func (p *P) EmitError(format string, args ...interface{}) {
|
func (p *P) EmitError(format string, args ...interface{}) {
|
||||||
message := fmt.Sprintf(format, args...)
|
message := fmt.Sprintf(format, args...)
|
||||||
|
@ -51,17 +83,17 @@ func (p *P) UnexpectedInput() {
|
||||||
r, _, ok := p.peek(0)
|
r, _, ok := p.peek(0)
|
||||||
switch {
|
switch {
|
||||||
case ok:
|
case ok:
|
||||||
p.EmitError("unexpected character %q%s", r, p.fmtExpects())
|
p.EmitError("unexpected character %q%s", r, fmtExpects(p))
|
||||||
case r == EOF:
|
case r == EOF:
|
||||||
p.EmitError("unexpected end of file%s", p.fmtExpects())
|
p.EmitError("unexpected end of file%s", fmtExpects(p))
|
||||||
case r == INVALID:
|
case r == INVALID:
|
||||||
p.EmitError("invalid UTF8 character in input%s", p.fmtExpects())
|
p.EmitError("invalid UTF8 character in input%s", fmtExpects(p))
|
||||||
default:
|
default:
|
||||||
panic("Unhandled output from peek()")
|
panic("Unhandled output from peek()")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *P) fmtExpects() string {
|
func fmtExpects(p *P) string {
|
||||||
if p.expecting == "" {
|
if p.expecting == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// P holds the internal state of the parser.
|
|
||||||
type P struct {
|
|
||||||
state StateFn // the function that handles the current state
|
|
||||||
nextState StateFn // the function that will handle the next state
|
|
||||||
stack []StateFn // state function stack, for nested parsing
|
|
||||||
input string // the scanned input
|
|
||||||
len int // the total length of the input in bytes
|
|
||||||
pos int // current byte scanning position in the input
|
|
||||||
newline bool // keep track of when we have scanned a newline
|
|
||||||
cursorRow int // current row number in the input
|
|
||||||
cursorColumn int // current column position in the input
|
|
||||||
expecting string // a description of what the current state expects to find
|
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values
|
|
||||||
items chan Item // channel of resulting Parser items
|
|
||||||
item Item // the current item as reached by Next() and retrieved by Get()
|
|
||||||
err *Error // an error when lexing failed, retrieved by Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
|
||||||
// (end of data or invalid UTF8 character).
|
|
||||||
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
|
||||||
return handleRuneError(r, w)
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleRuneError is used to normale rune value in case of errors.
|
|
||||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
||||||
// This can however indicate one of two situations:
|
|
||||||
// * w == 0: end of file is reached
|
|
||||||
// * w == 1: invalid UTF character on input
|
|
||||||
// This function lets these two cases return respectively the
|
|
||||||
// package's own EOF or INVALID runes, to make it easy for client
|
|
||||||
// code to distinct between these two cases.
|
|
||||||
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
||||||
if r == utf8.RuneError {
|
|
||||||
if w == 0 {
|
|
||||||
return EOF, 0, false
|
|
||||||
}
|
|
||||||
return INVALID, w, false
|
|
||||||
}
|
|
||||||
return r, w, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// EOF is a special rune, which is used to indicate an end of file when
|
|
||||||
// reading a character from the input.
|
|
||||||
// It can be treated as a rune when writing parsing rules, so a valid way to
|
|
||||||
// say 'I now expect the end of the file' is using something like:
|
|
||||||
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
|
||||||
const EOF rune = -1
|
|
||||||
|
|
||||||
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
|
||||||
// rune on the input.
|
|
||||||
const INVALID rune = utf8.RuneError
|
|
||||||
|
|
||||||
// StateFn defines the type of function that can be used to
|
|
||||||
// handle a parser state.
|
|
||||||
type StateFn func(*P)
|
|
||||||
|
|
||||||
// ItemType represents the type of a parser Item.
|
|
||||||
type ItemType int
|
|
||||||
|
|
||||||
// ItemEOF is a built-in parser item type that is used for flagging that the
|
|
||||||
// end of the input was reached.
|
|
||||||
const ItemEOF ItemType = -1
|
|
||||||
|
|
||||||
// ItemError is a built-in parser item type that is used for flagging that
|
|
||||||
// an error has occurred during parsing.
|
|
||||||
const ItemError ItemType = -2
|
|
||||||
|
|
||||||
// Item represents an item returned from the parser.
|
|
||||||
type Item struct {
|
|
||||||
Type ItemType
|
|
||||||
Value string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Error is used as the error type when parsing errors occur.
|
|
||||||
// The error includes some extra meta information to allow for useful
|
|
||||||
// error messages to the user.
|
|
||||||
type Error struct {
|
|
||||||
Message string
|
|
||||||
Row int
|
|
||||||
Column int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (err *Error) Error() string {
|
|
||||||
return err.Message
|
|
||||||
}
|
|
|
@ -6,8 +6,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// Not in need of it myself, but nice to have I guess:
|
// Not in need of it myself, but nice to have I guess:
|
||||||
// - NotFollowedBy
|
// - LookAhead
|
||||||
// - Separated
|
|
||||||
|
|
||||||
// MatchDialog is used by Matcher implementations as a means
|
// MatchDialog is used by Matcher implementations as a means
|
||||||
// to retrieve data to match against and to report back
|
// to retrieve data to match against and to report back
|
||||||
|
@ -92,6 +91,7 @@ type Matcher interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type matcherConstructors struct {
|
type matcherConstructors struct {
|
||||||
|
EndOfFile func() MatchEndOfFile
|
||||||
Any func() MatchAny
|
Any func() MatchAny
|
||||||
Rune func(rune) MatchRune
|
Rune func(rune) MatchRune
|
||||||
RuneRange func(rune, rune) MatchRuneRange
|
RuneRange func(rune, rune) MatchRuneRange
|
||||||
|
@ -99,20 +99,28 @@ type matcherConstructors struct {
|
||||||
String func(string) MatchSequence
|
String func(string) MatchSequence
|
||||||
StringNoCase func(string) MatchSequence
|
StringNoCase func(string) MatchSequence
|
||||||
AnyOf func(...Matcher) MatchAnyOf
|
AnyOf func(...Matcher) MatchAnyOf
|
||||||
Repeat func(int, Matcher) MatchRepeat
|
Not func(Matcher) MatchNot
|
||||||
Sequence func(...Matcher) MatchSequence
|
|
||||||
ZeroOrMore func(Matcher) MatchZeroOrMore
|
|
||||||
OneOrMore func(Matcher) MatchOneOrMore
|
|
||||||
Optional func(Matcher) MatchOptional
|
Optional func(Matcher) MatchOptional
|
||||||
|
Sequence func(...Matcher) MatchSequence
|
||||||
|
Repeat func(int, Matcher) MatchRepeat
|
||||||
|
Min func(int, Matcher) MatchRepeat
|
||||||
|
Max func(int, Matcher) MatchRepeat
|
||||||
|
Bounded func(int, int, Matcher) MatchRepeat
|
||||||
|
ZeroOrMore func(Matcher) MatchRepeat
|
||||||
|
OneOrMore func(Matcher) MatchRepeat
|
||||||
|
Separated func(Matcher, Matcher) MatchSeparated
|
||||||
Drop func(Matcher) MatchDrop
|
Drop func(Matcher) MatchDrop
|
||||||
}
|
}
|
||||||
|
|
||||||
// C provides access to a wide range of parser/combinator
|
// C provides access to a wide range of parser/combinator
|
||||||
// constructors that can be used to build matching expressions.
|
// constructorshat can be used to build matching expressions.
|
||||||
// When using C in your own parser, then it is advised to create
|
// When using C in your own parser, then it is advised to create
|
||||||
// an alias in your own package for easy reference:
|
// an alias in your own package for easy reference:
|
||||||
// var c = parsekit.C
|
// var c = parsekit.C
|
||||||
var C = matcherConstructors{
|
var C = matcherConstructors{
|
||||||
|
EndOfFile: func() MatchEndOfFile {
|
||||||
|
return MatchEndOfFile{}
|
||||||
|
},
|
||||||
Any: func() MatchAny {
|
Any: func() MatchAny {
|
||||||
return MatchAny{}
|
return MatchAny{}
|
||||||
},
|
},
|
||||||
|
@ -130,44 +138,73 @@ var C = matcherConstructors{
|
||||||
return MatchAnyOf{m}
|
return MatchAnyOf{m}
|
||||||
},
|
},
|
||||||
String: func(s string) MatchSequence {
|
String: func(s string) MatchSequence {
|
||||||
m := make([]Matcher, len(s))
|
var m = []Matcher{}
|
||||||
for i, r := range s {
|
for _, r := range s {
|
||||||
m[i] = MatchRune{r}
|
m = append(m, MatchRune{r})
|
||||||
}
|
}
|
||||||
return MatchSequence{m}
|
return MatchSequence{m}
|
||||||
},
|
},
|
||||||
StringNoCase: func(s string) MatchSequence {
|
StringNoCase: func(s string) MatchSequence {
|
||||||
m := make([]Matcher, len(s))
|
var m = []Matcher{}
|
||||||
for i, r := range s {
|
for _, r := range s {
|
||||||
u := MatchRune{unicode.ToUpper(r)}
|
u := MatchRune{unicode.ToUpper(r)}
|
||||||
l := MatchRune{unicode.ToLower(r)}
|
l := MatchRune{unicode.ToLower(r)}
|
||||||
m[i] = MatchAnyOf{[]Matcher{u, l}}
|
m = append(m, MatchAnyOf{[]Matcher{u, l}})
|
||||||
}
|
}
|
||||||
return MatchSequence{m}
|
return MatchSequence{m}
|
||||||
},
|
},
|
||||||
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
|
||||||
return MatchAnyOf{matchers}
|
|
||||||
},
|
|
||||||
Repeat: func(count int, matcher Matcher) MatchRepeat {
|
|
||||||
return MatchRepeat{count, matcher}
|
|
||||||
},
|
|
||||||
Sequence: func(matchers ...Matcher) MatchSequence {
|
|
||||||
return MatchSequence{matchers}
|
|
||||||
},
|
|
||||||
OneOrMore: func(matcher Matcher) MatchOneOrMore {
|
|
||||||
return MatchOneOrMore{matcher}
|
|
||||||
},
|
|
||||||
ZeroOrMore: func(matcher Matcher) MatchZeroOrMore {
|
|
||||||
return MatchZeroOrMore{matcher}
|
|
||||||
},
|
|
||||||
Optional: func(matcher Matcher) MatchOptional {
|
Optional: func(matcher Matcher) MatchOptional {
|
||||||
return MatchOptional{matcher}
|
return MatchOptional{matcher}
|
||||||
},
|
},
|
||||||
|
Not: func(matcher Matcher) MatchNot {
|
||||||
|
return MatchNot{matcher}
|
||||||
|
},
|
||||||
|
AnyOf: func(matchers ...Matcher) MatchAnyOf {
|
||||||
|
return MatchAnyOf{matchers}
|
||||||
|
},
|
||||||
|
Sequence: func(matchers ...Matcher) MatchSequence {
|
||||||
|
return MatchSequence{matchers}
|
||||||
|
},
|
||||||
|
Repeat: func(count int, matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{count, count, matcher}
|
||||||
|
},
|
||||||
|
Min: func(min int, matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{min, -1, matcher}
|
||||||
|
},
|
||||||
|
Max: func(max int, matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{-1, max, matcher}
|
||||||
|
},
|
||||||
|
Bounded: func(min int, max int, matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{min, max, matcher}
|
||||||
|
},
|
||||||
|
OneOrMore: func(matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{1, -1, matcher}
|
||||||
|
},
|
||||||
|
ZeroOrMore: func(matcher Matcher) MatchRepeat {
|
||||||
|
return MatchRepeat{0, -1, matcher}
|
||||||
|
},
|
||||||
|
Separated: func(separator Matcher, matcher Matcher) MatchSeparated {
|
||||||
|
return MatchSeparated{separator, matcher}
|
||||||
|
},
|
||||||
Drop: func(matcher Matcher) MatchDrop {
|
Drop: func(matcher Matcher) MatchDrop {
|
||||||
return MatchDrop{matcher}
|
return MatchDrop{matcher}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MatchEndOfFile struct{}
|
||||||
|
|
||||||
|
func (c MatchEndOfFile) Match(m *MatchDialog) bool {
|
||||||
|
r, ok := m.NextRune()
|
||||||
|
return !ok && r == EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchInvalidRune struct{}
|
||||||
|
|
||||||
|
func (c MatchInvalidRune) Match(m *MatchDialog) bool {
|
||||||
|
r, ok := m.NextRune()
|
||||||
|
return !ok && r == INVALID
|
||||||
|
}
|
||||||
|
|
||||||
type MatchAny struct{}
|
type MatchAny struct{}
|
||||||
|
|
||||||
func (c MatchAny) Match(m *MatchDialog) bool {
|
func (c MatchAny) Match(m *MatchDialog) bool {
|
||||||
|
@ -175,6 +212,31 @@ func (c MatchAny) Match(m *MatchDialog) bool {
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MatchNot struct {
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchNot) Match(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if !c.matcher.Match(child) {
|
||||||
|
child.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
type MatchOptional struct {
|
||||||
|
matcher Matcher
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c MatchOptional) Match(m *MatchDialog) bool {
|
||||||
|
child := m.Fork()
|
||||||
|
if c.matcher.Match(child) {
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
type MatchRune struct {
|
type MatchRune struct {
|
||||||
match rune
|
match rune
|
||||||
}
|
}
|
||||||
|
@ -209,18 +271,41 @@ func (c MatchAnyOf) Match(m *MatchDialog) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
type MatchRepeat struct {
|
type MatchRepeat struct {
|
||||||
count int
|
min int
|
||||||
|
max int
|
||||||
matcher Matcher
|
matcher Matcher
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
func (c MatchRepeat) Match(m *MatchDialog) bool {
|
||||||
child := m.Fork()
|
child := m.Fork()
|
||||||
for i := 0; i < c.count; i++ {
|
if c.min >= 0 && c.max >= 0 && c.min > c.max {
|
||||||
|
panic("MatchRepeat definition error: max must not be < min")
|
||||||
|
}
|
||||||
|
total := 0
|
||||||
|
// Specified min: check for the minimal required amount of matches.
|
||||||
|
for total < c.min {
|
||||||
|
total++
|
||||||
if !c.matcher.Match(child) {
|
if !c.matcher.Match(child) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// No specified max: include the rest of the available matches.
|
||||||
|
if c.max < 0 {
|
||||||
|
child.Merge()
|
||||||
|
for c.matcher.Match(child) {
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Specified max: include the rest of the availble matches, up to the max.
|
||||||
child.Merge()
|
child.Merge()
|
||||||
|
for total < c.max {
|
||||||
|
total++
|
||||||
|
if !c.matcher.Match(child) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
child.Merge()
|
||||||
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,40 +324,14 @@ func (c MatchSequence) Match(m *MatchDialog) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
type MatchOneOrMore struct {
|
type MatchSeparated struct {
|
||||||
matcher Matcher
|
separator Matcher
|
||||||
|
matcher Matcher
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c MatchOneOrMore) Match(m *MatchDialog) bool {
|
func (c MatchSeparated) Match(m *MatchDialog) bool {
|
||||||
child := m.Fork()
|
seq := C.Sequence(c.matcher, C.ZeroOrMore(C.Sequence(c.separator, c.matcher)))
|
||||||
for c.matcher.Match(child) {
|
return seq.Match(m)
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return len(m.runes) > 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type MatchZeroOrMore struct {
|
|
||||||
matcher Matcher
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c MatchZeroOrMore) Match(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
for c.matcher.Match(child) {
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
type MatchOptional struct {
|
|
||||||
matcher Matcher
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c MatchOptional) Match(m *MatchDialog) bool {
|
|
||||||
child := m.Fork()
|
|
||||||
if c.matcher.Match(child) {
|
|
||||||
child.Merge()
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MatchDrop struct {
|
type MatchDrop struct {
|
||||||
|
|
|
@ -15,7 +15,7 @@ func newParser(input string, matcher p.Matcher) *p.P {
|
||||||
p.Expects("MATCH")
|
p.Expects("MATCH")
|
||||||
if p.On(matcher).Accept() {
|
if p.On(matcher).Accept() {
|
||||||
p.EmitLiteral(TestItem)
|
p.EmitLiteral(TestItem)
|
||||||
p.Repeat()
|
p.RouteRepeat()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return p.New(input, stateFn)
|
return p.New(input, stateFn)
|
||||||
|
@ -120,20 +120,19 @@ func TestMatchString(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO
|
func TestMatchStringNoCase(t *testing.T) {
|
||||||
// func TestMatchStringNoCase(t *testing.T) {
|
p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
|
||||||
// p := newParser("HellÖ, world!", c.StringNoCase("hellö"))
|
r, err, ok := p.Next()
|
||||||
// r, err, ok := p.Next()
|
if !ok {
|
||||||
// if !ok {
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
// t.Fatalf("Parsing failed: %s", err)
|
}
|
||||||
// }
|
if r.Type != TestItem {
|
||||||
// if r.Type != TestItem {
|
t.Error("Parser item type not expected TestTitem")
|
||||||
// t.Error("Parser item type not expected TestTitem")
|
}
|
||||||
// }
|
if r.Value != "HellÖ" {
|
||||||
// if r.Value != "Hello" {
|
t.Errorf("Parser item value is %q instead of expected \"HellÖ\"", r.Value)
|
||||||
// t.Errorf("Parser item value is %q instead of expected \"Hello\"", r.Value)
|
}
|
||||||
// }
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
func TestMatchRunes(t *testing.T) {
|
func TestMatchRunes(t *testing.T) {
|
||||||
m := c.Runes('+', '-', '*', '/')
|
m := c.Runes('+', '-', '*', '/')
|
||||||
|
@ -156,6 +155,29 @@ func TestMatchRunes(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMatchNot(t *testing.T) {
|
||||||
|
p := newParser("aabc", c.Not(c.Rune('b')))
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s", err)
|
||||||
|
}
|
||||||
|
if r.Value != "a" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"a\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchNot_Mismatch(t *testing.T) {
|
||||||
|
p := newParser("aabc", c.Not(c.Rune('a')))
|
||||||
|
_, err, ok := p.Next()
|
||||||
|
if ok {
|
||||||
|
t.Fatalf("Parsing unexpectedly succeeded")
|
||||||
|
}
|
||||||
|
expected := "unexpected character 'a' (expected MATCH)"
|
||||||
|
if err.Error() != expected {
|
||||||
|
t.Fatalf("Unexpected error from parser:\nexpectd: %s\nactual: %s\n", expected, err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestMatchAnyOf(t *testing.T) {
|
func TestMatchAnyOf(t *testing.T) {
|
||||||
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
|
p := newParser("abc", c.AnyOf(c.Rune('a'), c.Rune('b')))
|
||||||
r, err, ok := p.Next()
|
r, err, ok := p.Next()
|
||||||
|
@ -192,6 +214,30 @@ func TestMatchRepeat(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMatchRepeat_Min(t *testing.T) {
|
||||||
|
p := newParser("1111112345", c.Min(4, c.Rune('1')))
|
||||||
|
r, _, _ := p.Next()
|
||||||
|
if r.Value != "111111" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"111111\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRepeat_Max(t *testing.T) {
|
||||||
|
p := newParser("1111112345", c.Max(4, c.Rune('1')))
|
||||||
|
r, _, _ := p.Next()
|
||||||
|
if r.Value != "1111" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"1111\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMatchRepeat_Bounded(t *testing.T) {
|
||||||
|
p := newParser("1111112345", c.Bounded(3, 5, c.Rune('1')))
|
||||||
|
r, _, _ := p.Next()
|
||||||
|
if r.Value != "11111" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"11111\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestMatchRepeat_Mismatch(t *testing.T) {
|
func TestMatchRepeat_Mismatch(t *testing.T) {
|
||||||
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
|
p := newParser("xxxyyyy", c.Repeat(4, c.Rune('x')))
|
||||||
_, err, ok := p.Next()
|
_, err, ok := p.Next()
|
||||||
|
@ -282,6 +328,21 @@ func TestMatchDrop(t *testing.T) {
|
||||||
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
t.Errorf("Parser item value is %q instead of expected \"x\"", r.Value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMatchSeparated(t *testing.T) {
|
||||||
|
number := c.Bounded(1, 3, c.RuneRange('0', '9'))
|
||||||
|
separators := c.Runes('|', ';', ',')
|
||||||
|
separated_numbers := c.Separated(separators, number)
|
||||||
|
p := newParser("1,2;3|44,55|66;777,abc", separated_numbers)
|
||||||
|
r, err, ok := p.Next()
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("Parsing failed: %s at row: %d, column %d\n", err, err.Row, err.Column)
|
||||||
|
}
|
||||||
|
if r.Value != "1,2;3|44,55|66;777" {
|
||||||
|
t.Errorf("Parser item value is %q instead of expected \"1,2;3|44,55|66;777\"", r.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestMixAndMatch(t *testing.T) {
|
func TestMixAndMatch(t *testing.T) {
|
||||||
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
hex := c.AnyOf(c.RuneRange('0', '9'), c.RuneRange('a', 'f'), c.RuneRange('A', 'F'))
|
||||||
backslash := c.Rune('\\')
|
backslash := c.Rune('\\')
|
||||||
|
|
|
@ -3,7 +3,7 @@ package parsekit
|
||||||
// Expects is used to let a state function describe what input it is expecting.
|
// Expects is used to let a state function describe what input it is expecting.
|
||||||
// This expectation is used in error messages to make them more descriptive.
|
// This expectation is used in error messages to make them more descriptive.
|
||||||
//
|
//
|
||||||
// Also, when defining an expectation inside a StateFn, you do not need
|
// Also, when defining an expectation inside a StateHandler, you do not need
|
||||||
// to handle unexpected input yourself. When the end of the function is
|
// to handle unexpected input yourself. When the end of the function is
|
||||||
// reached without setting the next state, an automatic error will be
|
// reached without setting the next state, an automatic error will be
|
||||||
// emitted. This error differentiates between issues:
|
// emitted. This error differentiates between issues:
|
||||||
|
@ -14,47 +14,18 @@ func (p *P) Expects(description string) {
|
||||||
p.expecting = description
|
p.expecting = description
|
||||||
}
|
}
|
||||||
|
|
||||||
// AtEndOfFile returns true when there is no more data available in the input.
|
// On checks if the current input matches the provided Matcher.
|
||||||
func (p *P) AtEndOfFile() bool {
|
// It returns a MatchAction struct, which provides methods that
|
||||||
return p.pos >= p.len
|
// can be used to tell the parser what to do with a match.
|
||||||
}
|
//
|
||||||
|
// The intended way to use this, is by chaining some methods,
|
||||||
// AtEndOfLine returns true when the cursor is either at the end of the line
|
// for example: p.On(...).Accept()
|
||||||
// or at the end of the file. The cursor is not moved to a new position
|
// The chained methods will as a whole return a boolean value,
|
||||||
// by this method.
|
// indicating whether or not a match was found and processed.
|
||||||
func (p *P) AtEndOfLine() bool {
|
func (p *P) On(m Matcher) *MatchAction {
|
||||||
return p.AtEndOfFile() ||
|
runes, widths, ok := p.match(m)
|
||||||
p.On(C.String("\r\n")).Stay() ||
|
p.LastMatch = string(runes)
|
||||||
p.On(C.Rune('\n')).Stay()
|
return &MatchAction{
|
||||||
}
|
|
||||||
|
|
||||||
// SkipEndOfLine returns true when the cursor is either at the end of the line
|
|
||||||
// or at the end of the file. Additionally, when not at the end of the file,
|
|
||||||
// the cursor is moved forward to beyond the newline.
|
|
||||||
func (p *P) SkipEndOfLine() bool {
|
|
||||||
return p.AtEndOfFile() ||
|
|
||||||
p.On(C.String("\r\n")).Skip() ||
|
|
||||||
p.On(C.Rune('\n')).Skip()
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptEndOfLine returns true when the cursor is either at the end of the line
|
|
||||||
// or at the end of the file. When not at the end of the file, a normalized
|
|
||||||
// newline (only a '\n' character, even with '\r\n' on the input)
|
|
||||||
// is added to the string buffer.
|
|
||||||
func (p *P) AcceptEndOfLine() bool {
|
|
||||||
if p.AtEndOfFile() {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if p.SkipEndOfLine() {
|
|
||||||
p.buffer.writeRune('\n')
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *P) On(m Matcher) *action {
|
|
||||||
runes, widths, ok := p.Match(m)
|
|
||||||
return &action{
|
|
||||||
p: p,
|
p: p,
|
||||||
runes: runes,
|
runes: runes,
|
||||||
widths: widths,
|
widths: widths,
|
||||||
|
@ -62,24 +33,29 @@ func (p *P) On(m Matcher) *action {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *P) Match(matcher Matcher) ([]rune, []int, bool) {
|
// Match checks if the provided Matcher matches the current input.
|
||||||
return p.match(0, matcher)
|
// Returns a slice of matching runes, a slice of their respective
|
||||||
}
|
// byte widths and a boolean.
|
||||||
|
// The boolean will be false and the slices will be empty in case
|
||||||
func (p *P) match(offset int, matcher Matcher) ([]rune, []int, bool) {
|
// the input did not match.
|
||||||
|
func (p *P) match(matcher Matcher) ([]rune, []int, bool) {
|
||||||
m := &MatchDialog{p: p}
|
m := &MatchDialog{p: p}
|
||||||
ok := matcher.Match(m)
|
ok := matcher.Match(m)
|
||||||
return m.runes, m.widths, ok
|
return m.runes, m.widths, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
type action struct {
|
type MatchAction struct {
|
||||||
p *P
|
p *P
|
||||||
runes []rune
|
runes []rune
|
||||||
widths []int
|
widths []int
|
||||||
ok bool
|
ok bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *action) Accept() bool {
|
// Accept tells the parser to move the cursor past a match that was found,
|
||||||
|
// and to store the input that matched in the string buffer.
|
||||||
|
// Returns true in case a match was found.
|
||||||
|
// When no match was found, then no action is taken and false is returned.
|
||||||
|
func (a *MatchAction) Accept() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
for i, r := range a.runes {
|
for i, r := range a.runes {
|
||||||
a.p.buffer.writeRune(r)
|
a.p.buffer.writeRune(r)
|
||||||
|
@ -89,7 +65,11 @@ func (a *action) Accept() bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *action) Skip() bool {
|
// Skip tells the parser to move the cursor past a match that was found,
|
||||||
|
// without storing the actual match in the string buffer.
|
||||||
|
// Returns true in case a match was found.
|
||||||
|
// When no match was found, then no action is taken and false is returned.
|
||||||
|
func (a *MatchAction) Skip() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
for i, r := range a.runes {
|
for i, r := range a.runes {
|
||||||
type C struct {
|
type C struct {
|
||||||
|
@ -102,13 +82,31 @@ func (a *action) Skip() bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *action) Stay() bool {
|
// Stay tells the parser to not move the cursor after finding a match.
|
||||||
|
// Returns true in case a match was found, false otherwise.
|
||||||
|
func (a *MatchAction) Stay() bool {
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the
|
// RouteTo is a shortcut for p.On(...).Stay() + p.RouteTo(...).
|
||||||
// input data. While doing so, it keeps tracks of newlines,
|
func (a *MatchAction) RouteTo(state StateHandler) bool {
|
||||||
// so we can report on row + column positions on error.
|
if a.ok {
|
||||||
|
a.p.RouteTo(state)
|
||||||
|
}
|
||||||
|
return a.ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// RouteReturn is a shortcut for p.On(...).Stay() + p.RouteReturn().
|
||||||
|
func (a *MatchAction) RouteReturn() bool {
|
||||||
|
if a.ok {
|
||||||
|
a.p.RouteReturn()
|
||||||
|
}
|
||||||
|
return a.ok
|
||||||
|
}
|
||||||
|
|
||||||
|
// advanceCursor advances the rune cursor one position in the input data.
|
||||||
|
// While doing so, it keeps tracks of newlines, so we can report on
|
||||||
|
// row + column positions on error.
|
||||||
func (p *P) advanceCursor(r rune, w int) {
|
func (p *P) advanceCursor(r rune, w int) {
|
||||||
p.pos += w
|
p.pos += w
|
||||||
if p.newline {
|
if p.newline {
|
||||||
|
@ -119,17 +117,3 @@ func (p *P) advanceCursor(r rune, w int) {
|
||||||
}
|
}
|
||||||
p.newline = r == '\n'
|
p.newline = r == '\n'
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *action) RouteTo(state StateFn) bool {
|
|
||||||
if a.ok {
|
|
||||||
a.p.RouteTo(state)
|
|
||||||
}
|
|
||||||
return a.ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *action) RouteReturn() bool {
|
|
||||||
if a.ok {
|
|
||||||
a.p.RouteReturn()
|
|
||||||
}
|
|
||||||
return a.ok
|
|
||||||
}
|
|
||||||
|
|
|
@ -6,13 +6,36 @@ import (
|
||||||
"runtime"
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// P holds the internal state of the parser.
|
||||||
|
type P struct {
|
||||||
|
state StateHandler // the function that handles the current state
|
||||||
|
nextState StateHandler // the function that will handle the next state
|
||||||
|
stack []StateHandler // state function stack, for nested parsing
|
||||||
|
input string // the scanned input
|
||||||
|
len int // the total length of the input in bytes
|
||||||
|
pos int // current byte scanning position in the input
|
||||||
|
newline bool // keep track of when we have scanned a newline
|
||||||
|
cursorRow int // current row number in the input
|
||||||
|
cursorColumn int // current column position in the input
|
||||||
|
expecting string // a description of what the current state expects to find
|
||||||
|
buffer stringBuffer // an efficient buffer, used to build string values
|
||||||
|
LastMatch string // a string representation of the last matched input data
|
||||||
|
items chan Item // channel of resulting Parser items
|
||||||
|
item Item // the current item as reached by Next() and retrieved by Get()
|
||||||
|
err *Error // an error when lexing failed, retrieved by Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
// StateHandler defines the type of function that can be used to
|
||||||
|
// handle a parser state.
|
||||||
|
type StateHandler func(*P)
|
||||||
|
|
||||||
// New takes an input string and a start state,
|
// New takes an input string and a start state,
|
||||||
// and initializes the parser for it.
|
// and initializes the parser for it.
|
||||||
func New(input string, startState StateFn) *P {
|
func New(input string, start StateHandler) *P {
|
||||||
return &P{
|
return &P{
|
||||||
input: input,
|
input: input,
|
||||||
len: len(input),
|
len: len(input),
|
||||||
nextState: startState,
|
nextState: start,
|
||||||
items: make(chan Item, 2),
|
items: make(chan Item, 2),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,51 +48,80 @@ func (p *P) Next() (Item, *Error, bool) {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case i := <-p.items:
|
case i := <-p.items:
|
||||||
switch {
|
return p.makeReturnValues(i)
|
||||||
case i.Type == ItemEOF:
|
|
||||||
return i, nil, false
|
|
||||||
case i.Type == ItemError:
|
|
||||||
p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
|
|
||||||
return i, p.err, false
|
|
||||||
default:
|
|
||||||
p.item = i
|
|
||||||
return i, nil, true
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
// When implementing a parser, a state function must provide
|
p.runStatusHandler()
|
||||||
// a routing decision in every state function execution.
|
|
||||||
// When no route is specified, then it is considered a but
|
|
||||||
// in the parser implementation.
|
|
||||||
// An exception is when a function specified its expectation
|
|
||||||
// using the Expects() method. In that case, an unexpected
|
|
||||||
// input error is emitted.
|
|
||||||
if p.nextState == nil {
|
|
||||||
if p.expecting != "" {
|
|
||||||
p.UnexpectedInput()
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
|
|
||||||
panic(fmt.Sprintf("StateFn implementation bug: %s did not set next state or input expectation", name))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p.state = p.nextState
|
|
||||||
p.nextState = nil
|
|
||||||
p.expecting = ""
|
|
||||||
p.state(p)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToArray returns Parser items as an array (mainly intended for testing purposes)
|
// runStatusHandler moves the parser, which is bascially a state machine,
|
||||||
// When an error occurs during scanning, a partial result will be
|
// to its next status. It does so by invoking a function of the
|
||||||
// returned, accompanied by the error that occurred.
|
// type StateHandler. This function represents the current status.
|
||||||
func (p *P) ToArray() ([]Item, *Error) {
|
func (p *P) runStatusHandler() {
|
||||||
var items []Item
|
if state, ok := p.getNextStateHandler(); ok {
|
||||||
for {
|
p.invokeNextStatusHandler(state)
|
||||||
item, err, more := p.Next()
|
}
|
||||||
if !more {
|
}
|
||||||
return items, err
|
|
||||||
}
|
// getNextStateHandler determintes the next StatusHandler to invoke in order
|
||||||
items = append(items, item)
|
// to move the parsing state machine one step further.
|
||||||
|
//
|
||||||
|
// When implementing a parser, the StateHandler functions must provide
|
||||||
|
// a routing decision in every invocation. A routing decision is one
|
||||||
|
// of the following:
|
||||||
|
//
|
||||||
|
// * A route is specified explicitly, which means that the next StatusHandler
|
||||||
|
// function to invoke is registered during the StateHandler function
|
||||||
|
// invocation. For example: p.RouteTo(nextStatus)
|
||||||
|
//
|
||||||
|
// * A route is specified implicitly, which means that a previous StateHandler
|
||||||
|
// invocation has registered the followup route for the current state.
|
||||||
|
// For example: p.RouteTo(nextStatus).ThenTo(otherStatus)
|
||||||
|
// In this example, the nextStatus StateHandler will not have to specify
|
||||||
|
// a route explicitly, but otherStatus will be used implicitly after
|
||||||
|
// the nextStatus function has returned.
|
||||||
|
//
|
||||||
|
// * An expectation is registered by the StatusHandler.
|
||||||
|
// For example: p.Expects("a cool thing")
|
||||||
|
// When the StatusHandler returns without having specified a route, this
|
||||||
|
// expectation is used to generate an "unexpected input" error message.
|
||||||
|
//
|
||||||
|
// When no routing decision is provided by a StateHandler, then this is
|
||||||
|
// considered a bug in the state handler, and the parser will panic.
|
||||||
|
func (p *P) getNextStateHandler() (StateHandler, bool) {
|
||||||
|
switch {
|
||||||
|
case p.nextState != nil:
|
||||||
|
return p.nextState, true
|
||||||
|
case len(p.stack) > 0:
|
||||||
|
return p.popState(), true
|
||||||
|
case p.expecting != "":
|
||||||
|
p.UnexpectedInput()
|
||||||
|
return nil, false
|
||||||
|
default:
|
||||||
|
name := runtime.FuncForPC(reflect.ValueOf(p.state).Pointer()).Name()
|
||||||
|
panic(fmt.Sprintf("StateHandler %s did not provide a routing decision", name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// invokeNextStatusHandler moves the parser state to the provided state
|
||||||
|
// and invokes the StatusHandler function.
|
||||||
|
func (p *P) invokeNextStatusHandler(state StateHandler) {
|
||||||
|
p.state = state
|
||||||
|
p.nextState = nil
|
||||||
|
p.expecting = ""
|
||||||
|
p.state(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *P) makeReturnValues(i Item) (Item, *Error, bool) {
|
||||||
|
switch {
|
||||||
|
case i.Type == ItemEOF:
|
||||||
|
return i, nil, false
|
||||||
|
case i.Type == ItemError:
|
||||||
|
p.err = &Error{i.Value, p.cursorRow, p.cursorColumn}
|
||||||
|
return i, p.err, false
|
||||||
|
default:
|
||||||
|
p.item = i
|
||||||
|
return i, nil, true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// peek returns but does not advance the cursor to the next rune(s) in the input.
|
||||||
|
// Returns the rune, its width in bytes and a boolean.
|
||||||
|
// The boolean will be false in case no upcoming rune can be peeked
|
||||||
|
// (end of data or invalid UTF8 character).
|
||||||
|
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
|
||||||
|
r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
|
||||||
|
return handleRuneError(r, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleRuneError is used to normale rune value in case of errors.
|
||||||
|
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||||
|
// This can however indicate one of two situations:
|
||||||
|
// * w == 0: end of file is reached
|
||||||
|
// * w == 1: invalid UTF character on input
|
||||||
|
// This function lets these two cases return respectively the
|
||||||
|
// package's own EOF or INVALID runes, to make it easy for client
|
||||||
|
// code to distinct between these two cases.
|
||||||
|
func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||||
|
if r == utf8.RuneError {
|
||||||
|
if w == 0 {
|
||||||
|
return EOF, 0, false
|
||||||
|
}
|
||||||
|
return INVALID, w, false
|
||||||
|
}
|
||||||
|
return r, w, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// EOF is a special rune, which is used to indicate an end of file when
|
||||||
|
// reading a character from the input.
|
||||||
|
// It can be treated as a rune when writing parsing rules, so a valid way to
|
||||||
|
// say 'I now expect the end of the file' is using something like:
|
||||||
|
// if (p.On(c.Rune(EOF)).Skip()) { ... }
|
||||||
|
const EOF rune = -1
|
||||||
|
|
||||||
|
// INVALID is a special rune, which is used to indicate an invalid UTF8
|
||||||
|
// rune on the input.
|
||||||
|
const INVALID rune = utf8.RuneError
|
|
@ -1,40 +1,58 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
func (p *P) Repeat() {
|
// RouteRepeat indicates that on the next parsing cycle,
|
||||||
p.nextState = p.state
|
// the current StateHandler must be invoked again.
|
||||||
return
|
func (p *P) RouteRepeat() {
|
||||||
|
p.RouteTo(p.state)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *P) RouteTo(state StateFn) *routeFollowup {
|
// RouteTo tells the parser what StateHandler function to invoke
|
||||||
|
// in the next parsing cycle.
|
||||||
|
func (p *P) RouteTo(state StateHandler) *RouteFollowup {
|
||||||
p.nextState = state
|
p.nextState = state
|
||||||
return &routeFollowup{p}
|
return &RouteFollowup{p}
|
||||||
}
|
}
|
||||||
|
|
||||||
type routeFollowup struct {
|
// RouteFollowup chains parsing routes.
|
||||||
|
// It allows for routing code like p.RouteTo(handlerA).ThenTo(handlerB).
|
||||||
|
type RouteFollowup struct {
|
||||||
p *P
|
p *P
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *routeFollowup) ThenTo(state StateFn) *routeFollowup {
|
// ThenTo schedules a StateHandler that must be invoked
|
||||||
|
// after the RouteTo StateHandler has been completed.
|
||||||
|
// For example: p.RouteTo(handlerA).ThenTo(handlerB)
|
||||||
|
func (r *RouteFollowup) ThenTo(state StateHandler) {
|
||||||
r.p.pushState(state)
|
r.p.pushState(state)
|
||||||
return r
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *routeFollowup) ThenReturnHere() {
|
// ThenReturnHere schedules the current StateHandler to be
|
||||||
|
// invoked after the RouteTo StateHandler has been completed.
|
||||||
|
// For example: p.RouteTo(handlerA).ThenReturnHere()
|
||||||
|
func (r *RouteFollowup) ThenReturnHere() {
|
||||||
r.p.pushState(r.p.state)
|
r.p.pushState(r.p.state)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RouteReturn tells the parser that on the next cycle the
|
||||||
|
// next scheduled route must be invoked.
|
||||||
|
// Using this method is optional. When implementating a
|
||||||
|
// StateHandler that is used as a sort of subroutine (using
|
||||||
|
// constructions like p.RouteTo(subroutine).ThenReturnHere()),
|
||||||
|
// then you can refrain from providing a routing decision
|
||||||
|
// from that handler. The parser will automatically assume
|
||||||
|
// a RouteReturn in that case.
|
||||||
func (p *P) RouteReturn() {
|
func (p *P) RouteReturn() {
|
||||||
p.nextState = p.popState()
|
p.nextState = p.popState()
|
||||||
}
|
}
|
||||||
|
|
||||||
// PushState adds the state function to the state stack.
|
// PushState adds the state function to the state stack.
|
||||||
// This is used for implementing nested parsing.
|
// This is used for implementing nested parsing.
|
||||||
func (p *P) pushState(state StateFn) {
|
func (p *P) pushState(state StateHandler) {
|
||||||
p.stack = append(p.stack, state)
|
p.stack = append(p.stack, state)
|
||||||
}
|
}
|
||||||
|
|
||||||
// PopState pops the last pushed state from the state stack.
|
// PopState pops the last pushed state from the state stack.
|
||||||
func (p *P) popState() StateFn {
|
func (p *P) popState() StateHandler {
|
||||||
last := len(p.stack) - 1
|
last := len(p.stack) - 1
|
||||||
head, tail := p.stack[:last], p.stack[last]
|
head, tail := p.stack[:last], p.stack[last]
|
||||||
p.stack = head
|
p.stack = head
|
||||||
|
|
|
@ -6,18 +6,20 @@ import (
|
||||||
|
|
||||||
// A '#' hash symbol marks the rest of the line as a comment.
|
// A '#' hash symbol marks the rest of the line as a comment.
|
||||||
func startComment(p *parsekit.P) {
|
func startComment(p *parsekit.P) {
|
||||||
p.On(c.OneOrMore(hash)).Skip()
|
p.Expects("start of comment")
|
||||||
p.RouteTo(commentContents)
|
if p.On(c.OneOrMore(hash)).Skip() {
|
||||||
|
p.RouteTo(commentContents)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// All characters up to the end of the line are included in the comment.
|
// All characters up to the end of the line are included in the comment.
|
||||||
func commentContents(p *parsekit.P) {
|
func commentContents(p *parsekit.P) {
|
||||||
p.Expects("comment contents")
|
p.Expects("comment contents")
|
||||||
switch {
|
switch {
|
||||||
case p.AtEndOfLine() || p.On(endOfLine).Skip(): // TODO drop AtEndOfLine support
|
case p.On(endOfLine).Skip():
|
||||||
p.EmitLiteralTrim(ItemComment)
|
p.EmitLiteralTrim(ItemComment)
|
||||||
p.RouteReturn()
|
p.RouteReturn()
|
||||||
case p.On(any).Accept():
|
case p.On(any).Accept():
|
||||||
p.Repeat()
|
p.RouteRepeat()
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -2,9 +2,10 @@ package parser
|
||||||
|
|
||||||
import "github.com/mmakaay/toml/parsekit"
|
import "github.com/mmakaay/toml/parsekit"
|
||||||
|
|
||||||
|
// TODO move into parsekit
|
||||||
func endOfFile(p *parsekit.P) {
|
func endOfFile(p *parsekit.P) {
|
||||||
p.Expects("end of file")
|
p.Expects("end of file")
|
||||||
if p.AtEndOfFile() {
|
if p.On(c.EndOfFile()).Stay() {
|
||||||
p.Emit(parsekit.ItemEOF, "EOF")
|
p.Emit(parsekit.ItemEOF, "EOF")
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -22,8 +22,23 @@ func runStatesTs(t *testing.T, tests []statesT) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ToArray returns Parser items as an array.
|
||||||
|
// When an error occurs during scanning, a partial result will be
|
||||||
|
// returned, accompanied by the error that occurred.
|
||||||
|
func parseItemsToArray(p *parsekit.P) ([]parsekit.Item, *parsekit.Error) {
|
||||||
|
var items []parsekit.Item
|
||||||
|
for {
|
||||||
|
item, err, more := p.Next()
|
||||||
|
if !more {
|
||||||
|
return items, err
|
||||||
|
}
|
||||||
|
items = append(items, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func runStatesT(t *testing.T, c statesT) {
|
func runStatesT(t *testing.T, c statesT) {
|
||||||
l, err := parser.NewParser(c.in).ToArray()
|
p := parser.NewParser(c.in)
|
||||||
|
l, err := parseItemsToArray(p)
|
||||||
if err == nil && c.err != "" {
|
if err == nil && c.err != "" {
|
||||||
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ var (
|
||||||
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
whitespace = c.OneOrMore(c.AnyOf(space, tab))
|
||||||
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
whitespaceOrNewlines = c.OneOrMore(c.AnyOf(space, tab, carriageReturn, lineFeed))
|
||||||
optionalWhitespace = c.Optional(whitespace)
|
optionalWhitespace = c.Optional(whitespace)
|
||||||
endOfLine = c.AnyOf(lineFeed, c.Rune(parsekit.EOF))
|
endOfLine = c.AnyOf(lineFeed, c.EndOfFile())
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewParser creates a new parser, using the provided input string
|
// NewParser creates a new parser, using the provided input string
|
|
@ -11,7 +11,8 @@ func TestEmptyInput(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||||
_, err := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
|
p := parser.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc")
|
||||||
|
_, err := parseItemsToArray(p)
|
||||||
t.Logf("Got error: %s", err.Error())
|
t.Logf("Got error: %s", err.Error())
|
||||||
if err.Row != 4 {
|
if err.Row != 4 {
|
||||||
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
|
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
|
||||||
|
@ -23,7 +24,7 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
|
||||||
|
|
||||||
func TestInvalidUtf8Data(t *testing.T) {
|
func TestInvalidUtf8Data(t *testing.T) {
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected comment contents)"},
|
{"inside comment", "# \xbc", "", "invalid UTF8 character in input (expected end of file)"},
|
||||||
{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
|
{"bare key 1", "\xbc", "", "invalid UTF8 character in input (expected end of file)"},
|
||||||
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
|
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character in input (expected a value assignment)"},
|
||||||
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
|
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character in input (expected a value)"},
|
|
@ -42,21 +42,6 @@ func startString(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseBasicString(p *parsekit.P) {
|
|
||||||
p.Expects("string contents")
|
|
||||||
switch {
|
|
||||||
case p.On(charThatMustBeEscaped).Stay():
|
|
||||||
r, _, _ := p.Match(charThatMustBeEscaped)
|
|
||||||
p.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
|
|
||||||
case p.On(validEscape).Accept():
|
|
||||||
p.Repeat()
|
|
||||||
case p.On(backslash).Stay() || p.On(doubleQuote).Stay():
|
|
||||||
p.RouteReturn()
|
|
||||||
case p.On(any).Accept():
|
|
||||||
p.Repeat()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func startBasicString(p *parsekit.P) {
|
func startBasicString(p *parsekit.P) {
|
||||||
p.Expects("a basic string")
|
p.Expects("a basic string")
|
||||||
if p.On(doubleQuote).Skip() {
|
if p.On(doubleQuote).Skip() {
|
||||||
|
@ -64,12 +49,27 @@ func startBasicString(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseBasicString(p *parsekit.P) {
|
||||||
|
p.Expects("string contents")
|
||||||
|
switch {
|
||||||
|
case p.On(charThatMustBeEscaped).Stay():
|
||||||
|
p.EmitError("Invalid character in basic string: %q (must be escaped)", p.LastMatch)
|
||||||
|
case p.On(validEscape).Accept():
|
||||||
|
p.RouteRepeat()
|
||||||
|
case p.On(backslash).RouteReturn():
|
||||||
|
case p.On(doubleQuote).RouteReturn():
|
||||||
|
case p.On(any).Accept():
|
||||||
|
p.RouteRepeat()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Specific handling of input for basic strings.
|
// Specific handling of input for basic strings.
|
||||||
// * A double quote ends the string
|
// * A double quote ends the string
|
||||||
// * No additional \escape sequences are allowed. What the spec say about this:
|
// * No additional \escape sequences are allowed. What the spec say about this:
|
||||||
// "All other escape sequences [..] are reserved and, if used, TOML should
|
// "All other escape sequences [..] are reserved and, if used, TOML should
|
||||||
// produce an error.""
|
// produce an error.""
|
||||||
func basicStringSpecifics(p *parsekit.P) {
|
func basicStringSpecifics(p *parsekit.P) {
|
||||||
|
p.Expects("string contents")
|
||||||
switch {
|
switch {
|
||||||
case p.On(doubleQuote).Skip():
|
case p.On(doubleQuote).Skip():
|
||||||
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
|
if err := p.EmitInterpreted(ItemString); err != nil { // TODO testcase?
|
||||||
|
@ -79,8 +79,6 @@ func basicStringSpecifics(p *parsekit.P) {
|
||||||
}
|
}
|
||||||
case p.On(backslash).Stay():
|
case p.On(backslash).Stay():
|
||||||
p.EmitError("Invalid escape sequence")
|
p.EmitError("Invalid escape sequence")
|
||||||
default:
|
|
||||||
panic("String parsing should not have ended up here")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,9 +13,9 @@ func TestUnterminatedBasicString(t *testing.T) {
|
||||||
|
|
||||||
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||||
runStatesTs(t, []statesT{
|
runStatesTs(t, []statesT{
|
||||||
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`},
|
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: "\x00" (must be escaped)`},
|
||||||
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`},
|
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: "\n" (must be escaped)`},
|
||||||
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`},
|
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: "\u007f" (must be escaped)`},
|
||||||
})
|
})
|
||||||
|
|
||||||
// No need to write all test cases for disallowed characters by hand.
|
// No need to write all test cases for disallowed characters by hand.
|
||||||
|
@ -23,7 +23,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
|
||||||
name := fmt.Sprintf("control character %x", rune(i))
|
name := fmt.Sprintf("control character %x", rune(i))
|
||||||
runStatesT(
|
runStatesT(
|
||||||
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
|
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
|
||||||
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))})
|
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, string(rune(i)))})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue