Backup a load of work on typed token support, making it easy to produce tokens directly from parser/combinator-based parsing rules.
This commit is contained in:
parent
21f1aa597c
commit
4580962fb8
|
@ -0,0 +1,19 @@
|
|||
package assert
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Equal(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func NotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
|
||||
if notExpected == actual {
|
||||
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package assert
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type PanicT struct {
|
||||
Function func()
|
||||
Expect string
|
||||
Regexp bool
|
||||
}
|
||||
|
||||
func Panic(t *testing.T, p PanicT) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
mismatch := false
|
||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||
mismatch = true
|
||||
}
|
||||
if !p.Regexp && p.Expect != r.(string) {
|
||||
mismatch = true
|
||||
}
|
||||
if mismatch {
|
||||
t.Errorf(
|
||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||
p.Expect, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||
}
|
||||
}()
|
||||
p.Function()
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
package parsekit
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Cursor represents the position of the input cursor in various ways.
|
||||
type Cursor struct {
|
||||
Byte int // The cursor offset in bytes
|
||||
Rune int // The cursor offset in UTF8 runes
|
||||
Column int // The column at which the cursor is (0-indexed)
|
||||
Line int // The line at which the cursor is (0-indexed)
|
||||
}
|
||||
|
||||
func (c *Cursor) String() string {
|
||||
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||
}
|
||||
|
||||
// move updates the position of the cursor, based on the provided input string.
|
||||
func (c *Cursor) move(input string) {
|
||||
c.Byte += len(input)
|
||||
for _, r := range input {
|
||||
c.Rune++
|
||||
if r == '\n' {
|
||||
c.Column = 0
|
||||
c.Line++
|
||||
} else {
|
||||
c.Column++
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
name string
|
||||
input []string
|
||||
byte int
|
||||
rune int
|
||||
line int
|
||||
column int
|
||||
}{
|
||||
{"No input at all", []string{""}, 0, 0, 0, 0},
|
||||
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
|
||||
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
|
||||
{"One newline", []string{"\n"}, 1, 1, 1, 0},
|
||||
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
|
||||
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
|
||||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||
} {
|
||||
c := Cursor{}
|
||||
for _, s := range test.input {
|
||||
c.move(s)
|
||||
}
|
||||
if c.Byte != test.byte {
|
||||
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
||||
}
|
||||
if c.Rune != test.rune {
|
||||
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
|
||||
}
|
||||
if c.Line != test.line {
|
||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
||||
}
|
||||
if c.Column != test.column {
|
||||
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -9,7 +9,6 @@ package parsekit_test
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
@ -28,7 +27,6 @@ func Example_basicCalculator1() {
|
|||
{"+", 0},
|
||||
{"10.8 + 12", 0},
|
||||
{"42+ ", 0},
|
||||
{"9999999999999999999 + 8888888", 0},
|
||||
} {
|
||||
output, err := ComputeSimple(c.input)
|
||||
if err != nil {
|
||||
|
@ -47,7 +45,6 @@ func Example_basicCalculator1() {
|
|||
// Input: "+", got error: unexpected character '+' (expected integer number)
|
||||
// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
|
||||
// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
|
||||
// Input: "9999999999999999999 + 8888888", got error: invalid value: strconv.ParseInt: parsing "9999999999999999999": value out of range
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
@ -72,23 +69,16 @@ type simpleCalculator struct {
|
|||
op int64 // represents operation for next term (+1 = add, -1 = subtract)
|
||||
}
|
||||
|
||||
// A definition of bareInteger, which conveniently drops surrounding whitespace.
|
||||
// A definition of an int64, which conveniently drops surrounding whitespace.
|
||||
var dropWhitespace = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Whitespace))
|
||||
var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitespace)
|
||||
var int64Token = parsekit.T.Int64(nil, bareInteger)
|
||||
|
||||
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
||||
if p.On(bareInteger).Accept() {
|
||||
value, err := strconv.ParseInt(p.BufLiteral(), 10, 64)
|
||||
p.BufClear()
|
||||
if err != nil {
|
||||
p.Error("invalid value: %s", err)
|
||||
} else {
|
||||
c.Result += c.op * value
|
||||
p.Handle(c.operatorOrEndOfFile)
|
||||
}
|
||||
} else {
|
||||
p.Expects("integer number")
|
||||
p.UnexpectedInput()
|
||||
p.Expects("integer number")
|
||||
if p.On(int64Token).Accept() {
|
||||
c.Result += c.op * p.Result().Value(0).(int64)
|
||||
p.Handle(c.operatorOrEndOfFile)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@ package parsekit_test
|
|||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
@ -97,8 +96,8 @@ func (c *calculator) expr(p *parsekit.ParseAPI) {
|
|||
|
||||
var pc, a = parsekit.C, parsekit.A
|
||||
if p.Handle(c.term) {
|
||||
for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
|
||||
op := p.LastMatch
|
||||
for p.On(pc.Any(a.Add, a.Subtract)).Accept() {
|
||||
op := p.Result().Rune(0)
|
||||
if !p.Handle(c.term) {
|
||||
return
|
||||
}
|
||||
|
@ -115,8 +114,8 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
|
|||
|
||||
var pc, a = parsekit.C, parsekit.A
|
||||
if p.Handle(c.factor) {
|
||||
for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
|
||||
op := p.LastMatch
|
||||
for p.On(pc.Any(a.Multiply, a.Divide)).Accept() {
|
||||
op := p.Result().Rune(0)
|
||||
if !p.Handle(c.factor) {
|
||||
return
|
||||
}
|
||||
|
@ -130,19 +129,12 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
|
|||
// <space> = (<space> (SPACE|TAB) | "")
|
||||
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
|
||||
func (c *calculator) factor(p *parsekit.ParseAPI) {
|
||||
var pc, a = parsekit.C, parsekit.A
|
||||
var a, tok = parsekit.A, parsekit.T
|
||||
p.On(a.Whitespace).Skip()
|
||||
switch {
|
||||
case p.On(pc.Signed(a.Float)).Accept():
|
||||
floatStr := p.BufLiteral()
|
||||
p.BufClear()
|
||||
value, err := strconv.ParseFloat(floatStr, 64)
|
||||
if err != nil {
|
||||
p.Error("invalid number %s: %s", floatStr, err)
|
||||
return
|
||||
} else {
|
||||
c.interpreter.pushValue(value)
|
||||
}
|
||||
case p.On(tok.Float64(nil, a.Signed(a.Float))).Accept():
|
||||
value := p.Result().Value(0).(float64)
|
||||
c.interpreter.pushValue(value)
|
||||
case p.On(a.LeftParen).Skip():
|
||||
if !p.Handle(c.expr) {
|
||||
return
|
||||
|
@ -194,16 +186,16 @@ func (i *interpreter) pushValue(value float64) {
|
|||
i.top.a, i.top.b = i.top.b, value
|
||||
}
|
||||
|
||||
func (i *interpreter) eval(op string) float64 {
|
||||
func (i *interpreter) eval(op rune) float64 {
|
||||
value := i.top.a
|
||||
switch op {
|
||||
case "+":
|
||||
case '+':
|
||||
value += i.top.b
|
||||
case "-":
|
||||
case '-':
|
||||
value -= i.top.b
|
||||
case "*":
|
||||
case '*':
|
||||
value *= i.top.b
|
||||
case "/":
|
||||
case '/':
|
||||
value /= i.top.b
|
||||
}
|
||||
i.top.b = value
|
||||
|
|
|
@ -36,11 +36,11 @@ func Example_dutchPostcodeUsingMatcher() {
|
|||
// [1] Input: "2233Ab" Output: 2233 AB
|
||||
// [2] Input: "1001\t\tab" Output: 1001 AB
|
||||
// [3] Input: "1818ab" Output: 1818 AB
|
||||
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
|
||||
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
|
||||
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1
|
||||
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1
|
||||
// [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1
|
||||
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
|
||||
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
|
||||
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
|
||||
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at start of file
|
||||
// [8] Input: "\xcd2222AB" Error: unexpected character '<27>' (expected a Dutch postcode) at start of file
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
@ -57,7 +57,7 @@ func createPostcodeMatcher() *parsekit.Matcher {
|
|||
// - A space between letters and digits is optional.
|
||||
// - It is good form to write the letters in upper case.
|
||||
// - It is good form to use a single space between digits and letters.
|
||||
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
||||
digitNotZero := c.Except(a.Rune('0'), a.Digit)
|
||||
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
//
|
||||
// This implementation uses a state-based Parser for it, and it does not
|
||||
// implement any custom parser/combinator TokenHandler functions. Note that
|
||||
// things are much easier to implement using custom TokenHandlers (see the other
|
||||
// HelloWorldUsingMatcher example for this). Doing this fully parser-based
|
||||
// things are much easier to implement using custom TokenHandlers (see the
|
||||
// helloParserCombinator example for this). Doing this fully parser-based
|
||||
// implementation is mainly for your learning pleasure.
|
||||
//
|
||||
// One big difference between the Matcher-based example and this one, is that
|
||||
// this parser reports errors much more fine-grained. This might or might not be
|
||||
// useful for your specific use case. If you need error reporting like this,
|
||||
// then also take a look at the HelloWorldUsingParser2 example, which does the
|
||||
// same thing as this version, only more concise.
|
||||
// One big difference between the parser/combinator-based example and this one,
|
||||
// is that this parser reports errors much more fine-grained. This might or
|
||||
// might not be useful for your specific use case. If you need error reporting
|
||||
// like this, then also take a look at the helloSingleState example, which does
|
||||
// the same thing as this version, only more concise.
|
||||
|
||||
package parsekit_test
|
||||
|
||||
|
@ -56,11 +56,11 @@ func Example_helloWorldUsingParser1() {
|
|||
// [6] Input: "hello" Error: unexpected end of file (expected comma)
|
||||
// [7] Input: "hello," Error: unexpected end of file (expected name)
|
||||
// [8] Input: "hello , " Error: unexpected end of file (expected name)
|
||||
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected name)
|
||||
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected exclamation)
|
||||
// [10] Input: "hello , Droopy!" Output: Droopy
|
||||
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
|
||||
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
|
||||
// [13] Input: "hello,!" Error: The name cannot be empty
|
||||
// [13] Input: "hello,!" Error: unexpected character '!' (expected name)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
@ -78,9 +78,9 @@ func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
|
|||
}
|
||||
|
||||
func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
||||
c := parsekit.C
|
||||
a := parsekit.A
|
||||
p.Expects("hello")
|
||||
if p.On(c.StrNoCase("hello")).Skip() {
|
||||
if p.On(a.StrNoCase("hello")).Skip() {
|
||||
p.Handle(h.comma)
|
||||
}
|
||||
}
|
||||
|
@ -88,20 +88,42 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
|||
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
p.Expects("comma")
|
||||
p.On(a.Whitespace).Skip()
|
||||
if p.On(a.Comma).Skip() {
|
||||
switch {
|
||||
case p.On(a.Whitespace).Skip():
|
||||
p.Handle(h.comma)
|
||||
case p.On(a.Comma).Skip():
|
||||
p.Handle(h.startName)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
|
||||
c, a := parsekit.C, parsekit.A
|
||||
p.Expects("name")
|
||||
switch {
|
||||
case p.On(a.Whitespace).Skip():
|
||||
p.Handle(h.startName)
|
||||
case p.On(c.Not(a.Excl)).Stay():
|
||||
p.Handle(h.name)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *helloparser1) name(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
c, a := parsekit.C, parsekit.A
|
||||
p.Expects("name")
|
||||
switch {
|
||||
case p.On(a.Excl).Skip():
|
||||
p.Handle(h.end)
|
||||
case p.On(a.AnyRune).Accept():
|
||||
case p.On(c.Not(a.Excl)).Accept():
|
||||
h.greetee += p.Result().String()
|
||||
p.Handle(h.name)
|
||||
default:
|
||||
p.Handle(h.exclamation)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
p.Expects("exclamation")
|
||||
if p.On(a.Excl).Accept() {
|
||||
p.Handle(h.end)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,7 +137,7 @@ func (h *helloparser1) end(p *parsekit.ParseAPI) {
|
|||
return
|
||||
}
|
||||
|
||||
h.greetee = strings.TrimSpace(p.BufLiteral())
|
||||
h.greetee = strings.TrimSpace(h.greetee)
|
||||
if h.greetee == "" {
|
||||
p.Error("The name cannot be empty")
|
||||
} else {
|
|
@ -4,7 +4,7 @@
|
|||
// The implementation uses only parser/combinator TokenHandler functions and does
|
||||
// not implement a full-fledged state-based Parser for it. If you want to see the
|
||||
// same kind of functionality, implementated using a Parser, take a look at the
|
||||
// HelloWorldUsingParser examples.
|
||||
// other hello examples.
|
||||
package parsekit_test
|
||||
|
||||
import (
|
||||
|
@ -37,9 +37,9 @@ func Example_helloWorldUsingMatcher() {
|
|||
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
||||
// [2] Input: "hello , Bob123!" Output: Bob123
|
||||
// [3] Input: "hello Pizza!" Output: Pizza
|
||||
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at line 1, column 1
|
||||
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
|
||||
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
|
||||
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at start of file
|
||||
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at start of file
|
||||
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at start of file
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
@ -53,7 +53,7 @@ func createHelloMatcher() *parsekit.Matcher {
|
|||
// Using the parser/combinator support of parsekit, we create a TokenHandler function
|
||||
// that does all the work. The 'greeting' TokenHandler matches the whole input and
|
||||
// drops all but the name from it.
|
||||
hello := c.StrNoCase("hello")
|
||||
hello := a.StrNoCase("hello")
|
||||
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
||||
separator := c.Any(comma, a.Whitespace)
|
||||
name := c.OneOrMore(c.Not(a.Excl))
|
|
@ -1,15 +1,15 @@
|
|||
// This is the same as the example HelloWorldUsingParser1, except that in this
|
||||
// This is the same as the other hello examples, except that in this
|
||||
// implementation the state machine is implemented using a combination of some
|
||||
// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
|
||||
// are combined to do all the work in one go.
|
||||
//
|
||||
// Note that things are much easier to implement using custom TokenHandlers (see
|
||||
// the other HelloWorldUsingMatcher example for this). Doing this implementation
|
||||
// the other helloParserCombinator example for this). Doing this implementation
|
||||
// is mainly for your learning pleasure.
|
||||
//
|
||||
// One big difference between the Matcher-based example and this one, is that
|
||||
// this parser reports errors much more fine-grained. This might or might not be
|
||||
// useful for your specific use case.:0
|
||||
// One big difference between the parser/combinator-based example and this one,
|
||||
// is that this parser reports errors much more fine-grained. This might or
|
||||
// might not be useful for your specific use case.
|
||||
|
||||
package parsekit_test
|
||||
|
||||
|
@ -80,21 +80,29 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
|
|||
|
||||
func (h *helloparser2) start(p *parsekit.ParseAPI) {
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
if !p.On(c.StrNoCase("hello")).Skip() {
|
||||
if !p.On(a.StrNoCase("hello")).Skip() {
|
||||
p.Error("the greeting is not being friendly")
|
||||
} else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
|
||||
return
|
||||
}
|
||||
if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
|
||||
p.Error("the greeting is not properly separated")
|
||||
} else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
|
||||
return
|
||||
}
|
||||
if p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
|
||||
h.greetee = p.Result().String()
|
||||
if h.greetee == "" {
|
||||
p.Error("the name cannot be empty")
|
||||
return
|
||||
}
|
||||
} else {
|
||||
p.Error("the greeting is targeted at thin air")
|
||||
} else if !p.On(a.Excl).Skip() {
|
||||
return
|
||||
}
|
||||
if !p.On(a.Excl).Skip() {
|
||||
p.Error("the greeting is not loud enough")
|
||||
} else if !p.On(a.EndOfFile).Stay() {
|
||||
p.Error("too much stuff going on after the closing '!'")
|
||||
} else {
|
||||
h.greetee = p.BufLiteral()
|
||||
if h.greetee == "" {
|
||||
p.Error("the name cannot be empty")
|
||||
}
|
||||
p.Stop()
|
||||
}
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
// In this example, we show that any type can be extended into a parser,
|
||||
// filling that type with data from the ParseHandler methods.
|
||||
//
|
||||
// Here, we create a custom type 'letterCollection', which is an alias
|
||||
// Here, we create a custom type 'Chunks', which is an alias
|
||||
// for []string. We add a ParseHandler method directly to that type
|
||||
// and let the parsing code fill the slice with strings during parsing.
|
||||
|
||||
|
@ -21,8 +21,7 @@ func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
|
|||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(chunkOfRunes).Accept() {
|
||||
*l = append(*l, p.BufLiteral())
|
||||
p.BufClear()
|
||||
*l = append(*l, p.Result().String())
|
||||
}
|
||||
})
|
||||
return parser.Execute(s)
|
||||
|
@ -30,10 +29,10 @@ func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
|
|||
|
||||
func Example_usingSliceAsParserState() {
|
||||
chunks := &Chunks{}
|
||||
chunks.AddChopped("This string will", 4)
|
||||
chunks.AddChopped("be cut to bits!!!!!!", 8)
|
||||
chunks.AddChopped("123412341234xxx", 4)
|
||||
chunks.AddChopped("1234567812345678xxxxx", 8)
|
||||
|
||||
fmt.Printf("Matches = %q", *chunks)
|
||||
// Output:
|
||||
// Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"]
|
||||
// Matches = ["1234" "1234" "1234" "xxx" "12345678" "12345678" "xxxxx"]
|
||||
}
|
||||
|
|
|
@ -48,23 +48,47 @@ func ExampleError_Full() {
|
|||
// it broke down at line 10, column 42
|
||||
}
|
||||
|
||||
func ExampleMatchAnyRune() {
|
||||
func ExampleMatchAnyRune_usingAcceptedRunes() {
|
||||
// Easy access to the parsekit definitions.
|
||||
a := parsekit.A
|
||||
|
||||
matches := []string{}
|
||||
|
||||
stateHandler := func(p *parsekit.ParseAPI) {
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(a.AnyRune).Accept() {
|
||||
matches = append(matches, p.BufLiteral())
|
||||
p.BufClear()
|
||||
matches = append(matches, p.Result().String())
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
}
|
||||
parser := parsekit.NewParser(stateHandler)
|
||||
})
|
||||
err := parser.Execute("¡Any will dö!")
|
||||
|
||||
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
|
||||
// Output:
|
||||
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
|
||||
}
|
||||
|
||||
func ExampleMatchAnyRune_usingTokens() {
|
||||
// Easy access to the parsekit definitions.
|
||||
c, a, tok := parsekit.C, parsekit.A, parsekit.T
|
||||
|
||||
var tokens []*parsekit.Token
|
||||
var accepted string
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
|
||||
tokens = p.Result().Tokens()
|
||||
accepted = p.Result().String()
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
})
|
||||
parser.Execute("¡Any will dö!")
|
||||
|
||||
fmt.Printf("Runes accepted: %q\n", accepted)
|
||||
fmt.Printf("Token values: ")
|
||||
for _, t := range tokens {
|
||||
fmt.Printf("%c ", t.Value)
|
||||
}
|
||||
// Output:
|
||||
// Runes accepted: "¡Any will dö!"
|
||||
// Token values: ¡ A n y w i l l d ö !
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||
// ParseHandler methods to communicate with the parser.
|
||||
type ParseAPI struct {
|
||||
tokenAPI *TokenAPI // the input reader
|
||||
loopCheck map[string]bool // used for parser loop detection
|
||||
expecting string // a description of what the current state expects to find (see Expects())
|
||||
result *Result // TokenHandler result, as received from On(...).Accept()
|
||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
}
|
||||
|
||||
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
||||
// or when it has been stopped. It is used from the ParseAPI methods, to
|
||||
// prevent further calls to the ParseAPI on these occasions.
|
||||
//
|
||||
// Basically, this guard ensures proper coding of parsers, making sure
|
||||
// that clean routes are followed. You can consider this check a runtime
|
||||
// unit test.
|
||||
func (p *ParseAPI) panicWhenStoppedOrInError() {
|
||||
if !p.isStoppedOrInError() {
|
||||
return
|
||||
}
|
||||
|
||||
called, _ := p.getCaller(1)
|
||||
parts := strings.Split(called, ".")
|
||||
calledShort := parts[len(parts)-1]
|
||||
caller, filepos := p.getCaller(2)
|
||||
|
||||
after := "Error()"
|
||||
if p.stopped {
|
||||
after = "Stop()"
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
|
||||
}
|
||||
|
||||
func (p *ParseAPI) isStoppedOrInError() bool {
|
||||
return p.stopped || p.err != nil
|
||||
}
|
||||
|
||||
func (p *ParseAPI) initLoopCheck() {
|
||||
p.loopCheck = map[string]bool{}
|
||||
}
|
||||
|
||||
func (p *ParseAPI) checkForLoops() {
|
||||
caller, filepos := p.getCaller(2)
|
||||
if _, ok := p.loopCheck[filepos]; ok {
|
||||
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
|
||||
}
|
||||
p.loopCheck[filepos] = true
|
||||
}
|
||||
|
||||
// TODO delete this one
|
||||
func (p *ParseAPI) getCaller(depth int) (string, string) {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
return caller.Name(), filepos
|
||||
}
|
107
parsehandler.go
107
parsehandler.go
|
@ -1,12 +1,5 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// ParseHandler defines the type of function that must be implemented to handle
|
||||
// a parsing state in a Parser state machine.
|
||||
//
|
||||
|
@ -14,103 +7,3 @@ import (
|
|||
// all the internal state for the parsing state machine and provides the
|
||||
// interface that the ParseHandler uses to interact with the parser.
|
||||
type ParseHandler func(*ParseAPI)
|
||||
|
||||
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||
// ParseHandler methods to communicate with the parser.
|
||||
type ParseAPI struct {
|
||||
input string // the input that is being scanned by the parser
|
||||
inputPos int // current byte cursor position in the input
|
||||
loopCheck map[string]bool // used for parser loop detection
|
||||
cursorLine int // current rune cursor row number in the input
|
||||
cursorColumn int // current rune cursor column position in the input
|
||||
len int // the total length of the input in bytes
|
||||
newline bool // keep track of when we have scanned a newline
|
||||
expecting string // a description of what the current state expects to find (see P.Expects())
|
||||
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
|
||||
LastMatch string // a string representation of the last matched input data
|
||||
}
|
||||
|
||||
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
||||
// or when it has been stopped. It is used from the ParseAPI methods, to
|
||||
// prevent further calls to the ParseAPI on these occasions.
|
||||
//
|
||||
// Basically, this guard ensures proper coding of parsers, making sure
|
||||
// that clean routes are followed. You can consider this check a runtime
|
||||
// unit test.
|
||||
func (p *ParseAPI) panicWhenStoppedOrInError() {
|
||||
if !p.isStoppedOrInError() {
|
||||
return
|
||||
}
|
||||
|
||||
called, _ := p.getCaller(1)
|
||||
parts := strings.Split(called, ".")
|
||||
calledShort := parts[len(parts)-1]
|
||||
caller, filepos := p.getCaller(2)
|
||||
|
||||
after := "Error()"
|
||||
if p.stopped {
|
||||
after = "Stop()"
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
|
||||
}
|
||||
|
||||
func (p *ParseAPI) isStoppedOrInError() bool {
|
||||
return p.stopped || p.err != nil
|
||||
}
|
||||
|
||||
func (p *ParseAPI) checkForLoops() {
|
||||
caller, filepos := p.getCaller(2)
|
||||
if _, ok := p.loopCheck[filepos]; ok {
|
||||
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
|
||||
}
|
||||
p.loopCheck[filepos] = true
|
||||
}
|
||||
|
||||
// peek returns but does not advance the cursor to the next rune in the input.
|
||||
// Returns the rune, its width in bytes and a boolean.
|
||||
//
|
||||
// The boolean will be false in case no upcoming rune can be peeked
|
||||
// (end of data or invalid UTF8 character). In this case, the returned rune
|
||||
// will be one of eofRune or invalidRune.
|
||||
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
|
||||
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
||||
return handleRuneError(r, w)
|
||||
}
|
||||
|
||||
// eofRune is a special rune that is used to indicate an end of file when
|
||||
// reading a character from the input.
|
||||
const eofRune rune = -1
|
||||
|
||||
// invalidRune is a special rune that is used to indicate an invalid UTF8
|
||||
// rune on the input.
|
||||
const invalidRune rune = utf8.RuneError
|
||||
|
||||
// handleRuneError is used to create specific rune value in case of errors.
|
||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
||||
// This can however indicate one of two situations:
|
||||
// 1) w == 0: end of file is reached
|
||||
// 2) w == 1: invalid UTF character on input
|
||||
// This function lets these two cases return respectively the
|
||||
// package's own eofRune or invalidRune, to make it easy for calling code
|
||||
// to distinct between these two cases.
|
||||
func handleRuneError(r rune, w int) (rune, int, bool) {
|
||||
if r == utf8.RuneError {
|
||||
if w == 0 {
|
||||
return eofRune, 0, false
|
||||
}
|
||||
return invalidRune, w, false
|
||||
}
|
||||
return r, w, true
|
||||
}
|
||||
|
||||
func (p *ParseAPI) getCaller(depth int) (string, string) {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
return caller.Name(), filepos
|
||||
}
|
||||
|
|
|
@ -20,7 +20,11 @@ func (err *Error) Error() string {
|
|||
// Full returns the current error message, including information about
|
||||
// the position in the input where the error occurred.
|
||||
func (err *Error) Full() string {
|
||||
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
|
||||
if err.Line == 0 {
|
||||
return fmt.Sprintf("%s at start of file", err)
|
||||
} else {
|
||||
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
|
||||
}
|
||||
}
|
||||
|
||||
// Error sets the error message in the parser API. This error message
|
||||
|
@ -29,5 +33,5 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
|
|||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||
// set a different error message when needed.
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.err = &Error{message, p.cursorLine, p.cursorColumn}
|
||||
p.err = &Error{message, p.tokenAPI.cursor.Line, p.tokenAPI.cursor.Column}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package parsekit
|
||||
|
||||
import "fmt"
|
||||
|
||||
// On checks if the input at the current cursor position matches the provided
|
||||
// TokenHandler. On must be chained with another method that tells the parser
|
||||
// what action to perform when a match was found:
|
||||
|
@ -17,7 +19,7 @@ package parsekit
|
|||
// The chain as a whole returns a boolean that indicates whether or not at match
|
||||
// was found. When no match was found, false is returned and Skip() and Accept()
|
||||
// will have no effect. Because of this, typical use of an On() chain is as
|
||||
// expression for a conditional expression (if, switch/case, for). E.g.:
|
||||
// expression for a conditional statement (if, switch/case, for). E.g.:
|
||||
//
|
||||
// // Skip multiple exclamation marks.
|
||||
// for p.On(parsekit.A.Excl).Skip() { }
|
||||
|
@ -32,70 +34,71 @@ package parsekit
|
|||
// p.RouteTo(stateHandlerC)
|
||||
// }
|
||||
//
|
||||
// // When there's a "hi" on input, emit a parser item for it.
|
||||
// // When there's a "hi" on input, then say hello.
|
||||
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||
// p.Emit(SomeItemType, p.BufLiteral())
|
||||
// fmt.Println("Hello!")
|
||||
// }
|
||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.checkForLoops()
|
||||
|
||||
// Perform the matching operation.
|
||||
m := &TokenAPI{p: p}
|
||||
if tokenHandler == nil {
|
||||
panic("ParseHandler bug: tokenHandler argument for On() is nil")
|
||||
}
|
||||
ok := tokenHandler(m)
|
||||
|
||||
// Keep track of the last match, to allow parser implementations
|
||||
// to access it in an easy way. Typical use would be something like:
|
||||
//
|
||||
// if p.On(somethingBad).End() {
|
||||
// p.Errorf("This was bad: %s", p.LastMatch)
|
||||
// }
|
||||
p.LastMatch = string(m.input)
|
||||
p.result = nil
|
||||
p.tokenAPI.result = NewResult()
|
||||
fork := p.tokenAPI.Fork()
|
||||
ok := tokenHandler(fork)
|
||||
|
||||
return &ParseAPIOnAction{
|
||||
p: p,
|
||||
parseAPI: p,
|
||||
tokenAPI: fork,
|
||||
ok: ok,
|
||||
input: m.input,
|
||||
output: m.output,
|
||||
inputPos: p.inputPos + m.inputOffset,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
|
||||
// The On() method will return an initialized struct of this type.
|
||||
type ParseAPIOnAction struct {
|
||||
p *ParseAPI
|
||||
parseAPI *ParseAPI
|
||||
tokenAPI *TokenAPI
|
||||
ok bool
|
||||
input []rune
|
||||
output []rune
|
||||
inputPos int
|
||||
}
|
||||
|
||||
// Accept tells the parser to move the cursor past a match that was found,
|
||||
// and to store the input that matched in the parser's string buffer.
|
||||
// When no match was found, then no action is taken.
|
||||
// and to make the TokenHandler results available in the ParseAPI through
|
||||
// the Result() method.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *ParseAPIOnAction) Accept() bool {
|
||||
if a.ok {
|
||||
a.p.buffer.writeString(string(a.output))
|
||||
a.advanceCursor()
|
||||
a.tokenAPI.Merge()
|
||||
a.parseAPI.result = a.tokenAPI.root.result
|
||||
a.tokenAPI.root.result = NewResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
if a.tokenAPI.offset > 0 {
|
||||
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
|
||||
a.parseAPI.initLoopCheck()
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Skip tells the parser to move the cursor past a match that was found,
|
||||
// without storing the actual match in the parser's string buffer.
|
||||
// without making the results available through the ParseAPI.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *ParseAPIOnAction) Skip() bool {
|
||||
if a.ok {
|
||||
a.advanceCursor()
|
||||
a.tokenAPI.root.cursor = a.tokenAPI.cursor
|
||||
a.tokenAPI.root.result = NewResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
if a.tokenAPI.offset > 0 {
|
||||
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
|
||||
a.parseAPI.initLoopCheck()
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
@ -103,25 +106,23 @@ func (a *ParseAPIOnAction) Skip() bool {
|
|||
// Stay tells the parser to not move the cursor after finding a match.
|
||||
// Returns true in case a match was found, false otherwise.
|
||||
func (a *ParseAPIOnAction) Stay() bool {
|
||||
if a.ok {
|
||||
a.tokenAPI.root.result = NewResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// advanceCursor advances the input position in the input data.
|
||||
// While doing so, it keeps tracks of newlines that are encountered, so we
|
||||
// can report on line + column positions on error.
|
||||
func (a *ParseAPIOnAction) advanceCursor() {
|
||||
if a.p.inputPos == a.inputPos {
|
||||
return
|
||||
}
|
||||
a.p.loopCheck = map[string]bool{}
|
||||
a.p.inputPos = a.inputPos
|
||||
for _, r := range a.input {
|
||||
if a.p.newline {
|
||||
a.p.cursorLine++
|
||||
a.p.cursorColumn = 1
|
||||
} else {
|
||||
a.p.cursorColumn++
|
||||
}
|
||||
a.p.newline = r == '\n'
|
||||
// Result returns a Result struct, containing results as produced by the
|
||||
// last ParseAPI.On() call.
|
||||
func (p *ParseAPI) Result() *Result {
|
||||
result := p.result
|
||||
if p.result == nil {
|
||||
caller, filepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.ParseAPI.Result(): Result() called without calling "+
|
||||
"ParseAPI.Accept() on beforehand to make the result available "+
|
||||
"from %s at %s", caller, filepos))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
package parsekit
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Handle is used to execute other ParseHandler functions from within your
|
||||
// ParseHandler function.
|
||||
|
@ -77,14 +80,14 @@ func (p *ParseAPI) ExpectEndOfFile() {
|
|||
// expectation is included in the error message.
|
||||
func (p *ParseAPI) UnexpectedInput() {
|
||||
p.panicWhenStoppedOrInError()
|
||||
r, _, ok := p.peek(0)
|
||||
r, err := p.tokenAPI.NextRune()
|
||||
switch {
|
||||
case ok:
|
||||
case err == nil:
|
||||
p.Error("unexpected character %q%s", r, fmtExpects(p))
|
||||
case r == eofRune:
|
||||
case err == io.EOF:
|
||||
p.Error("unexpected end of file%s", fmtExpects(p))
|
||||
case r == invalidRune:
|
||||
p.Error("invalid UTF8 character in input%s", fmtExpects(p))
|
||||
default:
|
||||
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
// BufLiteral retrieves the contents of the parser's string buffer (all the
|
||||
// runes that were added to it using ParseAPI.Accept()) as a literal string.
|
||||
//
|
||||
// Literal means that if the input had for example the subsequent runes '\' and
|
||||
// 'n' in it, then the literal string would have a backslash and an 'n' it in,
|
||||
// not a linefeed (ASCII char 10).
|
||||
//
|
||||
// Retrieving the buffer contents will not affect the buffer itself. New runes
|
||||
// can still be added to it. Only when calling P.BufClear(), the buffer will be
|
||||
// cleared.
|
||||
func (p *ParseAPI) BufLiteral() string {
|
||||
return p.buffer.asLiteralString()
|
||||
}
|
||||
|
||||
// BufInterpreted retrieves the contents of the parser's string buffer (all the
|
||||
// runes that were added to it using ParseAPI.Accept()) as an interpreted
|
||||
// string.
|
||||
//
|
||||
// Interpreted means that the contents are treated as a Go double quoted
|
||||
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
|
||||
// input had for example the subsequent runes '\' and 'n' in it, then the
|
||||
// interpreted string would have an actual linefeed (ASCII char 10) in it.
|
||||
//
|
||||
// This method returns a boolean value, indicating whether or not the string
|
||||
// interpretation was successful. On invalid string data, an error will
|
||||
// automatically be emitted and the boolean return value will be false.
|
||||
//
|
||||
// Retrieving the buffer contents will not affect the buffer itself. New runes
|
||||
// can still be added to it. Only when calling P.BufClear(), the buffer will be
|
||||
// cleared.
|
||||
func (p *ParseAPI) BufInterpreted() (string, bool) {
|
||||
s, err := p.buffer.asInterpretedString()
|
||||
if err != nil {
|
||||
p.Error(
|
||||
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
||||
p.buffer.asLiteralString(), err)
|
||||
return "", false
|
||||
}
|
||||
return s, true
|
||||
}
|
||||
|
||||
// BufClear clears the contents of the parser's string buffer.
|
||||
func (p *ParseAPI) BufClear() {
|
||||
p.buffer.reset()
|
||||
}
|
|
@ -43,35 +43,6 @@ func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) {
|
|||
`.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenFilledStringBuffer_BufInterpreted_ReturnsInterpretedString(t *testing.T) {
|
||||
var interpreted string
|
||||
var literal string
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
|
||||
literal = p.BufLiteral()
|
||||
interpreted, _ = p.BufInterpreted()
|
||||
})
|
||||
p.Execute(`This\tis\ta\tcool\tstring`)
|
||||
|
||||
if literal != `This\tis\ta\tcool\tstring` {
|
||||
t.Fatal("literal string is incorrect")
|
||||
}
|
||||
if interpreted != "This\tis\ta\tcool\tstring" {
|
||||
t.Fatal("interpreted string is incorrect")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *testing.T) {
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
|
||||
p.BufInterpreted()
|
||||
})
|
||||
err := p.Execute(`This \is wrongly escaped`)
|
||||
if err.Error() != `invalid string: This \is wrongly escaped (invalid syntax, forgot to escape a double quote or backslash maybe?)` {
|
||||
t.Fatalf("Got unexpected error: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
type parserWithLoop struct {
|
||||
loopCounter int
|
||||
}
|
||||
|
@ -119,7 +90,6 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
|||
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(c.Max(5, a.AnyRune)).Accept() {
|
||||
p.BufClear()
|
||||
}
|
||||
p.Stop()
|
||||
})
|
||||
|
|
28
parsekit.go
28
parsekit.go
|
@ -1,5 +1,9 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Parser is the top-level struct that holds the configuration for a parser.
|
||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||
type Parser struct {
|
||||
|
@ -22,11 +26,8 @@ func NewParser(startHandler ParseHandler) *Parser {
|
|||
// When an error occurs during parsing, then this error is returned. Nil otherwise.
|
||||
func (p *Parser) Execute(input string) *Error {
|
||||
api := &ParseAPI{
|
||||
input: input,
|
||||
len: len(input),
|
||||
cursorLine: 1,
|
||||
cursorColumn: 1,
|
||||
loopCheck: map[string]bool{},
|
||||
tokenAPI: NewTokenAPI(strings.NewReader(input)),
|
||||
loopCheck: map[string]bool{},
|
||||
}
|
||||
api.Handle(p.startHandler)
|
||||
if !api.stopped && api.err == nil {
|
||||
|
@ -39,12 +40,10 @@ func (p *Parser) Execute(input string) *Error {
|
|||
// a parser that is based solely on a TokenHandler function.
|
||||
// The Matcher can be instantiated using the parsekit.NewMatcher()
|
||||
// method.
|
||||
//
|
||||
// To match input data against the wrapped Matcher function, use the method
|
||||
// Matcher.Parse().
|
||||
// TODO Rename to Tokenizer
|
||||
type Matcher struct {
|
||||
parser *Parser
|
||||
match string
|
||||
result *Result
|
||||
}
|
||||
|
||||
// NewMatcher instantiates a new Matcher.
|
||||
|
@ -55,11 +54,12 @@ type Matcher struct {
|
|||
//
|
||||
// The 'expects' parameter is used for creating an error message in case parsed
|
||||
// input does not match the TokenHandler.
|
||||
// TODO Rename to NewTokenizer, and make matcher Tokeninzer, also see if we can use a Reader straight away, no ParseAPI.
|
||||
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
||||
matcher := &Matcher{}
|
||||
matcher.parser = NewParser(func(p *ParseAPI) {
|
||||
if p.On(tokenHandler).Accept() {
|
||||
matcher.match = p.BufLiteral()
|
||||
matcher.result = p.Result()
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expects(expects)
|
||||
|
@ -70,9 +70,9 @@ func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
|||
}
|
||||
|
||||
// Execute feeds the input to the wrapped TokenHandler function.
|
||||
// It returns the matched input string and an error. When an error
|
||||
// occurred during parsing, the error will be set, nil otherwise.
|
||||
func (m *Matcher) Execute(input string) (string, *Error) {
|
||||
// It returns the TokenHandler's results. When an error occurred during parsing,
|
||||
// the error will be set, nil otherwise.
|
||||
func (m *Matcher) Execute(input string) (*Result, *Error) {
|
||||
err := m.parser.Execute(input)
|
||||
return m.match, err
|
||||
return m.result, err
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ import (
|
|||
)
|
||||
|
||||
// Easy access to the parsekit definitions.
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
var c, a, m, tok = parsekit.C, parsekit.A, parsekit.M, parsekit.T
|
||||
|
||||
type TokenHandlerTest struct {
|
||||
Input string
|
||||
|
@ -27,11 +27,11 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
|
|||
}
|
||||
|
||||
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
||||
output, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else if output != test.Expected {
|
||||
} else if output := result.String(); output != test.Expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||
}
|
||||
} else {
|
||||
|
@ -41,6 +41,41 @@ func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
|||
}
|
||||
}
|
||||
|
||||
type TokenMakerTest struct {
|
||||
Input string
|
||||
TokenHandler parsekit.TokenHandler
|
||||
Expected []parsekit.Token
|
||||
}
|
||||
|
||||
func RunTokenMakerTest(t *testing.T, test TokenMakerTest) {
|
||||
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if string(expected.Runes) != string(actual.Runes) {
|
||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func RunTokenMakerTests(t *testing.T, testSet []TokenMakerTest) {
|
||||
for _, test := range testSet {
|
||||
RunTokenMakerTest(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
type PanicTest struct {
|
||||
function func()
|
||||
expected string
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Reader wraps around an io.Reader and provides buffering to allows us to read
|
||||
// the same runes over and over again. This is useful for implementing a parser
|
||||
// that must be able to do lookahead on the input, returning to the original
|
||||
// input position after finishing that lookahead).
|
||||
//
|
||||
// To minimze memory use, it is also possible to flush the buffer when there is
|
||||
// no more need to go back to previously read runes.
|
||||
type Reader struct {
|
||||
bufio *bufio.Reader // Used for ReadRune()
|
||||
buffer []rune // Input buffer, holding runes that were read from input
|
||||
bufferOffset int // The offset of the buffer, relative to the start of the input
|
||||
bufferLen int // Input size, the number of runes in the buffer
|
||||
}
|
||||
|
||||
// NewReader initializes a new Reader struct, wrapped around the provided io.Reader.
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
bufio: bufio.NewReader(r),
|
||||
buffer: []rune{},
|
||||
}
|
||||
}
|
||||
|
||||
// RuneAt reads the rune at the provided rune offset.
|
||||
//
|
||||
// This offset is relative to the current starting position of the buffer in
|
||||
// the reader. When starting reading, offset 0 will point at the start of the
|
||||
// input. After flushing, offset 0 will point at the input up to where
|
||||
// the flush was done.
|
||||
//
|
||||
// The error return value will be nil when reading was successful.
|
||||
// When an invalid rune is encountered on the input, the error will be nil,
|
||||
// but the rune will be utf8.RuneError
|
||||
//
|
||||
// When reading failed, the rune will be utf8.RuneError. One special read
|
||||
// fail is actually a normal situation: end of file reached. In that case,
|
||||
// the returned error wille be io.EOF.
|
||||
func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||
// Rune at provided offset is not yet available in the input buffer.
|
||||
// Read runes until we have enough runes to satisfy the offset.
|
||||
for r.bufferLen <= offset {
|
||||
readRune, _, err := r.bufio.ReadRune()
|
||||
|
||||
// Handle errors.
|
||||
if err != nil {
|
||||
return utf8.RuneError, err
|
||||
}
|
||||
|
||||
// Skip BOM.
|
||||
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
||||
r.bufferOffset++
|
||||
continue
|
||||
}
|
||||
|
||||
r.buffer = append(r.buffer, readRune)
|
||||
r.bufferLen++
|
||||
}
|
||||
return r.buffer[offset], nil
|
||||
}
|
||||
|
||||
// RunesAt reads a slice of runes of length 'len', starting from offset 'offset'.
|
||||
//
|
||||
// This offset is relative to the current starting position of the buffer in
|
||||
// the reader. When starting reading, offset 0 will point at the start of the
|
||||
// input. After flushing, offset 0 will point at the input up to where
|
||||
// the flush was done.
|
||||
//
|
||||
// When an error is encountered during reading (EOF or other error), then the
|
||||
// error return value will be set. In case of an error, any runes that could be
|
||||
// successfully read are returned along with the error.
|
||||
// TODO Do I actually use this interface?
|
||||
func (r *Reader) RunesAt(start int, len int) ([]rune, error) {
|
||||
if len == 0 {
|
||||
return r.buffer[0:0], nil
|
||||
}
|
||||
end := start + len
|
||||
_, err := r.RuneAt(end)
|
||||
if err != nil {
|
||||
if end > r.bufferLen {
|
||||
end = r.bufferLen
|
||||
}
|
||||
return r.buffer[start:end], err
|
||||
}
|
||||
return r.buffer[start:end], nil
|
||||
}
|
||||
|
||||
// Flush deletes the provided number of runes from the start of the
|
||||
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
||||
// will point to the rune that comes after the flushed runes.
|
||||
// So what this basically does is turn the Reader into a sliding window.
|
||||
func (r *Reader) Flush(numberOfRunes int) {
|
||||
if numberOfRunes > r.bufferLen {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
||||
}
|
||||
r.bufferOffset += numberOfRunes
|
||||
r.bufferLen -= numberOfRunes
|
||||
r.buffer = r.buffer[numberOfRunes:]
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func ExampleNewReader() {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
fmt.Printf("%c", at(0))
|
||||
fmt.Printf("%c", at(12))
|
||||
|
||||
// Output:
|
||||
// H!
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt() {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
fmt.Printf("%c", at(0))
|
||||
fmt.Printf("%c", at(12))
|
||||
fmt.Printf("%c", at(7))
|
||||
fmt.Printf("%c", at(0))
|
||||
|
||||
// Output:
|
||||
// H!wH
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt_endOfFile() {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
|
||||
rn, err := r.RuneAt(13)
|
||||
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
|
||||
|
||||
rn, err = r.RuneAt(20)
|
||||
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
|
||||
|
||||
// Output:
|
||||
// '<27>' EOF true
|
||||
// '<27>' EOF true
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt_invalidRune() {
|
||||
in := strings.NewReader("Hello, \xcdworld!")
|
||||
r := NewReader(in)
|
||||
|
||||
rn, err := r.RuneAt(6)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
rn, err = r.RuneAt(7)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
rn, err = r.RuneAt(8)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
rn, err = r.RuneAt(9)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
|
||||
// Output:
|
||||
// ' ' true
|
||||
// '<27>' true
|
||||
// 'w' true
|
||||
// 'o' true
|
||||
}
|
||||
|
||||
func ExampleReader_RunesAt() {
|
||||
in := strings.NewReader("Hello, \xcdworld!")
|
||||
r := NewReader(in)
|
||||
|
||||
rs, err := r.RunesAt(4, 6)
|
||||
fmt.Printf("%q %t\n", string(rs), err == nil)
|
||||
rs, err = r.RunesAt(4, 0)
|
||||
fmt.Printf("%q %t\n", string(rs), err == nil)
|
||||
rs, err = r.RunesAt(8, 100)
|
||||
fmt.Printf("%q %t\n", string(rs), err == io.EOF)
|
||||
|
||||
// Output:
|
||||
// "o, <20>wo" true
|
||||
// "" true
|
||||
// "world!" true
|
||||
}
|
||||
|
||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
in := strings.NewReader("\uFEFFBommetje!")
|
||||
r := NewReader(in)
|
||||
b, _ := r.RuneAt(0)
|
||||
o, _ := r.RuneAt(1)
|
||||
m, _ := r.RuneAt(2)
|
||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||
assert.Equal(t, "Bom", bom, "first three runes")
|
||||
}
|
||||
|
||||
func ExampleReader_Flush() {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
rb := func(start int, len int) []rune { r, _ := r.RunesAt(start, len); return r }
|
||||
|
||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||
fmt.Printf("%c\n", at(7))
|
||||
|
||||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||
r.Flush(4)
|
||||
|
||||
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||
// We can continue reading from there.
|
||||
fmt.Printf("%s", string(rb(0, 8)))
|
||||
|
||||
// Output:
|
||||
// w
|
||||
// o, world
|
||||
}
|
||||
|
||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
|
||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||
r.RuneAt(10)
|
||||
|
||||
// However, we flush 12 runes, which exceeds the buffer size.
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() { r.Flush(12) },
|
||||
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
|
||||
})
|
||||
}
|
62
stringbuf.go
62
stringbuf.go
|
@ -1,62 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// stringBuffer is a string buffer implementation that is used by the parser
|
||||
// to efficiently accumulate runes from the input and eventually turn these
|
||||
// into a string, either literal or interpreted.
|
||||
type stringBuffer struct {
|
||||
buffer bytes.Buffer
|
||||
}
|
||||
|
||||
// reset resets the string buffer, in order to build a new string.
|
||||
func (b *stringBuffer) reset() *stringBuffer {
|
||||
b.buffer.Reset()
|
||||
return b
|
||||
}
|
||||
|
||||
// writeString adds the runes of the input string to the string buffer.
|
||||
func (b *stringBuffer) writeString(s string) *stringBuffer {
|
||||
for _, r := range s {
|
||||
b.writeRune(r)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// writeRune adds a single rune to the string buffer.
|
||||
func (b *stringBuffer) writeRune(r rune) *stringBuffer {
|
||||
b.buffer.WriteRune(r)
|
||||
return b
|
||||
}
|
||||
|
||||
// asLiteralString returns the string buffer as a literal string.
|
||||
// Literal means that no escape sequences are processed.
|
||||
func (b *stringBuffer) asLiteralString() string {
|
||||
return b.buffer.String()
|
||||
}
|
||||
|
||||
// asInterpretedString returns the string in its interpreted form.
|
||||
// Interpreted means that escape sequences are handled in the way that Go would
|
||||
// have, had it been inside double quotes. It translates for example escape
|
||||
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
|
||||
// representations.
|
||||
// Since the input might contain invalid escape sequences, this method
|
||||
// also returns an error. When an error is returned, the returned string will
|
||||
// contain the string as far as it could be interpreted.
|
||||
func (b *stringBuffer) asInterpretedString() (string, error) {
|
||||
var sb strings.Builder
|
||||
tail := b.buffer.String()
|
||||
for len(tail) > 0 {
|
||||
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
|
||||
if err != nil {
|
||||
return sb.String(), err
|
||||
}
|
||||
tail = newtail
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
return sb.String(), nil
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
|
||||
var b stringBuffer
|
||||
s1, _ := b.writeString(`hi\nthere`).asInterpretedString()
|
||||
s2 := b.asLiteralString()
|
||||
if s1 != "hi\nthere" {
|
||||
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
|
||||
}
|
||||
if s2 != "hi\\nthere" {
|
||||
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResetResetsBuffer(t *testing.T) {
|
||||
var b stringBuffer
|
||||
s := b.writeRune('X').reset().asLiteralString()
|
||||
if s != "" {
|
||||
t.Fatalf("Did not get expected empty string, but %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAsLiteralString(t *testing.T) {
|
||||
b := stringBuffer{}
|
||||
for _, c := range []stringbufT{
|
||||
{"empty string", ``, ``, OK},
|
||||
{"simple string", `Simple string!`, `Simple string!`, OK},
|
||||
{"single quote", `'`, `'`, OK},
|
||||
{"double quote", `"`, `"`, OK},
|
||||
{"escaped single quote", `\'`, `\'`, OK},
|
||||
{"escaped double quote", `\"`, `\"`, OK},
|
||||
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
|
||||
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
|
||||
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
|
||||
} {
|
||||
s := b.reset().writeString(c.in).asLiteralString()
|
||||
if s != c.out {
|
||||
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAsInterpretedString(t *testing.T) {
|
||||
b := stringBuffer{}
|
||||
for _, c := range []stringbufT{
|
||||
{"empty string", "", "", OK},
|
||||
{"one character", "Simple string!", "Simple string!", OK},
|
||||
{"escaped single quote", `\'`, "", FAIL},
|
||||
{"escaped double quote", `\"`, `"`, OK},
|
||||
{"bare single quote", `'`, "'", OK},
|
||||
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
|
||||
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
|
||||
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
|
||||
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
|
||||
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
|
||||
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
|
||||
{"example from spec",
|
||||
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
|
||||
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
|
||||
} {
|
||||
s, err := b.reset().writeString(c.in).asInterpretedString()
|
||||
if c.isSuccessCase && err != nil {
|
||||
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
|
||||
}
|
||||
if !c.isSuccessCase && err == nil {
|
||||
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
|
||||
}
|
||||
if s != c.out && c.isSuccessCase {
|
||||
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type stringbufT struct {
|
||||
name string
|
||||
in string
|
||||
out string
|
||||
isSuccessCase bool
|
||||
}
|
||||
|
||||
const (
|
||||
OK bool = true
|
||||
FAIL bool = false
|
||||
)
|
|
@ -0,0 +1,188 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and
|
||||
// to report back results. For easy lookahead support, a forking strategy is
|
||||
// provided.
|
||||
//
|
||||
// BASIC OPERATION:
|
||||
//
|
||||
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
|
||||
//
|
||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||
// is then added to the result buffer of the TokenAPI struct.
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result buffer is extended
|
||||
// with as many runes as needed.
|
||||
//
|
||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||
//
|
||||
// Sometimes, we must be able to perform a lookahead, which might either
|
||||
// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be
|
||||
// brought back to the original state, so we can try a different route.
|
||||
//
|
||||
// The way in which this is supported, is by forking a TokenAPI struct by calling
|
||||
// Fork(). This will return a forked child TokenAPI, with an empty result buffer,
|
||||
// but using the same input cursor position as the forked parent.
|
||||
//
|
||||
// After forking, the same interface as described for BASIC OPERATION can be
|
||||
// used to fill the result buffer. When the lookahead was successful, then
|
||||
// Merge() can be called on the forked child to append the child's result
|
||||
// buffer to the parent's result buffer, and to move the input cursor position
|
||||
// to that of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child TokenAPI can simply
|
||||
// be discarded. The parent TokenAPI was never modified, so it can safely be used
|
||||
// as if the lookahead never happened.
|
||||
//
|
||||
// Note:
|
||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||
// peeks and by moving the input cursor position back and forth, or by putting
|
||||
// read input back on the input stream. That often leads to code that is
|
||||
// efficient, however, in my opinion, not very untuitive to read.
|
||||
type TokenAPI struct {
|
||||
reader *Reader
|
||||
cursor *Cursor // current read cursor position, rel. to the input start
|
||||
offset int // current rune offset rel. to the Reader's sliding window
|
||||
result *Result // results as produced by a TokenHandler (runes, Tokens)
|
||||
root *TokenAPI // the root TokenAPI
|
||||
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||
}
|
||||
|
||||
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
||||
func NewTokenAPI(r io.Reader) *TokenAPI {
|
||||
input := &TokenAPI{
|
||||
reader: NewReader(r),
|
||||
cursor: &Cursor{},
|
||||
result: NewResult(),
|
||||
}
|
||||
input.root = input
|
||||
return input
|
||||
}
|
||||
|
||||
// NextRune returns the rune at the current read offset.
|
||||
//
|
||||
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
||||
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
||||
// when needed.
|
||||
//
|
||||
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
||||
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
||||
// without explicitly accepting, this method will panic.
|
||||
func (i *TokenAPI) NextRune() (rune, error) {
|
||||
if i.result.lastRune != nil {
|
||||
caller, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+
|
||||
"to Accept() from %s at %s", caller, linepos))
|
||||
}
|
||||
i.detachChilds()
|
||||
|
||||
readRune, err := i.reader.RuneAt(i.offset)
|
||||
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
||||
return readRune, err
|
||||
}
|
||||
|
||||
// Accept the last rune as read by NextRune() into the result buffer and move
|
||||
// the cursor forward.
|
||||
//
|
||||
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||
// returned an error. Calling Accept() in such case will result in a panic.
|
||||
func (i *TokenAPI) Accept() {
|
||||
if i.result.lastRune == nil {
|
||||
caller, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.TokenAPI.Accept(): Accept() called without first "+
|
||||
"calling NextRune() from %s at %s", caller, linepos))
|
||||
} else if i.result.lastRune.err != nil {
|
||||
caller, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.TokenAPI.Accept(): Accept() called while the previous "+
|
||||
"call to NextRune() failed from %s at %s", caller, linepos))
|
||||
}
|
||||
i.result.runes = append(i.result.runes, i.result.lastRune.r)
|
||||
i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
|
||||
i.offset++
|
||||
i.result.lastRune = nil
|
||||
}
|
||||
|
||||
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
|
||||
// read cursor position, but for the rest this is a fresh TokenAPI.
|
||||
func (i *TokenAPI) Fork() *TokenAPI {
|
||||
i.detachChilds()
|
||||
|
||||
// Create the new fork.
|
||||
child := &TokenAPI{
|
||||
reader: i.reader,
|
||||
cursor: &Cursor{},
|
||||
offset: i.offset,
|
||||
root: i.root,
|
||||
parent: i,
|
||||
}
|
||||
child.result = NewResult()
|
||||
*child.cursor = *i.cursor
|
||||
i.child = child
|
||||
i.result.lastRune = nil
|
||||
return child
|
||||
}
|
||||
|
||||
// Merge adds the data of the forked child TokenAPI that Merge() is called on to the
|
||||
// data of its parent (results and read cursor position).
|
||||
func (i *TokenAPI) Merge() {
|
||||
if i.parent == nil {
|
||||
panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI")
|
||||
}
|
||||
|
||||
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
|
||||
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
|
||||
i.parent.offset = i.offset
|
||||
i.parent.cursor = i.cursor
|
||||
|
||||
i.detachChilds()
|
||||
i.result = NewResult()
|
||||
}
|
||||
|
||||
// Result returns the result data for the TokenAPI. The returned struct
|
||||
// can be used to retrieve and modify the result data.
|
||||
func (i *TokenAPI) Result() *Result {
|
||||
return i.result
|
||||
}
|
||||
|
||||
// Cursor retrieves the current read cursor data.
|
||||
// TODO make this and offset part of Result struct?
|
||||
func (i *TokenAPI) Cursor() Cursor {
|
||||
return *i.cursor
|
||||
}
|
||||
|
||||
// FlushReaderBuffer delegates to the Flush() method of the contained
|
||||
// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the
|
||||
// reader cache.
|
||||
func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) {
|
||||
if i != i.root {
|
||||
panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child")
|
||||
}
|
||||
i.detachChilds()
|
||||
i.reader.Flush(numberOfRunes)
|
||||
i.offset = 0
|
||||
}
|
||||
|
||||
func (i *TokenAPI) detachChilds() {
|
||||
if i.child != nil {
|
||||
i.child.detachChildsRecurse()
|
||||
i.child = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (i *TokenAPI) detachChildsRecurse() {
|
||||
if i.child != nil {
|
||||
i.child.detachChildsRecurse()
|
||||
}
|
||||
i.child = nil
|
||||
i.parent = nil
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Result holds results as produced by a TokenHandler.
|
||||
type Result struct {
|
||||
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||
runes []rune
|
||||
tokens []*Token
|
||||
}
|
||||
|
||||
type runeInfo struct {
|
||||
r rune
|
||||
err error
|
||||
}
|
||||
|
||||
// Token defines a lexical token as produced by TokenHandlers.
|
||||
type Token struct {
|
||||
Type interface{} // token type, can be any type that a parser author sees fit
|
||||
Runes []rune // the runes that make up the token
|
||||
Value interface{} // an optional value of any type
|
||||
}
|
||||
|
||||
// NewResult initializes an empty result struct.
|
||||
func NewResult() *Result {
|
||||
return &Result{
|
||||
runes: []rune{},
|
||||
tokens: []*Token{},
|
||||
}
|
||||
}
|
||||
|
||||
// ClearRunes clears the runes in the Result.
|
||||
func (r *Result) ClearRunes() {
|
||||
r.runes = []rune{}
|
||||
}
|
||||
|
||||
// SetRunes replaces the Runes from the Result with the provided input.
|
||||
func (r *Result) SetRunes(s interface{}) {
|
||||
r.ClearRunes()
|
||||
r.AddRunes(s)
|
||||
}
|
||||
|
||||
// AddRunes is used to add runes to the Result.
|
||||
func (r *Result) AddRunes(s interface{}) {
|
||||
switch s := s.(type) {
|
||||
case string:
|
||||
r.runes = append(r.runes, []rune(s)...)
|
||||
case []rune:
|
||||
r.runes = append(r.runes, s...)
|
||||
case rune:
|
||||
r.runes = append(r.runes, s)
|
||||
default:
|
||||
panic(fmt.Sprintf("parsekit.Result.SetRunes(): unsupported type '%T' used", s))
|
||||
}
|
||||
}
|
||||
|
||||
// Runes retrieves the Runes from the Result.
|
||||
func (r *Result) Runes() []rune {
|
||||
return r.runes
|
||||
}
|
||||
|
||||
// Rune retrieve a single rune from the Result at the specified index.
|
||||
func (r *Result) Rune(idx int) rune {
|
||||
return r.runes[idx]
|
||||
}
|
||||
|
||||
// String returns the Runes from the Result as a string.
|
||||
func (r *Result) String() string {
|
||||
return string(r.runes)
|
||||
}
|
||||
|
||||
// ClearTokens clears the tokens in the Result.
|
||||
func (r *Result) ClearTokens() {
|
||||
r.tokens = []*Token{}
|
||||
}
|
||||
|
||||
// AddToken is used to add a Token to the results.
|
||||
func (r *Result) AddToken(t *Token) {
|
||||
r.tokens = append(r.tokens, t)
|
||||
}
|
||||
|
||||
// Tokens retrieves the Tokens from the Result.
|
||||
func (r *Result) Tokens() []*Token {
|
||||
return r.tokens
|
||||
}
|
||||
|
||||
// Token retrieves a single Token from the Result at the specified index.
|
||||
func (r *Result) Token(idx int) *Token {
|
||||
return r.tokens[idx]
|
||||
}
|
||||
|
||||
// Values retrieves a slice containing only the Values for the Result Tokens.
|
||||
func (r *Result) Values() []interface{} {
|
||||
values := make([]interface{}, len(r.tokens))
|
||||
for i, tok := range r.tokens {
|
||||
values[i] = tok.Value
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
// Value retrieves a single Value from the Result Token at the specified index.
|
||||
func (r *Result) Value(idx int) interface{} {
|
||||
return r.tokens[idx].Value
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.Result().SetRunes("string")
|
||||
assert.Equal(t, "string", string(i.Result().String()), "i.Result() with string input")
|
||||
i.Result().SetRunes([]rune("rune slice"))
|
||||
assert.Equal(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
|
||||
i.Result().SetRunes('X')
|
||||
assert.Equal(t, "X", string(i.Result().String()), "i.Result() with rune input")
|
||||
}
|
||||
|
||||
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Result().SetRunes(1234567)
|
||||
},
|
||||
Expect: "parsekit.Result.SetRunes(): unsupported type 'int' used",
|
||||
})
|
||||
}
|
|
@ -0,0 +1,288 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
r, _ := mkInput().NextRune()
|
||||
assert.Equal(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
|
||||
`a prior call to Accept\(\) from .*TestCallingNextRuneTwice_Panics.* at /.*_test.go:\d+`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: mkInput().Accept,
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
|
||||
`first calling NextRune\(\) from .* at /.*:\d+`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Merge()
|
||||
},
|
||||
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.NextRune()
|
||||
f.Merge()
|
||||
},
|
||||
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.Fork()
|
||||
f.Merge()
|
||||
},
|
||||
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||
})
|
||||
}
|
||||
|
||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||
i := mkInput()
|
||||
f1 := i.Fork()
|
||||
f2 := f1.Fork()
|
||||
f3 := f2.Fork()
|
||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||
f5 := f4.Fork()
|
||||
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
|
||||
assert.Equal(t, true, i.child == f1, "i.child == f1")
|
||||
assert.Equal(t, true, f1.parent == i, "f1.parent == i")
|
||||
assert.Equal(t, true, f1.child == f4, "f1.child == f4")
|
||||
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
|
||||
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
|
||||
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
assert.Equal(t, true, f4.parent == f1, "f4.parent == f1")
|
||||
assert.Equal(t, true, f4.child == f5, "f4.child == f5")
|
||||
assert.Equal(t, true, f5.parent == f4, "f5.parent == f4")
|
||||
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
|
||||
|
||||
i.NextRune()
|
||||
|
||||
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
|
||||
assert.Equal(t, true, i.child == nil, "i.child == nil")
|
||||
assert.Equal(t, true, f1.parent == nil, "f1.parent == nil")
|
||||
assert.Equal(t, true, f1.child == nil, "f1.child == nil")
|
||||
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
|
||||
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
|
||||
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
assert.Equal(t, true, f4.parent == nil, "f4.parent == nil")
|
||||
assert.Equal(t, true, f4.child == nil, "f4.child == nil")
|
||||
assert.Equal(t, true, f5.parent == nil, "f5.parent == nil")
|
||||
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
|
||||
`first calling NextRune\(\) from .* at /.*:\d+`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := mkInput()
|
||||
r, _ := i.NextRune()
|
||||
assert.Equal(t, 'T', r, "result from 1st call to NextRune()")
|
||||
// TODO still (*runeInfo) case needed?
|
||||
assert.NotEqual(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after NextRune()")
|
||||
i.Accept()
|
||||
assert.Equal(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after Accept()")
|
||||
assert.Equal(t, 1, i.offset, "Input.offset")
|
||||
assert.Equal(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
|
||||
r, _ = i.NextRune()
|
||||
assert.Equal(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
||||
i := mkInput()
|
||||
for j := 0; j < 7; j++ {
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
assert.Equal(t, "Testing", string(i.reader.buffer), "reader input buffer")
|
||||
assert.Equal(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
assert.Equal(t, "line 1, column 1", i.cursor.String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
assert.Equal(t, "line 1, column 7", i.cursor.String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
assert.Equal(t, "line 2, column 1", i.cursor.String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
assert.Equal(t, "line 3, column 5", i.cursor.String(), "cursor 4")
|
||||
assert.Equal(t, *i.cursor, i.Cursor(), "i.Cursor()")
|
||||
}
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
assert.Equal(t, "T", i.Result().String(), "accepted rune in input")
|
||||
// Fork
|
||||
f := i.Fork()
|
||||
assert.Equal(t, f, i.child, "Input.child (must be f)")
|
||||
assert.Equal(t, i, f.parent, "Input.parent (must be i)")
|
||||
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
assert.Equal(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Accept two runes via fork.
|
||||
f.NextRune()
|
||||
f.Accept() // e
|
||||
f.NextRune()
|
||||
f.Accept() // s
|
||||
assert.Equal(t, "es", f.Result().String(), "result runes in fork")
|
||||
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
assert.Equal(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Merge fork back into parent
|
||||
f.Merge()
|
||||
assert.Equal(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||
assert.Equal(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
f1.NextRune()
|
||||
f1.Accept()
|
||||
f2 := f1.Fork()
|
||||
f2.NextRune()
|
||||
f2.Accept()
|
||||
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
|
||||
assert.Equal(t, 1, i.offset, "i.offset")
|
||||
assert.Equal(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||
assert.Equal(t, 2, f1.offset, "f1.offset")
|
||||
assert.Equal(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||
f2.Merge()
|
||||
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
|
||||
assert.Equal(t, 1, i.offset, "i.offset")
|
||||
assert.Equal(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||
assert.Equal(t, 3, f1.offset, "f1.offset")
|
||||
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||
f1.Merge()
|
||||
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
assert.Equal(t, 3, i.offset, "i.offset")
|
||||
assert.Equal(t, "", f1.Result().String(), "f1.Result().String()")
|
||||
assert.Equal(t, 3, f1.offset, "f1.offset")
|
||||
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||
}
|
||||
|
||||
func TestGivenForkedChild_FlushReaderBuffer_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
f.FlushReaderBuffer(1)
|
||||
},
|
||||
Expect: "parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() " +
|
||||
"can only be called on the root TokenAPI, not on a forked child",
|
||||
})
|
||||
}
|
||||
|
||||
func TestGivenRootWithSomeRunesRead_FlushReaderBuffer_ClearsReaderBuffer(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.FlushReaderBuffer(2)
|
||||
assert.Equal(t, "Te", i.Result().String(), "i.Result()")
|
||||
assert.Equal(t, 0, i.offset, "i.offset")
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
assert.Equal(t, 2, i.offset, "i.offset")
|
||||
i.FlushReaderBuffer(2)
|
||||
assert.Equal(t, "Test", i.Result().String(), "i.Result()")
|
||||
assert.Equal(t, 0, i.offset, "i.offset")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextRuneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
assert.Equal(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
assert.Equal(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingRuneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
f := i.Fork()
|
||||
f.NextRune()
|
||||
f.Accept()
|
||||
r, err := f.NextRune()
|
||||
assert.Equal(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
r, err = i.NextRune()
|
||||
assert.Equal(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
assert.Equal(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func mkInput() *TokenAPI {
|
||||
return NewTokenAPI(strings.NewReader("Testing"))
|
||||
}
|
224
tokenhandler.go
224
tokenhandler.go
|
@ -2,113 +2,55 @@ package parsekit
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
// TokenHandler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the match.
|
||||
//
|
||||
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
|
||||
// parsekit.Matcher().
|
||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the token(s).
|
||||
//
|
||||
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The TokenAPI is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type TokenHandler func(t *TokenAPI) bool
|
||||
|
||||
// TokenAPI is used by TokenHandler functions to retrieve runes from the
|
||||
// input to match against and to report back results.
|
||||
//
|
||||
// Basic operation:
|
||||
//
|
||||
// To retrieve the next rune from the input, the TokenHandler function can call
|
||||
// the TokenAPI.NextRune() method.
|
||||
//
|
||||
// The TokenHandler function can then evaluate the retrieved rune and either
|
||||
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
|
||||
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
|
||||
// the rune will not be added to the output. It is mandatory for a TokenHandler
|
||||
// to call either Accept() or Skip() after retrieving a rune, before calling
|
||||
// NextRune() again.
|
||||
//
|
||||
// Eventually, the TokenHandler function must return a boolean value, indicating
|
||||
// whether or not a match was found. When true, then the calling code will
|
||||
// use the runes that were accepted into the TokenAPI's resulting output.
|
||||
//
|
||||
// Forking operation for easy lookahead support:
|
||||
//
|
||||
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
|
||||
// might either succeed or fail. In case of a failing lookahead, the state
|
||||
// of the TokenAPI must be brought back to the original state.
|
||||
//
|
||||
// The way in which this is supported, is by forking a TokenAPI by calling
|
||||
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
|
||||
// output buffer, but using the same input cursor position as the forked parent.
|
||||
//
|
||||
// The TokenHandler function can then use the same interface as described for
|
||||
// normal operation to retrieve runes from the input and to fill the resulting
|
||||
// output. When the TokenHandler function decides that the lookahead was successful,
|
||||
// then the method TokenAPI.Merge() can be called on the forked child to
|
||||
// append the resulting output from the child to the parent's resulting output,
|
||||
// and to update the parent input cursor position to that of the child.
|
||||
//
|
||||
// When the TokenHandler function decides that the lookahead was unsuccessful,
|
||||
// then it can simply discard the forked child. The parent TokenAPI was never
|
||||
// modified, so a new match can be safely started using that parent, as if the
|
||||
// lookahead never happened.
|
||||
type TokenAPI struct {
|
||||
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
|
||||
inputOffset int // the byte offset into the input
|
||||
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
|
||||
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
||||
currRune *runeInfo // hold information for the last rune that was read from the input
|
||||
parent *TokenAPI // the parent MatchDialog, in case this one was forked
|
||||
}
|
||||
|
||||
// runeInfo describes a single rune and its metadata.
|
||||
type runeInfo struct {
|
||||
Rune rune // an UTF8 rune
|
||||
ByteSize int // the number of bytes in the rune
|
||||
OK bool // false when the rune represents an invalid UTF8 rune or EOF
|
||||
}
|
||||
|
||||
// NextRune retrieves the next rune from the input.
|
||||
//
|
||||
// It returns the rune and a boolean. The boolean will be false in case an
|
||||
// invalid UTF8 rune or the end of the file was encountered.
|
||||
//
|
||||
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
||||
// to respectively add the rune to the TokenAPI's resulting output or to
|
||||
// fully ignore it. This way, a TokenHandler has full control over what runes are
|
||||
// significant for the resulting output of that TokenHandler.
|
||||
// After retrieving a rune, Accept() or Skip() can be called to respectively add
|
||||
// the rune to the TokenAPIold's string buffer or to fully ignore it. This way,
|
||||
// a TokenHandler has full control over what runes are significant for the
|
||||
// resulting output of that TokenHandler.
|
||||
//
|
||||
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||
// rune is explicitly accepted or skipped as described above.
|
||||
func (t *TokenAPI) NextRune() (rune, bool) {
|
||||
if t.currRune != nil {
|
||||
caller, filepos := t.p.getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"TokenHandler bug: NextRune() was called from %s at %s "+
|
||||
"without accepting or skipping the previously read rune", caller, filepos))
|
||||
}
|
||||
r, w, ok := t.p.peek(t.inputOffset)
|
||||
t.currRune = &runeInfo{r, w, ok}
|
||||
if ok {
|
||||
t.input = append(t.input, r)
|
||||
}
|
||||
return r, ok
|
||||
}
|
||||
// func (t *TokenAPIold) NextRune() (rune, bool) {
|
||||
// if t.lastRune != nil {
|
||||
// caller, filepos := getCaller(1)
|
||||
// panic(fmt.Sprintf(
|
||||
// "TokenHandler bug: NextRune() was called from %s at %s "+
|
||||
// "without accepting or skipping the previously read rune", caller, filepos))
|
||||
// }
|
||||
// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset)
|
||||
// t.lastRune = &runeInfo{r, w, ok}
|
||||
// if ok {
|
||||
// t.result.Input = append(t.result.Input, r)
|
||||
// }
|
||||
// return r, ok
|
||||
// }
|
||||
|
||||
// Fork splits off a child TokenAPI, containing the same input cursor position
|
||||
// as the parent TokenAPI, but with all other data in a fresh state.
|
||||
// Fork splits off a child TokenAPIold, containing the same input cursor position
|
||||
// as the parent TokenAPIold, but with all other data in a fresh state.
|
||||
//
|
||||
// By forking, a TokenHandler function can freely work with a TokenAPI, without
|
||||
// affecting the parent TokenAPI. This is for example useful when the
|
||||
// By forking, a TokenHandler function can freely work with a TokenAPIold, without
|
||||
// affecting the parent TokenAPIold. This is for example useful when the
|
||||
// TokenHandler function must perform some form of lookahead.
|
||||
//
|
||||
// When a successful match was found, the TokenHandler function can call
|
||||
// TokenAPI.Merge() on the forked child to have the resulting output added
|
||||
// to the parent TokenAPI.
|
||||
// TokenAPIold.Merge() on the forked child to have the resulting output added
|
||||
// to the parent TokenAPIold.
|
||||
//
|
||||
// When no match was found, the forked child can simply be discarded.
|
||||
//
|
||||
|
@ -118,7 +60,7 @@ func (t *TokenAPI) NextRune() (rune, bool) {
|
|||
// case could look like this (yes, it's naive, but it shows the point):
|
||||
// TODO make proper tested example
|
||||
//
|
||||
// func MatchAbcd(t *TokenAPI) bool {
|
||||
// func MatchAbcd(t *TokenAPIold) bool {
|
||||
// child := t.Fork() // fork to keep m from input untouched
|
||||
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||
// if r, ok := t.NextRune(); !ok || r != letter {
|
||||
|
@ -129,73 +71,69 @@ func (t *TokenAPI) NextRune() (rune, bool) {
|
|||
// child.Merge() // we have a match, add resulting output to parent
|
||||
// return true // and report the successful match
|
||||
// }
|
||||
func (t *TokenAPI) Fork() *TokenAPI {
|
||||
return &TokenAPI{
|
||||
p: t.p,
|
||||
inputOffset: t.inputOffset,
|
||||
parent: t,
|
||||
}
|
||||
}
|
||||
|
||||
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
|
||||
// output of the TokenAPI.
|
||||
func (t *TokenAPI) Accept() {
|
||||
t.checkAllowedCall("Accept()")
|
||||
t.output = append(t.output, t.currRune.Rune)
|
||||
t.inputOffset += t.currRune.ByteSize
|
||||
t.currRune = nil
|
||||
}
|
||||
// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting
|
||||
// output of the TokenAPIold.
|
||||
// func (t *TokenAPIold) Accept() {
|
||||
// t.checkAllowedCall("Accept()")
|
||||
// t.buffer = append(t.buffer, t.lastRune.Rune)
|
||||
// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune)
|
||||
// t.inputOffset += t.lastRune.ByteSize
|
||||
// t.lastRune = nil
|
||||
// }
|
||||
|
||||
// Skip will ignore the last rune as read by NextRune().
|
||||
func (t *TokenAPI) Skip() {
|
||||
t.checkAllowedCall("Skip()")
|
||||
t.inputOffset += t.currRune.ByteSize
|
||||
t.currRune = nil
|
||||
}
|
||||
// func (t *TokenAPIold) Skip() {
|
||||
// t.checkAllowedCall("Skip()")
|
||||
// t.inputOffset += t.lastRune.ByteSize
|
||||
// t.lastRune = nil
|
||||
// }
|
||||
|
||||
func (t *TokenAPI) checkAllowedCall(name string) {
|
||||
if t.currRune == nil {
|
||||
caller, filepos := t.p.getCaller(2)
|
||||
panic(fmt.Sprintf(
|
||||
"TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
|
||||
name, caller, filepos))
|
||||
}
|
||||
if !t.currRune.OK {
|
||||
caller, filepos := t.p.getCaller(2)
|
||||
panic(fmt.Sprintf(
|
||||
"TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
|
||||
"did not return OK (EOF or invalid rune)", name, caller, filepos))
|
||||
}
|
||||
}
|
||||
// func (t *TokenAPIold) checkAllowedCall(name string) {
|
||||
// if t.lastRune == nil {
|
||||
// caller, filepos := getCaller(2)
|
||||
// panic(fmt.Sprintf(
|
||||
// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
|
||||
// name, caller, filepos))
|
||||
// }
|
||||
// if !t.lastRune.OK {
|
||||
// caller, filepos := getCaller(2)
|
||||
// panic(fmt.Sprintf(
|
||||
// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
|
||||
// "did not return OK (EOF or invalid rune)", name, caller, filepos))
|
||||
// }
|
||||
// }
|
||||
|
||||
// Merge merges the resulting output from a forked child TokenAPI back into
|
||||
// AddToken is used to add a token to the results of the TokenHandler.
|
||||
// func (t *TokenAPIold) AddToken(tok *Token) {
|
||||
// t.result.Tokens = append(t.result.Tokens, tok)
|
||||
// }
|
||||
|
||||
// Merge merges the resulting output from a forked child TokenAPIold back into
|
||||
// its parent: The runes that are accepted in the child are added to the parent
|
||||
// runes and the parent's input cursor position is advanced to the child's
|
||||
// cursor position.
|
||||
//
|
||||
// After the merge, the child TokenAPI is reset so it can immediately be
|
||||
// After the merge, the child TokenAPIold is reset so it can immediately be
|
||||
// reused for performing another match (all data are cleared, except for the
|
||||
// input offset which is kept at its current position).
|
||||
func (t *TokenAPI) Merge() bool {
|
||||
if t.parent == nil {
|
||||
panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
|
||||
}
|
||||
t.parent.input = append(t.parent.input, t.input...)
|
||||
t.parent.output = append(t.parent.output, t.output...)
|
||||
t.parent.inputOffset = t.inputOffset
|
||||
t.ClearOutput()
|
||||
t.ClearInput()
|
||||
return true
|
||||
}
|
||||
// func (t *TokenAPIold) Merge() bool {
|
||||
// if t.parent == nil {
|
||||
// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
|
||||
// }
|
||||
// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...)
|
||||
// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...)
|
||||
// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...)
|
||||
// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...)
|
||||
// t.parent.inputOffset = t.inputOffset
|
||||
// t.result = &TokResult{}
|
||||
// return true
|
||||
// }
|
||||
|
||||
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
|
||||
// the input and input offset as-is.
|
||||
func (t *TokenAPI) ClearOutput() {
|
||||
t.output = []rune{}
|
||||
}
|
||||
|
||||
// ClearInput clears the input for the TokenAPI, but it keeps the output
|
||||
// and input offset as-is.
|
||||
func (t *TokenAPI) ClearInput() {
|
||||
t.input = []rune{}
|
||||
func getCaller(depth int) (string, string) {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
return caller.Name(), filepos
|
||||
}
|
||||
|
|
|
@ -4,34 +4,107 @@ import (
|
|||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func TestWithinTokenHandler_AcceptIncludesAndSkipIgnoresRuneInOutput(t *testing.T) {
|
||||
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
for i := 0; i < 33; i++ {
|
||||
for i := 0; i < 20; i++ {
|
||||
t.NextRune()
|
||||
t.Accept()
|
||||
t.NextRune()
|
||||
t.Skip()
|
||||
}
|
||||
return true
|
||||
}, "test")
|
||||
output, _ := parser.Execute("Txhxixsx xsxhxoxuxlxdx xbxexcxoxmxex xqxuxixtxex xrxexaxdxaxbxlxex")
|
||||
if output != "This should become quite readable" {
|
||||
t.Fatalf("Got unexpected output from TokenHandler: %s", output)
|
||||
result, _ := parser.Execute("This is some random data to parse")
|
||||
if result.String() != "This is some random " {
|
||||
t.Fatalf("Got unexpected output from TokenHandler: %s", result.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGivenNextRuneCalled_WithoutAcceptOrSkip_NextCallToNextRunePanics(t *testing.T) {
|
||||
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
t.NextRune()
|
||||
t.NextRune()
|
||||
return false
|
||||
t.Result().AddToken(&parsekit.Token{
|
||||
Type: "PI",
|
||||
Runes: []rune("π"),
|
||||
Value: 3.1415,
|
||||
})
|
||||
t.Result().AddToken(&parsekit.Token{
|
||||
Type: nil,
|
||||
Runes: []rune("yes"),
|
||||
Value: true,
|
||||
})
|
||||
return true
|
||||
}, "test")
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("input string") },
|
||||
`TokenHandler bug: NextRune\(\) was called from .*NextCallToNextRunePanics.* ` +
|
||||
`at .*/tokenhandler_test\.go:\d+ without accepting or skipping the previously read rune`})
|
||||
result, _ := parser.Execute("doesn't matter")
|
||||
if len(result.Tokens()) != 2 {
|
||||
t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens()))
|
||||
}
|
||||
if result.Token(0).Value != 3.1415 {
|
||||
t.Fatal("Token 0 value not 3.1415")
|
||||
}
|
||||
if string(result.Token(0).Runes) != "π" {
|
||||
t.Fatal("Token 0 runes not \"π\"")
|
||||
}
|
||||
if result.Token(0).Type != "PI" {
|
||||
t.Fatal("Token 0 type not \"PI\"")
|
||||
}
|
||||
if result.Token(1).Value != true {
|
||||
t.Fatal("Token 1 value not true")
|
||||
}
|
||||
if string(result.Token(1).Runes) != "yes" {
|
||||
t.Fatal("Token 1 runes not \"yes\"")
|
||||
}
|
||||
if result.Token(1).Type != nil {
|
||||
t.Fatal("Token 1 type not nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
|
||||
fooToken := tok.StrLiteral("ASCII", c.OneOrMore(a.ASCII))
|
||||
parser := parsekit.NewMatcher(fooToken, "something")
|
||||
input := "This is fine ASCII Åltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error from parser: %s", err)
|
||||
}
|
||||
if result.String() != "This is fine ASCII " {
|
||||
t.Fatalf("result.String() contains unexpected data: %s", result.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
|
||||
fooToken := c.Seq(
|
||||
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||
tok.StrLiteral("COMBI", c.Seq(
|
||||
tok.StrLiteral("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
|
||||
tok.StrLiteral("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
|
||||
)),
|
||||
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||
)
|
||||
parser := parsekit.NewMatcher(fooToken, "something")
|
||||
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
|
||||
output := "This is fine ASCIIÅltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error from parser: %s", err)
|
||||
}
|
||||
if result.String() != output {
|
||||
t.Fatalf("result.String() contains unexpected data: %s", result.String())
|
||||
}
|
||||
if result.Token(0).Type != "COMBI" {
|
||||
t.Fatalf("Token 0 has unexpected type: %s", result.Token(0).Type)
|
||||
}
|
||||
if result.Token(0).Value != "This is fine ASCIIÅltho hère öt endĩt!" {
|
||||
t.Fatalf("Token 0 has unexpected value: %s", result.Token(0).Value)
|
||||
}
|
||||
if result.Token(1).Value != "This is fine ASCII" {
|
||||
t.Fatalf("Token 1 has unexpected value: %s", result.Token(0).Value)
|
||||
}
|
||||
if result.Token(2).Value != "Åltho hère öt endĩt!" {
|
||||
t.Fatalf("Token 2 has unexpected value: %s", result.Token(0).Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
||||
|
@ -39,21 +112,25 @@ func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
|||
t.Accept()
|
||||
return false
|
||||
}, "test")
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("input string") },
|
||||
`TokenHandler bug: Accept\(\) was called from .*CallToAcceptPanics.* ` +
|
||||
`at .*/tokenhandler_test\.go:\d+ without a prior call to NextRune\(\)`})
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() { parser.Execute("input string") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit.TokenAPI.Accept\(\): Accept\(\) called without first ` +
|
||||
`calling NextRune\(\) from .*CallToAcceptPanics.* at /.*_test.go`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestGivenNextRuneNotCalled_CallToSkipPanics(t *testing.T) {
|
||||
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
t.Skip()
|
||||
t.NextRune()
|
||||
t.NextRune()
|
||||
return false
|
||||
}, "test")
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("input string") },
|
||||
`TokenHandler bug: Skip\(\) was called from .*CallToSkipPanics.* ` +
|
||||
`at .*tokenhandler_test\.go:\d+ without a prior call to NextRune\(\)`})
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() { parser.Execute("input string") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
|
||||
`a prior call to Accept\(\) from .*CallToNextRunePanics.* at /.*/tokenhandler_test.go:\d+`})
|
||||
}
|
||||
|
||||
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
||||
|
@ -62,19 +139,19 @@ func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
|||
t.Accept()
|
||||
return false
|
||||
}, "test")
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("\xcd") },
|
||||
`TokenHandler bug: Accept\(\) was called from .*CallToAcceptPanics.* ` +
|
||||
`at .*tokenhandler_test\.go:\d+, but prior call to NextRune\(\) did not ` +
|
||||
`return OK \(EOF or invalid rune\)`})
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() { parser.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called while the previous call to ` +
|
||||
`NextRune\(\) failed from .*CallToAcceptPanics.* at .*_test\.go:\d+`})
|
||||
}
|
||||
|
||||
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
a := parsekit.TokenAPI{}
|
||||
a.Merge()
|
||||
},
|
||||
`TokenHandler bug: Cannot call Merge a a non-forked MatchDialog`,
|
||||
Expect: `parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI`,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -2,6 +2,9 @@ package parsekit
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
@ -9,6 +12,11 @@ import (
|
|||
// C provides convenient access to a range of parser/combinators that can be
|
||||
// used to construct TokenHandler functions.
|
||||
//
|
||||
// Parser/combinators are so called higher order functions that take in one
|
||||
// or more other TokenHandlers and output a new TokenHandler. They can be
|
||||
// used to combine TokenHandlers in useful ways to create new more complex
|
||||
// TokenHandlers.
|
||||
//
|
||||
// When using C in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
|
@ -16,11 +24,6 @@ import (
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var C = struct {
|
||||
Rune func(rune) TokenHandler
|
||||
Runes func(...rune) TokenHandler
|
||||
RuneRange func(rune, rune) TokenHandler
|
||||
Str func(string) TokenHandler
|
||||
StrNoCase func(string) TokenHandler
|
||||
Any func(...TokenHandler) TokenHandler
|
||||
Not func(TokenHandler) TokenHandler
|
||||
Opt func(TokenHandler) TokenHandler
|
||||
|
@ -31,15 +34,9 @@ var C = struct {
|
|||
ZeroOrMore func(TokenHandler) TokenHandler
|
||||
OneOrMore func(TokenHandler) TokenHandler
|
||||
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
|
||||
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
||||
Signed func(TokenHandler) TokenHandler
|
||||
}{
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
RuneRange: MatchRuneRange,
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
Opt: MatchOpt,
|
||||
Any: MatchAny,
|
||||
Not: MatchNot,
|
||||
|
@ -52,15 +49,217 @@ var C = struct {
|
|||
MinMax: MatchMinMax,
|
||||
Separated: MatchSeparated,
|
||||
Except: MatchExcept,
|
||||
Signed: MatchSigned,
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms or functions to build atoms.
|
||||
//
|
||||
// When using A in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var a = parsekit.A
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
Rune func(rune) TokenHandler
|
||||
Runes func(...rune) TokenHandler
|
||||
RuneRange func(rune, rune) TokenHandler
|
||||
Str func(string) TokenHandler
|
||||
StrNoCase func(string) TokenHandler
|
||||
EndOfFile TokenHandler
|
||||
AnyRune TokenHandler
|
||||
Space TokenHandler
|
||||
Tab TokenHandler
|
||||
CR TokenHandler
|
||||
LF TokenHandler
|
||||
CRLF TokenHandler
|
||||
Excl TokenHandler
|
||||
DoubleQuote TokenHandler
|
||||
Hash TokenHandler
|
||||
Dollar TokenHandler
|
||||
Percent TokenHandler
|
||||
Amp TokenHandler
|
||||
SingleQuote TokenHandler
|
||||
RoundOpen TokenHandler
|
||||
LeftParen TokenHandler
|
||||
RoundClose TokenHandler
|
||||
RightParen TokenHandler
|
||||
Asterisk TokenHandler
|
||||
Multiply TokenHandler
|
||||
Plus TokenHandler
|
||||
Add TokenHandler
|
||||
Comma TokenHandler
|
||||
Minus TokenHandler
|
||||
Subtract TokenHandler
|
||||
Dot TokenHandler
|
||||
Slash TokenHandler
|
||||
Divide TokenHandler
|
||||
Colon TokenHandler
|
||||
Semicolon TokenHandler
|
||||
AngleOpen TokenHandler
|
||||
LessThan TokenHandler
|
||||
Equal TokenHandler
|
||||
AngleClose TokenHandler
|
||||
GreaterThan TokenHandler
|
||||
Question TokenHandler
|
||||
At TokenHandler
|
||||
SquareOpen TokenHandler
|
||||
Backslash TokenHandler
|
||||
SquareClose TokenHandler
|
||||
Caret TokenHandler
|
||||
Underscore TokenHandler
|
||||
Backquote TokenHandler
|
||||
CurlyOpen TokenHandler
|
||||
Pipe TokenHandler
|
||||
CurlyClose TokenHandler
|
||||
Tilde TokenHandler
|
||||
Newline TokenHandler
|
||||
Whitespace TokenHandler
|
||||
WhitespaceAndNewlines TokenHandler
|
||||
EndOfLine TokenHandler
|
||||
Digit TokenHandler
|
||||
DigitNotZero TokenHandler
|
||||
Digits TokenHandler
|
||||
Float TokenHandler
|
||||
Boolean TokenHandler
|
||||
Integer TokenHandler
|
||||
Signed func(TokenHandler) TokenHandler
|
||||
IntegerBetween func(min int64, max int64) TokenHandler
|
||||
ASCII TokenHandler
|
||||
ASCIILower TokenHandler
|
||||
ASCIIUpper TokenHandler
|
||||
HexDigit TokenHandler
|
||||
Octet TokenHandler
|
||||
IPv4 TokenHandler
|
||||
IPv4MaskBits TokenHandler
|
||||
}{
|
||||
Rune: MatchRune,
|
||||
Runes: MatchRunes,
|
||||
RuneRange: MatchRuneRange,
|
||||
Str: MatchStr,
|
||||
StrNoCase: MatchStrNoCase,
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
Space: MatchRune(' '),
|
||||
Tab: MatchRune('\t'),
|
||||
CR: MatchRune('\r'),
|
||||
LF: MatchRune('\n'),
|
||||
CRLF: MatchStr("\r\n"),
|
||||
Excl: MatchRune('!'),
|
||||
DoubleQuote: MatchRune('"'),
|
||||
Hash: MatchRune('#'),
|
||||
Dollar: MatchRune('$'),
|
||||
Percent: MatchRune('%'),
|
||||
Amp: MatchRune('&'),
|
||||
SingleQuote: MatchRune('\''),
|
||||
RoundOpen: MatchRune('('),
|
||||
LeftParen: MatchRune('('),
|
||||
RoundClose: MatchRune(')'),
|
||||
RightParen: MatchRune(')'),
|
||||
Asterisk: MatchRune('*'),
|
||||
Multiply: MatchRune('*'),
|
||||
Plus: MatchRune('+'),
|
||||
Add: MatchRune('+'),
|
||||
Comma: MatchRune(','),
|
||||
Minus: MatchRune('-'),
|
||||
Subtract: MatchRune('-'),
|
||||
Dot: MatchRune('.'),
|
||||
Slash: MatchRune('/'),
|
||||
Divide: MatchRune('/'),
|
||||
Colon: MatchRune(':'),
|
||||
Semicolon: MatchRune(';'),
|
||||
AngleOpen: MatchRune('<'),
|
||||
LessThan: MatchRune('<'),
|
||||
Equal: MatchRune('='),
|
||||
AngleClose: MatchRune('>'),
|
||||
GreaterThan: MatchRune('>'),
|
||||
Question: MatchRune('?'),
|
||||
At: MatchRune('@'),
|
||||
SquareOpen: MatchRune('['),
|
||||
Backslash: MatchRune('\\'),
|
||||
SquareClose: MatchRune(']'),
|
||||
Caret: MatchRune('^'),
|
||||
Underscore: MatchRune('_'),
|
||||
Backquote: MatchRune('`'),
|
||||
CurlyOpen: MatchRune('{'),
|
||||
Pipe: MatchRune('|'),
|
||||
CurlyClose: MatchRune('}'),
|
||||
Tilde: MatchRune('~'),
|
||||
Whitespace: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))),
|
||||
WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))),
|
||||
EndOfLine: MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()),
|
||||
Digit: MatchDigit(),
|
||||
DigitNotZero: MatchDigitNotZero(),
|
||||
Digits: MatchDigits(),
|
||||
Integer: MatchInteger(),
|
||||
Signed: MatchSigned,
|
||||
IntegerBetween: MatchIntegerBetween,
|
||||
Float: MatchFloat(),
|
||||
Boolean: MatchBoolean(),
|
||||
ASCII: MatchRuneRange('\x00', '\x7F'),
|
||||
ASCIILower: MatchRuneRange('a', 'z'),
|
||||
ASCIIUpper: MatchRuneRange('A', 'Z'),
|
||||
HexDigit: MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')),
|
||||
Octet: MatchOctet(false),
|
||||
IPv4: MatchIPv4(),
|
||||
IPv4MaskBits: MatchIntegerBetween(0, 32),
|
||||
}
|
||||
|
||||
// T provides convenient access to a range of Token producers (which in their
|
||||
// nature are parser/combinators) that can be used when creating TokenHandler
|
||||
// functions.
|
||||
//
|
||||
// When using T in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var t = parsekit.T
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var T = struct {
|
||||
StrLiteral func(interface{}, TokenHandler) TokenHandler
|
||||
StrInterpreted func(interface{}, TokenHandler) TokenHandler
|
||||
Byte func(interface{}, TokenHandler) TokenHandler
|
||||
Rune func(interface{}, TokenHandler) TokenHandler
|
||||
Int func(interface{}, TokenHandler) TokenHandler
|
||||
Int8 func(interface{}, TokenHandler) TokenHandler
|
||||
Int16 func(interface{}, TokenHandler) TokenHandler
|
||||
Int32 func(interface{}, TokenHandler) TokenHandler
|
||||
Int64 func(interface{}, TokenHandler) TokenHandler
|
||||
Uint func(interface{}, TokenHandler) TokenHandler
|
||||
Uint8 func(interface{}, TokenHandler) TokenHandler
|
||||
Uint16 func(interface{}, TokenHandler) TokenHandler
|
||||
Uint32 func(interface{}, TokenHandler) TokenHandler
|
||||
Uint64 func(interface{}, TokenHandler) TokenHandler
|
||||
Float32 func(interface{}, TokenHandler) TokenHandler
|
||||
Float64 func(interface{}, TokenHandler) TokenHandler
|
||||
Boolean func(interface{}, TokenHandler) TokenHandler
|
||||
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
|
||||
}{
|
||||
StrLiteral: MakeStrLiteralToken,
|
||||
StrInterpreted: MakeStrInterpretedToken,
|
||||
Byte: MakeByteToken,
|
||||
Rune: MakeRuneToken,
|
||||
Int: MakeIntToken,
|
||||
Int8: MakeInt8Token,
|
||||
Int16: MakeInt16Token,
|
||||
Int32: MakeInt32Token,
|
||||
Int64: MakeInt64Token,
|
||||
Uint: MakeUintToken,
|
||||
Uint8: MakeUint8Token,
|
||||
Uint16: MakeUint16Token,
|
||||
Uint32: MakeUint32Token,
|
||||
Uint64: MakeUint64Token,
|
||||
Float32: MakeFloat32Token,
|
||||
Float64: MakeFloat64Token,
|
||||
Boolean: MakeBooleanToken,
|
||||
ByCallback: MakeTokenByCallback,
|
||||
}
|
||||
|
||||
// MatchRune creates a TokenHandler function that checks if the next rune from
|
||||
// the input matches the provided rune.
|
||||
func MatchRune(expected rune) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
input, ok := t.NextRune()
|
||||
if ok && input == expected {
|
||||
input, err := t.NextRune()
|
||||
if err == nil && input == expected {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
|
@ -73,8 +272,8 @@ func MatchRune(expected rune) TokenHandler {
|
|||
func MatchRunes(expected ...rune) TokenHandler {
|
||||
s := string(expected)
|
||||
return func(t *TokenAPI) bool {
|
||||
input, ok := t.NextRune()
|
||||
if ok {
|
||||
input, err := t.NextRune()
|
||||
if err == nil {
|
||||
if strings.ContainsRune(s, input) {
|
||||
t.Accept()
|
||||
return true
|
||||
|
@ -97,8 +296,8 @@ func MatchRuneRange(start rune, end rune) TokenHandler {
|
|||
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
||||
}
|
||||
return func(t *TokenAPI) bool {
|
||||
input, ok := t.NextRune()
|
||||
if ok && input >= start && input <= end {
|
||||
input, err := t.NextRune()
|
||||
if err == nil && input >= start && input <= end {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
|
@ -167,7 +366,8 @@ func MatchAny(handlers ...TokenHandler) TokenHandler {
|
|||
for _, handler := range handlers {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
return child.Merge()
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
|
@ -183,8 +383,8 @@ func MatchNot(handler TokenHandler) TokenHandler {
|
|||
if handler(probe) {
|
||||
return false
|
||||
}
|
||||
_, ok := t.NextRune()
|
||||
if ok {
|
||||
_, err := t.NextRune()
|
||||
if err == nil {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
|
@ -311,138 +511,24 @@ func MatchSigned(handler TokenHandler) TokenHandler {
|
|||
return MatchSeq(sign, handler)
|
||||
}
|
||||
|
||||
// A provides convenient access to a range of atoms that can be used to
|
||||
// build TokenHandlers or parser rules.
|
||||
//
|
||||
// In parsekit, an atom is defined as a ready for use TokenHandler function.
|
||||
//
|
||||
// When using A in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var a = parsekit.A
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
EndOfFile TokenHandler
|
||||
AnyRune TokenHandler
|
||||
Space TokenHandler
|
||||
Tab TokenHandler
|
||||
CR TokenHandler
|
||||
LF TokenHandler
|
||||
CRLF TokenHandler
|
||||
Excl TokenHandler
|
||||
DoubleQuote TokenHandler
|
||||
Hash TokenHandler
|
||||
Dollar TokenHandler
|
||||
Percent TokenHandler
|
||||
Amp TokenHandler
|
||||
SingleQuote TokenHandler
|
||||
RoundOpen TokenHandler
|
||||
LeftParen TokenHandler
|
||||
RoundClose TokenHandler
|
||||
RightParen TokenHandler
|
||||
Asterisk TokenHandler
|
||||
Multiply TokenHandler
|
||||
Plus TokenHandler
|
||||
Add TokenHandler
|
||||
Comma TokenHandler
|
||||
Minus TokenHandler
|
||||
Subtract TokenHandler
|
||||
Dot TokenHandler
|
||||
Slash TokenHandler
|
||||
Divide TokenHandler
|
||||
Colon TokenHandler
|
||||
Semicolon TokenHandler
|
||||
AngleOpen TokenHandler
|
||||
LessThan TokenHandler
|
||||
Equal TokenHandler
|
||||
AngleClose TokenHandler
|
||||
GreaterThan TokenHandler
|
||||
Question TokenHandler
|
||||
At TokenHandler
|
||||
SquareOpen TokenHandler
|
||||
Backslash TokenHandler
|
||||
SquareClose TokenHandler
|
||||
Caret TokenHandler
|
||||
Underscore TokenHandler
|
||||
Backquote TokenHandler
|
||||
CurlyOpen TokenHandler
|
||||
Pipe TokenHandler
|
||||
CurlyClose TokenHandler
|
||||
Tilde TokenHandler
|
||||
Newline TokenHandler
|
||||
Whitespace TokenHandler
|
||||
WhitespaceAndNewlines TokenHandler
|
||||
EndOfLine TokenHandler
|
||||
Digit TokenHandler
|
||||
DigitNotZero TokenHandler
|
||||
Digits TokenHandler
|
||||
Float TokenHandler
|
||||
Integer TokenHandler
|
||||
ASCII TokenHandler
|
||||
ASCIILower TokenHandler
|
||||
ASCIIUpper TokenHandler
|
||||
HexDigit TokenHandler
|
||||
}{
|
||||
EndOfFile: MatchEndOfFile(),
|
||||
AnyRune: MatchAnyRune(),
|
||||
Space: C.Rune(' '),
|
||||
Tab: C.Rune('\t'),
|
||||
CR: C.Rune('\r'),
|
||||
LF: C.Rune('\n'),
|
||||
CRLF: C.Str("\r\n"),
|
||||
Excl: C.Rune('!'),
|
||||
DoubleQuote: C.Rune('"'),
|
||||
Hash: C.Rune('#'),
|
||||
Dollar: C.Rune('$'),
|
||||
Percent: C.Rune('%'),
|
||||
Amp: C.Rune('&'),
|
||||
SingleQuote: C.Rune('\''),
|
||||
RoundOpen: C.Rune('('),
|
||||
LeftParen: C.Rune('('),
|
||||
RoundClose: C.Rune(')'),
|
||||
RightParen: C.Rune(')'),
|
||||
Asterisk: C.Rune('*'),
|
||||
Multiply: C.Rune('*'),
|
||||
Plus: C.Rune('+'),
|
||||
Add: C.Rune('+'),
|
||||
Comma: C.Rune(','),
|
||||
Minus: C.Rune('-'),
|
||||
Subtract: C.Rune('-'),
|
||||
Dot: C.Rune('.'),
|
||||
Slash: C.Rune('/'),
|
||||
Divide: C.Rune('/'),
|
||||
Colon: C.Rune(':'),
|
||||
Semicolon: C.Rune(';'),
|
||||
AngleOpen: C.Rune('<'),
|
||||
LessThan: C.Rune('<'),
|
||||
Equal: C.Rune('='),
|
||||
AngleClose: C.Rune('>'),
|
||||
GreaterThan: C.Rune('>'),
|
||||
Question: C.Rune('?'),
|
||||
At: C.Rune('@'),
|
||||
SquareOpen: C.Rune('['),
|
||||
Backslash: C.Rune('\\'),
|
||||
SquareClose: C.Rune(']'),
|
||||
Caret: C.Rune('^'),
|
||||
Underscore: C.Rune('_'),
|
||||
Backquote: C.Rune('`'),
|
||||
CurlyOpen: C.Rune('{'),
|
||||
Pipe: C.Rune('|'),
|
||||
CurlyClose: C.Rune('}'),
|
||||
Tilde: C.Rune('~'),
|
||||
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
||||
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
||||
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
||||
Digit: MatchDigit(),
|
||||
DigitNotZero: MatchDigitNotZero(),
|
||||
Digits: MatchDigits(),
|
||||
Integer: MatchInteger(),
|
||||
Float: MatchFloat(),
|
||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
||||
ASCIILower: C.RuneRange('a', 'z'),
|
||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
||||
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
||||
// MatchIntegerBetween creates a TokenHandler that checks for an integer
|
||||
// value between the provided min and max boundaries (inclusive).
|
||||
// It uses an int64 for checking internally, so you can check values
|
||||
// ranging from -9223372036854775808 to 9223372036854775807.
|
||||
func MatchIntegerBetween(min int64, max int64) TokenHandler {
|
||||
digits := MatchSigned(MatchDigits())
|
||||
return func(t *TokenAPI) bool {
|
||||
fork := t.Fork()
|
||||
if !digits(fork) {
|
||||
return false
|
||||
}
|
||||
value, _ := strconv.ParseInt(fork.Result().String(), 10, 64)
|
||||
if value < min || value > max {
|
||||
return false
|
||||
}
|
||||
fork.Merge()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
||||
|
@ -451,8 +537,8 @@ var A = struct {
|
|||
func MatchEndOfFile() TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
fork := t.Fork()
|
||||
input, ok := fork.NextRune()
|
||||
return !ok && input == eofRune
|
||||
_, err := fork.NextRune()
|
||||
return err == io.EOF
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -461,8 +547,8 @@ func MatchEndOfFile() TokenHandler {
|
|||
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
||||
func MatchAnyRune() TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
_, ok := t.NextRune()
|
||||
if ok {
|
||||
_, err := t.NextRune()
|
||||
if err == nil {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
|
@ -494,7 +580,7 @@ func MatchDigitNotZero() TokenHandler {
|
|||
// hexadecimal.
|
||||
func MatchInteger() TokenHandler {
|
||||
justZero := MatchRune('0')
|
||||
integer := C.Seq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
||||
integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
||||
return MatchAny(integer, justZero)
|
||||
}
|
||||
|
||||
|
@ -506,6 +592,56 @@ func MatchFloat() TokenHandler {
|
|||
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
|
||||
}
|
||||
|
||||
// MatchBoolean creates a TokenHandler function that checks if a valid boolean
|
||||
// value can be read from the input. It supports the boolean values as understood
|
||||
// by Go's strconv.ParseBool() function.
|
||||
func MatchBoolean() TokenHandler {
|
||||
trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
|
||||
falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
|
||||
return MatchAny(trues, falses)
|
||||
}
|
||||
|
||||
// MatchOctet creates a TokenHandler function that checks if a valid octet value
|
||||
// can be read from the input (octet = byte value representation, with a value
|
||||
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
|
||||
// digits, not if there's a non-digit after it, meaning that "123255" would be
|
||||
// a valid sequence of two octets.
|
||||
//
|
||||
// When the normalize parameter is set to true, then leading zeroes will be
|
||||
// stripped from the octet.
|
||||
func MatchOctet(normalize bool) TokenHandler {
|
||||
digits := MatchMinMax(1, 3, MatchDigit())
|
||||
return func(t *TokenAPI) bool {
|
||||
fork := t.Fork()
|
||||
if !digits(fork) {
|
||||
return false
|
||||
}
|
||||
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
|
||||
if value <= 255 {
|
||||
if normalize {
|
||||
runes := fork.Result().Runes()
|
||||
for len(runes) > 1 && runes[0] == '0' {
|
||||
runes = runes[1:]
|
||||
}
|
||||
fork.Result().SetRunes(runes)
|
||||
}
|
||||
fork.Merge()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
|
||||
// IP address value can be read from the input.
|
||||
// It will normalize IP-addresses that look like "192.168.001.012" to
|
||||
// "192.168.1.12".
|
||||
func MatchIPv4() TokenHandler {
|
||||
octet := MatchOctet(true)
|
||||
dot := MatchRune('.')
|
||||
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||||
}
|
||||
|
||||
// M provides convenient access to a range of modifiers (which in their nature are
|
||||
// parser/combinators) that can be used when creating TokenHandler functions.
|
||||
//
|
||||
|
@ -520,25 +656,25 @@ func MatchFloat() TokenHandler {
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var M = struct {
|
||||
Drop func(TokenHandler) TokenHandler
|
||||
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimSpace func(handler TokenHandler) TokenHandler
|
||||
ToLower func(TokenHandler) TokenHandler
|
||||
ToUpper func(TokenHandler) TokenHandler
|
||||
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
||||
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
|
||||
Drop func(TokenHandler) TokenHandler
|
||||
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
|
||||
TrimSpace func(handler TokenHandler) TokenHandler
|
||||
ToLower func(TokenHandler) TokenHandler
|
||||
ToUpper func(TokenHandler) TokenHandler
|
||||
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
||||
ByCallback func(TokenHandler, func(string) string) TokenHandler
|
||||
}{
|
||||
Drop: ModifyDrop,
|
||||
Trim: ModifyTrim,
|
||||
TrimLeft: ModifyTrimLeft,
|
||||
TrimRight: ModifyTrimRight,
|
||||
TrimSpace: ModifyTrimSpace,
|
||||
ToLower: ModifyToLower,
|
||||
ToUpper: ModifyToUpper,
|
||||
Replace: ModifyReplace,
|
||||
ModifyByCallback: ModifyByCallback,
|
||||
Drop: ModifyDrop,
|
||||
Trim: ModifyTrim,
|
||||
TrimLeft: ModifyTrimLeft,
|
||||
TrimRight: ModifyTrimRight,
|
||||
TrimSpace: ModifyTrimSpace,
|
||||
ToLower: ModifyToLower,
|
||||
ToUpper: ModifyToUpper,
|
||||
Replace: ModifyReplace,
|
||||
ByCallback: ModifyByCallback,
|
||||
}
|
||||
|
||||
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||
|
@ -635,11 +771,222 @@ func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHa
|
|||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
s := modfunc(string(child.output))
|
||||
child.output = []rune(s)
|
||||
s := modfunc(child.Result().String())
|
||||
child.Result().SetRunes(s)
|
||||
child.Merge()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||
literal := t.Result().String()
|
||||
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal}
|
||||
})
|
||||
}
|
||||
|
||||
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||
// TODO ERROR HANDLING
|
||||
interpreted, _ := interpretString(t.Result().String())
|
||||
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted}
|
||||
})
|
||||
}
|
||||
|
||||
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||
// TODO ERROR HANDLING --- not a 1 rune input
|
||||
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
|
||||
})
|
||||
}
|
||||
|
||||
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||
// TODO ERROR HANDLING --- not a 1 byte input
|
||||
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
|
||||
})
|
||||
}
|
||||
|
||||
func interpretString(str string) (string, error) {
|
||||
var sb strings.Builder
|
||||
for len(str) > 0 {
|
||||
r, _, remainder, err := strconv.UnquoteChar(str, '"')
|
||||
if err != nil {
|
||||
return sb.String(), err
|
||||
}
|
||||
str = remainder
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
return sb.String(), nil
|
||||
}
|
||||
|
||||
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
|
||||
return strconv.Atoi(s)
|
||||
})
|
||||
}
|
||||
|
||||
// TODO allow other Go types for oct and hex too.
|
||||
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseInt(s, 10, 8)
|
||||
if err == nil {
|
||||
return int8(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseInt(s, 10, 16)
|
||||
if err == nil {
|
||||
return int16(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseInt(s, 10, 32)
|
||||
if err == nil {
|
||||
return int32(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseInt(s, 10, 64)
|
||||
if err == nil {
|
||||
return int64(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseUint(s, 10, 0)
|
||||
if err == nil {
|
||||
return uint(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
// TODO allow other Go types for oct and hex too.
|
||||
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseUint(s, 10, 8)
|
||||
if err == nil {
|
||||
return uint8(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseUint(s, 10, 16)
|
||||
if err == nil {
|
||||
return uint16(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseUint(s, 10, 32)
|
||||
if err == nil {
|
||||
return uint32(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseUint(s, 10, 64)
|
||||
if err == nil {
|
||||
return uint64(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseFloat(s, 32)
|
||||
if err == nil {
|
||||
return float32(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseFloat(s, 64)
|
||||
if err == nil {
|
||||
return float64(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||
return makeStrconvToken(toktype, handler,
|
||||
func(s string) (interface{}, error) {
|
||||
value, err := strconv.ParseBool(s)
|
||||
if err == nil {
|
||||
return bool(value), err
|
||||
}
|
||||
return value, err
|
||||
})
|
||||
}
|
||||
|
||||
func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler {
|
||||
pc, _, _, _ := runtime.Caller(1)
|
||||
fullName := runtime.FuncForPC(pc).Name()
|
||||
parts := strings.Split(fullName, ".")
|
||||
name := parts[len(parts)-1]
|
||||
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||
value, err := convert(t.Result().String())
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf(
|
||||
"TokenHandler error: %s cannot handle input %q: %s "+
|
||||
"(only use a type conversion token maker, when the input has been "+
|
||||
"validated on beforehand)", name, t.Result().String(), err))
|
||||
}
|
||||
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value}
|
||||
})
|
||||
}
|
||||
|
||||
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
fork := t.Fork()
|
||||
if handler(fork) {
|
||||
t.Result().AddToken(callback(fork))
|
||||
fork.Merge()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,72 +9,57 @@ import (
|
|||
|
||||
func TestCombinators(t *testing.T) {
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"xxx", c.Rune('x'), true, "x"},
|
||||
{"x ", c.Rune(' '), false, ""},
|
||||
{"aa", c.RuneRange('b', 'e'), false, ""},
|
||||
{"bb", c.RuneRange('b', 'e'), true, "b"},
|
||||
{"cc", c.RuneRange('b', 'e'), true, "c"},
|
||||
{"dd", c.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", c.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", c.RuneRange('b', 'e'), false, ""},
|
||||
{"Hello, world!", c.Str("Hello"), true, "Hello"},
|
||||
{"HellÖ, world!", c.StrNoCase("hellö"), true, "HellÖ"},
|
||||
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
|
||||
{"abc", c.Not(c.Rune('b')), true, "a"},
|
||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
||||
{"1010", c.Not(c.Seq(c.Rune('2'), c.Rune('0'))), true, "1"},
|
||||
{"2020", c.Not(c.Seq(c.Rune('2'), c.Rune('0'))), false, ""},
|
||||
{"abc", c.Any(c.Rune('a'), c.Rune('b')), true, "a"},
|
||||
{"bcd", c.Any(c.Rune('a'), c.Rune('b')), true, "b"},
|
||||
{"cde", c.Any(c.Rune('a'), c.Rune('b')), false, ""},
|
||||
{"ababc", c.Rep(4, c.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc", c.Rep(5, c.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, c.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, c.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
|
||||
{"", c.Max(4, c.Rune('b')), true, ""},
|
||||
{"X", c.Max(4, c.Rune('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(c.Rune('d')), false, ""},
|
||||
{"X", c.OneOrMore(c.Rune('d')), false, ""},
|
||||
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
|
||||
{"Hello, world!X", c.Seq(c.Str("Hello"), a.Comma, a.Space, c.Str("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Seq(c.Rune('1'), c.Rune('0'))), true, "101010"},
|
||||
{"", c.Opt(c.OneOrMore(c.Rune('f'))), true, ""},
|
||||
{"ghijkl", c.Opt(c.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
|
||||
{"abc", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||
{"ababc", c.Rep(4, a.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc", c.Rep(5, a.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
||||
{"", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"X", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||
{"", c.Opt(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||
{"ghijkl", c.Opt(a.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Opt(a.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Opt(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
|
@ -83,27 +68,42 @@ func TestCombinators(t *testing.T) {
|
|||
|
||||
func TestCombinatorPanics(t *testing.T) {
|
||||
RunPanicTests(t, []PanicTest{
|
||||
{func() { parsekit.C.RuneRange('z', 'a') },
|
||||
{func() { a.RuneRange('z', 'a') },
|
||||
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
|
||||
{func() { parsekit.C.MinMax(-1, 1, parsekit.A.Space) },
|
||||
{func() { c.MinMax(-1, 1, parsekit.A.Space) },
|
||||
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
|
||||
{func() { parsekit.C.MinMax(1, -1, parsekit.A.Space) },
|
||||
{func() { c.MinMax(1, -1, parsekit.A.Space) },
|
||||
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
|
||||
{func() { parsekit.C.MinMax(10, 5, parsekit.A.Space) },
|
||||
{func() { c.MinMax(10, 5, parsekit.A.Space) },
|
||||
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
|
||||
{func() { parsekit.C.Min(-10, parsekit.A.Space) },
|
||||
{func() { c.Min(-10, parsekit.A.Space) },
|
||||
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
|
||||
{func() { parsekit.C.Max(-42, parsekit.A.Space) },
|
||||
{func() { c.Max(-42, parsekit.A.Space) },
|
||||
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||
{"Hello, world!", a.Str("Hello"), true, "Hello"},
|
||||
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
|
||||
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
||||
{"xxx", a.Rune('x'), true, "x"},
|
||||
{"x ", a.Rune(' '), false, ""},
|
||||
{"aa", a.RuneRange('b', 'e'), false, ""},
|
||||
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
||||
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
||||
{"", a.EndOfFile, true, ""},
|
||||
{"⌘", a.AnyRune, true, "⌘"},
|
||||
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
|
||||
{"", a.AnyRune, false, ""}, // end of file
|
||||
{"\xbc", a.AnyRune, true, "<22>"}, // invalid UTF8 rune
|
||||
{"", a.AnyRune, false, ""}, // false is for end of file
|
||||
{" ", a.Space, true, " "},
|
||||
{"X", a.Space, false, ""},
|
||||
{"\t", a.Tab, true, "\t"},
|
||||
|
@ -187,32 +187,128 @@ func TestAtoms(t *testing.T) {
|
|||
{"1", a.Integer, true, "1"},
|
||||
{"-10X", a.Integer, false, ""},
|
||||
{"+10X", a.Integer, false, ""},
|
||||
{"-10X", c.Signed(a.Integer), true, "-10"},
|
||||
{"+10X", c.Signed(a.Integer), true, "+10"},
|
||||
{"+10.1X", c.Signed(a.Integer), true, "+10"},
|
||||
{"-10X", a.Signed(a.Integer), true, "-10"},
|
||||
{"+10X", a.Signed(a.Integer), true, "+10"},
|
||||
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
||||
{"0X", a.Float, true, "0"},
|
||||
{"0X", a.Float, true, "0"},
|
||||
{"1X", a.Float, true, "1"},
|
||||
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||
{"123.321X", a.Float, true, "123.321"},
|
||||
{"-3.14X", a.Float, false, ""},
|
||||
{"-3.14X", c.Signed(a.Float), true, "-3.14"},
|
||||
{"-003.0014X", c.Signed(a.Float), true, "-003.0014"},
|
||||
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
||||
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
||||
{"0X", a.Octet, true, "0"},
|
||||
{"00X", a.Octet, true, "00"},
|
||||
{"000X", a.Octet, true, "000"},
|
||||
{"10X", a.Octet, true, "10"},
|
||||
{"010X", a.Octet, true, "010"},
|
||||
{"255123", a.Octet, true, "255"},
|
||||
{"256123", a.Octet, false, ""},
|
||||
{"300", a.Octet, false, ""},
|
||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||
{"256.255.255.255", a.IPv4, false, ""},
|
||||
{"0", a.IPv4MaskBits, true, "0"},
|
||||
{"32", a.IPv4MaskBits, true, "32"},
|
||||
{"33", a.IPv4MaskBits, false, "0"},
|
||||
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
||||
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
||||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestModifiers(t *testing.T) {
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
||||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||
{"abcdefghijk", m.ModifyByCallback(c.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||
{"NoTaLlUpPeR", m.ToUpper(c.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||
{"NoTaLlLoWeR", m.ToLower(c.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||
})
|
||||
}
|
||||
|
||||
// When a TokenMaker encounters an error, this is considered a programmer error.
|
||||
// A TokenMaker should not be called, unless the input is already validated to
|
||||
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||
// input cannot be processed successfully.
|
||||
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||
parser := parsekit.NewMatcher(invalid, "boolean")
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("no") },
|
||||
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
|
||||
`invalid syntax \(only use a type conversion token maker, when the input has been validated on beforehand\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenMakers(t *testing.T) {
|
||||
RunTokenMakerTests(t, []TokenMakerTest{
|
||||
{`empty token`, tok.StrLiteral("A", c.ZeroOrMore(a.Digit)),
|
||||
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
||||
|
||||
{`Ѝюج literal \string`, tok.StrLiteral("B", c.OneOrMore(a.AnyRune)),
|
||||
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
||||
|
||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
[]parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{
|
||||
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
|
||||
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
|
||||
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
|
||||
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
|
||||
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
|
||||
}},
|
||||
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
|
||||
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
|
||||
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
|
||||
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
|
||||
|
||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
|
||||
{Type: "P", Runes: []rune("1"), Value: true},
|
||||
{Type: "P", Runes: []rune("t"), Value: true},
|
||||
{Type: "P", Runes: []rune("T"), Value: true},
|
||||
{Type: "P", Runes: []rune("true"), Value: true},
|
||||
{Type: "P", Runes: []rune("TRUE"), Value: true},
|
||||
{Type: "P", Runes: []rune("True"), Value: true},
|
||||
}},
|
||||
|
||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
|
||||
{Type: "P", Runes: []rune("0"), Value: false},
|
||||
{Type: "P", Runes: []rune("f"), Value: false},
|
||||
{Type: "P", Runes: []rune("F"), Value: false},
|
||||
{Type: "P", Runes: []rune("false"), Value: false},
|
||||
{Type: "P", Runes: []rune("FALSE"), Value: false},
|
||||
{Type: "P", Runes: []rune("False"), Value: false},
|
||||
}},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -229,7 +325,7 @@ func TestSequenceOfRunes(t *testing.T) {
|
|||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Expects("Sequence of runes")
|
||||
if p.On(sequence).Accept() {
|
||||
output = p.BufLiteral()
|
||||
output = p.Result().String()
|
||||
p.Stop()
|
||||
}
|
||||
})
|
||||
|
@ -250,7 +346,7 @@ func TestCombination(t *testing.T) {
|
|||
c.Seq(
|
||||
c.Opt(a.Whitespace),
|
||||
c.Rep(3, a.AngleClose),
|
||||
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
|
||||
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
|
||||
return fmt.Sprintf("%d", len(s))
|
||||
}),
|
||||
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
|
||||
|
|
Loading…
Reference in New Issue