Backup a load of work on typed token support, making it easy to produce tokens directly from parser/combinator-based parsing rules.
This commit is contained in:
parent
21f1aa597c
commit
4580962fb8
|
@ -0,0 +1,19 @@
|
||||||
|
package assert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Equal(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||||
|
if expected != actual {
|
||||||
|
t.Errorf(
|
||||||
|
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||||
|
forWhat, expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
|
||||||
|
if notExpected == actual {
|
||||||
|
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
package assert
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PanicT struct {
|
||||||
|
Function func()
|
||||||
|
Expect string
|
||||||
|
Regexp bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func Panic(t *testing.T, p PanicT) {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
mismatch := false
|
||||||
|
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||||
|
mismatch = true
|
||||||
|
}
|
||||||
|
if !p.Regexp && p.Expect != r.(string) {
|
||||||
|
mismatch = true
|
||||||
|
}
|
||||||
|
if mismatch {
|
||||||
|
t.Errorf(
|
||||||
|
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||||
|
p.Expect, r)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
p.Function()
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// Cursor represents the position of the input cursor in various ways.
|
||||||
|
type Cursor struct {
|
||||||
|
Byte int // The cursor offset in bytes
|
||||||
|
Rune int // The cursor offset in UTF8 runes
|
||||||
|
Column int // The column at which the cursor is (0-indexed)
|
||||||
|
Line int // The line at which the cursor is (0-indexed)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cursor) String() string {
|
||||||
|
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// move updates the position of the cursor, based on the provided input string.
|
||||||
|
func (c *Cursor) move(input string) {
|
||||||
|
c.Byte += len(input)
|
||||||
|
for _, r := range input {
|
||||||
|
c.Rune++
|
||||||
|
if r == '\n' {
|
||||||
|
c.Column = 0
|
||||||
|
c.Line++
|
||||||
|
} else {
|
||||||
|
c.Column++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||||
|
for _, test := range []struct {
|
||||||
|
name string
|
||||||
|
input []string
|
||||||
|
byte int
|
||||||
|
rune int
|
||||||
|
line int
|
||||||
|
column int
|
||||||
|
}{
|
||||||
|
{"No input at all", []string{""}, 0, 0, 0, 0},
|
||||||
|
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
|
||||||
|
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
|
||||||
|
{"One newline", []string{"\n"}, 1, 1, 1, 0},
|
||||||
|
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
|
||||||
|
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
|
||||||
|
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||||
|
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||||
|
} {
|
||||||
|
c := Cursor{}
|
||||||
|
for _, s := range test.input {
|
||||||
|
c.move(s)
|
||||||
|
}
|
||||||
|
if c.Byte != test.byte {
|
||||||
|
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
||||||
|
}
|
||||||
|
if c.Rune != test.rune {
|
||||||
|
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
|
||||||
|
}
|
||||||
|
if c.Line != test.line {
|
||||||
|
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
||||||
|
}
|
||||||
|
if c.Column != test.column {
|
||||||
|
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -9,7 +9,6 @@ package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
)
|
)
|
||||||
|
@ -28,7 +27,6 @@ func Example_basicCalculator1() {
|
||||||
{"+", 0},
|
{"+", 0},
|
||||||
{"10.8 + 12", 0},
|
{"10.8 + 12", 0},
|
||||||
{"42+ ", 0},
|
{"42+ ", 0},
|
||||||
{"9999999999999999999 + 8888888", 0},
|
|
||||||
} {
|
} {
|
||||||
output, err := ComputeSimple(c.input)
|
output, err := ComputeSimple(c.input)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -47,7 +45,6 @@ func Example_basicCalculator1() {
|
||||||
// Input: "+", got error: unexpected character '+' (expected integer number)
|
// Input: "+", got error: unexpected character '+' (expected integer number)
|
||||||
// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
|
// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
|
||||||
// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
|
// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
|
||||||
// Input: "9999999999999999999 + 8888888", got error: invalid value: strconv.ParseInt: parsing "9999999999999999999": value out of range
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
@ -72,23 +69,16 @@ type simpleCalculator struct {
|
||||||
op int64 // represents operation for next term (+1 = add, -1 = subtract)
|
op int64 // represents operation for next term (+1 = add, -1 = subtract)
|
||||||
}
|
}
|
||||||
|
|
||||||
// A definition of bareInteger, which conveniently drops surrounding whitespace.
|
// A definition of an int64, which conveniently drops surrounding whitespace.
|
||||||
var dropWhitespace = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Whitespace))
|
var dropWhitespace = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Whitespace))
|
||||||
var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitespace)
|
var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitespace)
|
||||||
|
var int64Token = parsekit.T.Int64(nil, bareInteger)
|
||||||
|
|
||||||
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
||||||
if p.On(bareInteger).Accept() {
|
|
||||||
value, err := strconv.ParseInt(p.BufLiteral(), 10, 64)
|
|
||||||
p.BufClear()
|
|
||||||
if err != nil {
|
|
||||||
p.Error("invalid value: %s", err)
|
|
||||||
} else {
|
|
||||||
c.Result += c.op * value
|
|
||||||
p.Handle(c.operatorOrEndOfFile)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
p.Expects("integer number")
|
p.Expects("integer number")
|
||||||
p.UnexpectedInput()
|
if p.On(int64Token).Accept() {
|
||||||
|
c.Result += c.op * p.Result().Value(0).(int64)
|
||||||
|
p.Handle(c.operatorOrEndOfFile)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,6 @@ package parsekit_test
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
)
|
)
|
||||||
|
@ -97,8 +96,8 @@ func (c *calculator) expr(p *parsekit.ParseAPI) {
|
||||||
|
|
||||||
var pc, a = parsekit.C, parsekit.A
|
var pc, a = parsekit.C, parsekit.A
|
||||||
if p.Handle(c.term) {
|
if p.Handle(c.term) {
|
||||||
for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
|
for p.On(pc.Any(a.Add, a.Subtract)).Accept() {
|
||||||
op := p.LastMatch
|
op := p.Result().Rune(0)
|
||||||
if !p.Handle(c.term) {
|
if !p.Handle(c.term) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -115,8 +114,8 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
|
||||||
|
|
||||||
var pc, a = parsekit.C, parsekit.A
|
var pc, a = parsekit.C, parsekit.A
|
||||||
if p.Handle(c.factor) {
|
if p.Handle(c.factor) {
|
||||||
for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
|
for p.On(pc.Any(a.Multiply, a.Divide)).Accept() {
|
||||||
op := p.LastMatch
|
op := p.Result().Rune(0)
|
||||||
if !p.Handle(c.factor) {
|
if !p.Handle(c.factor) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -130,19 +129,12 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
|
||||||
// <space> = (<space> (SPACE|TAB) | "")
|
// <space> = (<space> (SPACE|TAB) | "")
|
||||||
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
|
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
|
||||||
func (c *calculator) factor(p *parsekit.ParseAPI) {
|
func (c *calculator) factor(p *parsekit.ParseAPI) {
|
||||||
var pc, a = parsekit.C, parsekit.A
|
var a, tok = parsekit.A, parsekit.T
|
||||||
p.On(a.Whitespace).Skip()
|
p.On(a.Whitespace).Skip()
|
||||||
switch {
|
switch {
|
||||||
case p.On(pc.Signed(a.Float)).Accept():
|
case p.On(tok.Float64(nil, a.Signed(a.Float))).Accept():
|
||||||
floatStr := p.BufLiteral()
|
value := p.Result().Value(0).(float64)
|
||||||
p.BufClear()
|
|
||||||
value, err := strconv.ParseFloat(floatStr, 64)
|
|
||||||
if err != nil {
|
|
||||||
p.Error("invalid number %s: %s", floatStr, err)
|
|
||||||
return
|
|
||||||
} else {
|
|
||||||
c.interpreter.pushValue(value)
|
c.interpreter.pushValue(value)
|
||||||
}
|
|
||||||
case p.On(a.LeftParen).Skip():
|
case p.On(a.LeftParen).Skip():
|
||||||
if !p.Handle(c.expr) {
|
if !p.Handle(c.expr) {
|
||||||
return
|
return
|
||||||
|
@ -194,16 +186,16 @@ func (i *interpreter) pushValue(value float64) {
|
||||||
i.top.a, i.top.b = i.top.b, value
|
i.top.a, i.top.b = i.top.b, value
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *interpreter) eval(op string) float64 {
|
func (i *interpreter) eval(op rune) float64 {
|
||||||
value := i.top.a
|
value := i.top.a
|
||||||
switch op {
|
switch op {
|
||||||
case "+":
|
case '+':
|
||||||
value += i.top.b
|
value += i.top.b
|
||||||
case "-":
|
case '-':
|
||||||
value -= i.top.b
|
value -= i.top.b
|
||||||
case "*":
|
case '*':
|
||||||
value *= i.top.b
|
value *= i.top.b
|
||||||
case "/":
|
case '/':
|
||||||
value /= i.top.b
|
value /= i.top.b
|
||||||
}
|
}
|
||||||
i.top.b = value
|
i.top.b = value
|
||||||
|
|
|
@ -36,11 +36,11 @@ func Example_dutchPostcodeUsingMatcher() {
|
||||||
// [1] Input: "2233Ab" Output: 2233 AB
|
// [1] Input: "2233Ab" Output: 2233 AB
|
||||||
// [2] Input: "1001\t\tab" Output: 1001 AB
|
// [2] Input: "1001\t\tab" Output: 1001 AB
|
||||||
// [3] Input: "1818ab" Output: 1818 AB
|
// [3] Input: "1818ab" Output: 1818 AB
|
||||||
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
|
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
|
||||||
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
|
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
|
||||||
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1
|
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
|
||||||
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1
|
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at start of file
|
||||||
// [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1
|
// [8] Input: "\xcd2222AB" Error: unexpected character '<27>' (expected a Dutch postcode) at start of file
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
@ -57,7 +57,7 @@ func createPostcodeMatcher() *parsekit.Matcher {
|
||||||
// - A space between letters and digits is optional.
|
// - A space between letters and digits is optional.
|
||||||
// - It is good form to write the letters in upper case.
|
// - It is good form to write the letters in upper case.
|
||||||
// - It is good form to use a single space between digits and letters.
|
// - It is good form to use a single space between digits and letters.
|
||||||
digitNotZero := c.Except(c.Rune('0'), a.Digit)
|
digitNotZero := c.Except(a.Rune('0'), a.Digit)
|
||||||
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||||
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||||
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||||
|
|
|
@ -3,15 +3,15 @@
|
||||||
//
|
//
|
||||||
// This implementation uses a state-based Parser for it, and it does not
|
// This implementation uses a state-based Parser for it, and it does not
|
||||||
// implement any custom parser/combinator TokenHandler functions. Note that
|
// implement any custom parser/combinator TokenHandler functions. Note that
|
||||||
// things are much easier to implement using custom TokenHandlers (see the other
|
// things are much easier to implement using custom TokenHandlers (see the
|
||||||
// HelloWorldUsingMatcher example for this). Doing this fully parser-based
|
// helloParserCombinator example for this). Doing this fully parser-based
|
||||||
// implementation is mainly for your learning pleasure.
|
// implementation is mainly for your learning pleasure.
|
||||||
//
|
//
|
||||||
// One big difference between the Matcher-based example and this one, is that
|
// One big difference between the parser/combinator-based example and this one,
|
||||||
// this parser reports errors much more fine-grained. This might or might not be
|
// is that this parser reports errors much more fine-grained. This might or
|
||||||
// useful for your specific use case. If you need error reporting like this,
|
// might not be useful for your specific use case. If you need error reporting
|
||||||
// then also take a look at the HelloWorldUsingParser2 example, which does the
|
// like this, then also take a look at the helloSingleState example, which does
|
||||||
// same thing as this version, only more concise.
|
// the same thing as this version, only more concise.
|
||||||
|
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
|
@ -56,11 +56,11 @@ func Example_helloWorldUsingParser1() {
|
||||||
// [6] Input: "hello" Error: unexpected end of file (expected comma)
|
// [6] Input: "hello" Error: unexpected end of file (expected comma)
|
||||||
// [7] Input: "hello," Error: unexpected end of file (expected name)
|
// [7] Input: "hello," Error: unexpected end of file (expected name)
|
||||||
// [8] Input: "hello , " Error: unexpected end of file (expected name)
|
// [8] Input: "hello , " Error: unexpected end of file (expected name)
|
||||||
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected name)
|
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected exclamation)
|
||||||
// [10] Input: "hello , Droopy!" Output: Droopy
|
// [10] Input: "hello , Droopy!" Output: Droopy
|
||||||
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
|
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
|
||||||
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
|
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
|
||||||
// [13] Input: "hello,!" Error: The name cannot be empty
|
// [13] Input: "hello,!" Error: unexpected character '!' (expected name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
@ -78,9 +78,9 @@ func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
||||||
c := parsekit.C
|
a := parsekit.A
|
||||||
p.Expects("hello")
|
p.Expects("hello")
|
||||||
if p.On(c.StrNoCase("hello")).Skip() {
|
if p.On(a.StrNoCase("hello")).Skip() {
|
||||||
p.Handle(h.comma)
|
p.Handle(h.comma)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -88,20 +88,42 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
||||||
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
|
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
|
||||||
a := parsekit.A
|
a := parsekit.A
|
||||||
p.Expects("comma")
|
p.Expects("comma")
|
||||||
p.On(a.Whitespace).Skip()
|
switch {
|
||||||
if p.On(a.Comma).Skip() {
|
case p.On(a.Whitespace).Skip():
|
||||||
|
p.Handle(h.comma)
|
||||||
|
case p.On(a.Comma).Skip():
|
||||||
|
p.Handle(h.startName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
|
||||||
|
c, a := parsekit.C, parsekit.A
|
||||||
|
p.Expects("name")
|
||||||
|
switch {
|
||||||
|
case p.On(a.Whitespace).Skip():
|
||||||
|
p.Handle(h.startName)
|
||||||
|
case p.On(c.Not(a.Excl)).Stay():
|
||||||
p.Handle(h.name)
|
p.Handle(h.name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *helloparser1) name(p *parsekit.ParseAPI) {
|
func (h *helloparser1) name(p *parsekit.ParseAPI) {
|
||||||
a := parsekit.A
|
c, a := parsekit.C, parsekit.A
|
||||||
p.Expects("name")
|
p.Expects("name")
|
||||||
switch {
|
switch {
|
||||||
case p.On(a.Excl).Skip():
|
case p.On(c.Not(a.Excl)).Accept():
|
||||||
p.Handle(h.end)
|
h.greetee += p.Result().String()
|
||||||
case p.On(a.AnyRune).Accept():
|
|
||||||
p.Handle(h.name)
|
p.Handle(h.name)
|
||||||
|
default:
|
||||||
|
p.Handle(h.exclamation)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
|
||||||
|
a := parsekit.A
|
||||||
|
p.Expects("exclamation")
|
||||||
|
if p.On(a.Excl).Accept() {
|
||||||
|
p.Handle(h.end)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,7 +137,7 @@ func (h *helloparser1) end(p *parsekit.ParseAPI) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
h.greetee = strings.TrimSpace(p.BufLiteral())
|
h.greetee = strings.TrimSpace(h.greetee)
|
||||||
if h.greetee == "" {
|
if h.greetee == "" {
|
||||||
p.Error("The name cannot be empty")
|
p.Error("The name cannot be empty")
|
||||||
} else {
|
} else {
|
|
@ -4,7 +4,7 @@
|
||||||
// The implementation uses only parser/combinator TokenHandler functions and does
|
// The implementation uses only parser/combinator TokenHandler functions and does
|
||||||
// not implement a full-fledged state-based Parser for it. If you want to see the
|
// not implement a full-fledged state-based Parser for it. If you want to see the
|
||||||
// same kind of functionality, implementated using a Parser, take a look at the
|
// same kind of functionality, implementated using a Parser, take a look at the
|
||||||
// HelloWorldUsingParser examples.
|
// other hello examples.
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -37,9 +37,9 @@ func Example_helloWorldUsingMatcher() {
|
||||||
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
||||||
// [2] Input: "hello , Bob123!" Output: Bob123
|
// [2] Input: "hello , Bob123!" Output: Bob123
|
||||||
// [3] Input: "hello Pizza!" Output: Pizza
|
// [3] Input: "hello Pizza!" Output: Pizza
|
||||||
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at line 1, column 1
|
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at start of file
|
||||||
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
|
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at start of file
|
||||||
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
|
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at start of file
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
@ -53,7 +53,7 @@ func createHelloMatcher() *parsekit.Matcher {
|
||||||
// Using the parser/combinator support of parsekit, we create a TokenHandler function
|
// Using the parser/combinator support of parsekit, we create a TokenHandler function
|
||||||
// that does all the work. The 'greeting' TokenHandler matches the whole input and
|
// that does all the work. The 'greeting' TokenHandler matches the whole input and
|
||||||
// drops all but the name from it.
|
// drops all but the name from it.
|
||||||
hello := c.StrNoCase("hello")
|
hello := a.StrNoCase("hello")
|
||||||
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
|
||||||
separator := c.Any(comma, a.Whitespace)
|
separator := c.Any(comma, a.Whitespace)
|
||||||
name := c.OneOrMore(c.Not(a.Excl))
|
name := c.OneOrMore(c.Not(a.Excl))
|
|
@ -1,15 +1,15 @@
|
||||||
// This is the same as the example HelloWorldUsingParser1, except that in this
|
// This is the same as the other hello examples, except that in this
|
||||||
// implementation the state machine is implemented using a combination of some
|
// implementation the state machine is implemented using a combination of some
|
||||||
// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
|
// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
|
||||||
// are combined to do all the work in one go.
|
// are combined to do all the work in one go.
|
||||||
//
|
//
|
||||||
// Note that things are much easier to implement using custom TokenHandlers (see
|
// Note that things are much easier to implement using custom TokenHandlers (see
|
||||||
// the other HelloWorldUsingMatcher example for this). Doing this implementation
|
// the other helloParserCombinator example for this). Doing this implementation
|
||||||
// is mainly for your learning pleasure.
|
// is mainly for your learning pleasure.
|
||||||
//
|
//
|
||||||
// One big difference between the Matcher-based example and this one, is that
|
// One big difference between the parser/combinator-based example and this one,
|
||||||
// this parser reports errors much more fine-grained. This might or might not be
|
// is that this parser reports errors much more fine-grained. This might or
|
||||||
// useful for your specific use case.:0
|
// might not be useful for your specific use case.
|
||||||
|
|
||||||
package parsekit_test
|
package parsekit_test
|
||||||
|
|
||||||
|
@ -80,21 +80,29 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
|
||||||
|
|
||||||
func (h *helloparser2) start(p *parsekit.ParseAPI) {
|
func (h *helloparser2) start(p *parsekit.ParseAPI) {
|
||||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||||
if !p.On(c.StrNoCase("hello")).Skip() {
|
if !p.On(a.StrNoCase("hello")).Skip() {
|
||||||
p.Error("the greeting is not being friendly")
|
p.Error("the greeting is not being friendly")
|
||||||
} else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
|
return
|
||||||
|
}
|
||||||
|
if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
|
||||||
p.Error("the greeting is not properly separated")
|
p.Error("the greeting is not properly separated")
|
||||||
} else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
|
return
|
||||||
|
}
|
||||||
|
if p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
|
||||||
|
h.greetee = p.Result().String()
|
||||||
|
if h.greetee == "" {
|
||||||
|
p.Error("the name cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
p.Error("the greeting is targeted at thin air")
|
p.Error("the greeting is targeted at thin air")
|
||||||
} else if !p.On(a.Excl).Skip() {
|
return
|
||||||
|
}
|
||||||
|
if !p.On(a.Excl).Skip() {
|
||||||
p.Error("the greeting is not loud enough")
|
p.Error("the greeting is not loud enough")
|
||||||
} else if !p.On(a.EndOfFile).Stay() {
|
} else if !p.On(a.EndOfFile).Stay() {
|
||||||
p.Error("too much stuff going on after the closing '!'")
|
p.Error("too much stuff going on after the closing '!'")
|
||||||
} else {
|
} else {
|
||||||
h.greetee = p.BufLiteral()
|
|
||||||
if h.greetee == "" {
|
|
||||||
p.Error("the name cannot be empty")
|
|
||||||
}
|
|
||||||
p.Stop()
|
p.Stop()
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
// In this example, we show that any type can be extended into a parser,
|
// In this example, we show that any type can be extended into a parser,
|
||||||
// filling that type with data from the ParseHandler methods.
|
// filling that type with data from the ParseHandler methods.
|
||||||
//
|
//
|
||||||
// Here, we create a custom type 'letterCollection', which is an alias
|
// Here, we create a custom type 'Chunks', which is an alias
|
||||||
// for []string. We add a ParseHandler method directly to that type
|
// for []string. We add a ParseHandler method directly to that type
|
||||||
// and let the parsing code fill the slice with strings during parsing.
|
// and let the parsing code fill the slice with strings during parsing.
|
||||||
|
|
||||||
|
@ -21,8 +21,7 @@ func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
|
||||||
|
|
||||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
for p.On(chunkOfRunes).Accept() {
|
for p.On(chunkOfRunes).Accept() {
|
||||||
*l = append(*l, p.BufLiteral())
|
*l = append(*l, p.Result().String())
|
||||||
p.BufClear()
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
return parser.Execute(s)
|
return parser.Execute(s)
|
||||||
|
@ -30,10 +29,10 @@ func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
|
||||||
|
|
||||||
func Example_usingSliceAsParserState() {
|
func Example_usingSliceAsParserState() {
|
||||||
chunks := &Chunks{}
|
chunks := &Chunks{}
|
||||||
chunks.AddChopped("This string will", 4)
|
chunks.AddChopped("123412341234xxx", 4)
|
||||||
chunks.AddChopped("be cut to bits!!!!!!", 8)
|
chunks.AddChopped("1234567812345678xxxxx", 8)
|
||||||
|
|
||||||
fmt.Printf("Matches = %q", *chunks)
|
fmt.Printf("Matches = %q", *chunks)
|
||||||
// Output:
|
// Output:
|
||||||
// Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"]
|
// Matches = ["1234" "1234" "1234" "xxx" "12345678" "12345678" "xxxxx"]
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,23 +48,47 @@ func ExampleError_Full() {
|
||||||
// it broke down at line 10, column 42
|
// it broke down at line 10, column 42
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleMatchAnyRune() {
|
func ExampleMatchAnyRune_usingAcceptedRunes() {
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
a := parsekit.A
|
a := parsekit.A
|
||||||
|
|
||||||
matches := []string{}
|
matches := []string{}
|
||||||
|
|
||||||
stateHandler := func(p *parsekit.ParseAPI) {
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
for p.On(a.AnyRune).Accept() {
|
for p.On(a.AnyRune).Accept() {
|
||||||
matches = append(matches, p.BufLiteral())
|
matches = append(matches, p.Result().String())
|
||||||
p.BufClear()
|
|
||||||
}
|
}
|
||||||
p.ExpectEndOfFile()
|
p.ExpectEndOfFile()
|
||||||
}
|
})
|
||||||
parser := parsekit.NewParser(stateHandler)
|
|
||||||
err := parser.Execute("¡Any will dö!")
|
err := parser.Execute("¡Any will dö!")
|
||||||
|
|
||||||
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
|
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
|
||||||
// Output:
|
// Output:
|
||||||
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
|
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ExampleMatchAnyRune_usingTokens() {
|
||||||
|
// Easy access to the parsekit definitions.
|
||||||
|
c, a, tok := parsekit.C, parsekit.A, parsekit.T
|
||||||
|
|
||||||
|
var tokens []*parsekit.Token
|
||||||
|
var accepted string
|
||||||
|
|
||||||
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
|
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
|
||||||
|
tokens = p.Result().Tokens()
|
||||||
|
accepted = p.Result().String()
|
||||||
|
}
|
||||||
|
p.ExpectEndOfFile()
|
||||||
|
})
|
||||||
|
parser.Execute("¡Any will dö!")
|
||||||
|
|
||||||
|
fmt.Printf("Runes accepted: %q\n", accepted)
|
||||||
|
fmt.Printf("Token values: ")
|
||||||
|
for _, t := range tokens {
|
||||||
|
fmt.Printf("%c ", t.Value)
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// Runes accepted: "¡Any will dö!"
|
||||||
|
// Token values: ¡ A n y w i l l d ö !
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseAPI holds the internal state of a parse run and provides an API to
|
||||||
|
// ParseHandler methods to communicate with the parser.
|
||||||
|
type ParseAPI struct {
|
||||||
|
tokenAPI *TokenAPI // the input reader
|
||||||
|
loopCheck map[string]bool // used for parser loop detection
|
||||||
|
expecting string // a description of what the current state expects to find (see Expects())
|
||||||
|
result *Result // TokenHandler result, as received from On(...).Accept()
|
||||||
|
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||||
|
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||||
|
}
|
||||||
|
|
||||||
|
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
||||||
|
// or when it has been stopped. It is used from the ParseAPI methods, to
|
||||||
|
// prevent further calls to the ParseAPI on these occasions.
|
||||||
|
//
|
||||||
|
// Basically, this guard ensures proper coding of parsers, making sure
|
||||||
|
// that clean routes are followed. You can consider this check a runtime
|
||||||
|
// unit test.
|
||||||
|
func (p *ParseAPI) panicWhenStoppedOrInError() {
|
||||||
|
if !p.isStoppedOrInError() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
called, _ := p.getCaller(1)
|
||||||
|
parts := strings.Split(called, ".")
|
||||||
|
calledShort := parts[len(parts)-1]
|
||||||
|
caller, filepos := p.getCaller(2)
|
||||||
|
|
||||||
|
after := "Error()"
|
||||||
|
if p.stopped {
|
||||||
|
after = "Stop()"
|
||||||
|
}
|
||||||
|
|
||||||
|
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ParseAPI) isStoppedOrInError() bool {
|
||||||
|
return p.stopped || p.err != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ParseAPI) initLoopCheck() {
|
||||||
|
p.loopCheck = map[string]bool{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ParseAPI) checkForLoops() {
|
||||||
|
caller, filepos := p.getCaller(2)
|
||||||
|
if _, ok := p.loopCheck[filepos]; ok {
|
||||||
|
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
|
||||||
|
}
|
||||||
|
p.loopCheck[filepos] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO delete this one
|
||||||
|
func (p *ParseAPI) getCaller(depth int) (string, string) {
|
||||||
|
// No error handling, because we call this method ourselves with safe depth values.
|
||||||
|
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||||
|
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||||
|
caller := runtime.FuncForPC(pc)
|
||||||
|
return caller.Name(), filepos
|
||||||
|
}
|
107
parsehandler.go
107
parsehandler.go
|
@ -1,12 +1,5 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"runtime"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ParseHandler defines the type of function that must be implemented to handle
|
// ParseHandler defines the type of function that must be implemented to handle
|
||||||
// a parsing state in a Parser state machine.
|
// a parsing state in a Parser state machine.
|
||||||
//
|
//
|
||||||
|
@ -14,103 +7,3 @@ import (
|
||||||
// all the internal state for the parsing state machine and provides the
|
// all the internal state for the parsing state machine and provides the
|
||||||
// interface that the ParseHandler uses to interact with the parser.
|
// interface that the ParseHandler uses to interact with the parser.
|
||||||
type ParseHandler func(*ParseAPI)
|
type ParseHandler func(*ParseAPI)
|
||||||
|
|
||||||
// ParseAPI holds the internal state of a parse run and provides an API to
|
|
||||||
// ParseHandler methods to communicate with the parser.
|
|
||||||
type ParseAPI struct {
|
|
||||||
input string // the input that is being scanned by the parser
|
|
||||||
inputPos int // current byte cursor position in the input
|
|
||||||
loopCheck map[string]bool // used for parser loop detection
|
|
||||||
cursorLine int // current rune cursor row number in the input
|
|
||||||
cursorColumn int // current rune cursor column position in the input
|
|
||||||
len int // the total length of the input in bytes
|
|
||||||
newline bool // keep track of when we have scanned a newline
|
|
||||||
expecting string // a description of what the current state expects to find (see P.Expects())
|
|
||||||
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
|
|
||||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
|
||||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
|
||||||
|
|
||||||
LastMatch string // a string representation of the last matched input data
|
|
||||||
}
|
|
||||||
|
|
||||||
// panicWhenStoppedOrInError will panic when the parser has produced an error
|
|
||||||
// or when it has been stopped. It is used from the ParseAPI methods, to
|
|
||||||
// prevent further calls to the ParseAPI on these occasions.
|
|
||||||
//
|
|
||||||
// Basically, this guard ensures proper coding of parsers, making sure
|
|
||||||
// that clean routes are followed. You can consider this check a runtime
|
|
||||||
// unit test.
|
|
||||||
func (p *ParseAPI) panicWhenStoppedOrInError() {
|
|
||||||
if !p.isStoppedOrInError() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
called, _ := p.getCaller(1)
|
|
||||||
parts := strings.Split(called, ".")
|
|
||||||
calledShort := parts[len(parts)-1]
|
|
||||||
caller, filepos := p.getCaller(2)
|
|
||||||
|
|
||||||
after := "Error()"
|
|
||||||
if p.stopped {
|
|
||||||
after = "Stop()"
|
|
||||||
}
|
|
||||||
|
|
||||||
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *ParseAPI) isStoppedOrInError() bool {
|
|
||||||
return p.stopped || p.err != nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *ParseAPI) checkForLoops() {
|
|
||||||
caller, filepos := p.getCaller(2)
|
|
||||||
if _, ok := p.loopCheck[filepos]; ok {
|
|
||||||
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
|
|
||||||
}
|
|
||||||
p.loopCheck[filepos] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// peek returns but does not advance the cursor to the next rune in the input.
|
|
||||||
// Returns the rune, its width in bytes and a boolean.
|
|
||||||
//
|
|
||||||
// The boolean will be false in case no upcoming rune can be peeked
|
|
||||||
// (end of data or invalid UTF8 character). In this case, the returned rune
|
|
||||||
// will be one of eofRune or invalidRune.
|
|
||||||
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
|
|
||||||
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
|
|
||||||
return handleRuneError(r, w)
|
|
||||||
}
|
|
||||||
|
|
||||||
// eofRune is a special rune that is used to indicate an end of file when
|
|
||||||
// reading a character from the input.
|
|
||||||
const eofRune rune = -1
|
|
||||||
|
|
||||||
// invalidRune is a special rune that is used to indicate an invalid UTF8
|
|
||||||
// rune on the input.
|
|
||||||
const invalidRune rune = utf8.RuneError
|
|
||||||
|
|
||||||
// handleRuneError is used to create specific rune value in case of errors.
|
|
||||||
// When an error occurs, then utf8.RuneError will be in the rune.
|
|
||||||
// This can however indicate one of two situations:
|
|
||||||
// 1) w == 0: end of file is reached
|
|
||||||
// 2) w == 1: invalid UTF character on input
|
|
||||||
// This function lets these two cases return respectively the
|
|
||||||
// package's own eofRune or invalidRune, to make it easy for calling code
|
|
||||||
// to distinct between these two cases.
|
|
||||||
func handleRuneError(r rune, w int) (rune, int, bool) {
|
|
||||||
if r == utf8.RuneError {
|
|
||||||
if w == 0 {
|
|
||||||
return eofRune, 0, false
|
|
||||||
}
|
|
||||||
return invalidRune, w, false
|
|
||||||
}
|
|
||||||
return r, w, true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *ParseAPI) getCaller(depth int) (string, string) {
|
|
||||||
// No error handling, because we call this method ourselves with safe depth values.
|
|
||||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
|
||||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
|
||||||
caller := runtime.FuncForPC(pc)
|
|
||||||
return caller.Name(), filepos
|
|
||||||
}
|
|
||||||
|
|
|
@ -20,7 +20,11 @@ func (err *Error) Error() string {
|
||||||
// Full returns the current error message, including information about
|
// Full returns the current error message, including information about
|
||||||
// the position in the input where the error occurred.
|
// the position in the input where the error occurred.
|
||||||
func (err *Error) Full() string {
|
func (err *Error) Full() string {
|
||||||
|
if err.Line == 0 {
|
||||||
|
return fmt.Sprintf("%s at start of file", err)
|
||||||
|
} else {
|
||||||
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
|
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Error sets the error message in the parser API. This error message
|
// Error sets the error message in the parser API. This error message
|
||||||
|
@ -29,5 +33,5 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
|
||||||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||||
// set a different error message when needed.
|
// set a different error message when needed.
|
||||||
message := fmt.Sprintf(format, args...)
|
message := fmt.Sprintf(format, args...)
|
||||||
p.err = &Error{message, p.cursorLine, p.cursorColumn}
|
p.err = &Error{message, p.tokenAPI.cursor.Line, p.tokenAPI.cursor.Column}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
// On checks if the input at the current cursor position matches the provided
|
// On checks if the input at the current cursor position matches the provided
|
||||||
// TokenHandler. On must be chained with another method that tells the parser
|
// TokenHandler. On must be chained with another method that tells the parser
|
||||||
// what action to perform when a match was found:
|
// what action to perform when a match was found:
|
||||||
|
@ -17,7 +19,7 @@ package parsekit
|
||||||
// The chain as a whole returns a boolean that indicates whether or not at match
|
// The chain as a whole returns a boolean that indicates whether or not at match
|
||||||
// was found. When no match was found, false is returned and Skip() and Accept()
|
// was found. When no match was found, false is returned and Skip() and Accept()
|
||||||
// will have no effect. Because of this, typical use of an On() chain is as
|
// will have no effect. Because of this, typical use of an On() chain is as
|
||||||
// expression for a conditional expression (if, switch/case, for). E.g.:
|
// expression for a conditional statement (if, switch/case, for). E.g.:
|
||||||
//
|
//
|
||||||
// // Skip multiple exclamation marks.
|
// // Skip multiple exclamation marks.
|
||||||
// for p.On(parsekit.A.Excl).Skip() { }
|
// for p.On(parsekit.A.Excl).Skip() { }
|
||||||
|
@ -32,70 +34,71 @@ package parsekit
|
||||||
// p.RouteTo(stateHandlerC)
|
// p.RouteTo(stateHandlerC)
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// // When there's a "hi" on input, emit a parser item for it.
|
// // When there's a "hi" on input, then say hello.
|
||||||
// if p.On(parsekit.C.Str("hi")).Accept() {
|
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||||
// p.Emit(SomeItemType, p.BufLiteral())
|
// fmt.Println("Hello!")
|
||||||
// }
|
// }
|
||||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
||||||
p.panicWhenStoppedOrInError()
|
p.panicWhenStoppedOrInError()
|
||||||
p.checkForLoops()
|
p.checkForLoops()
|
||||||
|
|
||||||
// Perform the matching operation.
|
|
||||||
m := &TokenAPI{p: p}
|
|
||||||
if tokenHandler == nil {
|
if tokenHandler == nil {
|
||||||
panic("ParseHandler bug: tokenHandler argument for On() is nil")
|
panic("ParseHandler bug: tokenHandler argument for On() is nil")
|
||||||
}
|
}
|
||||||
ok := tokenHandler(m)
|
|
||||||
|
|
||||||
// Keep track of the last match, to allow parser implementations
|
p.result = nil
|
||||||
// to access it in an easy way. Typical use would be something like:
|
p.tokenAPI.result = NewResult()
|
||||||
//
|
fork := p.tokenAPI.Fork()
|
||||||
// if p.On(somethingBad).End() {
|
ok := tokenHandler(fork)
|
||||||
// p.Errorf("This was bad: %s", p.LastMatch)
|
|
||||||
// }
|
|
||||||
p.LastMatch = string(m.input)
|
|
||||||
|
|
||||||
return &ParseAPIOnAction{
|
return &ParseAPIOnAction{
|
||||||
p: p,
|
parseAPI: p,
|
||||||
|
tokenAPI: fork,
|
||||||
ok: ok,
|
ok: ok,
|
||||||
input: m.input,
|
|
||||||
output: m.output,
|
|
||||||
inputPos: p.inputPos + m.inputOffset,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
|
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
|
||||||
// The On() method will return an initialized struct of this type.
|
// The On() method will return an initialized struct of this type.
|
||||||
type ParseAPIOnAction struct {
|
type ParseAPIOnAction struct {
|
||||||
p *ParseAPI
|
parseAPI *ParseAPI
|
||||||
|
tokenAPI *TokenAPI
|
||||||
ok bool
|
ok bool
|
||||||
input []rune
|
|
||||||
output []rune
|
|
||||||
inputPos int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accept tells the parser to move the cursor past a match that was found,
|
// Accept tells the parser to move the cursor past a match that was found,
|
||||||
// and to store the input that matched in the parser's string buffer.
|
// and to make the TokenHandler results available in the ParseAPI through
|
||||||
// When no match was found, then no action is taken.
|
// the Result() method.
|
||||||
//
|
//
|
||||||
// Returns true in case a match was found.
|
// Returns true in case a match was found.
|
||||||
// When no match was found, then no action is taken and false is returned.
|
// When no match was found, then no action is taken and false is returned.
|
||||||
func (a *ParseAPIOnAction) Accept() bool {
|
func (a *ParseAPIOnAction) Accept() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.p.buffer.writeString(string(a.output))
|
a.tokenAPI.Merge()
|
||||||
a.advanceCursor()
|
a.parseAPI.result = a.tokenAPI.root.result
|
||||||
|
a.tokenAPI.root.result = NewResult()
|
||||||
|
a.tokenAPI.root.detachChilds()
|
||||||
|
if a.tokenAPI.offset > 0 {
|
||||||
|
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
|
||||||
|
a.parseAPI.initLoopCheck()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip tells the parser to move the cursor past a match that was found,
|
// Skip tells the parser to move the cursor past a match that was found,
|
||||||
// without storing the actual match in the parser's string buffer.
|
// without making the results available through the ParseAPI.
|
||||||
//
|
//
|
||||||
// Returns true in case a match was found.
|
// Returns true in case a match was found.
|
||||||
// When no match was found, then no action is taken and false is returned.
|
// When no match was found, then no action is taken and false is returned.
|
||||||
func (a *ParseAPIOnAction) Skip() bool {
|
func (a *ParseAPIOnAction) Skip() bool {
|
||||||
if a.ok {
|
if a.ok {
|
||||||
a.advanceCursor()
|
a.tokenAPI.root.cursor = a.tokenAPI.cursor
|
||||||
|
a.tokenAPI.root.result = NewResult()
|
||||||
|
a.tokenAPI.root.detachChilds()
|
||||||
|
if a.tokenAPI.offset > 0 {
|
||||||
|
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
|
||||||
|
a.parseAPI.initLoopCheck()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
@ -103,25 +106,23 @@ func (a *ParseAPIOnAction) Skip() bool {
|
||||||
// Stay tells the parser to not move the cursor after finding a match.
|
// Stay tells the parser to not move the cursor after finding a match.
|
||||||
// Returns true in case a match was found, false otherwise.
|
// Returns true in case a match was found, false otherwise.
|
||||||
func (a *ParseAPIOnAction) Stay() bool {
|
func (a *ParseAPIOnAction) Stay() bool {
|
||||||
|
if a.ok {
|
||||||
|
a.tokenAPI.root.result = NewResult()
|
||||||
|
a.tokenAPI.root.detachChilds()
|
||||||
|
}
|
||||||
return a.ok
|
return a.ok
|
||||||
}
|
}
|
||||||
|
|
||||||
// advanceCursor advances the input position in the input data.
|
// Result returns a Result struct, containing results as produced by the
|
||||||
// While doing so, it keeps tracks of newlines that are encountered, so we
|
// last ParseAPI.On() call.
|
||||||
// can report on line + column positions on error.
|
func (p *ParseAPI) Result() *Result {
|
||||||
func (a *ParseAPIOnAction) advanceCursor() {
|
result := p.result
|
||||||
if a.p.inputPos == a.inputPos {
|
if p.result == nil {
|
||||||
return
|
caller, filepos := getCaller(1)
|
||||||
}
|
panic(fmt.Sprintf(
|
||||||
a.p.loopCheck = map[string]bool{}
|
"parsekit.ParseAPI.Result(): Result() called without calling "+
|
||||||
a.p.inputPos = a.inputPos
|
"ParseAPI.Accept() on beforehand to make the result available "+
|
||||||
for _, r := range a.input {
|
"from %s at %s", caller, filepos))
|
||||||
if a.p.newline {
|
|
||||||
a.p.cursorLine++
|
|
||||||
a.p.cursorColumn = 1
|
|
||||||
} else {
|
|
||||||
a.p.cursorColumn++
|
|
||||||
}
|
|
||||||
a.p.newline = r == '\n'
|
|
||||||
}
|
}
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
import "fmt"
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
// Handle is used to execute other ParseHandler functions from within your
|
// Handle is used to execute other ParseHandler functions from within your
|
||||||
// ParseHandler function.
|
// ParseHandler function.
|
||||||
|
@ -77,14 +80,14 @@ func (p *ParseAPI) ExpectEndOfFile() {
|
||||||
// expectation is included in the error message.
|
// expectation is included in the error message.
|
||||||
func (p *ParseAPI) UnexpectedInput() {
|
func (p *ParseAPI) UnexpectedInput() {
|
||||||
p.panicWhenStoppedOrInError()
|
p.panicWhenStoppedOrInError()
|
||||||
r, _, ok := p.peek(0)
|
r, err := p.tokenAPI.NextRune()
|
||||||
switch {
|
switch {
|
||||||
case ok:
|
case err == nil:
|
||||||
p.Error("unexpected character %q%s", r, fmtExpects(p))
|
p.Error("unexpected character %q%s", r, fmtExpects(p))
|
||||||
case r == eofRune:
|
case err == io.EOF:
|
||||||
p.Error("unexpected end of file%s", fmtExpects(p))
|
p.Error("unexpected end of file%s", fmtExpects(p))
|
||||||
case r == invalidRune:
|
default:
|
||||||
p.Error("invalid UTF8 character in input%s", fmtExpects(p))
|
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,47 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
// BufLiteral retrieves the contents of the parser's string buffer (all the
|
|
||||||
// runes that were added to it using ParseAPI.Accept()) as a literal string.
|
|
||||||
//
|
|
||||||
// Literal means that if the input had for example the subsequent runes '\' and
|
|
||||||
// 'n' in it, then the literal string would have a backslash and an 'n' it in,
|
|
||||||
// not a linefeed (ASCII char 10).
|
|
||||||
//
|
|
||||||
// Retrieving the buffer contents will not affect the buffer itself. New runes
|
|
||||||
// can still be added to it. Only when calling P.BufClear(), the buffer will be
|
|
||||||
// cleared.
|
|
||||||
func (p *ParseAPI) BufLiteral() string {
|
|
||||||
return p.buffer.asLiteralString()
|
|
||||||
}
|
|
||||||
|
|
||||||
// BufInterpreted retrieves the contents of the parser's string buffer (all the
|
|
||||||
// runes that were added to it using ParseAPI.Accept()) as an interpreted
|
|
||||||
// string.
|
|
||||||
//
|
|
||||||
// Interpreted means that the contents are treated as a Go double quoted
|
|
||||||
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
|
|
||||||
// input had for example the subsequent runes '\' and 'n' in it, then the
|
|
||||||
// interpreted string would have an actual linefeed (ASCII char 10) in it.
|
|
||||||
//
|
|
||||||
// This method returns a boolean value, indicating whether or not the string
|
|
||||||
// interpretation was successful. On invalid string data, an error will
|
|
||||||
// automatically be emitted and the boolean return value will be false.
|
|
||||||
//
|
|
||||||
// Retrieving the buffer contents will not affect the buffer itself. New runes
|
|
||||||
// can still be added to it. Only when calling P.BufClear(), the buffer will be
|
|
||||||
// cleared.
|
|
||||||
func (p *ParseAPI) BufInterpreted() (string, bool) {
|
|
||||||
s, err := p.buffer.asInterpretedString()
|
|
||||||
if err != nil {
|
|
||||||
p.Error(
|
|
||||||
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
|
|
||||||
p.buffer.asLiteralString(), err)
|
|
||||||
return "", false
|
|
||||||
}
|
|
||||||
return s, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// BufClear clears the contents of the parser's string buffer.
|
|
||||||
func (p *ParseAPI) BufClear() {
|
|
||||||
p.buffer.reset()
|
|
||||||
}
|
|
|
@ -43,35 +43,6 @@ func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) {
|
||||||
`.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
|
`.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenFilledStringBuffer_BufInterpreted_ReturnsInterpretedString(t *testing.T) {
|
|
||||||
var interpreted string
|
|
||||||
var literal string
|
|
||||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
|
||||||
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
|
|
||||||
literal = p.BufLiteral()
|
|
||||||
interpreted, _ = p.BufInterpreted()
|
|
||||||
})
|
|
||||||
p.Execute(`This\tis\ta\tcool\tstring`)
|
|
||||||
|
|
||||||
if literal != `This\tis\ta\tcool\tstring` {
|
|
||||||
t.Fatal("literal string is incorrect")
|
|
||||||
}
|
|
||||||
if interpreted != "This\tis\ta\tcool\tstring" {
|
|
||||||
t.Fatal("interpreted string is incorrect")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *testing.T) {
|
|
||||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
|
||||||
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
|
|
||||||
p.BufInterpreted()
|
|
||||||
})
|
|
||||||
err := p.Execute(`This \is wrongly escaped`)
|
|
||||||
if err.Error() != `invalid string: This \is wrongly escaped (invalid syntax, forgot to escape a double quote or backslash maybe?)` {
|
|
||||||
t.Fatalf("Got unexpected error: %s", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type parserWithLoop struct {
|
type parserWithLoop struct {
|
||||||
loopCounter int
|
loopCounter int
|
||||||
}
|
}
|
||||||
|
@ -119,7 +90,6 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
||||||
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
||||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
for p.On(c.Max(5, a.AnyRune)).Accept() {
|
for p.On(c.Max(5, a.AnyRune)).Accept() {
|
||||||
p.BufClear()
|
|
||||||
}
|
}
|
||||||
p.Stop()
|
p.Stop()
|
||||||
})
|
})
|
||||||
|
|
26
parsekit.go
26
parsekit.go
|
@ -1,5 +1,9 @@
|
||||||
package parsekit
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
// Parser is the top-level struct that holds the configuration for a parser.
|
// Parser is the top-level struct that holds the configuration for a parser.
|
||||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||||
type Parser struct {
|
type Parser struct {
|
||||||
|
@ -22,10 +26,7 @@ func NewParser(startHandler ParseHandler) *Parser {
|
||||||
// When an error occurs during parsing, then this error is returned. Nil otherwise.
|
// When an error occurs during parsing, then this error is returned. Nil otherwise.
|
||||||
func (p *Parser) Execute(input string) *Error {
|
func (p *Parser) Execute(input string) *Error {
|
||||||
api := &ParseAPI{
|
api := &ParseAPI{
|
||||||
input: input,
|
tokenAPI: NewTokenAPI(strings.NewReader(input)),
|
||||||
len: len(input),
|
|
||||||
cursorLine: 1,
|
|
||||||
cursorColumn: 1,
|
|
||||||
loopCheck: map[string]bool{},
|
loopCheck: map[string]bool{},
|
||||||
}
|
}
|
||||||
api.Handle(p.startHandler)
|
api.Handle(p.startHandler)
|
||||||
|
@ -39,12 +40,10 @@ func (p *Parser) Execute(input string) *Error {
|
||||||
// a parser that is based solely on a TokenHandler function.
|
// a parser that is based solely on a TokenHandler function.
|
||||||
// The Matcher can be instantiated using the parsekit.NewMatcher()
|
// The Matcher can be instantiated using the parsekit.NewMatcher()
|
||||||
// method.
|
// method.
|
||||||
//
|
// TODO Rename to Tokenizer
|
||||||
// To match input data against the wrapped Matcher function, use the method
|
|
||||||
// Matcher.Parse().
|
|
||||||
type Matcher struct {
|
type Matcher struct {
|
||||||
parser *Parser
|
parser *Parser
|
||||||
match string
|
result *Result
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMatcher instantiates a new Matcher.
|
// NewMatcher instantiates a new Matcher.
|
||||||
|
@ -55,11 +54,12 @@ type Matcher struct {
|
||||||
//
|
//
|
||||||
// The 'expects' parameter is used for creating an error message in case parsed
|
// The 'expects' parameter is used for creating an error message in case parsed
|
||||||
// input does not match the TokenHandler.
|
// input does not match the TokenHandler.
|
||||||
|
// TODO Rename to NewTokenizer, and make matcher Tokeninzer, also see if we can use a Reader straight away, no ParseAPI.
|
||||||
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
||||||
matcher := &Matcher{}
|
matcher := &Matcher{}
|
||||||
matcher.parser = NewParser(func(p *ParseAPI) {
|
matcher.parser = NewParser(func(p *ParseAPI) {
|
||||||
if p.On(tokenHandler).Accept() {
|
if p.On(tokenHandler).Accept() {
|
||||||
matcher.match = p.BufLiteral()
|
matcher.result = p.Result()
|
||||||
p.Stop()
|
p.Stop()
|
||||||
} else {
|
} else {
|
||||||
p.Expects(expects)
|
p.Expects(expects)
|
||||||
|
@ -70,9 +70,9 @@ func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute feeds the input to the wrapped TokenHandler function.
|
// Execute feeds the input to the wrapped TokenHandler function.
|
||||||
// It returns the matched input string and an error. When an error
|
// It returns the TokenHandler's results. When an error occurred during parsing,
|
||||||
// occurred during parsing, the error will be set, nil otherwise.
|
// the error will be set, nil otherwise.
|
||||||
func (m *Matcher) Execute(input string) (string, *Error) {
|
func (m *Matcher) Execute(input string) (*Result, *Error) {
|
||||||
err := m.parser.Execute(input)
|
err := m.parser.Execute(input)
|
||||||
return m.match, err
|
return m.result, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// Easy access to the parsekit definitions.
|
// Easy access to the parsekit definitions.
|
||||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
var c, a, m, tok = parsekit.C, parsekit.A, parsekit.M, parsekit.T
|
||||||
|
|
||||||
type TokenHandlerTest struct {
|
type TokenHandlerTest struct {
|
||||||
Input string
|
Input string
|
||||||
|
@ -27,11 +27,11 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
||||||
output, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||||
if test.MustMatch {
|
if test.MustMatch {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||||
} else if output != test.Expected {
|
} else if output := result.String(); output != test.Expected {
|
||||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -41,6 +41,41 @@ func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type TokenMakerTest struct {
|
||||||
|
Input string
|
||||||
|
TokenHandler parsekit.TokenHandler
|
||||||
|
Expected []parsekit.Token
|
||||||
|
}
|
||||||
|
|
||||||
|
func RunTokenMakerTest(t *testing.T, test TokenMakerTest) {
|
||||||
|
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||||
|
} else {
|
||||||
|
if len(result.Tokens()) != len(test.Expected) {
|
||||||
|
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||||
|
}
|
||||||
|
for i, expected := range test.Expected {
|
||||||
|
actual := result.Token(i)
|
||||||
|
if expected.Type != actual.Type {
|
||||||
|
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||||
|
}
|
||||||
|
if string(expected.Runes) != string(actual.Runes) {
|
||||||
|
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||||
|
}
|
||||||
|
if expected.Value != actual.Value {
|
||||||
|
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func RunTokenMakerTests(t *testing.T, testSet []TokenMakerTest) {
|
||||||
|
for _, test := range testSet {
|
||||||
|
RunTokenMakerTest(t, test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type PanicTest struct {
|
type PanicTest struct {
|
||||||
function func()
|
function func()
|
||||||
expected string
|
expected string
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Reader wraps around an io.Reader and provides buffering to allows us to read
|
||||||
|
// the same runes over and over again. This is useful for implementing a parser
|
||||||
|
// that must be able to do lookahead on the input, returning to the original
|
||||||
|
// input position after finishing that lookahead).
|
||||||
|
//
|
||||||
|
// To minimze memory use, it is also possible to flush the buffer when there is
|
||||||
|
// no more need to go back to previously read runes.
|
||||||
|
type Reader struct {
|
||||||
|
bufio *bufio.Reader // Used for ReadRune()
|
||||||
|
buffer []rune // Input buffer, holding runes that were read from input
|
||||||
|
bufferOffset int // The offset of the buffer, relative to the start of the input
|
||||||
|
bufferLen int // Input size, the number of runes in the buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewReader initializes a new Reader struct, wrapped around the provided io.Reader.
|
||||||
|
func NewReader(r io.Reader) *Reader {
|
||||||
|
return &Reader{
|
||||||
|
bufio: bufio.NewReader(r),
|
||||||
|
buffer: []rune{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RuneAt reads the rune at the provided rune offset.
|
||||||
|
//
|
||||||
|
// This offset is relative to the current starting position of the buffer in
|
||||||
|
// the reader. When starting reading, offset 0 will point at the start of the
|
||||||
|
// input. After flushing, offset 0 will point at the input up to where
|
||||||
|
// the flush was done.
|
||||||
|
//
|
||||||
|
// The error return value will be nil when reading was successful.
|
||||||
|
// When an invalid rune is encountered on the input, the error will be nil,
|
||||||
|
// but the rune will be utf8.RuneError
|
||||||
|
//
|
||||||
|
// When reading failed, the rune will be utf8.RuneError. One special read
|
||||||
|
// fail is actually a normal situation: end of file reached. In that case,
|
||||||
|
// the returned error wille be io.EOF.
|
||||||
|
func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||||
|
// Rune at provided offset is not yet available in the input buffer.
|
||||||
|
// Read runes until we have enough runes to satisfy the offset.
|
||||||
|
for r.bufferLen <= offset {
|
||||||
|
readRune, _, err := r.bufio.ReadRune()
|
||||||
|
|
||||||
|
// Handle errors.
|
||||||
|
if err != nil {
|
||||||
|
return utf8.RuneError, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip BOM.
|
||||||
|
if readRune == '\uFEFF' && r.bufferOffset == 0 {
|
||||||
|
r.bufferOffset++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
r.buffer = append(r.buffer, readRune)
|
||||||
|
r.bufferLen++
|
||||||
|
}
|
||||||
|
return r.buffer[offset], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RunesAt reads a slice of runes of length 'len', starting from offset 'offset'.
|
||||||
|
//
|
||||||
|
// This offset is relative to the current starting position of the buffer in
|
||||||
|
// the reader. When starting reading, offset 0 will point at the start of the
|
||||||
|
// input. After flushing, offset 0 will point at the input up to where
|
||||||
|
// the flush was done.
|
||||||
|
//
|
||||||
|
// When an error is encountered during reading (EOF or other error), then the
|
||||||
|
// error return value will be set. In case of an error, any runes that could be
|
||||||
|
// successfully read are returned along with the error.
|
||||||
|
// TODO Do I actually use this interface?
|
||||||
|
func (r *Reader) RunesAt(start int, len int) ([]rune, error) {
|
||||||
|
if len == 0 {
|
||||||
|
return r.buffer[0:0], nil
|
||||||
|
}
|
||||||
|
end := start + len
|
||||||
|
_, err := r.RuneAt(end)
|
||||||
|
if err != nil {
|
||||||
|
if end > r.bufferLen {
|
||||||
|
end = r.bufferLen
|
||||||
|
}
|
||||||
|
return r.buffer[start:end], err
|
||||||
|
}
|
||||||
|
return r.buffer[start:end], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush deletes the provided number of runes from the start of the
|
||||||
|
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
||||||
|
// will point to the rune that comes after the flushed runes.
|
||||||
|
// So what this basically does is turn the Reader into a sliding window.
|
||||||
|
func (r *Reader) Flush(numberOfRunes int) {
|
||||||
|
if numberOfRunes > r.bufferLen {
|
||||||
|
panic(fmt.Sprintf(
|
||||||
|
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||||
|
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
|
||||||
|
}
|
||||||
|
r.bufferOffset += numberOfRunes
|
||||||
|
r.bufferLen -= numberOfRunes
|
||||||
|
r.buffer = r.buffer[numberOfRunes:]
|
||||||
|
}
|
|
@ -0,0 +1,134 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleNewReader() {
|
||||||
|
in := strings.NewReader("Hello, world!")
|
||||||
|
r := NewReader(in)
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
fmt.Printf("%c", at(0))
|
||||||
|
fmt.Printf("%c", at(12))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// H!
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_RuneAt() {
|
||||||
|
in := strings.NewReader("Hello, world!")
|
||||||
|
r := NewReader(in)
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
|
||||||
|
// It is possible to go back and forth while reading the input.
|
||||||
|
fmt.Printf("%c", at(0))
|
||||||
|
fmt.Printf("%c", at(12))
|
||||||
|
fmt.Printf("%c", at(7))
|
||||||
|
fmt.Printf("%c", at(0))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// H!wH
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_RuneAt_endOfFile() {
|
||||||
|
in := strings.NewReader("Hello, world!")
|
||||||
|
r := NewReader(in)
|
||||||
|
|
||||||
|
rn, err := r.RuneAt(13)
|
||||||
|
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
|
||||||
|
|
||||||
|
rn, err = r.RuneAt(20)
|
||||||
|
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// '<27>' EOF true
|
||||||
|
// '<27>' EOF true
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_RuneAt_invalidRune() {
|
||||||
|
in := strings.NewReader("Hello, \xcdworld!")
|
||||||
|
r := NewReader(in)
|
||||||
|
|
||||||
|
rn, err := r.RuneAt(6)
|
||||||
|
fmt.Printf("%q %t\n", rn, err == nil)
|
||||||
|
rn, err = r.RuneAt(7)
|
||||||
|
fmt.Printf("%q %t\n", rn, err == nil)
|
||||||
|
rn, err = r.RuneAt(8)
|
||||||
|
fmt.Printf("%q %t\n", rn, err == nil)
|
||||||
|
rn, err = r.RuneAt(9)
|
||||||
|
fmt.Printf("%q %t\n", rn, err == nil)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// ' ' true
|
||||||
|
// '<27>' true
|
||||||
|
// 'w' true
|
||||||
|
// 'o' true
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_RunesAt() {
|
||||||
|
in := strings.NewReader("Hello, \xcdworld!")
|
||||||
|
r := NewReader(in)
|
||||||
|
|
||||||
|
rs, err := r.RunesAt(4, 6)
|
||||||
|
fmt.Printf("%q %t\n", string(rs), err == nil)
|
||||||
|
rs, err = r.RunesAt(4, 0)
|
||||||
|
fmt.Printf("%q %t\n", string(rs), err == nil)
|
||||||
|
rs, err = r.RunesAt(8, 100)
|
||||||
|
fmt.Printf("%q %t\n", string(rs), err == io.EOF)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// "o, <20>wo" true
|
||||||
|
// "" true
|
||||||
|
// "world!" true
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||||
|
in := strings.NewReader("\uFEFFBommetje!")
|
||||||
|
r := NewReader(in)
|
||||||
|
b, _ := r.RuneAt(0)
|
||||||
|
o, _ := r.RuneAt(1)
|
||||||
|
m, _ := r.RuneAt(2)
|
||||||
|
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||||
|
assert.Equal(t, "Bom", bom, "first three runes")
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleReader_Flush() {
|
||||||
|
in := strings.NewReader("Hello, world!")
|
||||||
|
r := NewReader(in)
|
||||||
|
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||||
|
rb := func(start int, len int) []rune { r, _ := r.RunesAt(start, len); return r }
|
||||||
|
|
||||||
|
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||||
|
fmt.Printf("%c\n", at(7))
|
||||||
|
|
||||||
|
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||||
|
r.Flush(4)
|
||||||
|
|
||||||
|
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||||
|
// We can continue reading from there.
|
||||||
|
fmt.Printf("%s", string(rb(0, 8)))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// w
|
||||||
|
// o, world
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||||
|
in := strings.NewReader("Hello, world!")
|
||||||
|
r := NewReader(in)
|
||||||
|
|
||||||
|
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||||
|
r.RuneAt(10)
|
||||||
|
|
||||||
|
// However, we flush 12 runes, which exceeds the buffer size.
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() { r.Flush(12) },
|
||||||
|
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
|
||||||
|
})
|
||||||
|
}
|
62
stringbuf.go
62
stringbuf.go
|
@ -1,62 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// stringBuffer is a string buffer implementation that is used by the parser
|
|
||||||
// to efficiently accumulate runes from the input and eventually turn these
|
|
||||||
// into a string, either literal or interpreted.
|
|
||||||
type stringBuffer struct {
|
|
||||||
buffer bytes.Buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
// reset resets the string buffer, in order to build a new string.
|
|
||||||
func (b *stringBuffer) reset() *stringBuffer {
|
|
||||||
b.buffer.Reset()
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeString adds the runes of the input string to the string buffer.
|
|
||||||
func (b *stringBuffer) writeString(s string) *stringBuffer {
|
|
||||||
for _, r := range s {
|
|
||||||
b.writeRune(r)
|
|
||||||
}
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeRune adds a single rune to the string buffer.
|
|
||||||
func (b *stringBuffer) writeRune(r rune) *stringBuffer {
|
|
||||||
b.buffer.WriteRune(r)
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// asLiteralString returns the string buffer as a literal string.
|
|
||||||
// Literal means that no escape sequences are processed.
|
|
||||||
func (b *stringBuffer) asLiteralString() string {
|
|
||||||
return b.buffer.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// asInterpretedString returns the string in its interpreted form.
|
|
||||||
// Interpreted means that escape sequences are handled in the way that Go would
|
|
||||||
// have, had it been inside double quotes. It translates for example escape
|
|
||||||
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
|
|
||||||
// representations.
|
|
||||||
// Since the input might contain invalid escape sequences, this method
|
|
||||||
// also returns an error. When an error is returned, the returned string will
|
|
||||||
// contain the string as far as it could be interpreted.
|
|
||||||
func (b *stringBuffer) asInterpretedString() (string, error) {
|
|
||||||
var sb strings.Builder
|
|
||||||
tail := b.buffer.String()
|
|
||||||
for len(tail) > 0 {
|
|
||||||
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
|
|
||||||
if err != nil {
|
|
||||||
return sb.String(), err
|
|
||||||
}
|
|
||||||
tail = newtail
|
|
||||||
sb.WriteRune(r)
|
|
||||||
}
|
|
||||||
return sb.String(), nil
|
|
||||||
}
|
|
|
@ -1,88 +0,0 @@
|
||||||
package parsekit
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
|
|
||||||
var b stringBuffer
|
|
||||||
s1, _ := b.writeString(`hi\nthere`).asInterpretedString()
|
|
||||||
s2 := b.asLiteralString()
|
|
||||||
if s1 != "hi\nthere" {
|
|
||||||
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
|
|
||||||
}
|
|
||||||
if s2 != "hi\\nthere" {
|
|
||||||
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResetResetsBuffer(t *testing.T) {
|
|
||||||
var b stringBuffer
|
|
||||||
s := b.writeRune('X').reset().asLiteralString()
|
|
||||||
if s != "" {
|
|
||||||
t.Fatalf("Did not get expected empty string, but %q", s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAsLiteralString(t *testing.T) {
|
|
||||||
b := stringBuffer{}
|
|
||||||
for _, c := range []stringbufT{
|
|
||||||
{"empty string", ``, ``, OK},
|
|
||||||
{"simple string", `Simple string!`, `Simple string!`, OK},
|
|
||||||
{"single quote", `'`, `'`, OK},
|
|
||||||
{"double quote", `"`, `"`, OK},
|
|
||||||
{"escaped single quote", `\'`, `\'`, OK},
|
|
||||||
{"escaped double quote", `\"`, `\"`, OK},
|
|
||||||
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
|
|
||||||
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
|
|
||||||
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
|
|
||||||
} {
|
|
||||||
s := b.reset().writeString(c.in).asLiteralString()
|
|
||||||
if s != c.out {
|
|
||||||
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAsInterpretedString(t *testing.T) {
|
|
||||||
b := stringBuffer{}
|
|
||||||
for _, c := range []stringbufT{
|
|
||||||
{"empty string", "", "", OK},
|
|
||||||
{"one character", "Simple string!", "Simple string!", OK},
|
|
||||||
{"escaped single quote", `\'`, "", FAIL},
|
|
||||||
{"escaped double quote", `\"`, `"`, OK},
|
|
||||||
{"bare single quote", `'`, "'", OK},
|
|
||||||
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
|
|
||||||
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
|
|
||||||
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
|
|
||||||
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
|
|
||||||
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
|
|
||||||
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
|
|
||||||
{"example from spec",
|
|
||||||
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
|
|
||||||
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
|
|
||||||
} {
|
|
||||||
s, err := b.reset().writeString(c.in).asInterpretedString()
|
|
||||||
if c.isSuccessCase && err != nil {
|
|
||||||
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
|
|
||||||
}
|
|
||||||
if !c.isSuccessCase && err == nil {
|
|
||||||
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
|
|
||||||
}
|
|
||||||
if s != c.out && c.isSuccessCase {
|
|
||||||
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type stringbufT struct {
|
|
||||||
name string
|
|
||||||
in string
|
|
||||||
out string
|
|
||||||
isSuccessCase bool
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
OK bool = true
|
|
||||||
FAIL bool = false
|
|
||||||
)
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and
|
||||||
|
// to report back results. For easy lookahead support, a forking strategy is
|
||||||
|
// provided.
|
||||||
|
//
|
||||||
|
// BASIC OPERATION:
|
||||||
|
//
|
||||||
|
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
|
||||||
|
//
|
||||||
|
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||||
|
// is then added to the result buffer of the TokenAPI struct.
|
||||||
|
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||||
|
// NextRune() again. Failing to do so will result in a panic.
|
||||||
|
//
|
||||||
|
// By invoking NextRune() + Accept() multiple times, the result buffer is extended
|
||||||
|
// with as many runes as needed.
|
||||||
|
//
|
||||||
|
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||||
|
//
|
||||||
|
// Sometimes, we must be able to perform a lookahead, which might either
|
||||||
|
// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be
|
||||||
|
// brought back to the original state, so we can try a different route.
|
||||||
|
//
|
||||||
|
// The way in which this is supported, is by forking a TokenAPI struct by calling
|
||||||
|
// Fork(). This will return a forked child TokenAPI, with an empty result buffer,
|
||||||
|
// but using the same input cursor position as the forked parent.
|
||||||
|
//
|
||||||
|
// After forking, the same interface as described for BASIC OPERATION can be
|
||||||
|
// used to fill the result buffer. When the lookahead was successful, then
|
||||||
|
// Merge() can be called on the forked child to append the child's result
|
||||||
|
// buffer to the parent's result buffer, and to move the input cursor position
|
||||||
|
// to that of the child.
|
||||||
|
//
|
||||||
|
// When the lookahead was unsuccessful, then the forked child TokenAPI can simply
|
||||||
|
// be discarded. The parent TokenAPI was never modified, so it can safely be used
|
||||||
|
// as if the lookahead never happened.
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||||
|
// peeks and by moving the input cursor position back and forth, or by putting
|
||||||
|
// read input back on the input stream. That often leads to code that is
|
||||||
|
// efficient, however, in my opinion, not very untuitive to read.
|
||||||
|
type TokenAPI struct {
|
||||||
|
reader *Reader
|
||||||
|
cursor *Cursor // current read cursor position, rel. to the input start
|
||||||
|
offset int // current rune offset rel. to the Reader's sliding window
|
||||||
|
result *Result // results as produced by a TokenHandler (runes, Tokens)
|
||||||
|
root *TokenAPI // the root TokenAPI
|
||||||
|
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||||
|
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
||||||
|
func NewTokenAPI(r io.Reader) *TokenAPI {
|
||||||
|
input := &TokenAPI{
|
||||||
|
reader: NewReader(r),
|
||||||
|
cursor: &Cursor{},
|
||||||
|
result: NewResult(),
|
||||||
|
}
|
||||||
|
input.root = input
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextRune returns the rune at the current read offset.
|
||||||
|
//
|
||||||
|
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
||||||
|
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
||||||
|
// when needed.
|
||||||
|
//
|
||||||
|
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
||||||
|
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
||||||
|
// without explicitly accepting, this method will panic.
|
||||||
|
func (i *TokenAPI) NextRune() (rune, error) {
|
||||||
|
if i.result.lastRune != nil {
|
||||||
|
caller, linepos := getCaller(1)
|
||||||
|
panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+
|
||||||
|
"to Accept() from %s at %s", caller, linepos))
|
||||||
|
}
|
||||||
|
i.detachChilds()
|
||||||
|
|
||||||
|
readRune, err := i.reader.RuneAt(i.offset)
|
||||||
|
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
||||||
|
return readRune, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept the last rune as read by NextRune() into the result buffer and move
|
||||||
|
// the cursor forward.
|
||||||
|
//
|
||||||
|
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||||
|
// returned an error. Calling Accept() in such case will result in a panic.
|
||||||
|
func (i *TokenAPI) Accept() {
|
||||||
|
if i.result.lastRune == nil {
|
||||||
|
caller, linepos := getCaller(1)
|
||||||
|
panic(fmt.Sprintf(
|
||||||
|
"parsekit.TokenAPI.Accept(): Accept() called without first "+
|
||||||
|
"calling NextRune() from %s at %s", caller, linepos))
|
||||||
|
} else if i.result.lastRune.err != nil {
|
||||||
|
caller, linepos := getCaller(1)
|
||||||
|
panic(fmt.Sprintf(
|
||||||
|
"parsekit.TokenAPI.Accept(): Accept() called while the previous "+
|
||||||
|
"call to NextRune() failed from %s at %s", caller, linepos))
|
||||||
|
}
|
||||||
|
i.result.runes = append(i.result.runes, i.result.lastRune.r)
|
||||||
|
i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
|
||||||
|
i.offset++
|
||||||
|
i.result.lastRune = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
|
||||||
|
// read cursor position, but for the rest this is a fresh TokenAPI.
|
||||||
|
func (i *TokenAPI) Fork() *TokenAPI {
|
||||||
|
i.detachChilds()
|
||||||
|
|
||||||
|
// Create the new fork.
|
||||||
|
child := &TokenAPI{
|
||||||
|
reader: i.reader,
|
||||||
|
cursor: &Cursor{},
|
||||||
|
offset: i.offset,
|
||||||
|
root: i.root,
|
||||||
|
parent: i,
|
||||||
|
}
|
||||||
|
child.result = NewResult()
|
||||||
|
*child.cursor = *i.cursor
|
||||||
|
i.child = child
|
||||||
|
i.result.lastRune = nil
|
||||||
|
return child
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge adds the data of the forked child TokenAPI that Merge() is called on to the
|
||||||
|
// data of its parent (results and read cursor position).
|
||||||
|
func (i *TokenAPI) Merge() {
|
||||||
|
if i.parent == nil {
|
||||||
|
panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI")
|
||||||
|
}
|
||||||
|
|
||||||
|
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
|
||||||
|
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
|
||||||
|
i.parent.offset = i.offset
|
||||||
|
i.parent.cursor = i.cursor
|
||||||
|
|
||||||
|
i.detachChilds()
|
||||||
|
i.result = NewResult()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Result returns the result data for the TokenAPI. The returned struct
|
||||||
|
// can be used to retrieve and modify the result data.
|
||||||
|
func (i *TokenAPI) Result() *Result {
|
||||||
|
return i.result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cursor retrieves the current read cursor data.
|
||||||
|
// TODO make this and offset part of Result struct?
|
||||||
|
func (i *TokenAPI) Cursor() Cursor {
|
||||||
|
return *i.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
// FlushReaderBuffer delegates to the Flush() method of the contained
|
||||||
|
// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the
|
||||||
|
// reader cache.
|
||||||
|
func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) {
|
||||||
|
if i != i.root {
|
||||||
|
panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child")
|
||||||
|
}
|
||||||
|
i.detachChilds()
|
||||||
|
i.reader.Flush(numberOfRunes)
|
||||||
|
i.offset = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *TokenAPI) detachChilds() {
|
||||||
|
if i.child != nil {
|
||||||
|
i.child.detachChildsRecurse()
|
||||||
|
i.child = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *TokenAPI) detachChildsRecurse() {
|
||||||
|
if i.child != nil {
|
||||||
|
i.child.detachChildsRecurse()
|
||||||
|
}
|
||||||
|
i.child = nil
|
||||||
|
i.parent = nil
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Result holds results as produced by a TokenHandler.
|
||||||
|
type Result struct {
|
||||||
|
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||||
|
runes []rune
|
||||||
|
tokens []*Token
|
||||||
|
}
|
||||||
|
|
||||||
|
type runeInfo struct {
|
||||||
|
r rune
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
|
||||||
|
// Token defines a lexical token as produced by TokenHandlers.
|
||||||
|
type Token struct {
|
||||||
|
Type interface{} // token type, can be any type that a parser author sees fit
|
||||||
|
Runes []rune // the runes that make up the token
|
||||||
|
Value interface{} // an optional value of any type
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewResult initializes an empty result struct.
|
||||||
|
func NewResult() *Result {
|
||||||
|
return &Result{
|
||||||
|
runes: []rune{},
|
||||||
|
tokens: []*Token{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearRunes clears the runes in the Result.
|
||||||
|
func (r *Result) ClearRunes() {
|
||||||
|
r.runes = []rune{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetRunes replaces the Runes from the Result with the provided input.
|
||||||
|
func (r *Result) SetRunes(s interface{}) {
|
||||||
|
r.ClearRunes()
|
||||||
|
r.AddRunes(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddRunes is used to add runes to the Result.
|
||||||
|
func (r *Result) AddRunes(s interface{}) {
|
||||||
|
switch s := s.(type) {
|
||||||
|
case string:
|
||||||
|
r.runes = append(r.runes, []rune(s)...)
|
||||||
|
case []rune:
|
||||||
|
r.runes = append(r.runes, s...)
|
||||||
|
case rune:
|
||||||
|
r.runes = append(r.runes, s)
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("parsekit.Result.SetRunes(): unsupported type '%T' used", s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runes retrieves the Runes from the Result.
|
||||||
|
func (r *Result) Runes() []rune {
|
||||||
|
return r.runes
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rune retrieve a single rune from the Result at the specified index.
|
||||||
|
func (r *Result) Rune(idx int) rune {
|
||||||
|
return r.runes[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the Runes from the Result as a string.
|
||||||
|
func (r *Result) String() string {
|
||||||
|
return string(r.runes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClearTokens clears the tokens in the Result.
|
||||||
|
func (r *Result) ClearTokens() {
|
||||||
|
r.tokens = []*Token{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddToken is used to add a Token to the results.
|
||||||
|
func (r *Result) AddToken(t *Token) {
|
||||||
|
r.tokens = append(r.tokens, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tokens retrieves the Tokens from the Result.
|
||||||
|
func (r *Result) Tokens() []*Token {
|
||||||
|
return r.tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
// Token retrieves a single Token from the Result at the specified index.
|
||||||
|
func (r *Result) Token(idx int) *Token {
|
||||||
|
return r.tokens[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Values retrieves a slice containing only the Values for the Result Tokens.
|
||||||
|
func (r *Result) Values() []interface{} {
|
||||||
|
values := make([]interface{}, len(r.tokens))
|
||||||
|
for i, tok := range r.tokens {
|
||||||
|
values[i] = tok.Value
|
||||||
|
}
|
||||||
|
return values
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value retrieves a single Value from the Result Token at the specified index.
|
||||||
|
func (r *Result) Value(idx int) interface{} {
|
||||||
|
return r.tokens[idx].Value
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
i.Result().SetRunes("string")
|
||||||
|
assert.Equal(t, "string", string(i.Result().String()), "i.Result() with string input")
|
||||||
|
i.Result().SetRunes([]rune("rune slice"))
|
||||||
|
assert.Equal(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
|
||||||
|
i.Result().SetRunes('X')
|
||||||
|
assert.Equal(t, "X", string(i.Result().String()), "i.Result() with rune input")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
i.Result().SetRunes(1234567)
|
||||||
|
},
|
||||||
|
Expect: "parsekit.Result.SetRunes(): unsupported type 'int' used",
|
||||||
|
})
|
||||||
|
}
|
|
@ -0,0 +1,288 @@
|
||||||
|
package parsekit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||||
|
r, _ := mkInput().NextRune()
|
||||||
|
assert.Equal(t, 'T', r, "first rune")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
i.NextRune()
|
||||||
|
i.NextRune()
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
|
||||||
|
`a prior call to Accept\(\) from .*TestCallingNextRuneTwice_Panics.* at /.*_test.go:\d+`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: mkInput().Accept,
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
|
||||||
|
`first calling NextRune\(\) from .* at /.*:\d+`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
i.Merge()
|
||||||
|
},
|
||||||
|
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
f := i.Fork()
|
||||||
|
i.NextRune()
|
||||||
|
f.Merge()
|
||||||
|
},
|
||||||
|
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
f := i.Fork()
|
||||||
|
i.Fork()
|
||||||
|
f.Merge()
|
||||||
|
},
|
||||||
|
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
f1 := i.Fork()
|
||||||
|
f2 := f1.Fork()
|
||||||
|
f3 := f2.Fork()
|
||||||
|
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||||
|
f5 := f4.Fork()
|
||||||
|
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
|
||||||
|
assert.Equal(t, true, i.child == f1, "i.child == f1")
|
||||||
|
assert.Equal(t, true, f1.parent == i, "f1.parent == i")
|
||||||
|
assert.Equal(t, true, f1.child == f4, "f1.child == f4")
|
||||||
|
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
|
||||||
|
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
|
||||||
|
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
|
||||||
|
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
|
||||||
|
assert.Equal(t, true, f4.parent == f1, "f4.parent == f1")
|
||||||
|
assert.Equal(t, true, f4.child == f5, "f4.child == f5")
|
||||||
|
assert.Equal(t, true, f5.parent == f4, "f5.parent == f4")
|
||||||
|
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
|
||||||
|
|
||||||
|
i.NextRune()
|
||||||
|
|
||||||
|
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
|
||||||
|
assert.Equal(t, true, i.child == nil, "i.child == nil")
|
||||||
|
assert.Equal(t, true, f1.parent == nil, "f1.parent == nil")
|
||||||
|
assert.Equal(t, true, f1.child == nil, "f1.child == nil")
|
||||||
|
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
|
||||||
|
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
|
||||||
|
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
|
||||||
|
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
|
||||||
|
assert.Equal(t, true, f4.parent == nil, "f4.parent == nil")
|
||||||
|
assert.Equal(t, true, f4.child == nil, "f4.child == nil")
|
||||||
|
assert.Equal(t, true, f5.parent == nil, "f5.parent == nil")
|
||||||
|
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
i.NextRune()
|
||||||
|
i.Fork()
|
||||||
|
i.Accept()
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
|
||||||
|
`first calling NextRune\(\) from .* at /.*:\d+`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
r, _ := i.NextRune()
|
||||||
|
assert.Equal(t, 'T', r, "result from 1st call to NextRune()")
|
||||||
|
// TODO still (*runeInfo) case needed?
|
||||||
|
assert.NotEqual(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after NextRune()")
|
||||||
|
i.Accept()
|
||||||
|
assert.Equal(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after Accept()")
|
||||||
|
assert.Equal(t, 1, i.offset, "Input.offset")
|
||||||
|
assert.Equal(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
|
||||||
|
r, _ = i.NextRune()
|
||||||
|
assert.Equal(t, 'e', r, "result from 2nd call to NextRune()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
for j := 0; j < 7; j++ {
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
}
|
||||||
|
assert.Equal(t, "Testing", string(i.reader.buffer), "reader input buffer")
|
||||||
|
assert.Equal(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||||
|
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||||
|
assert.Equal(t, "line 1, column 1", i.cursor.String(), "cursor 1")
|
||||||
|
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
}
|
||||||
|
assert.Equal(t, "line 1, column 7", i.cursor.String(), "cursor 2")
|
||||||
|
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||||
|
i.Accept()
|
||||||
|
assert.Equal(t, "line 2, column 1", i.cursor.String(), "cursor 3")
|
||||||
|
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
}
|
||||||
|
assert.Equal(t, "line 3, column 5", i.cursor.String(), "cursor 4")
|
||||||
|
assert.Equal(t, *i.cursor, i.Cursor(), "i.Cursor()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||||
|
// Create input, accept the first rune.
|
||||||
|
i := mkInput()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept() // T
|
||||||
|
assert.Equal(t, "T", i.Result().String(), "accepted rune in input")
|
||||||
|
// Fork
|
||||||
|
f := i.Fork()
|
||||||
|
assert.Equal(t, f, i.child, "Input.child (must be f)")
|
||||||
|
assert.Equal(t, i, f.parent, "Input.parent (must be i)")
|
||||||
|
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||||
|
assert.Equal(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||||
|
// Accept two runes via fork.
|
||||||
|
f.NextRune()
|
||||||
|
f.Accept() // e
|
||||||
|
f.NextRune()
|
||||||
|
f.Accept() // s
|
||||||
|
assert.Equal(t, "es", f.Result().String(), "result runes in fork")
|
||||||
|
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||||
|
assert.Equal(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||||
|
// Merge fork back into parent
|
||||||
|
f.Merge()
|
||||||
|
assert.Equal(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||||
|
assert.Equal(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
f1 := i.Fork()
|
||||||
|
f1.NextRune()
|
||||||
|
f1.Accept()
|
||||||
|
f2 := f1.Fork()
|
||||||
|
f2.NextRune()
|
||||||
|
f2.Accept()
|
||||||
|
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
|
||||||
|
assert.Equal(t, 1, i.offset, "i.offset")
|
||||||
|
assert.Equal(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||||
|
assert.Equal(t, 2, f1.offset, "f1.offset")
|
||||||
|
assert.Equal(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||||
|
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||||
|
f2.Merge()
|
||||||
|
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
|
||||||
|
assert.Equal(t, 1, i.offset, "i.offset")
|
||||||
|
assert.Equal(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||||
|
assert.Equal(t, 3, f1.offset, "f1.offset")
|
||||||
|
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
|
||||||
|
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||||
|
f1.Merge()
|
||||||
|
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||||
|
assert.Equal(t, 3, i.offset, "i.offset")
|
||||||
|
assert.Equal(t, "", f1.Result().String(), "f1.Result().String()")
|
||||||
|
assert.Equal(t, 3, f1.offset, "f1.offset")
|
||||||
|
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
|
||||||
|
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenForkedChild_FlushReaderBuffer_Panics(t *testing.T) {
|
||||||
|
assert.Panic(t, assert.PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := mkInput()
|
||||||
|
f := i.Fork()
|
||||||
|
f.FlushReaderBuffer(1)
|
||||||
|
},
|
||||||
|
Expect: "parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() " +
|
||||||
|
"can only be called on the root TokenAPI, not on a forked child",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenRootWithSomeRunesRead_FlushReaderBuffer_ClearsReaderBuffer(t *testing.T) {
|
||||||
|
i := mkInput()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.FlushReaderBuffer(2)
|
||||||
|
assert.Equal(t, "Te", i.Result().String(), "i.Result()")
|
||||||
|
assert.Equal(t, 0, i.offset, "i.offset")
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
assert.Equal(t, 2, i.offset, "i.offset")
|
||||||
|
i.FlushReaderBuffer(2)
|
||||||
|
assert.Equal(t, "Test", i.Result().String(), "i.Result()")
|
||||||
|
assert.Equal(t, 0, i.offset, "i.offset")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWhenCallingNextRuneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||||
|
i := NewTokenAPI(strings.NewReader("X"))
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
r, err := i.NextRune()
|
||||||
|
assert.Equal(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||||
|
assert.Equal(t, true, err == io.EOF, "returned error from NextRune()")
|
||||||
|
}
|
||||||
|
func TestAfterReadingRuneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||||
|
i := NewTokenAPI(strings.NewReader("X"))
|
||||||
|
f := i.Fork()
|
||||||
|
f.NextRune()
|
||||||
|
f.Accept()
|
||||||
|
r, err := f.NextRune()
|
||||||
|
assert.Equal(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||||
|
r, err = i.NextRune()
|
||||||
|
assert.Equal(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||||
|
assert.Equal(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func mkInput() *TokenAPI {
|
||||||
|
return NewTokenAPI(strings.NewReader("Testing"))
|
||||||
|
}
|
224
tokenhandler.go
224
tokenhandler.go
|
@ -2,113 +2,55 @@ package parsekit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"runtime"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TokenHandler is the function type that is involved in turning a low level
|
// TokenHandler is the function type that is involved in turning a low level
|
||||||
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
|
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||||
// data matches some kind of pattern and to report back the match.
|
// data matches some kind of pattern and to report back the token(s).
|
||||||
//
|
|
||||||
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
|
|
||||||
// parsekit.Matcher().
|
|
||||||
//
|
//
|
||||||
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
||||||
// indicate whether or not it found a match on the input. The TokenAPI is used
|
// indicate whether or not it found a match on the input. The TokenAPI is used
|
||||||
// for retrieving input data to match against and for reporting back results.
|
// for retrieving input data to match against and for reporting back results.
|
||||||
type TokenHandler func(t *TokenAPI) bool
|
type TokenHandler func(t *TokenAPI) bool
|
||||||
|
|
||||||
// TokenAPI is used by TokenHandler functions to retrieve runes from the
|
|
||||||
// input to match against and to report back results.
|
|
||||||
//
|
|
||||||
// Basic operation:
|
|
||||||
//
|
|
||||||
// To retrieve the next rune from the input, the TokenHandler function can call
|
|
||||||
// the TokenAPI.NextRune() method.
|
|
||||||
//
|
|
||||||
// The TokenHandler function can then evaluate the retrieved rune and either
|
|
||||||
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
|
|
||||||
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
|
|
||||||
// the rune will not be added to the output. It is mandatory for a TokenHandler
|
|
||||||
// to call either Accept() or Skip() after retrieving a rune, before calling
|
|
||||||
// NextRune() again.
|
|
||||||
//
|
|
||||||
// Eventually, the TokenHandler function must return a boolean value, indicating
|
|
||||||
// whether or not a match was found. When true, then the calling code will
|
|
||||||
// use the runes that were accepted into the TokenAPI's resulting output.
|
|
||||||
//
|
|
||||||
// Forking operation for easy lookahead support:
|
|
||||||
//
|
|
||||||
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
|
|
||||||
// might either succeed or fail. In case of a failing lookahead, the state
|
|
||||||
// of the TokenAPI must be brought back to the original state.
|
|
||||||
//
|
|
||||||
// The way in which this is supported, is by forking a TokenAPI by calling
|
|
||||||
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
|
|
||||||
// output buffer, but using the same input cursor position as the forked parent.
|
|
||||||
//
|
|
||||||
// The TokenHandler function can then use the same interface as described for
|
|
||||||
// normal operation to retrieve runes from the input and to fill the resulting
|
|
||||||
// output. When the TokenHandler function decides that the lookahead was successful,
|
|
||||||
// then the method TokenAPI.Merge() can be called on the forked child to
|
|
||||||
// append the resulting output from the child to the parent's resulting output,
|
|
||||||
// and to update the parent input cursor position to that of the child.
|
|
||||||
//
|
|
||||||
// When the TokenHandler function decides that the lookahead was unsuccessful,
|
|
||||||
// then it can simply discard the forked child. The parent TokenAPI was never
|
|
||||||
// modified, so a new match can be safely started using that parent, as if the
|
|
||||||
// lookahead never happened.
|
|
||||||
type TokenAPI struct {
|
|
||||||
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
|
|
||||||
inputOffset int // the byte offset into the input
|
|
||||||
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
|
|
||||||
output []rune // a slice of runes that represents the accepted output runes for the Matcher
|
|
||||||
currRune *runeInfo // hold information for the last rune that was read from the input
|
|
||||||
parent *TokenAPI // the parent MatchDialog, in case this one was forked
|
|
||||||
}
|
|
||||||
|
|
||||||
// runeInfo describes a single rune and its metadata.
|
|
||||||
type runeInfo struct {
|
|
||||||
Rune rune // an UTF8 rune
|
|
||||||
ByteSize int // the number of bytes in the rune
|
|
||||||
OK bool // false when the rune represents an invalid UTF8 rune or EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextRune retrieves the next rune from the input.
|
// NextRune retrieves the next rune from the input.
|
||||||
//
|
//
|
||||||
// It returns the rune and a boolean. The boolean will be false in case an
|
// It returns the rune and a boolean. The boolean will be false in case an
|
||||||
// invalid UTF8 rune or the end of the file was encountered.
|
// invalid UTF8 rune or the end of the file was encountered.
|
||||||
//
|
//
|
||||||
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
|
// After retrieving a rune, Accept() or Skip() can be called to respectively add
|
||||||
// to respectively add the rune to the TokenAPI's resulting output or to
|
// the rune to the TokenAPIold's string buffer or to fully ignore it. This way,
|
||||||
// fully ignore it. This way, a TokenHandler has full control over what runes are
|
// a TokenHandler has full control over what runes are significant for the
|
||||||
// significant for the resulting output of that TokenHandler.
|
// resulting output of that TokenHandler.
|
||||||
//
|
//
|
||||||
// After using NextRune(), this method can not be reinvoked, until the last read
|
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||||
// rune is explicitly accepted or skipped as described above.
|
// rune is explicitly accepted or skipped as described above.
|
||||||
func (t *TokenAPI) NextRune() (rune, bool) {
|
// func (t *TokenAPIold) NextRune() (rune, bool) {
|
||||||
if t.currRune != nil {
|
// if t.lastRune != nil {
|
||||||
caller, filepos := t.p.getCaller(1)
|
// caller, filepos := getCaller(1)
|
||||||
panic(fmt.Sprintf(
|
// panic(fmt.Sprintf(
|
||||||
"TokenHandler bug: NextRune() was called from %s at %s "+
|
// "TokenHandler bug: NextRune() was called from %s at %s "+
|
||||||
"without accepting or skipping the previously read rune", caller, filepos))
|
// "without accepting or skipping the previously read rune", caller, filepos))
|
||||||
}
|
// }
|
||||||
r, w, ok := t.p.peek(t.inputOffset)
|
// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset)
|
||||||
t.currRune = &runeInfo{r, w, ok}
|
// t.lastRune = &runeInfo{r, w, ok}
|
||||||
if ok {
|
// if ok {
|
||||||
t.input = append(t.input, r)
|
// t.result.Input = append(t.result.Input, r)
|
||||||
}
|
// }
|
||||||
return r, ok
|
// return r, ok
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Fork splits off a child TokenAPI, containing the same input cursor position
|
// Fork splits off a child TokenAPIold, containing the same input cursor position
|
||||||
// as the parent TokenAPI, but with all other data in a fresh state.
|
// as the parent TokenAPIold, but with all other data in a fresh state.
|
||||||
//
|
//
|
||||||
// By forking, a TokenHandler function can freely work with a TokenAPI, without
|
// By forking, a TokenHandler function can freely work with a TokenAPIold, without
|
||||||
// affecting the parent TokenAPI. This is for example useful when the
|
// affecting the parent TokenAPIold. This is for example useful when the
|
||||||
// TokenHandler function must perform some form of lookahead.
|
// TokenHandler function must perform some form of lookahead.
|
||||||
//
|
//
|
||||||
// When a successful match was found, the TokenHandler function can call
|
// When a successful match was found, the TokenHandler function can call
|
||||||
// TokenAPI.Merge() on the forked child to have the resulting output added
|
// TokenAPIold.Merge() on the forked child to have the resulting output added
|
||||||
// to the parent TokenAPI.
|
// to the parent TokenAPIold.
|
||||||
//
|
//
|
||||||
// When no match was found, the forked child can simply be discarded.
|
// When no match was found, the forked child can simply be discarded.
|
||||||
//
|
//
|
||||||
|
@ -118,7 +60,7 @@ func (t *TokenAPI) NextRune() (rune, bool) {
|
||||||
// case could look like this (yes, it's naive, but it shows the point):
|
// case could look like this (yes, it's naive, but it shows the point):
|
||||||
// TODO make proper tested example
|
// TODO make proper tested example
|
||||||
//
|
//
|
||||||
// func MatchAbcd(t *TokenAPI) bool {
|
// func MatchAbcd(t *TokenAPIold) bool {
|
||||||
// child := t.Fork() // fork to keep m from input untouched
|
// child := t.Fork() // fork to keep m from input untouched
|
||||||
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||||
// if r, ok := t.NextRune(); !ok || r != letter {
|
// if r, ok := t.NextRune(); !ok || r != letter {
|
||||||
|
@ -129,73 +71,69 @@ func (t *TokenAPI) NextRune() (rune, bool) {
|
||||||
// child.Merge() // we have a match, add resulting output to parent
|
// child.Merge() // we have a match, add resulting output to parent
|
||||||
// return true // and report the successful match
|
// return true // and report the successful match
|
||||||
// }
|
// }
|
||||||
func (t *TokenAPI) Fork() *TokenAPI {
|
|
||||||
return &TokenAPI{
|
|
||||||
p: t.p,
|
|
||||||
inputOffset: t.inputOffset,
|
|
||||||
parent: t,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
|
// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting
|
||||||
// output of the TokenAPI.
|
// output of the TokenAPIold.
|
||||||
func (t *TokenAPI) Accept() {
|
// func (t *TokenAPIold) Accept() {
|
||||||
t.checkAllowedCall("Accept()")
|
// t.checkAllowedCall("Accept()")
|
||||||
t.output = append(t.output, t.currRune.Rune)
|
// t.buffer = append(t.buffer, t.lastRune.Rune)
|
||||||
t.inputOffset += t.currRune.ByteSize
|
// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune)
|
||||||
t.currRune = nil
|
// t.inputOffset += t.lastRune.ByteSize
|
||||||
}
|
// t.lastRune = nil
|
||||||
|
// }
|
||||||
|
|
||||||
// Skip will ignore the last rune as read by NextRune().
|
// Skip will ignore the last rune as read by NextRune().
|
||||||
func (t *TokenAPI) Skip() {
|
// func (t *TokenAPIold) Skip() {
|
||||||
t.checkAllowedCall("Skip()")
|
// t.checkAllowedCall("Skip()")
|
||||||
t.inputOffset += t.currRune.ByteSize
|
// t.inputOffset += t.lastRune.ByteSize
|
||||||
t.currRune = nil
|
// t.lastRune = nil
|
||||||
}
|
// }
|
||||||
|
|
||||||
func (t *TokenAPI) checkAllowedCall(name string) {
|
// func (t *TokenAPIold) checkAllowedCall(name string) {
|
||||||
if t.currRune == nil {
|
// if t.lastRune == nil {
|
||||||
caller, filepos := t.p.getCaller(2)
|
// caller, filepos := getCaller(2)
|
||||||
panic(fmt.Sprintf(
|
// panic(fmt.Sprintf(
|
||||||
"TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
|
// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
|
||||||
name, caller, filepos))
|
// name, caller, filepos))
|
||||||
}
|
// }
|
||||||
if !t.currRune.OK {
|
// if !t.lastRune.OK {
|
||||||
caller, filepos := t.p.getCaller(2)
|
// caller, filepos := getCaller(2)
|
||||||
panic(fmt.Sprintf(
|
// panic(fmt.Sprintf(
|
||||||
"TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
|
// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
|
||||||
"did not return OK (EOF or invalid rune)", name, caller, filepos))
|
// "did not return OK (EOF or invalid rune)", name, caller, filepos))
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Merge merges the resulting output from a forked child TokenAPI back into
|
// AddToken is used to add a token to the results of the TokenHandler.
|
||||||
|
// func (t *TokenAPIold) AddToken(tok *Token) {
|
||||||
|
// t.result.Tokens = append(t.result.Tokens, tok)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Merge merges the resulting output from a forked child TokenAPIold back into
|
||||||
// its parent: The runes that are accepted in the child are added to the parent
|
// its parent: The runes that are accepted in the child are added to the parent
|
||||||
// runes and the parent's input cursor position is advanced to the child's
|
// runes and the parent's input cursor position is advanced to the child's
|
||||||
// cursor position.
|
// cursor position.
|
||||||
//
|
//
|
||||||
// After the merge, the child TokenAPI is reset so it can immediately be
|
// After the merge, the child TokenAPIold is reset so it can immediately be
|
||||||
// reused for performing another match (all data are cleared, except for the
|
// reused for performing another match (all data are cleared, except for the
|
||||||
// input offset which is kept at its current position).
|
// input offset which is kept at its current position).
|
||||||
func (t *TokenAPI) Merge() bool {
|
// func (t *TokenAPIold) Merge() bool {
|
||||||
if t.parent == nil {
|
// if t.parent == nil {
|
||||||
panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
|
// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
|
||||||
}
|
// }
|
||||||
t.parent.input = append(t.parent.input, t.input...)
|
// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...)
|
||||||
t.parent.output = append(t.parent.output, t.output...)
|
// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...)
|
||||||
t.parent.inputOffset = t.inputOffset
|
// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...)
|
||||||
t.ClearOutput()
|
// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...)
|
||||||
t.ClearInput()
|
// t.parent.inputOffset = t.inputOffset
|
||||||
return true
|
// t.result = &TokResult{}
|
||||||
}
|
// return true
|
||||||
|
// }
|
||||||
|
|
||||||
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
|
func getCaller(depth int) (string, string) {
|
||||||
// the input and input offset as-is.
|
// No error handling, because we call this method ourselves with safe depth values.
|
||||||
func (t *TokenAPI) ClearOutput() {
|
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||||
t.output = []rune{}
|
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||||
}
|
caller := runtime.FuncForPC(pc)
|
||||||
|
return caller.Name(), filepos
|
||||||
// ClearInput clears the input for the TokenAPI, but it keeps the output
|
|
||||||
// and input offset as-is.
|
|
||||||
func (t *TokenAPI) ClearInput() {
|
|
||||||
t.input = []rune{}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,34 +4,107 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit"
|
"git.makaay.nl/mauricem/go-parsekit"
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestWithinTokenHandler_AcceptIncludesAndSkipIgnoresRuneInOutput(t *testing.T) {
|
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
|
||||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||||
for i := 0; i < 33; i++ {
|
for i := 0; i < 20; i++ {
|
||||||
t.NextRune()
|
t.NextRune()
|
||||||
t.Accept()
|
t.Accept()
|
||||||
t.NextRune()
|
|
||||||
t.Skip()
|
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}, "test")
|
}, "test")
|
||||||
output, _ := parser.Execute("Txhxixsx xsxhxoxuxlxdx xbxexcxoxmxex xqxuxixtxex xrxexaxdxaxbxlxex")
|
result, _ := parser.Execute("This is some random data to parse")
|
||||||
if output != "This should become quite readable" {
|
if result.String() != "This is some random " {
|
||||||
t.Fatalf("Got unexpected output from TokenHandler: %s", output)
|
t.Fatalf("Got unexpected output from TokenHandler: %s", result.String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenNextRuneCalled_WithoutAcceptOrSkip_NextCallToNextRunePanics(t *testing.T) {
|
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
|
||||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||||
t.NextRune()
|
t.Result().AddToken(&parsekit.Token{
|
||||||
t.NextRune()
|
Type: "PI",
|
||||||
return false
|
Runes: []rune("π"),
|
||||||
|
Value: 3.1415,
|
||||||
|
})
|
||||||
|
t.Result().AddToken(&parsekit.Token{
|
||||||
|
Type: nil,
|
||||||
|
Runes: []rune("yes"),
|
||||||
|
Value: true,
|
||||||
|
})
|
||||||
|
return true
|
||||||
}, "test")
|
}, "test")
|
||||||
RunPanicTest(t, PanicTest{
|
result, _ := parser.Execute("doesn't matter")
|
||||||
func() { parser.Execute("input string") },
|
if len(result.Tokens()) != 2 {
|
||||||
`TokenHandler bug: NextRune\(\) was called from .*NextCallToNextRunePanics.* ` +
|
t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens()))
|
||||||
`at .*/tokenhandler_test\.go:\d+ without accepting or skipping the previously read rune`})
|
}
|
||||||
|
if result.Token(0).Value != 3.1415 {
|
||||||
|
t.Fatal("Token 0 value not 3.1415")
|
||||||
|
}
|
||||||
|
if string(result.Token(0).Runes) != "π" {
|
||||||
|
t.Fatal("Token 0 runes not \"π\"")
|
||||||
|
}
|
||||||
|
if result.Token(0).Type != "PI" {
|
||||||
|
t.Fatal("Token 0 type not \"PI\"")
|
||||||
|
}
|
||||||
|
if result.Token(1).Value != true {
|
||||||
|
t.Fatal("Token 1 value not true")
|
||||||
|
}
|
||||||
|
if string(result.Token(1).Runes) != "yes" {
|
||||||
|
t.Fatal("Token 1 runes not \"yes\"")
|
||||||
|
}
|
||||||
|
if result.Token(1).Type != nil {
|
||||||
|
t.Fatal("Token 1 type not nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
|
||||||
|
fooToken := tok.StrLiteral("ASCII", c.OneOrMore(a.ASCII))
|
||||||
|
parser := parsekit.NewMatcher(fooToken, "something")
|
||||||
|
input := "This is fine ASCII Åltho hère öt endĩt!"
|
||||||
|
result, err := parser.Execute(input)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unexpected error from parser: %s", err)
|
||||||
|
}
|
||||||
|
if result.String() != "This is fine ASCII " {
|
||||||
|
t.Fatalf("result.String() contains unexpected data: %s", result.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
|
||||||
|
fooToken := c.Seq(
|
||||||
|
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||||
|
tok.StrLiteral("COMBI", c.Seq(
|
||||||
|
tok.StrLiteral("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
|
||||||
|
tok.StrLiteral("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
|
||||||
|
)),
|
||||||
|
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||||
|
)
|
||||||
|
parser := parsekit.NewMatcher(fooToken, "something")
|
||||||
|
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
|
||||||
|
output := "This is fine ASCIIÅltho hère öt endĩt!"
|
||||||
|
result, err := parser.Execute(input)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unexpected error from parser: %s", err)
|
||||||
|
}
|
||||||
|
if result.String() != output {
|
||||||
|
t.Fatalf("result.String() contains unexpected data: %s", result.String())
|
||||||
|
}
|
||||||
|
if result.Token(0).Type != "COMBI" {
|
||||||
|
t.Fatalf("Token 0 has unexpected type: %s", result.Token(0).Type)
|
||||||
|
}
|
||||||
|
if result.Token(0).Value != "This is fine ASCIIÅltho hère öt endĩt!" {
|
||||||
|
t.Fatalf("Token 0 has unexpected value: %s", result.Token(0).Value)
|
||||||
|
}
|
||||||
|
if result.Token(1).Value != "This is fine ASCII" {
|
||||||
|
t.Fatalf("Token 1 has unexpected value: %s", result.Token(0).Value)
|
||||||
|
}
|
||||||
|
if result.Token(2).Value != "Åltho hère öt endĩt!" {
|
||||||
|
t.Fatalf("Token 2 has unexpected value: %s", result.Token(0).Value)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
||||||
|
@ -39,21 +112,25 @@ func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return false
|
return false
|
||||||
}, "test")
|
}, "test")
|
||||||
RunPanicTest(t, PanicTest{
|
assert.Panic(t, assert.PanicT{
|
||||||
func() { parser.Execute("input string") },
|
Function: func() { parser.Execute("input string") },
|
||||||
`TokenHandler bug: Accept\(\) was called from .*CallToAcceptPanics.* ` +
|
Regexp: true,
|
||||||
`at .*/tokenhandler_test\.go:\d+ without a prior call to NextRune\(\)`})
|
Expect: `parsekit.TokenAPI.Accept\(\): Accept\(\) called without first ` +
|
||||||
|
`calling NextRune\(\) from .*CallToAcceptPanics.* at /.*_test.go`,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenNextRuneNotCalled_CallToSkipPanics(t *testing.T) {
|
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
|
||||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||||
t.Skip()
|
t.NextRune()
|
||||||
|
t.NextRune()
|
||||||
return false
|
return false
|
||||||
}, "test")
|
}, "test")
|
||||||
RunPanicTest(t, PanicTest{
|
assert.Panic(t, assert.PanicT{
|
||||||
func() { parser.Execute("input string") },
|
Function: func() { parser.Execute("input string") },
|
||||||
`TokenHandler bug: Skip\(\) was called from .*CallToSkipPanics.* ` +
|
Regexp: true,
|
||||||
`at .*tokenhandler_test\.go:\d+ without a prior call to NextRune\(\)`})
|
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
|
||||||
|
`a prior call to Accept\(\) from .*CallToNextRunePanics.* at /.*/tokenhandler_test.go:\d+`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
||||||
|
@ -62,19 +139,19 @@ func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return false
|
return false
|
||||||
}, "test")
|
}, "test")
|
||||||
RunPanicTest(t, PanicTest{
|
assert.Panic(t, assert.PanicT{
|
||||||
func() { parser.Execute("\xcd") },
|
Function: func() { parser.Execute("") },
|
||||||
`TokenHandler bug: Accept\(\) was called from .*CallToAcceptPanics.* ` +
|
Regexp: true,
|
||||||
`at .*tokenhandler_test\.go:\d+, but prior call to NextRune\(\) did not ` +
|
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called while the previous call to ` +
|
||||||
`return OK \(EOF or invalid rune\)`})
|
`NextRune\(\) failed from .*CallToAcceptPanics.* at .*_test\.go:\d+`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
|
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
|
||||||
RunPanicTest(t, PanicTest{
|
assert.Panic(t, assert.PanicT{
|
||||||
func() {
|
Function: func() {
|
||||||
a := parsekit.TokenAPI{}
|
a := parsekit.TokenAPI{}
|
||||||
a.Merge()
|
a.Merge()
|
||||||
},
|
},
|
||||||
`TokenHandler bug: Cannot call Merge a a non-forked MatchDialog`,
|
Expect: `parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI`,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,9 @@ package parsekit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
)
|
)
|
||||||
|
@ -9,6 +12,11 @@ import (
|
||||||
// C provides convenient access to a range of parser/combinators that can be
|
// C provides convenient access to a range of parser/combinators that can be
|
||||||
// used to construct TokenHandler functions.
|
// used to construct TokenHandler functions.
|
||||||
//
|
//
|
||||||
|
// Parser/combinators are so called higher order functions that take in one
|
||||||
|
// or more other TokenHandlers and output a new TokenHandler. They can be
|
||||||
|
// used to combine TokenHandlers in useful ways to create new more complex
|
||||||
|
// TokenHandlers.
|
||||||
|
//
|
||||||
// When using C in your own parser, then it is advised to create a variable
|
// When using C in your own parser, then it is advised to create a variable
|
||||||
// to reference it:
|
// to reference it:
|
||||||
//
|
//
|
||||||
|
@ -16,11 +24,6 @@ import (
|
||||||
//
|
//
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
var C = struct {
|
var C = struct {
|
||||||
Rune func(rune) TokenHandler
|
|
||||||
Runes func(...rune) TokenHandler
|
|
||||||
RuneRange func(rune, rune) TokenHandler
|
|
||||||
Str func(string) TokenHandler
|
|
||||||
StrNoCase func(string) TokenHandler
|
|
||||||
Any func(...TokenHandler) TokenHandler
|
Any func(...TokenHandler) TokenHandler
|
||||||
Not func(TokenHandler) TokenHandler
|
Not func(TokenHandler) TokenHandler
|
||||||
Opt func(TokenHandler) TokenHandler
|
Opt func(TokenHandler) TokenHandler
|
||||||
|
@ -31,15 +34,9 @@ var C = struct {
|
||||||
ZeroOrMore func(TokenHandler) TokenHandler
|
ZeroOrMore func(TokenHandler) TokenHandler
|
||||||
OneOrMore func(TokenHandler) TokenHandler
|
OneOrMore func(TokenHandler) TokenHandler
|
||||||
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
MinMax func(min int, max int, handler TokenHandler) TokenHandler
|
||||||
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
|
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
|
||||||
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
Except func(except TokenHandler, handler TokenHandler) TokenHandler
|
||||||
Signed func(TokenHandler) TokenHandler
|
|
||||||
}{
|
}{
|
||||||
Rune: MatchRune,
|
|
||||||
Runes: MatchRunes,
|
|
||||||
RuneRange: MatchRuneRange,
|
|
||||||
Str: MatchStr,
|
|
||||||
StrNoCase: MatchStrNoCase,
|
|
||||||
Opt: MatchOpt,
|
Opt: MatchOpt,
|
||||||
Any: MatchAny,
|
Any: MatchAny,
|
||||||
Not: MatchNot,
|
Not: MatchNot,
|
||||||
|
@ -52,15 +49,217 @@ var C = struct {
|
||||||
MinMax: MatchMinMax,
|
MinMax: MatchMinMax,
|
||||||
Separated: MatchSeparated,
|
Separated: MatchSeparated,
|
||||||
Except: MatchExcept,
|
Except: MatchExcept,
|
||||||
|
}
|
||||||
|
|
||||||
|
// A provides convenient access to a range of atoms or functions to build atoms.
|
||||||
|
//
|
||||||
|
// When using A in your own parser, then it is advised to create a variable
|
||||||
|
// to reference it:
|
||||||
|
//
|
||||||
|
// var a = parsekit.A
|
||||||
|
//
|
||||||
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
|
var A = struct {
|
||||||
|
Rune func(rune) TokenHandler
|
||||||
|
Runes func(...rune) TokenHandler
|
||||||
|
RuneRange func(rune, rune) TokenHandler
|
||||||
|
Str func(string) TokenHandler
|
||||||
|
StrNoCase func(string) TokenHandler
|
||||||
|
EndOfFile TokenHandler
|
||||||
|
AnyRune TokenHandler
|
||||||
|
Space TokenHandler
|
||||||
|
Tab TokenHandler
|
||||||
|
CR TokenHandler
|
||||||
|
LF TokenHandler
|
||||||
|
CRLF TokenHandler
|
||||||
|
Excl TokenHandler
|
||||||
|
DoubleQuote TokenHandler
|
||||||
|
Hash TokenHandler
|
||||||
|
Dollar TokenHandler
|
||||||
|
Percent TokenHandler
|
||||||
|
Amp TokenHandler
|
||||||
|
SingleQuote TokenHandler
|
||||||
|
RoundOpen TokenHandler
|
||||||
|
LeftParen TokenHandler
|
||||||
|
RoundClose TokenHandler
|
||||||
|
RightParen TokenHandler
|
||||||
|
Asterisk TokenHandler
|
||||||
|
Multiply TokenHandler
|
||||||
|
Plus TokenHandler
|
||||||
|
Add TokenHandler
|
||||||
|
Comma TokenHandler
|
||||||
|
Minus TokenHandler
|
||||||
|
Subtract TokenHandler
|
||||||
|
Dot TokenHandler
|
||||||
|
Slash TokenHandler
|
||||||
|
Divide TokenHandler
|
||||||
|
Colon TokenHandler
|
||||||
|
Semicolon TokenHandler
|
||||||
|
AngleOpen TokenHandler
|
||||||
|
LessThan TokenHandler
|
||||||
|
Equal TokenHandler
|
||||||
|
AngleClose TokenHandler
|
||||||
|
GreaterThan TokenHandler
|
||||||
|
Question TokenHandler
|
||||||
|
At TokenHandler
|
||||||
|
SquareOpen TokenHandler
|
||||||
|
Backslash TokenHandler
|
||||||
|
SquareClose TokenHandler
|
||||||
|
Caret TokenHandler
|
||||||
|
Underscore TokenHandler
|
||||||
|
Backquote TokenHandler
|
||||||
|
CurlyOpen TokenHandler
|
||||||
|
Pipe TokenHandler
|
||||||
|
CurlyClose TokenHandler
|
||||||
|
Tilde TokenHandler
|
||||||
|
Newline TokenHandler
|
||||||
|
Whitespace TokenHandler
|
||||||
|
WhitespaceAndNewlines TokenHandler
|
||||||
|
EndOfLine TokenHandler
|
||||||
|
Digit TokenHandler
|
||||||
|
DigitNotZero TokenHandler
|
||||||
|
Digits TokenHandler
|
||||||
|
Float TokenHandler
|
||||||
|
Boolean TokenHandler
|
||||||
|
Integer TokenHandler
|
||||||
|
Signed func(TokenHandler) TokenHandler
|
||||||
|
IntegerBetween func(min int64, max int64) TokenHandler
|
||||||
|
ASCII TokenHandler
|
||||||
|
ASCIILower TokenHandler
|
||||||
|
ASCIIUpper TokenHandler
|
||||||
|
HexDigit TokenHandler
|
||||||
|
Octet TokenHandler
|
||||||
|
IPv4 TokenHandler
|
||||||
|
IPv4MaskBits TokenHandler
|
||||||
|
}{
|
||||||
|
Rune: MatchRune,
|
||||||
|
Runes: MatchRunes,
|
||||||
|
RuneRange: MatchRuneRange,
|
||||||
|
Str: MatchStr,
|
||||||
|
StrNoCase: MatchStrNoCase,
|
||||||
|
EndOfFile: MatchEndOfFile(),
|
||||||
|
AnyRune: MatchAnyRune(),
|
||||||
|
Space: MatchRune(' '),
|
||||||
|
Tab: MatchRune('\t'),
|
||||||
|
CR: MatchRune('\r'),
|
||||||
|
LF: MatchRune('\n'),
|
||||||
|
CRLF: MatchStr("\r\n"),
|
||||||
|
Excl: MatchRune('!'),
|
||||||
|
DoubleQuote: MatchRune('"'),
|
||||||
|
Hash: MatchRune('#'),
|
||||||
|
Dollar: MatchRune('$'),
|
||||||
|
Percent: MatchRune('%'),
|
||||||
|
Amp: MatchRune('&'),
|
||||||
|
SingleQuote: MatchRune('\''),
|
||||||
|
RoundOpen: MatchRune('('),
|
||||||
|
LeftParen: MatchRune('('),
|
||||||
|
RoundClose: MatchRune(')'),
|
||||||
|
RightParen: MatchRune(')'),
|
||||||
|
Asterisk: MatchRune('*'),
|
||||||
|
Multiply: MatchRune('*'),
|
||||||
|
Plus: MatchRune('+'),
|
||||||
|
Add: MatchRune('+'),
|
||||||
|
Comma: MatchRune(','),
|
||||||
|
Minus: MatchRune('-'),
|
||||||
|
Subtract: MatchRune('-'),
|
||||||
|
Dot: MatchRune('.'),
|
||||||
|
Slash: MatchRune('/'),
|
||||||
|
Divide: MatchRune('/'),
|
||||||
|
Colon: MatchRune(':'),
|
||||||
|
Semicolon: MatchRune(';'),
|
||||||
|
AngleOpen: MatchRune('<'),
|
||||||
|
LessThan: MatchRune('<'),
|
||||||
|
Equal: MatchRune('='),
|
||||||
|
AngleClose: MatchRune('>'),
|
||||||
|
GreaterThan: MatchRune('>'),
|
||||||
|
Question: MatchRune('?'),
|
||||||
|
At: MatchRune('@'),
|
||||||
|
SquareOpen: MatchRune('['),
|
||||||
|
Backslash: MatchRune('\\'),
|
||||||
|
SquareClose: MatchRune(']'),
|
||||||
|
Caret: MatchRune('^'),
|
||||||
|
Underscore: MatchRune('_'),
|
||||||
|
Backquote: MatchRune('`'),
|
||||||
|
CurlyOpen: MatchRune('{'),
|
||||||
|
Pipe: MatchRune('|'),
|
||||||
|
CurlyClose: MatchRune('}'),
|
||||||
|
Tilde: MatchRune('~'),
|
||||||
|
Whitespace: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))),
|
||||||
|
WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))),
|
||||||
|
EndOfLine: MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()),
|
||||||
|
Digit: MatchDigit(),
|
||||||
|
DigitNotZero: MatchDigitNotZero(),
|
||||||
|
Digits: MatchDigits(),
|
||||||
|
Integer: MatchInteger(),
|
||||||
Signed: MatchSigned,
|
Signed: MatchSigned,
|
||||||
|
IntegerBetween: MatchIntegerBetween,
|
||||||
|
Float: MatchFloat(),
|
||||||
|
Boolean: MatchBoolean(),
|
||||||
|
ASCII: MatchRuneRange('\x00', '\x7F'),
|
||||||
|
ASCIILower: MatchRuneRange('a', 'z'),
|
||||||
|
ASCIIUpper: MatchRuneRange('A', 'Z'),
|
||||||
|
HexDigit: MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')),
|
||||||
|
Octet: MatchOctet(false),
|
||||||
|
IPv4: MatchIPv4(),
|
||||||
|
IPv4MaskBits: MatchIntegerBetween(0, 32),
|
||||||
|
}
|
||||||
|
|
||||||
|
// T provides convenient access to a range of Token producers (which in their
|
||||||
|
// nature are parser/combinators) that can be used when creating TokenHandler
|
||||||
|
// functions.
|
||||||
|
//
|
||||||
|
// When using T in your own parser, then it is advised to create a variable
|
||||||
|
// to reference it:
|
||||||
|
//
|
||||||
|
// var t = parsekit.T
|
||||||
|
//
|
||||||
|
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||||
|
var T = struct {
|
||||||
|
StrLiteral func(interface{}, TokenHandler) TokenHandler
|
||||||
|
StrInterpreted func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Byte func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Rune func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Int func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Int8 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Int16 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Int32 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Int64 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Uint func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Uint8 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Uint16 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Uint32 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Uint64 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Float32 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Float64 func(interface{}, TokenHandler) TokenHandler
|
||||||
|
Boolean func(interface{}, TokenHandler) TokenHandler
|
||||||
|
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
|
||||||
|
}{
|
||||||
|
StrLiteral: MakeStrLiteralToken,
|
||||||
|
StrInterpreted: MakeStrInterpretedToken,
|
||||||
|
Byte: MakeByteToken,
|
||||||
|
Rune: MakeRuneToken,
|
||||||
|
Int: MakeIntToken,
|
||||||
|
Int8: MakeInt8Token,
|
||||||
|
Int16: MakeInt16Token,
|
||||||
|
Int32: MakeInt32Token,
|
||||||
|
Int64: MakeInt64Token,
|
||||||
|
Uint: MakeUintToken,
|
||||||
|
Uint8: MakeUint8Token,
|
||||||
|
Uint16: MakeUint16Token,
|
||||||
|
Uint32: MakeUint32Token,
|
||||||
|
Uint64: MakeUint64Token,
|
||||||
|
Float32: MakeFloat32Token,
|
||||||
|
Float64: MakeFloat64Token,
|
||||||
|
Boolean: MakeBooleanToken,
|
||||||
|
ByCallback: MakeTokenByCallback,
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchRune creates a TokenHandler function that checks if the next rune from
|
// MatchRune creates a TokenHandler function that checks if the next rune from
|
||||||
// the input matches the provided rune.
|
// the input matches the provided rune.
|
||||||
func MatchRune(expected rune) TokenHandler {
|
func MatchRune(expected rune) TokenHandler {
|
||||||
return func(t *TokenAPI) bool {
|
return func(t *TokenAPI) bool {
|
||||||
input, ok := t.NextRune()
|
input, err := t.NextRune()
|
||||||
if ok && input == expected {
|
if err == nil && input == expected {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -73,8 +272,8 @@ func MatchRune(expected rune) TokenHandler {
|
||||||
func MatchRunes(expected ...rune) TokenHandler {
|
func MatchRunes(expected ...rune) TokenHandler {
|
||||||
s := string(expected)
|
s := string(expected)
|
||||||
return func(t *TokenAPI) bool {
|
return func(t *TokenAPI) bool {
|
||||||
input, ok := t.NextRune()
|
input, err := t.NextRune()
|
||||||
if ok {
|
if err == nil {
|
||||||
if strings.ContainsRune(s, input) {
|
if strings.ContainsRune(s, input) {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return true
|
return true
|
||||||
|
@ -97,8 +296,8 @@ func MatchRuneRange(start rune, end rune) TokenHandler {
|
||||||
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
|
||||||
}
|
}
|
||||||
return func(t *TokenAPI) bool {
|
return func(t *TokenAPI) bool {
|
||||||
input, ok := t.NextRune()
|
input, err := t.NextRune()
|
||||||
if ok && input >= start && input <= end {
|
if err == nil && input >= start && input <= end {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -167,7 +366,8 @@ func MatchAny(handlers ...TokenHandler) TokenHandler {
|
||||||
for _, handler := range handlers {
|
for _, handler := range handlers {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(child) {
|
||||||
return child.Merge()
|
child.Merge()
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -183,8 +383,8 @@ func MatchNot(handler TokenHandler) TokenHandler {
|
||||||
if handler(probe) {
|
if handler(probe) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
_, ok := t.NextRune()
|
_, err := t.NextRune()
|
||||||
if ok {
|
if err == nil {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -311,138 +511,24 @@ func MatchSigned(handler TokenHandler) TokenHandler {
|
||||||
return MatchSeq(sign, handler)
|
return MatchSeq(sign, handler)
|
||||||
}
|
}
|
||||||
|
|
||||||
// A provides convenient access to a range of atoms that can be used to
|
// MatchIntegerBetween creates a TokenHandler that checks for an integer
|
||||||
// build TokenHandlers or parser rules.
|
// value between the provided min and max boundaries (inclusive).
|
||||||
//
|
// It uses an int64 for checking internally, so you can check values
|
||||||
// In parsekit, an atom is defined as a ready for use TokenHandler function.
|
// ranging from -9223372036854775808 to 9223372036854775807.
|
||||||
//
|
func MatchIntegerBetween(min int64, max int64) TokenHandler {
|
||||||
// When using A in your own parser, then it is advised to create a variable
|
digits := MatchSigned(MatchDigits())
|
||||||
// to reference it:
|
return func(t *TokenAPI) bool {
|
||||||
//
|
fork := t.Fork()
|
||||||
// var a = parsekit.A
|
if !digits(fork) {
|
||||||
//
|
return false
|
||||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
}
|
||||||
var A = struct {
|
value, _ := strconv.ParseInt(fork.Result().String(), 10, 64)
|
||||||
EndOfFile TokenHandler
|
if value < min || value > max {
|
||||||
AnyRune TokenHandler
|
return false
|
||||||
Space TokenHandler
|
}
|
||||||
Tab TokenHandler
|
fork.Merge()
|
||||||
CR TokenHandler
|
return true
|
||||||
LF TokenHandler
|
}
|
||||||
CRLF TokenHandler
|
|
||||||
Excl TokenHandler
|
|
||||||
DoubleQuote TokenHandler
|
|
||||||
Hash TokenHandler
|
|
||||||
Dollar TokenHandler
|
|
||||||
Percent TokenHandler
|
|
||||||
Amp TokenHandler
|
|
||||||
SingleQuote TokenHandler
|
|
||||||
RoundOpen TokenHandler
|
|
||||||
LeftParen TokenHandler
|
|
||||||
RoundClose TokenHandler
|
|
||||||
RightParen TokenHandler
|
|
||||||
Asterisk TokenHandler
|
|
||||||
Multiply TokenHandler
|
|
||||||
Plus TokenHandler
|
|
||||||
Add TokenHandler
|
|
||||||
Comma TokenHandler
|
|
||||||
Minus TokenHandler
|
|
||||||
Subtract TokenHandler
|
|
||||||
Dot TokenHandler
|
|
||||||
Slash TokenHandler
|
|
||||||
Divide TokenHandler
|
|
||||||
Colon TokenHandler
|
|
||||||
Semicolon TokenHandler
|
|
||||||
AngleOpen TokenHandler
|
|
||||||
LessThan TokenHandler
|
|
||||||
Equal TokenHandler
|
|
||||||
AngleClose TokenHandler
|
|
||||||
GreaterThan TokenHandler
|
|
||||||
Question TokenHandler
|
|
||||||
At TokenHandler
|
|
||||||
SquareOpen TokenHandler
|
|
||||||
Backslash TokenHandler
|
|
||||||
SquareClose TokenHandler
|
|
||||||
Caret TokenHandler
|
|
||||||
Underscore TokenHandler
|
|
||||||
Backquote TokenHandler
|
|
||||||
CurlyOpen TokenHandler
|
|
||||||
Pipe TokenHandler
|
|
||||||
CurlyClose TokenHandler
|
|
||||||
Tilde TokenHandler
|
|
||||||
Newline TokenHandler
|
|
||||||
Whitespace TokenHandler
|
|
||||||
WhitespaceAndNewlines TokenHandler
|
|
||||||
EndOfLine TokenHandler
|
|
||||||
Digit TokenHandler
|
|
||||||
DigitNotZero TokenHandler
|
|
||||||
Digits TokenHandler
|
|
||||||
Float TokenHandler
|
|
||||||
Integer TokenHandler
|
|
||||||
ASCII TokenHandler
|
|
||||||
ASCIILower TokenHandler
|
|
||||||
ASCIIUpper TokenHandler
|
|
||||||
HexDigit TokenHandler
|
|
||||||
}{
|
|
||||||
EndOfFile: MatchEndOfFile(),
|
|
||||||
AnyRune: MatchAnyRune(),
|
|
||||||
Space: C.Rune(' '),
|
|
||||||
Tab: C.Rune('\t'),
|
|
||||||
CR: C.Rune('\r'),
|
|
||||||
LF: C.Rune('\n'),
|
|
||||||
CRLF: C.Str("\r\n"),
|
|
||||||
Excl: C.Rune('!'),
|
|
||||||
DoubleQuote: C.Rune('"'),
|
|
||||||
Hash: C.Rune('#'),
|
|
||||||
Dollar: C.Rune('$'),
|
|
||||||
Percent: C.Rune('%'),
|
|
||||||
Amp: C.Rune('&'),
|
|
||||||
SingleQuote: C.Rune('\''),
|
|
||||||
RoundOpen: C.Rune('('),
|
|
||||||
LeftParen: C.Rune('('),
|
|
||||||
RoundClose: C.Rune(')'),
|
|
||||||
RightParen: C.Rune(')'),
|
|
||||||
Asterisk: C.Rune('*'),
|
|
||||||
Multiply: C.Rune('*'),
|
|
||||||
Plus: C.Rune('+'),
|
|
||||||
Add: C.Rune('+'),
|
|
||||||
Comma: C.Rune(','),
|
|
||||||
Minus: C.Rune('-'),
|
|
||||||
Subtract: C.Rune('-'),
|
|
||||||
Dot: C.Rune('.'),
|
|
||||||
Slash: C.Rune('/'),
|
|
||||||
Divide: C.Rune('/'),
|
|
||||||
Colon: C.Rune(':'),
|
|
||||||
Semicolon: C.Rune(';'),
|
|
||||||
AngleOpen: C.Rune('<'),
|
|
||||||
LessThan: C.Rune('<'),
|
|
||||||
Equal: C.Rune('='),
|
|
||||||
AngleClose: C.Rune('>'),
|
|
||||||
GreaterThan: C.Rune('>'),
|
|
||||||
Question: C.Rune('?'),
|
|
||||||
At: C.Rune('@'),
|
|
||||||
SquareOpen: C.Rune('['),
|
|
||||||
Backslash: C.Rune('\\'),
|
|
||||||
SquareClose: C.Rune(']'),
|
|
||||||
Caret: C.Rune('^'),
|
|
||||||
Underscore: C.Rune('_'),
|
|
||||||
Backquote: C.Rune('`'),
|
|
||||||
CurlyOpen: C.Rune('{'),
|
|
||||||
Pipe: C.Rune('|'),
|
|
||||||
CurlyClose: C.Rune('}'),
|
|
||||||
Tilde: C.Rune('~'),
|
|
||||||
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
|
|
||||||
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
|
|
||||||
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
|
|
||||||
Digit: MatchDigit(),
|
|
||||||
DigitNotZero: MatchDigitNotZero(),
|
|
||||||
Digits: MatchDigits(),
|
|
||||||
Integer: MatchInteger(),
|
|
||||||
Float: MatchFloat(),
|
|
||||||
ASCII: C.RuneRange('\x00', '\x7F'),
|
|
||||||
ASCIILower: C.RuneRange('a', 'z'),
|
|
||||||
ASCIIUpper: C.RuneRange('A', 'Z'),
|
|
||||||
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
|
||||||
|
@ -451,8 +537,8 @@ var A = struct {
|
||||||
func MatchEndOfFile() TokenHandler {
|
func MatchEndOfFile() TokenHandler {
|
||||||
return func(t *TokenAPI) bool {
|
return func(t *TokenAPI) bool {
|
||||||
fork := t.Fork()
|
fork := t.Fork()
|
||||||
input, ok := fork.NextRune()
|
_, err := fork.NextRune()
|
||||||
return !ok && input == eofRune
|
return err == io.EOF
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,8 +547,8 @@ func MatchEndOfFile() TokenHandler {
|
||||||
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
|
||||||
func MatchAnyRune() TokenHandler {
|
func MatchAnyRune() TokenHandler {
|
||||||
return func(t *TokenAPI) bool {
|
return func(t *TokenAPI) bool {
|
||||||
_, ok := t.NextRune()
|
_, err := t.NextRune()
|
||||||
if ok {
|
if err == nil {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -494,7 +580,7 @@ func MatchDigitNotZero() TokenHandler {
|
||||||
// hexadecimal.
|
// hexadecimal.
|
||||||
func MatchInteger() TokenHandler {
|
func MatchInteger() TokenHandler {
|
||||||
justZero := MatchRune('0')
|
justZero := MatchRune('0')
|
||||||
integer := C.Seq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
|
||||||
return MatchAny(integer, justZero)
|
return MatchAny(integer, justZero)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -506,6 +592,56 @@ func MatchFloat() TokenHandler {
|
||||||
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
|
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MatchBoolean creates a TokenHandler function that checks if a valid boolean
|
||||||
|
// value can be read from the input. It supports the boolean values as understood
|
||||||
|
// by Go's strconv.ParseBool() function.
|
||||||
|
func MatchBoolean() TokenHandler {
|
||||||
|
trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
|
||||||
|
falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
|
||||||
|
return MatchAny(trues, falses)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchOctet creates a TokenHandler function that checks if a valid octet value
|
||||||
|
// can be read from the input (octet = byte value representation, with a value
|
||||||
|
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
|
||||||
|
// digits, not if there's a non-digit after it, meaning that "123255" would be
|
||||||
|
// a valid sequence of two octets.
|
||||||
|
//
|
||||||
|
// When the normalize parameter is set to true, then leading zeroes will be
|
||||||
|
// stripped from the octet.
|
||||||
|
func MatchOctet(normalize bool) TokenHandler {
|
||||||
|
digits := MatchMinMax(1, 3, MatchDigit())
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
fork := t.Fork()
|
||||||
|
if !digits(fork) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
|
||||||
|
if value <= 255 {
|
||||||
|
if normalize {
|
||||||
|
runes := fork.Result().Runes()
|
||||||
|
for len(runes) > 1 && runes[0] == '0' {
|
||||||
|
runes = runes[1:]
|
||||||
|
}
|
||||||
|
fork.Result().SetRunes(runes)
|
||||||
|
}
|
||||||
|
fork.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
|
||||||
|
// IP address value can be read from the input.
|
||||||
|
// It will normalize IP-addresses that look like "192.168.001.012" to
|
||||||
|
// "192.168.1.12".
|
||||||
|
func MatchIPv4() TokenHandler {
|
||||||
|
octet := MatchOctet(true)
|
||||||
|
dot := MatchRune('.')
|
||||||
|
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||||||
|
}
|
||||||
|
|
||||||
// M provides convenient access to a range of modifiers (which in their nature are
|
// M provides convenient access to a range of modifiers (which in their nature are
|
||||||
// parser/combinators) that can be used when creating TokenHandler functions.
|
// parser/combinators) that can be used when creating TokenHandler functions.
|
||||||
//
|
//
|
||||||
|
@ -528,7 +664,7 @@ var M = struct {
|
||||||
ToLower func(TokenHandler) TokenHandler
|
ToLower func(TokenHandler) TokenHandler
|
||||||
ToUpper func(TokenHandler) TokenHandler
|
ToUpper func(TokenHandler) TokenHandler
|
||||||
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
|
||||||
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
|
ByCallback func(TokenHandler, func(string) string) TokenHandler
|
||||||
}{
|
}{
|
||||||
Drop: ModifyDrop,
|
Drop: ModifyDrop,
|
||||||
Trim: ModifyTrim,
|
Trim: ModifyTrim,
|
||||||
|
@ -538,7 +674,7 @@ var M = struct {
|
||||||
ToLower: ModifyToLower,
|
ToLower: ModifyToLower,
|
||||||
ToUpper: ModifyToUpper,
|
ToUpper: ModifyToUpper,
|
||||||
Replace: ModifyReplace,
|
Replace: ModifyReplace,
|
||||||
ModifyByCallback: ModifyByCallback,
|
ByCallback: ModifyByCallback,
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
|
||||||
|
@ -635,11 +771,222 @@ func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHa
|
||||||
return func(t *TokenAPI) bool {
|
return func(t *TokenAPI) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(child) {
|
||||||
s := modfunc(string(child.output))
|
s := modfunc(child.Result().String())
|
||||||
child.output = []rune(s)
|
child.Result().SetRunes(s)
|
||||||
child.Merge()
|
child.Merge()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||||
|
literal := t.Result().String()
|
||||||
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||||
|
// TODO ERROR HANDLING
|
||||||
|
interpreted, _ := interpretString(t.Result().String())
|
||||||
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||||
|
// TODO ERROR HANDLING --- not a 1 rune input
|
||||||
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||||
|
// TODO ERROR HANDLING --- not a 1 byte input
|
||||||
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func interpretString(str string) (string, error) {
|
||||||
|
var sb strings.Builder
|
||||||
|
for len(str) > 0 {
|
||||||
|
r, _, remainder, err := strconv.UnquoteChar(str, '"')
|
||||||
|
if err != nil {
|
||||||
|
return sb.String(), err
|
||||||
|
}
|
||||||
|
str = remainder
|
||||||
|
sb.WriteRune(r)
|
||||||
|
}
|
||||||
|
return sb.String(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
|
||||||
|
return strconv.Atoi(s)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO allow other Go types for oct and hex too.
|
||||||
|
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseInt(s, 10, 8)
|
||||||
|
if err == nil {
|
||||||
|
return int8(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseInt(s, 10, 16)
|
||||||
|
if err == nil {
|
||||||
|
return int16(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseInt(s, 10, 32)
|
||||||
|
if err == nil {
|
||||||
|
return int32(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseInt(s, 10, 64)
|
||||||
|
if err == nil {
|
||||||
|
return int64(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseUint(s, 10, 0)
|
||||||
|
if err == nil {
|
||||||
|
return uint(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO allow other Go types for oct and hex too.
|
||||||
|
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseUint(s, 10, 8)
|
||||||
|
if err == nil {
|
||||||
|
return uint8(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseUint(s, 10, 16)
|
||||||
|
if err == nil {
|
||||||
|
return uint16(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseUint(s, 10, 32)
|
||||||
|
if err == nil {
|
||||||
|
return uint32(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseUint(s, 10, 64)
|
||||||
|
if err == nil {
|
||||||
|
return uint64(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseFloat(s, 32)
|
||||||
|
if err == nil {
|
||||||
|
return float32(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseFloat(s, 64)
|
||||||
|
if err == nil {
|
||||||
|
return float64(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
|
||||||
|
return makeStrconvToken(toktype, handler,
|
||||||
|
func(s string) (interface{}, error) {
|
||||||
|
value, err := strconv.ParseBool(s)
|
||||||
|
if err == nil {
|
||||||
|
return bool(value), err
|
||||||
|
}
|
||||||
|
return value, err
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler {
|
||||||
|
pc, _, _, _ := runtime.Caller(1)
|
||||||
|
fullName := runtime.FuncForPC(pc).Name()
|
||||||
|
parts := strings.Split(fullName, ".")
|
||||||
|
name := parts[len(parts)-1]
|
||||||
|
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
|
||||||
|
value, err := convert(t.Result().String())
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf(
|
||||||
|
"TokenHandler error: %s cannot handle input %q: %s "+
|
||||||
|
"(only use a type conversion token maker, when the input has been "+
|
||||||
|
"validated on beforehand)", name, t.Result().String(), err))
|
||||||
|
}
|
||||||
|
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
|
||||||
|
return func(t *TokenAPI) bool {
|
||||||
|
fork := t.Fork()
|
||||||
|
if handler(fork) {
|
||||||
|
t.Result().AddToken(callback(fork))
|
||||||
|
fork.Merge()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -9,72 +9,57 @@ import (
|
||||||
|
|
||||||
func TestCombinators(t *testing.T) {
|
func TestCombinators(t *testing.T) {
|
||||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
{"xxx", c.Rune('x'), true, "x"},
|
{"abc", c.Not(a.Rune('b')), true, "a"},
|
||||||
{"x ", c.Rune(' '), false, ""},
|
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||||
{"aa", c.RuneRange('b', 'e'), false, ""},
|
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||||
{"bb", c.RuneRange('b', 'e'), true, "b"},
|
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||||
{"cc", c.RuneRange('b', 'e'), true, "c"},
|
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||||
{"dd", c.RuneRange('b', 'e'), true, "d"},
|
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||||
{"ee", c.RuneRange('b', 'e'), true, "e"},
|
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||||
{"ff", c.RuneRange('b', 'e'), false, ""},
|
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||||
{"Hello, world!", c.Str("Hello"), true, "Hello"},
|
{"ababc", c.Rep(4, a.Runes('a', 'b')), true, "abab"},
|
||||||
{"HellÖ, world!", c.StrNoCase("hellö"), true, "HellÖ"},
|
{"ababc", c.Rep(5, a.Runes('a', 'b')), false, ""},
|
||||||
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
|
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||||
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
|
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||||
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
|
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||||
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
|
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
||||||
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
|
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
||||||
{"abc", c.Not(c.Rune('b')), true, "a"},
|
{"", c.Max(4, a.Rune('b')), true, ""},
|
||||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
{"X", c.Max(4, a.Rune('b')), true, ""},
|
||||||
{"bcd", c.Not(c.Rune('b')), false, ""},
|
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
||||||
{"1010", c.Not(c.Seq(c.Rune('2'), c.Rune('0'))), true, "1"},
|
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
||||||
{"2020", c.Not(c.Seq(c.Rune('2'), c.Rune('0'))), false, ""},
|
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
||||||
{"abc", c.Any(c.Rune('a'), c.Rune('b')), true, "a"},
|
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||||
{"bcd", c.Any(c.Rune('a'), c.Rune('b')), true, "b"},
|
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||||
{"cde", c.Any(c.Rune('a'), c.Rune('b')), false, ""},
|
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
||||||
{"ababc", c.Rep(4, c.Runes('a', 'b')), true, "abab"},
|
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||||
{"ababc", c.Rep(5, c.Runes('a', 'b')), false, ""},
|
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||||
{"", c.Min(0, c.Rune('a')), true, ""},
|
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||||
{"a", c.Min(0, c.Rune('a')), true, "a"},
|
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
||||||
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
|
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||||
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
|
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
||||||
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
|
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
||||||
{"", c.Max(4, c.Rune('b')), true, ""},
|
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||||
{"X", c.Max(4, c.Rune('b')), true, ""},
|
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||||
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
|
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
||||||
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
|
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
||||||
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
|
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
||||||
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
||||||
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
||||||
{"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"},
|
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
||||||
{"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
||||||
{"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||||
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
|
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||||
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
|
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||||
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
|
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||||
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
|
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||||
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
|
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||||
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
{"", c.Opt(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||||
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
|
{"ghijkl", c.Opt(a.Rune('h')), true, ""},
|
||||||
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
|
{"ghijkl", c.Opt(a.Rune('g')), true, "g"},
|
||||||
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
|
{"fffffX", c.Opt(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
||||||
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
|
|
||||||
{"", c.OneOrMore(c.Rune('d')), false, ""},
|
|
||||||
{"X", c.OneOrMore(c.Rune('d')), false, ""},
|
|
||||||
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
|
|
||||||
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
|
|
||||||
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
|
|
||||||
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
|
|
||||||
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
|
|
||||||
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
|
|
||||||
{"Hello, world!X", c.Seq(c.Str("Hello"), a.Comma, a.Space, c.Str("world"), a.Excl), true, "Hello, world!"},
|
|
||||||
{"101010123", c.OneOrMore(c.Seq(c.Rune('1'), c.Rune('0'))), true, "101010"},
|
|
||||||
{"", c.Opt(c.OneOrMore(c.Rune('f'))), true, ""},
|
|
||||||
{"ghijkl", c.Opt(c.Rune('h')), true, ""},
|
|
||||||
{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
|
|
||||||
{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
|
|
||||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
@ -83,27 +68,42 @@ func TestCombinators(t *testing.T) {
|
||||||
|
|
||||||
func TestCombinatorPanics(t *testing.T) {
|
func TestCombinatorPanics(t *testing.T) {
|
||||||
RunPanicTests(t, []PanicTest{
|
RunPanicTests(t, []PanicTest{
|
||||||
{func() { parsekit.C.RuneRange('z', 'a') },
|
{func() { a.RuneRange('z', 'a') },
|
||||||
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
|
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
|
||||||
{func() { parsekit.C.MinMax(-1, 1, parsekit.A.Space) },
|
{func() { c.MinMax(-1, 1, parsekit.A.Space) },
|
||||||
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
|
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
|
||||||
{func() { parsekit.C.MinMax(1, -1, parsekit.A.Space) },
|
{func() { c.MinMax(1, -1, parsekit.A.Space) },
|
||||||
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
|
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
|
||||||
{func() { parsekit.C.MinMax(10, 5, parsekit.A.Space) },
|
{func() { c.MinMax(10, 5, parsekit.A.Space) },
|
||||||
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
|
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
|
||||||
{func() { parsekit.C.Min(-10, parsekit.A.Space) },
|
{func() { c.Min(-10, parsekit.A.Space) },
|
||||||
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
|
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
|
||||||
{func() { parsekit.C.Max(-42, parsekit.A.Space) },
|
{func() { c.Max(-42, parsekit.A.Space) },
|
||||||
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
|
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAtoms(t *testing.T) {
|
func TestAtoms(t *testing.T) {
|
||||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
|
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||||
|
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||||
|
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||||
|
{"Hello, world!", a.Str("Hello"), true, "Hello"},
|
||||||
|
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
|
||||||
|
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
||||||
|
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
||||||
|
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
||||||
|
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
||||||
|
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
||||||
|
{"xxx", a.Rune('x'), true, "x"},
|
||||||
|
{"x ", a.Rune(' '), false, ""},
|
||||||
|
{"aa", a.RuneRange('b', 'e'), false, ""},
|
||||||
|
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
||||||
|
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
||||||
{"", a.EndOfFile, true, ""},
|
{"", a.EndOfFile, true, ""},
|
||||||
{"⌘", a.AnyRune, true, "⌘"},
|
{"⌘", a.AnyRune, true, "⌘"},
|
||||||
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
|
{"\xbc", a.AnyRune, true, "<22>"}, // invalid UTF8 rune
|
||||||
{"", a.AnyRune, false, ""}, // end of file
|
{"", a.AnyRune, false, ""}, // false is for end of file
|
||||||
{" ", a.Space, true, " "},
|
{" ", a.Space, true, " "},
|
||||||
{"X", a.Space, false, ""},
|
{"X", a.Space, false, ""},
|
||||||
{"\t", a.Tab, true, "\t"},
|
{"\t", a.Tab, true, "\t"},
|
||||||
|
@ -187,32 +187,128 @@ func TestAtoms(t *testing.T) {
|
||||||
{"1", a.Integer, true, "1"},
|
{"1", a.Integer, true, "1"},
|
||||||
{"-10X", a.Integer, false, ""},
|
{"-10X", a.Integer, false, ""},
|
||||||
{"+10X", a.Integer, false, ""},
|
{"+10X", a.Integer, false, ""},
|
||||||
{"-10X", c.Signed(a.Integer), true, "-10"},
|
{"-10X", a.Signed(a.Integer), true, "-10"},
|
||||||
{"+10X", c.Signed(a.Integer), true, "+10"},
|
{"+10X", a.Signed(a.Integer), true, "+10"},
|
||||||
{"+10.1X", c.Signed(a.Integer), true, "+10"},
|
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
||||||
{"0X", a.Float, true, "0"},
|
{"0X", a.Float, true, "0"},
|
||||||
{"0X", a.Float, true, "0"},
|
{"0X", a.Float, true, "0"},
|
||||||
{"1X", a.Float, true, "1"},
|
{"1X", a.Float, true, "1"},
|
||||||
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||||
{"123.321X", a.Float, true, "123.321"},
|
{"123.321X", a.Float, true, "123.321"},
|
||||||
{"-3.14X", a.Float, false, ""},
|
{"-3.14X", a.Float, false, ""},
|
||||||
{"-3.14X", c.Signed(a.Float), true, "-3.14"},
|
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
||||||
{"-003.0014X", c.Signed(a.Float), true, "-003.0014"},
|
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
||||||
|
{"0X", a.Octet, true, "0"},
|
||||||
|
{"00X", a.Octet, true, "00"},
|
||||||
|
{"000X", a.Octet, true, "000"},
|
||||||
|
{"10X", a.Octet, true, "10"},
|
||||||
|
{"010X", a.Octet, true, "010"},
|
||||||
|
{"255123", a.Octet, true, "255"},
|
||||||
|
{"256123", a.Octet, false, ""},
|
||||||
|
{"300", a.Octet, false, ""},
|
||||||
|
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||||
|
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||||
|
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||||
|
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||||
|
{"256.255.255.255", a.IPv4, false, ""},
|
||||||
|
{"0", a.IPv4MaskBits, true, "0"},
|
||||||
|
{"32", a.IPv4MaskBits, true, "32"},
|
||||||
|
{"33", a.IPv4MaskBits, false, "0"},
|
||||||
|
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
||||||
|
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
||||||
|
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||||
|
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||||
|
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestModifiers(t *testing.T) {
|
func TestModifiers(t *testing.T) {
|
||||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
|
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||||
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
||||||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||||
{"abcdefghijk", m.ModifyByCallback(c.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||||
{"NoTaLlUpPeR", m.ToUpper(c.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||||
{"NoTaLlLoWeR", m.ToLower(c.StrNoCase("NOTALLlower")), true, "notalllower"},
|
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// When a TokenMaker encounters an error, this is considered a programmer error.
|
||||||
|
// A TokenMaker should not be called, unless the input is already validated to
|
||||||
|
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||||
|
// input cannot be processed successfully.
|
||||||
|
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||||
|
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||||
|
parser := parsekit.NewMatcher(invalid, "boolean")
|
||||||
|
RunPanicTest(t, PanicTest{
|
||||||
|
func() { parser.Execute("no") },
|
||||||
|
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
|
||||||
|
`invalid syntax \(only use a type conversion token maker, when the input has been validated on beforehand\)`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenMakers(t *testing.T) {
|
||||||
|
RunTokenMakerTests(t, []TokenMakerTest{
|
||||||
|
{`empty token`, tok.StrLiteral("A", c.ZeroOrMore(a.Digit)),
|
||||||
|
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
||||||
|
|
||||||
|
{`Ѝюج literal \string`, tok.StrLiteral("B", c.OneOrMore(a.AnyRune)),
|
||||||
|
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
||||||
|
|
||||||
|
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||||
|
[]parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||||
|
|
||||||
|
{"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
|
||||||
|
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{
|
||||||
|
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
|
||||||
|
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
|
||||||
|
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
|
||||||
|
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
|
||||||
|
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
|
||||||
|
}},
|
||||||
|
|
||||||
|
{"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
|
||||||
|
|
||||||
|
{`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
|
||||||
|
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
|
||||||
|
{`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
|
||||||
|
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
|
||||||
|
{`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
|
||||||
|
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
|
||||||
|
{`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
|
||||||
|
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
|
||||||
|
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
|
||||||
|
|
||||||
|
{`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
|
||||||
|
{`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
|
||||||
|
{`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
|
||||||
|
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
|
||||||
|
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
|
||||||
|
|
||||||
|
{`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
|
||||||
|
{`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
|
||||||
|
|
||||||
|
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
|
||||||
|
{Type: "P", Runes: []rune("1"), Value: true},
|
||||||
|
{Type: "P", Runes: []rune("t"), Value: true},
|
||||||
|
{Type: "P", Runes: []rune("T"), Value: true},
|
||||||
|
{Type: "P", Runes: []rune("true"), Value: true},
|
||||||
|
{Type: "P", Runes: []rune("TRUE"), Value: true},
|
||||||
|
{Type: "P", Runes: []rune("True"), Value: true},
|
||||||
|
}},
|
||||||
|
|
||||||
|
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
|
||||||
|
{Type: "P", Runes: []rune("0"), Value: false},
|
||||||
|
{Type: "P", Runes: []rune("f"), Value: false},
|
||||||
|
{Type: "P", Runes: []rune("F"), Value: false},
|
||||||
|
{Type: "P", Runes: []rune("false"), Value: false},
|
||||||
|
{Type: "P", Runes: []rune("FALSE"), Value: false},
|
||||||
|
{Type: "P", Runes: []rune("False"), Value: false},
|
||||||
|
}},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,7 +325,7 @@ func TestSequenceOfRunes(t *testing.T) {
|
||||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||||
p.Expects("Sequence of runes")
|
p.Expects("Sequence of runes")
|
||||||
if p.On(sequence).Accept() {
|
if p.On(sequence).Accept() {
|
||||||
output = p.BufLiteral()
|
output = p.Result().String()
|
||||||
p.Stop()
|
p.Stop()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -250,7 +346,7 @@ func TestCombination(t *testing.T) {
|
||||||
c.Seq(
|
c.Seq(
|
||||||
c.Opt(a.Whitespace),
|
c.Opt(a.Whitespace),
|
||||||
c.Rep(3, a.AngleClose),
|
c.Rep(3, a.AngleClose),
|
||||||
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
|
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
|
||||||
return fmt.Sprintf("%d", len(s))
|
return fmt.Sprintf("%d", len(s))
|
||||||
}),
|
}),
|
||||||
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
|
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),
|
||||||
|
|
Loading…
Reference in New Issue