Big simplification run once more, cleaned up code, added tests and examples, made stuff unexported where possible, to slim down the exported interface.
This commit is contained in:
parent
4580962fb8
commit
75373e5ed5
|
@ -1,19 +0,0 @@
|
|||
package assert
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Equal(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func NotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
|
||||
if notExpected == actual {
|
||||
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
|
||||
}
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
package assert
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type PanicT struct {
|
||||
Function func()
|
||||
Expect string
|
||||
Regexp bool
|
||||
}
|
||||
|
||||
func Panic(t *testing.T, p PanicT) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
mismatch := false
|
||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||
mismatch = true
|
||||
}
|
||||
if !p.Regexp && p.Expect != r.(string) {
|
||||
mismatch = true
|
||||
}
|
||||
if mismatch {
|
||||
t.Errorf(
|
||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||
p.Expect, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||
}
|
||||
}()
|
||||
p.Function()
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
package parsekit
|
||||
|
||||
// This file contains some tools that are used for writing parsekit tests.
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
|
||||
if notExpected == actual {
|
||||
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
||||
|
||||
type PanicT struct {
|
||||
Function func()
|
||||
Regexp bool
|
||||
Expect string
|
||||
}
|
||||
|
||||
func AssertPanics(t *testing.T, testSet []PanicT) {
|
||||
for _, test := range testSet {
|
||||
AssertPanic(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertPanic(t *testing.T, p PanicT) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
mismatch := false
|
||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||
mismatch = true
|
||||
}
|
||||
if !p.Regexp && p.Expect != r.(string) {
|
||||
mismatch = true
|
||||
}
|
||||
if mismatch {
|
||||
t.Errorf(
|
||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||
p.Expect, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||
}
|
||||
}()
|
||||
p.Function()
|
||||
}
|
||||
|
||||
type TokenHandlerT struct {
|
||||
Input string
|
||||
TokenHandler TokenHandler
|
||||
MustMatch bool
|
||||
Expected string
|
||||
}
|
||||
|
||||
func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenHandler(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
|
||||
result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else if output := result.String(); output != test.Expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TokenMakerT struct {
|
||||
Input string
|
||||
TokenHandler TokenHandler
|
||||
Expected []Token
|
||||
}
|
||||
|
||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenMaker(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||
result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if string(expected.Runes) != string(actual.Runes) {
|
||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
24
cursor.go
24
cursor.go
|
@ -2,20 +2,31 @@ package parsekit
|
|||
|
||||
import "fmt"
|
||||
|
||||
// Cursor represents the position of the input cursor in various ways.
|
||||
// Cursor represents the position of a cursor in various ways.
|
||||
type Cursor struct {
|
||||
Byte int // The cursor offset in bytes
|
||||
Rune int // The cursor offset in UTF8 runes
|
||||
Byte int // The cursor offset in bytes, relative to start of file
|
||||
Rune int // The cursor offset in UTF8 runes, relative to start of file
|
||||
Column int // The column at which the cursor is (0-indexed)
|
||||
Line int // The line at which the cursor is (0-indexed)
|
||||
}
|
||||
|
||||
func (c *Cursor) String() string {
|
||||
// String produces a string representation of the cursor position.
|
||||
func (c Cursor) String() string {
|
||||
if c.Line == 0 && c.Column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||
}
|
||||
|
||||
// move updates the position of the cursor, based on the provided input string.
|
||||
func (c *Cursor) move(input string) {
|
||||
// Move updates the position of the cursor, based on the provided input string.
|
||||
// The input string represents the runes that has been skipped over. This
|
||||
// method will take newlines into account to keep track of line numbers and
|
||||
// column positions automatically.
|
||||
//
|
||||
// Note: when you are writing a parser using parsekit, it's unlikely
|
||||
// that you will use this method directly. The parsekit package takes care
|
||||
// of calling it at the correct time.
|
||||
func (c *Cursor) Move(input string) *Cursor {
|
||||
c.Byte += len(input)
|
||||
for _, r := range input {
|
||||
c.Rune++
|
||||
|
@ -26,4 +37,5 @@ func (c *Cursor) move(input string) {
|
|||
c.Column++
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
|
|
@ -1,9 +1,38 @@
|
|||
package parsekit
|
||||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func ExampleCursor_Move() {
|
||||
c := &parsekit.Cursor{}
|
||||
fmt.Printf("after initialization : %s\n", c)
|
||||
fmt.Printf("after 'some words' : %s\n", c.Move("some words"))
|
||||
fmt.Printf("after '\\n' : %s\n", c.Move("\n"))
|
||||
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.Move("\r\nskip\nlines"))
|
||||
|
||||
// Output:
|
||||
// after initialization : start of file
|
||||
// after 'some words' : line 1, column 11
|
||||
// after '\n' : line 2, column 1
|
||||
// after '\r\nskip\nlines' : line 4, column 6
|
||||
}
|
||||
|
||||
func ExampleCursor_String() {
|
||||
c := &parsekit.Cursor{}
|
||||
fmt.Println(c.String())
|
||||
|
||||
c.Move("\nfoobar")
|
||||
fmt.Println(c.String())
|
||||
|
||||
// Output:
|
||||
// start of file
|
||||
// line 2, column 7
|
||||
}
|
||||
|
||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
name string
|
||||
|
@ -22,9 +51,9 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
|||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||
} {
|
||||
c := Cursor{}
|
||||
c := parsekit.Cursor{}
|
||||
for _, s := range test.input {
|
||||
c.move(s)
|
||||
c.Move(s)
|
||||
}
|
||||
if c.Byte != test.byte {
|
||||
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Error is used as the error type when parsing errors occur.
|
||||
// The error includes some context information to allow for useful
|
||||
// error messages to the user.
|
||||
type Error struct {
|
||||
Message string
|
||||
Cursor Cursor
|
||||
}
|
||||
|
||||
func (err *Error) Error() string {
|
||||
if err == nil {
|
||||
_, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf("parsekit.Error.Error(): method called with nil error at %s", linepos))
|
||||
}
|
||||
return err.Message
|
||||
}
|
||||
|
||||
// Full returns the current error message, including information about
|
||||
// the position in the input where the error occurred.
|
||||
func (err *Error) Full() string {
|
||||
if err == nil {
|
||||
_, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf("parsekit.Error.Full(): method called with nil error at %s", linepos))
|
||||
}
|
||||
return fmt.Sprintf("%s at %s", err, err.Cursor)
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func ExampleError() {
|
||||
err := &parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Cursor: parsekit.Cursor{Line: 9, Column: 41},
|
||||
}
|
||||
|
||||
fmt.Println(err.Error())
|
||||
fmt.Printf("%s\n", err)
|
||||
fmt.Println(err.Full())
|
||||
// Output:
|
||||
// it broke down
|
||||
// it broke down
|
||||
// it broke down at line 10, column 42
|
||||
}
|
||||
|
||||
func ExampleError_Error() {
|
||||
err := &parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Cursor: parsekit.Cursor{Line: 9, Column: 41},
|
||||
}
|
||||
|
||||
fmt.Println(err.Error())
|
||||
fmt.Printf("%s\n", err)
|
||||
// Output:
|
||||
// it broke down
|
||||
// it broke down
|
||||
}
|
||||
|
||||
func ExampleError_Full() {
|
||||
err := &parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Cursor: parsekit.Cursor{Line: 9, Column: 41},
|
||||
}
|
||||
|
||||
fmt.Println(err.Full())
|
||||
// Output:
|
||||
// it broke down at line 10, column 42
|
||||
}
|
|
@ -5,7 +5,7 @@
|
|||
//
|
||||
// So positive numbers that can be either added or substracted, and whitespace
|
||||
// is ignored.
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -83,15 +83,15 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
|||
}
|
||||
|
||||
func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
|
||||
var a = parsekit.A
|
||||
var A = parsekit.A
|
||||
switch {
|
||||
case p.On(a.Add).Skip():
|
||||
case p.On(A.Add).Skip():
|
||||
c.op = +1
|
||||
p.Handle(c.number)
|
||||
case p.On(a.Subtract).Skip():
|
||||
case p.On(A.Subtract).Skip():
|
||||
c.op = -1
|
||||
p.Handle(c.number)
|
||||
case !p.On(a.EndOfFile).Stay():
|
||||
case !p.On(A.EndOfFile).Stay():
|
||||
p.Expects("operator, '+' or '-'")
|
||||
p.UnexpectedInput()
|
||||
default:
|
|
@ -10,7 +10,7 @@
|
|||
// <term> = (<factor> | <factor> (MUL|DIV) <factor>)
|
||||
// <space> = (<space> (SPACE|TAB) | "")
|
||||
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -40,7 +40,7 @@ func Example_basicCalculator2() {
|
|||
output, err := Compute(c.input)
|
||||
output = math.Round(output*1000000) / 1000000 // to make the expectation comparisons usable
|
||||
if err != nil {
|
||||
fmt.Printf("Input: %q, got error: %s\n", c.input, err)
|
||||
fmt.Printf("Input: %q, got error: %s\n", c.input, err.Full())
|
||||
} else {
|
||||
fmt.Printf("Input: %q, got outcome: %f, correct = %t\n", c.input, output, output == c.expected)
|
||||
}
|
||||
|
@ -53,11 +53,11 @@ func Example_basicCalculator2() {
|
|||
// Input: "(3.05+2)*(4.3+5.12)", got outcome: 47.571000, correct = true
|
||||
// Input: "8.10 + 999/233", got outcome: 12.387554, correct = true
|
||||
// Input: " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 ", got outcome: -20.168000, correct = true
|
||||
// Input: "", got error: unexpected end of file
|
||||
// Input: "(", got error: unexpected end of file
|
||||
// Input: "10+20-", got error: unexpected end of file
|
||||
// Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file)
|
||||
// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')')
|
||||
// Input: "", got error: unexpected end of file at start of file
|
||||
// Input: "(", got error: unexpected end of file at line 1, column 2
|
||||
// Input: "10+20-", got error: unexpected end of file at line 1, column 7
|
||||
// Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) at line 1, column 13
|
||||
// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
@ -94,9 +94,9 @@ func (c *calculator) calculation(p *parsekit.ParseAPI) {
|
|||
func (c *calculator) expr(p *parsekit.ParseAPI) {
|
||||
c.interpreter.push()
|
||||
|
||||
var pc, a = parsekit.C, parsekit.A
|
||||
var C, A = parsekit.C, parsekit.A
|
||||
if p.Handle(c.term) {
|
||||
for p.On(pc.Any(a.Add, a.Subtract)).Accept() {
|
||||
for p.On(C.Any(A.Add, A.Subtract)).Accept() {
|
||||
op := p.Result().Rune(0)
|
||||
if !p.Handle(c.term) {
|
||||
return
|
||||
|
@ -112,9 +112,9 @@ func (c *calculator) expr(p *parsekit.ParseAPI) {
|
|||
func (c *calculator) term(p *parsekit.ParseAPI) {
|
||||
c.interpreter.push()
|
||||
|
||||
var pc, a = parsekit.C, parsekit.A
|
||||
var C, A = parsekit.C, parsekit.A
|
||||
if p.Handle(c.factor) {
|
||||
for p.On(pc.Any(a.Multiply, a.Divide)).Accept() {
|
||||
for p.On(C.Any(A.Multiply, A.Divide)).Accept() {
|
||||
op := p.Result().Rune(0)
|
||||
if !p.Handle(c.factor) {
|
||||
return
|
||||
|
@ -129,17 +129,17 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
|
|||
// <space> = (<space> (SPACE|TAB) | "")
|
||||
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
|
||||
func (c *calculator) factor(p *parsekit.ParseAPI) {
|
||||
var a, tok = parsekit.A, parsekit.T
|
||||
p.On(a.Whitespace).Skip()
|
||||
var A, T = parsekit.A, parsekit.T
|
||||
p.On(A.Whitespace).Skip()
|
||||
switch {
|
||||
case p.On(tok.Float64(nil, a.Signed(a.Float))).Accept():
|
||||
case p.On(T.Float64(nil, A.Signed(A.Float))).Accept():
|
||||
value := p.Result().Value(0).(float64)
|
||||
c.interpreter.pushValue(value)
|
||||
case p.On(a.LeftParen).Skip():
|
||||
case p.On(A.LeftParen).Skip():
|
||||
if !p.Handle(c.expr) {
|
||||
return
|
||||
}
|
||||
if !p.On(a.RightParen).Skip() {
|
||||
if !p.On(A.RightParen).Skip() {
|
||||
p.Expects("')'")
|
||||
p.UnexpectedInput()
|
||||
return
|
||||
|
@ -148,7 +148,7 @@ func (c *calculator) factor(p *parsekit.ParseAPI) {
|
|||
p.UnexpectedInput()
|
||||
return
|
||||
}
|
||||
p.On(a.Whitespace).Skip()
|
||||
p.On(A.Whitespace).Skip()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
|
@ -2,7 +2,7 @@
|
|||
// The implementation uses only TokenHandler functions and does not implement a
|
||||
// full-fledged state-based Parser for it.
|
||||
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -10,8 +10,8 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func Example_dutchPostcodeUsingMatcher() {
|
||||
parser := createPostcodeMatcher()
|
||||
func Example_dutchPostcodeUsingTokenizer() {
|
||||
parser := createPostcodeTokenizer()
|
||||
|
||||
for i, input := range []string{
|
||||
"1234 AB",
|
||||
|
@ -24,18 +24,22 @@ func Example_dutchPostcodeUsingMatcher() {
|
|||
"",
|
||||
"\xcd2222AB",
|
||||
} {
|
||||
output, err := parser.Execute(input)
|
||||
result, err := parser.Execute(input)
|
||||
if err != nil {
|
||||
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full())
|
||||
} else {
|
||||
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output)
|
||||
fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result)
|
||||
for _, t := range result.Tokens() {
|
||||
fmt.Printf(" %s(%s)", t.Type, t.Value)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// [0] Input: "1234 AB" Output: 1234 AB
|
||||
// [1] Input: "2233Ab" Output: 2233 AB
|
||||
// [2] Input: "1001\t\tab" Output: 1001 AB
|
||||
// [3] Input: "1818ab" Output: 1818 AB
|
||||
// [0] Input: "1234 AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB)
|
||||
// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
|
||||
// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
|
||||
// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
|
||||
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
|
||||
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
|
||||
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
|
||||
|
@ -47,9 +51,9 @@ func Example_dutchPostcodeUsingMatcher() {
|
|||
// Implementation of the parser
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func createPostcodeMatcher() *parsekit.Matcher {
|
||||
func createPostcodeTokenizer() *parsekit.Tokenizer {
|
||||
// Easy access to the parsekit definitions.
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T
|
||||
|
||||
// TokenHandler functions are created and combined to satisfy these rules:
|
||||
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||
|
@ -57,14 +61,14 @@ func createPostcodeMatcher() *parsekit.Matcher {
|
|||
// - A space between letters and digits is optional.
|
||||
// - It is good form to write the letters in upper case.
|
||||
// - It is good form to use a single space between digits and letters.
|
||||
digitNotZero := c.Except(a.Rune('0'), a.Digit)
|
||||
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
|
||||
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
|
||||
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))
|
||||
space := m.Replace(c.Opt(a.Whitespace), " ")
|
||||
postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile)
|
||||
digitNotZero := C.Except(A.Rune('0'), A.Digit)
|
||||
pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
|
||||
pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
|
||||
pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
|
||||
space := M.Replace(C.Opt(A.Whitespace), " ")
|
||||
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
|
||||
|
||||
// Create a Matcher that wraps the 'postcode' TokenHandler and allows
|
||||
// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
return parsekit.NewMatcher(postcode, "a Dutch postcode")
|
||||
return parsekit.NewTokenizer(postcode, "a Dutch postcode")
|
||||
}
|
|
@ -13,7 +13,7 @@
|
|||
// like this, then also take a look at the helloSingleState example, which does
|
||||
// the same thing as this version, only more concise.
|
||||
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -131,6 +131,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
|
|||
// different route was taken to implement a more friendly 'end of greeting'
|
||||
// error message.
|
||||
func (h *helloparser1) end(p *parsekit.ParseAPI) {
|
||||
var a = parsekit.A
|
||||
if !p.On(a.EndOfFile).Stay() {
|
||||
p.Expects("end of greeting")
|
||||
p.UnexpectedInput()
|
|
@ -5,7 +5,7 @@
|
|||
// not implement a full-fledged state-based Parser for it. If you want to see the
|
||||
// same kind of functionality, implementated using a Parser, take a look at the
|
||||
// other hello examples.
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -13,8 +13,8 @@ import (
|
|||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func Example_helloWorldUsingMatcher() {
|
||||
parser := createHelloMatcher()
|
||||
func Example_helloWorldUsingTokenizer() {
|
||||
parser := createHelloTokenizer()
|
||||
|
||||
for i, input := range []string{
|
||||
"Hello, world!",
|
||||
|
@ -46,7 +46,7 @@ func Example_helloWorldUsingMatcher() {
|
|||
// Implementation of the parser
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func createHelloMatcher() *parsekit.Matcher {
|
||||
func createHelloTokenizer() *parsekit.Tokenizer {
|
||||
// Easy access to parsekit definition.
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
|
||||
|
@ -59,7 +59,7 @@ func createHelloMatcher() *parsekit.Matcher {
|
|||
name := c.OneOrMore(c.Not(a.Excl))
|
||||
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)
|
||||
|
||||
// Create a Matcher that wraps the 'greeting' TokenHandler and allows
|
||||
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
return parsekit.NewMatcher(greeting, "a friendly greeting")
|
||||
return parsekit.NewTokenizer(greeting, "a friendly greeting")
|
||||
}
|
|
@ -11,7 +11,7 @@
|
|||
// is that this parser reports errors much more fine-grained. This might or
|
||||
// might not be useful for your specific use case.
|
||||
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
|
@ -0,0 +1,5 @@
|
|||
// Package examples contains various examples for the parsekit module.
|
||||
// These examples have been moved into their own package, because they
|
||||
// are quite numerous and quite big. Too big in my opinion to make them
|
||||
// all available from within the parsekit package godocs.
|
||||
package examples
|
|
@ -5,7 +5,7 @@
|
|||
// for []string. We add a ParseHandler method directly to that type
|
||||
// and let the parsing code fill the slice with strings during parsing.
|
||||
|
||||
package parsekit_test
|
||||
package examples
|
||||
|
||||
import (
|
||||
"fmt"
|
|
@ -1,94 +0,0 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func ExampleError() {
|
||||
err := &parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Line: 10,
|
||||
Column: 42,
|
||||
}
|
||||
|
||||
fmt.Println(err.Error())
|
||||
fmt.Printf("%s\n", err)
|
||||
fmt.Println(err.Full())
|
||||
// Output:
|
||||
// it broke down
|
||||
// it broke down
|
||||
// it broke down at line 10, column 42
|
||||
}
|
||||
|
||||
func ExampleError_Error() {
|
||||
err := &parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Line: 10,
|
||||
Column: 42,
|
||||
}
|
||||
|
||||
fmt.Println(err.Error())
|
||||
fmt.Printf("%s\n", err)
|
||||
// Output:
|
||||
// it broke down
|
||||
// it broke down
|
||||
}
|
||||
|
||||
func ExampleError_Full() {
|
||||
err := &parsekit.Error{
|
||||
Message: "it broke down",
|
||||
Line: 10,
|
||||
Column: 42,
|
||||
}
|
||||
|
||||
fmt.Println(err.Full())
|
||||
// Output:
|
||||
// it broke down at line 10, column 42
|
||||
}
|
||||
|
||||
func ExampleMatchAnyRune_usingAcceptedRunes() {
|
||||
// Easy access to the parsekit definitions.
|
||||
a := parsekit.A
|
||||
|
||||
matches := []string{}
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(a.AnyRune).Accept() {
|
||||
matches = append(matches, p.Result().String())
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
})
|
||||
err := parser.Execute("¡Any will dö!")
|
||||
|
||||
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
|
||||
// Output:
|
||||
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
|
||||
}
|
||||
|
||||
func ExampleMatchAnyRune_usingTokens() {
|
||||
// Easy access to the parsekit definitions.
|
||||
c, a, tok := parsekit.C, parsekit.A, parsekit.T
|
||||
|
||||
var tokens []*parsekit.Token
|
||||
var accepted string
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
|
||||
tokens = p.Result().Tokens()
|
||||
accepted = p.Result().String()
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
})
|
||||
parser.Execute("¡Any will dö!")
|
||||
|
||||
fmt.Printf("Runes accepted: %q\n", accepted)
|
||||
fmt.Printf("Token values: ")
|
||||
for _, t := range tokens {
|
||||
fmt.Printf("%c ", t.Value)
|
||||
}
|
||||
// Output:
|
||||
// Runes accepted: "¡Any will dö!"
|
||||
// Token values: ¡ A n y w i l l d ö !
|
||||
}
|
274
parseapi.go
274
parseapi.go
|
@ -2,7 +2,7 @@ package parsekit
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -12,7 +12,7 @@ type ParseAPI struct {
|
|||
tokenAPI *TokenAPI // the input reader
|
||||
loopCheck map[string]bool // used for parser loop detection
|
||||
expecting string // a description of what the current state expects to find (see Expects())
|
||||
result *Result // TokenHandler result, as received from On(...).Accept()
|
||||
result *TokenResult // Last TokenHandler result as retrieved by On(...).Accept()
|
||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
}
|
||||
|
@ -29,17 +29,20 @@ func (p *ParseAPI) panicWhenStoppedOrInError() {
|
|||
return
|
||||
}
|
||||
|
||||
called, _ := p.getCaller(1)
|
||||
called, _ := getCaller(1)
|
||||
parts := strings.Split(called, ".")
|
||||
calledShort := parts[len(parts)-1]
|
||||
caller, filepos := p.getCaller(2)
|
||||
_, filepos := getCaller(2)
|
||||
|
||||
after := "Error()"
|
||||
if p.stopped {
|
||||
after = "Stop()"
|
||||
}
|
||||
|
||||
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.ParseAPI.%s(): Illegal call to %s() at %s: "+
|
||||
"no calls allowed after ParseAPI.%s",
|
||||
calledShort, calledShort, filepos, after))
|
||||
}
|
||||
|
||||
func (p *ParseAPI) isStoppedOrInError() bool {
|
||||
|
@ -51,18 +54,261 @@ func (p *ParseAPI) initLoopCheck() {
|
|||
}
|
||||
|
||||
func (p *ParseAPI) checkForLoops() {
|
||||
caller, filepos := p.getCaller(2)
|
||||
_, filepos := getCaller(2)
|
||||
if _, ok := p.loopCheck[filepos]; ok {
|
||||
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
|
||||
panic(fmt.Sprintf("parsekit.ParseAPI: Loop detected in parser at %s", filepos))
|
||||
}
|
||||
p.loopCheck[filepos] = true
|
||||
}
|
||||
|
||||
// TODO delete this one
|
||||
func (p *ParseAPI) getCaller(depth int) (string, string) {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
return caller.Name(), filepos
|
||||
// On checks if the input at the current cursor position matches the provided
|
||||
// TokenHandler. On must be chained with another method that tells the parser
|
||||
// what action to perform when a match was found:
|
||||
//
|
||||
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
||||
//
|
||||
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
|
||||
//
|
||||
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
|
||||
//
|
||||
// So an example chain could look like this:
|
||||
//
|
||||
// p.On(parsekit.A.Whitespace).Skip()
|
||||
//
|
||||
// The chain as a whole returns a boolean that indicates whether or not at match
|
||||
// was found. When no match was found, false is returned and Skip() and Accept()
|
||||
// will have no effect. Because of this, typical use of an On() chain is as
|
||||
// expression for a conditional statement (if, switch/case, for). E.g.:
|
||||
//
|
||||
// // Skip multiple exclamation marks.
|
||||
// for p.On(parsekit.A.Excl).Skip() { }
|
||||
//
|
||||
// // Fork a route based on the input.
|
||||
// switch {
|
||||
// case p.On(parsekit.A.Excl).Stay()
|
||||
// p.RouteTo(stateHandlerA)
|
||||
// case p.On(parsekit.A.Colon).Stay():
|
||||
// p.RouteTo(stateHandlerB)
|
||||
// default:
|
||||
// p.RouteTo(stateHandlerC)
|
||||
// }
|
||||
//
|
||||
// // When there's a "hi" on input, then say hello.
|
||||
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||
// fmt.Println("Hello!")
|
||||
// }
|
||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.checkForLoops()
|
||||
if tokenHandler == nil {
|
||||
_, filepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.ParseAPI.On(): On() called with nil "+
|
||||
"tokenHandler argument at %s", filepos))
|
||||
}
|
||||
|
||||
p.result = nil
|
||||
p.tokenAPI.result = newTokenResult()
|
||||
fork := p.tokenAPI.Fork()
|
||||
ok := tokenHandler(fork)
|
||||
|
||||
return &ParseAPIOnAction{
|
||||
parseAPI: p,
|
||||
tokenAPI: fork,
|
||||
ok: ok,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
|
||||
// The On() method will return an initialized struct of this type.
|
||||
type ParseAPIOnAction struct {
|
||||
parseAPI *ParseAPI
|
||||
tokenAPI *TokenAPI
|
||||
ok bool
|
||||
}
|
||||
|
||||
// Accept tells the parser to move the cursor past a match that was found,
|
||||
// and to make the TokenResult from the TokenAPI available in the ParseAPI
|
||||
// through the Result() method.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *ParseAPIOnAction) Accept() bool {
|
||||
if a.ok {
|
||||
a.tokenAPI.Merge()
|
||||
a.parseAPI.result = a.tokenAPI.root.result
|
||||
a.flushTokenAPI()
|
||||
a.flushReader() //a.flush()
|
||||
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Skip tells the parser to move the cursor past a match that was found,
|
||||
// without making the results available through the ParseAPI.
|
||||
//
|
||||
// Note that functionally, you could call Accept() just as well, simply
|
||||
// ignoring the results. However, the Skip() call is a bit more efficient
|
||||
// than the Accept() call and (more important if you ask me) the code
|
||||
// expresses more clearly that your intent is to skip the match.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *ParseAPIOnAction) Skip() bool {
|
||||
if a.ok {
|
||||
a.tokenAPI.root.cursor = a.tokenAPI.cursor
|
||||
a.parseAPI.result = nil
|
||||
a.flushTokenAPI()
|
||||
a.flushReader()
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Stay tells the parser to not move the cursor after finding a match.
|
||||
//
|
||||
// A typical use of Stay() is to let one ParseHandler detect the start
|
||||
// of some kind of token, but without moving the read cursor forward.
|
||||
// When a match is found, it hands off control to another ParseHandler
|
||||
// to take care of the actual token parsing.
|
||||
//
|
||||
// Returns true in case a match was found, false otherwise.
|
||||
func (a *ParseAPIOnAction) Stay() bool {
|
||||
if a.ok {
|
||||
a.parseAPI.result = nil
|
||||
a.flushTokenAPI()
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
func (a *ParseAPIOnAction) flushTokenAPI() {
|
||||
a.tokenAPI.root.result = newTokenResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
}
|
||||
|
||||
func (a *ParseAPIOnAction) flushReader() {
|
||||
if a.tokenAPI.offset > 0 {
|
||||
a.tokenAPI.root.reader.flush(a.tokenAPI.offset)
|
||||
a.tokenAPI.root.offset = 0
|
||||
a.parseAPI.initLoopCheck()
|
||||
}
|
||||
}
|
||||
|
||||
// Result returns a TokenResult struct, containing results as produced by the
|
||||
// last ParseAPI.On().Accept() call.
|
||||
func (p *ParseAPI) Result() *TokenResult {
|
||||
result := p.result
|
||||
if p.result == nil {
|
||||
_, filepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.ParseAPI.TokenResult(): TokenResult() called at %s without "+
|
||||
"calling ParseAPI.Accept() on beforehand", filepos))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Handle is used to execute other ParseHandler functions from within your
|
||||
// ParseHandler function.
|
||||
//
|
||||
// The boolean return value is true when the parser can still continue.
|
||||
// It will be false when either an error was set (using ParseAPI.Error()),
|
||||
// or the parser was stopped (using ParseAPI.Stop()).
|
||||
func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.panicWhenParseHandlerNil(parseHandler)
|
||||
parseHandler(p)
|
||||
return !p.isStoppedOrInError()
|
||||
}
|
||||
|
||||
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
|
||||
if parseHandler == nil {
|
||||
_, filepos := getCaller(2)
|
||||
panic(fmt.Sprintf("parsekit.ParseAPI.Handle(): Handle() called with nil input at %s", filepos))
|
||||
}
|
||||
}
|
||||
|
||||
// Expects is used to let a ParseHandler function describe what input it is
|
||||
// expecting. This expectation is used in error messages to provide some
|
||||
// context to them.
|
||||
//
|
||||
// When defining an expectation inside a ParseHandler, you do not need to
|
||||
// handle unexpected input yourself. When the end of the parser is reached
|
||||
// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(),
|
||||
// an automatic error will be emitted using ParseAPI.UnexpectedInput().
|
||||
func (p *ParseAPI) Expects(description string) {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.expecting = description
|
||||
}
|
||||
|
||||
// Stop is used by the parser impementation to tell the ParseAPI that it has
|
||||
// completed the parsing process successfully.
|
||||
//
|
||||
// When the parser implementation returns without stopping first (and
|
||||
// without running into an error), the Parser.Execute() will call
|
||||
// ParserAPI.ExpectEndOfFile() to check if the end of the file was reached.
|
||||
// If not, then things will end in an UnexpectedError().
|
||||
// Even though this fallback mechanism will work in a lot of cases, try to make
|
||||
// your parser explicit about things and call Stop() actively yourself.
|
||||
//
|
||||
// After stopping, no more calls to ParseAPI methods are allowed.
|
||||
// Calling a method in this state will result in a panic.
|
||||
func (p *ParseAPI) Stop() {
|
||||
p.stopped = true
|
||||
}
|
||||
|
||||
// Error sets the error message in the ParseAPI.
|
||||
//
|
||||
// After setting an error, no more calls to ParseAPI methods are allowed.
|
||||
// Calling a method in this state will result in a panic.
|
||||
func (p *ParseAPI) Error(format string, args ...interface{}) {
|
||||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||
// set a different error message when needed.
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.err = &Error{message, p.tokenAPI.Cursor()}
|
||||
}
|
||||
|
||||
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||
//
|
||||
// When it finds that the end of the file was indeed reached, then the
|
||||
// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected
|
||||
// input is reported through ParseAPI.UnexpectedInput() with "end of file"
|
||||
// as the expectation.
|
||||
func (p *ParseAPI) ExpectEndOfFile() {
|
||||
p.panicWhenStoppedOrInError()
|
||||
if p.On(A.EndOfFile).Stay() {
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expects("end of file")
|
||||
p.UnexpectedInput()
|
||||
}
|
||||
}
|
||||
|
||||
// UnexpectedInput is used to set an error that tells the user that some
|
||||
// unexpected input was encountered.
|
||||
//
|
||||
// It can automatically produce an error message for a couple of situations:
|
||||
// 1) input simply didn't match the expectation
|
||||
// 2) the end of the input was reached
|
||||
// 3) there was an invalid UTF8 character on the input.
|
||||
//
|
||||
// The parser implementation can provide some feedback for this error by
|
||||
// calling ParseAPI.Expects() to set the expectation. When set, the
|
||||
// expectation is included in the error message.
|
||||
func (p *ParseAPI) UnexpectedInput() {
|
||||
p.panicWhenStoppedOrInError()
|
||||
r, err := p.tokenAPI.NextRune()
|
||||
switch {
|
||||
case err == nil:
|
||||
p.Error("unexpected character %q%s", r, fmtExpects(p))
|
||||
case err == io.EOF:
|
||||
p.Error("unexpected end of file%s", fmtExpects(p))
|
||||
default:
|
||||
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
|
||||
}
|
||||
}
|
||||
|
||||
func fmtExpects(p *ParseAPI) string {
|
||||
if p.expecting == "" {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf(" (expected %s)", p.expecting)
|
||||
}
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
// ParseHandler defines the type of function that must be implemented to handle
|
||||
// a parsing state in a Parser state machine.
|
||||
//
|
||||
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
|
||||
// all the internal state for the parsing state machine and provides the
|
||||
// interface that the ParseHandler uses to interact with the parser.
|
||||
type ParseHandler func(*ParseAPI)
|
|
@ -1,37 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Error is used as the error type when parsing errors occur.
|
||||
// The error includes some context information to allow for useful
|
||||
// error messages to the user.
|
||||
type Error struct {
|
||||
Message string
|
||||
Line int
|
||||
Column int
|
||||
}
|
||||
|
||||
func (err *Error) Error() string {
|
||||
return err.Message
|
||||
}
|
||||
|
||||
// Full returns the current error message, including information about
|
||||
// the position in the input where the error occurred.
|
||||
func (err *Error) Full() string {
|
||||
if err.Line == 0 {
|
||||
return fmt.Sprintf("%s at start of file", err)
|
||||
} else {
|
||||
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
|
||||
}
|
||||
}
|
||||
|
||||
// Error sets the error message in the parser API. This error message
|
||||
// will eventually be returned by the Parser.Execute() method.
|
||||
func (p *ParseAPI) Error(format string, args ...interface{}) {
|
||||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||
// set a different error message when needed.
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.err = &Error{message, p.tokenAPI.cursor.Line, p.tokenAPI.cursor.Column}
|
||||
}
|
|
@ -1,128 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import "fmt"
|
||||
|
||||
// On checks if the input at the current cursor position matches the provided
|
||||
// TokenHandler. On must be chained with another method that tells the parser
|
||||
// what action to perform when a match was found:
|
||||
//
|
||||
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
|
||||
//
|
||||
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
|
||||
//
|
||||
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
|
||||
//
|
||||
// So an example chain could look like this:
|
||||
//
|
||||
// p.On(parsekit.A.Whitespace).Skip()
|
||||
//
|
||||
// The chain as a whole returns a boolean that indicates whether or not at match
|
||||
// was found. When no match was found, false is returned and Skip() and Accept()
|
||||
// will have no effect. Because of this, typical use of an On() chain is as
|
||||
// expression for a conditional statement (if, switch/case, for). E.g.:
|
||||
//
|
||||
// // Skip multiple exclamation marks.
|
||||
// for p.On(parsekit.A.Excl).Skip() { }
|
||||
//
|
||||
// // Fork a route based on the input.
|
||||
// switch {
|
||||
// case p.On(parsekit.A.Excl).Stay()
|
||||
// p.RouteTo(stateHandlerA)
|
||||
// case p.On(parsekit.A.Colon).Stay():
|
||||
// p.RouteTo(stateHandlerB)
|
||||
// default:
|
||||
// p.RouteTo(stateHandlerC)
|
||||
// }
|
||||
//
|
||||
// // When there's a "hi" on input, then say hello.
|
||||
// if p.On(parsekit.C.Str("hi")).Accept() {
|
||||
// fmt.Println("Hello!")
|
||||
// }
|
||||
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.checkForLoops()
|
||||
if tokenHandler == nil {
|
||||
panic("ParseHandler bug: tokenHandler argument for On() is nil")
|
||||
}
|
||||
|
||||
p.result = nil
|
||||
p.tokenAPI.result = NewResult()
|
||||
fork := p.tokenAPI.Fork()
|
||||
ok := tokenHandler(fork)
|
||||
|
||||
return &ParseAPIOnAction{
|
||||
parseAPI: p,
|
||||
tokenAPI: fork,
|
||||
ok: ok,
|
||||
}
|
||||
}
|
||||
|
||||
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
|
||||
// The On() method will return an initialized struct of this type.
|
||||
type ParseAPIOnAction struct {
|
||||
parseAPI *ParseAPI
|
||||
tokenAPI *TokenAPI
|
||||
ok bool
|
||||
}
|
||||
|
||||
// Accept tells the parser to move the cursor past a match that was found,
|
||||
// and to make the TokenHandler results available in the ParseAPI through
|
||||
// the Result() method.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *ParseAPIOnAction) Accept() bool {
|
||||
if a.ok {
|
||||
a.tokenAPI.Merge()
|
||||
a.parseAPI.result = a.tokenAPI.root.result
|
||||
a.tokenAPI.root.result = NewResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
if a.tokenAPI.offset > 0 {
|
||||
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
|
||||
a.parseAPI.initLoopCheck()
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Skip tells the parser to move the cursor past a match that was found,
|
||||
// without making the results available through the ParseAPI.
|
||||
//
|
||||
// Returns true in case a match was found.
|
||||
// When no match was found, then no action is taken and false is returned.
|
||||
func (a *ParseAPIOnAction) Skip() bool {
|
||||
if a.ok {
|
||||
a.tokenAPI.root.cursor = a.tokenAPI.cursor
|
||||
a.tokenAPI.root.result = NewResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
if a.tokenAPI.offset > 0 {
|
||||
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
|
||||
a.parseAPI.initLoopCheck()
|
||||
}
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Stay tells the parser to not move the cursor after finding a match.
|
||||
// Returns true in case a match was found, false otherwise.
|
||||
func (a *ParseAPIOnAction) Stay() bool {
|
||||
if a.ok {
|
||||
a.tokenAPI.root.result = NewResult()
|
||||
a.tokenAPI.root.detachChilds()
|
||||
}
|
||||
return a.ok
|
||||
}
|
||||
|
||||
// Result returns a Result struct, containing results as produced by the
|
||||
// last ParseAPI.On() call.
|
||||
func (p *ParseAPI) Result() *Result {
|
||||
result := p.result
|
||||
if p.result == nil {
|
||||
caller, filepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.ParseAPI.Result(): Result() called without calling "+
|
||||
"ParseAPI.Accept() on beforehand to make the result available "+
|
||||
"from %s at %s", caller, filepos))
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -1,99 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Handle is used to execute other ParseHandler functions from within your
|
||||
// ParseHandler function.
|
||||
//
|
||||
// The boolean return value is true when the parser can still continue.
|
||||
// It will be false when either an error was set (using ParseAPI.Error()),
|
||||
// or the parser was stopped (using ParseAPI.Stop()).
|
||||
func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.panicWhenParseHandlerNil(parseHandler)
|
||||
parseHandler(p)
|
||||
return !p.isStoppedOrInError()
|
||||
}
|
||||
|
||||
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
|
||||
if parseHandler == nil {
|
||||
caller, filepos := p.getCaller(2)
|
||||
panic(fmt.Sprintf("ParseAPI.Handle() called with nil input from %s at %s", caller, filepos))
|
||||
}
|
||||
}
|
||||
|
||||
// Expects is used to let a ParseHandler function describe what input it is
|
||||
// expecting. This expectation is used in error messages to provide some
|
||||
// context to them.
|
||||
//
|
||||
// When defining an expectation inside a ParseHandler, you do not need to
|
||||
// handle unexpected input yourself. When the end of the parser is reached
|
||||
// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(),
|
||||
// an automatic error will be emitted using ParseAPI.UnexpectedInput().
|
||||
func (p *ParseAPI) Expects(description string) {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.expecting = description
|
||||
}
|
||||
|
||||
// Stop is used by the parser impementation to tell the API that it has
|
||||
// completed the parsing process successfully.
|
||||
//
|
||||
// When the parser implementation returns without stopping first, the
|
||||
// Parser.Execute() will assume that something went wrong and calls
|
||||
// ParserAPI.UnexpectedInput() to report an error about this.
|
||||
//
|
||||
// The parser implementation can define what was being expected, by
|
||||
// providing a description to ParseAPI.Expecting().
|
||||
func (p *ParseAPI) Stop() {
|
||||
p.stopped = true
|
||||
}
|
||||
|
||||
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||
//
|
||||
// When it finds that the end of the file was indeed reached, then the
|
||||
// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected
|
||||
// input is reported through ParseAPI.UnexpectedInput() with "end of file"
|
||||
// as the expectation.
|
||||
func (p *ParseAPI) ExpectEndOfFile() {
|
||||
p.panicWhenStoppedOrInError()
|
||||
if p.On(A.EndOfFile).Stay() {
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expects("end of file")
|
||||
p.UnexpectedInput()
|
||||
}
|
||||
}
|
||||
|
||||
// UnexpectedInput is used to set an error that tells the user that some
|
||||
// unexpected input was encountered.
|
||||
//
|
||||
// It can automatically produce an error message for a couple of situations:
|
||||
// 1) input simply didn't match the expectation
|
||||
// 2) the end of the input was reached
|
||||
// 3) there was an invalid UTF8 character on the input.
|
||||
//
|
||||
// The parser implementation can provide some feedback for this error by
|
||||
// calling ParseAPI.Expects() to set the expectation. When set, the
|
||||
// expectation is included in the error message.
|
||||
func (p *ParseAPI) UnexpectedInput() {
|
||||
p.panicWhenStoppedOrInError()
|
||||
r, err := p.tokenAPI.NextRune()
|
||||
switch {
|
||||
case err == nil:
|
||||
p.Error("unexpected character %q%s", r, fmtExpects(p))
|
||||
case err == io.EOF:
|
||||
p.Error("unexpected end of file%s", fmtExpects(p))
|
||||
default:
|
||||
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
|
||||
}
|
||||
}
|
||||
|
||||
func fmtExpects(p *ParseAPI) string {
|
||||
if p.expecting == "" {
|
||||
return ""
|
||||
}
|
||||
return fmt.Sprintf(" (expected %s)", p.expecting)
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func TestGivenNilTokenHandler_WhenCallingOn_ParsekitPanics(t *testing.T) {
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.On(nil)
|
||||
})
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { p.Execute("") },
|
||||
`ParseHandler bug: tokenHandler argument for On\(\) is nil`})
|
||||
}
|
||||
|
||||
func TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics(t *testing.T) {
|
||||
otherHandler := func(p *parsekit.ParseAPI) {
|
||||
panic("This is not the handler you're looking for")
|
||||
}
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Stop()
|
||||
p.Handle(otherHandler)
|
||||
})
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { p.Execute("") },
|
||||
`Illegal call to ParseAPI.Handle\(\) from .*ParsekitPanics.func.* at ` +
|
||||
`.*/parsehandler_test.go:\d+: no calls allowed after ParseAPI.Stop\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) {
|
||||
otherHandler := func(p *parsekit.ParseAPI) {
|
||||
panic("This is not the handler you're looking for")
|
||||
}
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Error("It ends here")
|
||||
p.Handle(otherHandler)
|
||||
})
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { p.Execute("") },
|
||||
`Illegal call to ParseAPI\.Handle\(\) from .*ParsekitPanics\.func2 at ` +
|
||||
`.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
|
||||
}
|
||||
|
||||
type parserWithLoop struct {
|
||||
loopCounter int
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.second)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.third)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
|
||||
if l.loopCounter++; l.loopCounter > 100 {
|
||||
p.Error("Loop not detected by parsekit")
|
||||
return
|
||||
}
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.first)
|
||||
}
|
||||
|
||||
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
||||
looper := &parserWithLoop{}
|
||||
parser := parsekit.NewParser(looper.first)
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("Het houdt niet op, niet vanzelf") },
|
||||
`Loop detected in parser in .*\(\*parserWithLoop\).second at .*/parsehandler_test\.go:\d+`})
|
||||
}
|
||||
|
||||
// This test incorporates an actual loop bug that I dropped on myself and
|
||||
// that I could not easily spot in my code. It sounded so logical:
|
||||
// I want to get chunks of 5 chars from the input, so I simply loop on:
|
||||
//
|
||||
// p.On(c.Max(5, a.AnyRune))
|
||||
//
|
||||
// The problem here is that Max(5, ...) will also match when there is
|
||||
// no more input, since Max(5, ---) is actually MinMax(0, 5, ...).
|
||||
// Therefore the loop will never stop. Solving the loop was simple:
|
||||
//
|
||||
// p.On(c.MinMax(1, 5, a.AnyRune))
|
||||
//
|
||||
// Now the loop stops when the parser finds no more matching input data.
|
||||
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(c.Max(5, a.AnyRune)).Accept() {
|
||||
}
|
||||
p.Stop()
|
||||
})
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("This will end soon") },
|
||||
`Loop detected in parser in .*ParserPanics.* at .*/parsehandler_test.go:\d+`})
|
||||
}
|
||||
|
||||
func TestGivenNullHandler_HandlePanics(t *testing.T) {
|
||||
parser := parsekit.NewParser(nil)
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("") },
|
||||
`ParseAPI.Handle\(\) called with nil input from .*\(\*Parser\).Execute at .*/parsekit\.go:\d+`})
|
||||
}
|
78
parsekit.go
78
parsekit.go
|
@ -1,78 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Parser is the top-level struct that holds the configuration for a parser.
|
||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||
type Parser struct {
|
||||
startHandler ParseHandler // the function that handles the very first state
|
||||
}
|
||||
|
||||
// NewParser instantiates a new Parser.
|
||||
//
|
||||
// The Parser is a state machine-style recursive descent parser, in which
|
||||
// ParseHandler functions are used to move the state machine forward during
|
||||
// parsing. This style of parser is typically used for parsing programming
|
||||
// languages and structured data formats (like json, xml, toml, etc.)
|
||||
//
|
||||
// To parse input data, use the method Parser.Execute().
|
||||
func NewParser(startHandler ParseHandler) *Parser {
|
||||
return &Parser{startHandler: startHandler}
|
||||
}
|
||||
|
||||
// Execute starts the parser for the provided input.
|
||||
// When an error occurs during parsing, then this error is returned. Nil otherwise.
|
||||
func (p *Parser) Execute(input string) *Error {
|
||||
api := &ParseAPI{
|
||||
tokenAPI: NewTokenAPI(strings.NewReader(input)),
|
||||
loopCheck: map[string]bool{},
|
||||
}
|
||||
api.Handle(p.startHandler)
|
||||
if !api.stopped && api.err == nil {
|
||||
api.UnexpectedInput()
|
||||
}
|
||||
return api.err
|
||||
}
|
||||
|
||||
// Matcher is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a TokenHandler function.
|
||||
// The Matcher can be instantiated using the parsekit.NewMatcher()
|
||||
// method.
|
||||
// TODO Rename to Tokenizer
|
||||
type Matcher struct {
|
||||
parser *Parser
|
||||
result *Result
|
||||
}
|
||||
|
||||
// NewMatcher instantiates a new Matcher.
|
||||
//
|
||||
// This is a simple wrapper around a TokenHandler function. It can be used to
|
||||
// match an input string against that TokenHandler function and retrieve the
|
||||
// results in a straight forward way.
|
||||
//
|
||||
// The 'expects' parameter is used for creating an error message in case parsed
|
||||
// input does not match the TokenHandler.
|
||||
// TODO Rename to NewTokenizer, and make matcher Tokeninzer, also see if we can use a Reader straight away, no ParseAPI.
|
||||
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
|
||||
matcher := &Matcher{}
|
||||
matcher.parser = NewParser(func(p *ParseAPI) {
|
||||
if p.On(tokenHandler).Accept() {
|
||||
matcher.result = p.Result()
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expects(expects)
|
||||
p.UnexpectedInput()
|
||||
}
|
||||
})
|
||||
return matcher
|
||||
}
|
||||
|
||||
// Execute feeds the input to the wrapped TokenHandler function.
|
||||
// It returns the TokenHandler's results. When an error occurred during parsing,
|
||||
// the error will be set, nil otherwise.
|
||||
func (m *Matcher) Execute(input string) (*Result, *Error) {
|
||||
err := m.parser.Execute(input)
|
||||
return m.result, err
|
||||
}
|
101
parsekit_test.go
101
parsekit_test.go
|
@ -1,101 +0,0 @@
|
|||
package parsekit_test
|
||||
|
||||
// This file only provides building blocks for writing tests.
|
||||
// No actual tests belong in this file.
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
// Easy access to the parsekit definitions.
|
||||
var c, a, m, tok = parsekit.C, parsekit.A, parsekit.M, parsekit.T
|
||||
|
||||
type TokenHandlerTest struct {
|
||||
Input string
|
||||
TokenHandler parsekit.TokenHandler
|
||||
MustMatch bool
|
||||
Expected string
|
||||
}
|
||||
|
||||
func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
|
||||
for _, test := range testSet {
|
||||
RunTokenHandlerTest(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
|
||||
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else if output := result.String(); output != test.Expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TokenMakerTest struct {
|
||||
Input string
|
||||
TokenHandler parsekit.TokenHandler
|
||||
Expected []parsekit.Token
|
||||
}
|
||||
|
||||
func RunTokenMakerTest(t *testing.T, test TokenMakerTest) {
|
||||
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if string(expected.Runes) != string(actual.Runes) {
|
||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func RunTokenMakerTests(t *testing.T, testSet []TokenMakerTest) {
|
||||
for _, test := range testSet {
|
||||
RunTokenMakerTest(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
type PanicTest struct {
|
||||
function func()
|
||||
expected string
|
||||
}
|
||||
|
||||
func RunPanicTest(t *testing.T, p PanicTest) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if !regexp.MustCompile(p.expected).MatchString(r.(string)) {
|
||||
t.Errorf("Function did panic, but unexpected panic message received:\nexpected: %q\nactual: %q\n", p.expected, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.expected)
|
||||
}
|
||||
}()
|
||||
p.function()
|
||||
}
|
||||
|
||||
func RunPanicTests(t *testing.T, testSet []PanicTest) {
|
||||
for _, test := range testSet {
|
||||
RunPanicTest(t, test)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Parser is the top-level struct that holds the configuration for a parser.
|
||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||
type Parser struct {
|
||||
startHandler ParseHandler // the function that handles the very first state
|
||||
}
|
||||
|
||||
// ParseHandler defines the type of function that must be implemented to handle
|
||||
// a parsing state in a Parser state machine.
|
||||
//
|
||||
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
|
||||
// all the internal state for the parsing state machine and provides the
|
||||
// interface that the ParseHandler uses to interact with the parser.
|
||||
type ParseHandler func(*ParseAPI)
|
||||
|
||||
// NewParser instantiates a new Parser.
|
||||
//
|
||||
// The Parser is a state machine-style recursive descent parser, in which
|
||||
// ParseHandler functions are used to move the state machine forward during
|
||||
// parsing. This style of parser is typically used for parsing programming
|
||||
// languages and structured data formats (like json, xml, toml, etc.)
|
||||
//
|
||||
// To parse input data, use the method Parser.Execute().
|
||||
func NewParser(startHandler ParseHandler) *Parser {
|
||||
if startHandler == nil {
|
||||
_, filepos := getCaller(1)
|
||||
panic(fmt.Sprintf("parsekit.NewParser(): NewParser() called with nil input at %s", filepos))
|
||||
}
|
||||
return &Parser{startHandler: startHandler}
|
||||
}
|
||||
|
||||
// Execute starts the parser for the provided input.
|
||||
// When an error occurs during parsing, then this error is returned. Nil otherwise.
|
||||
func (p *Parser) Execute(input string) *Error {
|
||||
api := &ParseAPI{
|
||||
tokenAPI: NewTokenAPI(strings.NewReader(input)),
|
||||
loopCheck: map[string]bool{},
|
||||
}
|
||||
if api.Handle(p.startHandler) {
|
||||
// Handle indicated that parsing could still continue, meaning that there
|
||||
// was no error and that the parsing has not actively been Stop()-ed.
|
||||
// However, at this point, the parsing really should have stopped.
|
||||
// We'll see what happens when we tell the parser that EOF was expected.
|
||||
// This might work if we're indeed at EOF. Otherwise, an error will be
|
||||
// generated.
|
||||
api.ExpectEndOfFile()
|
||||
}
|
||||
return api.err
|
||||
}
|
||||
|
||||
func getCaller(depth int) (string, string) {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
return caller.Name(), filepos
|
||||
}
|
|
@ -0,0 +1,327 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func ExampleParser_usingAcceptedRunes() {
|
||||
// Easy access to the parsekit definitions.
|
||||
a := parsekit.A
|
||||
|
||||
matches := []string{}
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(a.AnyRune).Accept() {
|
||||
matches = append(matches, p.Result().String())
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
})
|
||||
err := parser.Execute("¡Any will dö!")
|
||||
|
||||
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
|
||||
// Output:
|
||||
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
|
||||
}
|
||||
|
||||
func ExampleParser_usingTokens() {
|
||||
// Easy access to the parsekit definitions.
|
||||
c, a, tok := parsekit.C, parsekit.A, parsekit.T
|
||||
|
||||
var tokens []*parsekit.Token
|
||||
var accepted string
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
|
||||
tokens = p.Result().Tokens()
|
||||
accepted = p.Result().String()
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
})
|
||||
parser.Execute("¡Any will dö!")
|
||||
|
||||
fmt.Printf("Runes accepted: %q\n", accepted)
|
||||
fmt.Printf("Token values: ")
|
||||
for _, t := range tokens {
|
||||
fmt.Printf("%c ", t.Value)
|
||||
}
|
||||
// Output:
|
||||
// Runes accepted: "¡Any will dö!"
|
||||
// Token values: ¡ A n y w i l l d ö !
|
||||
}
|
||||
|
||||
func ExampleParseAPI_UnexpectedInput() {
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Expects("a thing")
|
||||
p.UnexpectedInput()
|
||||
})
|
||||
err := parser.Execute("Whatever, this parser will never be happy...")
|
||||
fmt.Println(err.Full())
|
||||
|
||||
// Output:
|
||||
// unexpected character 'W' (expected a thing) at start of file
|
||||
}
|
||||
|
||||
func ExampleParseAPIOnAction_Accept() {
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
// When a case-insensitive match on "Yowza!" is found by the
|
||||
// tokenizer, then Accept() will make the result available
|
||||
// through ParseAPI.Result()
|
||||
if p.On(parsekit.A.StrNoCase("Yowza!")).Accept() {
|
||||
// Result.String() returns a string containing all
|
||||
// accepted runes that were matched against.
|
||||
fmt.Println(p.Result().String())
|
||||
}
|
||||
})
|
||||
parser.Execute("YOWZA!")
|
||||
|
||||
// Output:
|
||||
// YOWZA!
|
||||
}
|
||||
|
||||
func ExampleParseAPIOnAction_Skip() {
|
||||
var result string
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for loop := true; loop; {
|
||||
switch {
|
||||
case p.On(parsekit.A.Rune('X')).Skip():
|
||||
// NOOP, skip this rune
|
||||
case p.On(parsekit.A.AnyRune).Accept():
|
||||
result += p.Result().String()
|
||||
default:
|
||||
loop = false
|
||||
}
|
||||
}
|
||||
})
|
||||
parser.Execute("HXeXllXoXX, XXwoXrlXXXd!")
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
||||
// Hello, world!
|
||||
}
|
||||
|
||||
func ExampleParseAPI_Stop() {
|
||||
C, A := parsekit.C, parsekit.A
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
fmt.Printf("First word: ")
|
||||
for p.On(C.Not(A.Space)).Accept() {
|
||||
fmt.Printf("%s", p.Result())
|
||||
}
|
||||
p.Stop()
|
||||
})
|
||||
parser.Execute("Input with spaces")
|
||||
|
||||
// Output:
|
||||
// First word: Input
|
||||
}
|
||||
|
||||
func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
|
||||
C, A := parsekit.C, parsekit.A
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
fmt.Printf("Word: ")
|
||||
for p.On(C.Not(A.Space)).Accept() {
|
||||
fmt.Printf("%s", p.Result())
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
})
|
||||
err := parser.Execute("Troglodyte")
|
||||
fmt.Printf("Error is nil: %t\n", err == nil)
|
||||
|
||||
// Output:
|
||||
// Word: Troglodyte
|
||||
// Error is nil: true
|
||||
}
|
||||
|
||||
func ExampleParseAPI_Stop_notCalledButInputPending() {
|
||||
C, A := parsekit.C, parsekit.A
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
fmt.Printf("First word: ")
|
||||
for p.On(C.Not(A.Space)).Accept() {
|
||||
fmt.Printf("%s", p.Result())
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
})
|
||||
err := parser.Execute("Input with spaces")
|
||||
fmt.Printf("Error: %s\n", err.Full())
|
||||
|
||||
// Output:
|
||||
// First word: Input
|
||||
// Error: unexpected character ' ' (expected end of file) at line 1, column 6
|
||||
}
|
||||
|
||||
func ExampleParseAPIOnAction_Stay() {
|
||||
// Definition of a fantasy serial number format.
|
||||
C, A := parsekit.C, parsekit.A
|
||||
serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits)
|
||||
|
||||
// This handler is able to handle serial numbers.
|
||||
serialnrHandler := func(p *parsekit.ParseAPI) {
|
||||
if p.On(serialnr).Accept() {
|
||||
fmt.Println(p.Result().String())
|
||||
}
|
||||
}
|
||||
|
||||
// Start could function as a sort of dispatcher, handing over
|
||||
// control to the correct ParseHandler function, based on the input.
|
||||
start := func(p *parsekit.ParseAPI) {
|
||||
if p.On(parsekit.A.Asterisk).Stay() {
|
||||
p.Handle(serialnrHandler)
|
||||
return
|
||||
}
|
||||
// ... other cases could go here ...
|
||||
}
|
||||
|
||||
parser := parsekit.NewParser(start)
|
||||
parser.Execute("#XX1234")
|
||||
parser.Execute("*ay432566")
|
||||
parser.Execute("*ZD987112")
|
||||
|
||||
// Output:
|
||||
// *ZD987112
|
||||
}
|
||||
|
||||
func TestGivenNullHandler_NewParserPanics(t *testing.T) {
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parsekit.NewParser(nil) },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.NewParser\(\): NewParser\(\) called ` +
|
||||
`with nil input at /.*/parser_test\.go:\d+`})
|
||||
}
|
||||
|
||||
func TestGivenNullHandler_HandlePanics(t *testing.T) {
|
||||
brokenParseHandler := func(p *parsekit.ParseAPI) {
|
||||
p.Handle(nil)
|
||||
}
|
||||
parser := parsekit.NewParser(brokenParseHandler)
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI\.Handle\(\): Handle\(\) called with nil input ` +
|
||||
`at /.*/parser_test\.go:\d+`})
|
||||
}
|
||||
func TestGivenNilTokenHandler_OnPanics(t *testing.T) {
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.On(nil)
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { p.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI\.On\(\): On\(\) called with nil ` +
|
||||
`tokenHandler argument at /.*/parser_test\.go:\d+`})
|
||||
}
|
||||
|
||||
func TestGivenStoppedParser_HandlePanics(t *testing.T) {
|
||||
otherHandler := func(p *parsekit.ParseAPI) {
|
||||
panic("This is not the handler you're looking for")
|
||||
}
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Stop()
|
||||
p.Handle(otherHandler)
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { p.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` +
|
||||
`at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Stop\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
|
||||
otherHandler := func(p *parsekit.ParseAPI) {
|
||||
panic("This is not the handler you're looking for")
|
||||
}
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Error("It ends here")
|
||||
p.Handle(otherHandler)
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { p.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` +
|
||||
`at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenParserWithoutCallToAccept_ResultPanics(t *testing.T) {
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
p.Result()
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { p.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI\.TokenResult\(\): TokenResult\(\) called at ` +
|
||||
`/.*/parser_test.go:\d+ without calling ParseAPI.Accept\(\) on beforehand`})
|
||||
}
|
||||
|
||||
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
|
||||
err := p.Execute("")
|
||||
parsekit.AssertTrue(t, err == nil, "err")
|
||||
}
|
||||
|
||||
func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) {
|
||||
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
|
||||
err := p.Execute("x")
|
||||
parsekit.AssertEqual(t, "unexpected character 'x' (expected end of file) at start of file", err.Full(), "err")
|
||||
}
|
||||
|
||||
type parserWithLoop struct {
|
||||
loopCounter int
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.second)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.third)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
|
||||
if l.loopCounter++; l.loopCounter > 100 {
|
||||
p.Error("Loop not detected by parsekit")
|
||||
return
|
||||
}
|
||||
p.On(parsekit.A.ASCII).Accept()
|
||||
p.Handle(l.first)
|
||||
}
|
||||
|
||||
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
||||
looper := &parserWithLoop{}
|
||||
parser := parsekit.NewParser(looper.first)
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("Het houdt niet op, niet vanzelf") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI: Loop detected in parser at /.*/parser_test.go:\d+`})
|
||||
}
|
||||
|
||||
// This test incorporates an actual loop bug that I dropped on myself and
|
||||
// that I could not easily spot in my code. It sounded so logical:
|
||||
// I want to get chunks of 5 chars from the input, so I simply loop on:
|
||||
//
|
||||
// p.On(c.Max(5, a.AnyRune))
|
||||
//
|
||||
// The problem here is that Max(5, ...) will also match when there is
|
||||
// no more input, since Max(5, ---) is actually MinMax(0, 5, ...).
|
||||
// Therefore the loop will never stop. Solving the loop was simple:
|
||||
//
|
||||
// p.On(c.MinMax(1, 5, a.AnyRune))
|
||||
//
|
||||
// Now the loop stops when the parser finds no more matching input data.
|
||||
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.On(c.Max(5, a.AnyRune)).Accept() {
|
||||
}
|
||||
p.Stop()
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("This will end soon") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.ParseAPI: Loop detected in parser at .*/parser_test.go:\d+`})
|
||||
}
|
46
reader.go
46
reader.go
|
@ -7,29 +7,31 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Reader wraps around an io.Reader and provides buffering to allows us to read
|
||||
// reader wraps around an io.Reader and provides buffering to allows us to read
|
||||
// the same runes over and over again. This is useful for implementing a parser
|
||||
// that must be able to do lookahead on the input, returning to the original
|
||||
// input position after finishing that lookahead).
|
||||
//
|
||||
// To minimze memory use, it is also possible to flush the buffer when there is
|
||||
// no more need to go back to previously read runes.
|
||||
type Reader struct {
|
||||
//
|
||||
// The reader is used internally by parsekit.TokenAPI.
|
||||
type reader struct {
|
||||
bufio *bufio.Reader // Used for ReadRune()
|
||||
buffer []rune // Input buffer, holding runes that were read from input
|
||||
bufferOffset int // The offset of the buffer, relative to the start of the input
|
||||
bufferLen int // Input size, the number of runes in the buffer
|
||||
}
|
||||
|
||||
// NewReader initializes a new Reader struct, wrapped around the provided io.Reader.
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
// newwReader initializes a new reader struct, wrapped around the provided io.Reader.
|
||||
func newReader(r io.Reader) *reader {
|
||||
return &reader{
|
||||
bufio: bufio.NewReader(r),
|
||||
buffer: []rune{},
|
||||
}
|
||||
}
|
||||
|
||||
// RuneAt reads the rune at the provided rune offset.
|
||||
// runeAt reads the rune at the provided rune offset.
|
||||
//
|
||||
// This offset is relative to the current starting position of the buffer in
|
||||
// the reader. When starting reading, offset 0 will point at the start of the
|
||||
|
@ -43,7 +45,7 @@ func NewReader(r io.Reader) *Reader {
|
|||
// When reading failed, the rune will be utf8.RuneError. One special read
|
||||
// fail is actually a normal situation: end of file reached. In that case,
|
||||
// the returned error wille be io.EOF.
|
||||
func (r *Reader) RuneAt(offset int) (rune, error) {
|
||||
func (r *reader) runeAt(offset int) (rune, error) {
|
||||
// Rune at provided offset is not yet available in the input buffer.
|
||||
// Read runes until we have enough runes to satisfy the offset.
|
||||
for r.bufferLen <= offset {
|
||||
|
@ -66,37 +68,11 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
|
|||
return r.buffer[offset], nil
|
||||
}
|
||||
|
||||
// RunesAt reads a slice of runes of length 'len', starting from offset 'offset'.
|
||||
//
|
||||
// This offset is relative to the current starting position of the buffer in
|
||||
// the reader. When starting reading, offset 0 will point at the start of the
|
||||
// input. After flushing, offset 0 will point at the input up to where
|
||||
// the flush was done.
|
||||
//
|
||||
// When an error is encountered during reading (EOF or other error), then the
|
||||
// error return value will be set. In case of an error, any runes that could be
|
||||
// successfully read are returned along with the error.
|
||||
// TODO Do I actually use this interface?
|
||||
func (r *Reader) RunesAt(start int, len int) ([]rune, error) {
|
||||
if len == 0 {
|
||||
return r.buffer[0:0], nil
|
||||
}
|
||||
end := start + len
|
||||
_, err := r.RuneAt(end)
|
||||
if err != nil {
|
||||
if end > r.bufferLen {
|
||||
end = r.bufferLen
|
||||
}
|
||||
return r.buffer[start:end], err
|
||||
}
|
||||
return r.buffer[start:end], nil
|
||||
}
|
||||
|
||||
// Flush deletes the provided number of runes from the start of the
|
||||
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
|
||||
// reader buffer. After flushing the buffer, offset 0 as used by runeAt()
|
||||
// will point to the rune that comes after the flushed runes.
|
||||
// So what this basically does is turn the Reader into a sliding window.
|
||||
func (r *Reader) Flush(numberOfRunes int) {
|
||||
func (r *reader) flush(numberOfRunes int) {
|
||||
if numberOfRunes > r.bufferLen {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||
|
|
116
reader_test.go
116
reader_test.go
|
@ -5,14 +5,12 @@ import (
|
|||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func ExampleNewReader() {
|
||||
func ExamplenewReader() {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
fmt.Printf("%c", at(0))
|
||||
fmt.Printf("%c", at(12))
|
||||
|
@ -21,114 +19,76 @@ func ExampleNewReader() {
|
|||
// H!
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt() {
|
||||
func TestReader_runeAt(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
fmt.Printf("%c", at(0))
|
||||
fmt.Printf("%c", at(12))
|
||||
fmt.Printf("%c", at(7))
|
||||
fmt.Printf("%c", at(0))
|
||||
|
||||
// Output:
|
||||
// H!wH
|
||||
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
|
||||
AssertEqual(t, "H!wH", result, "result")
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt_endOfFile() {
|
||||
func TestReader_runeAt_endOfFile(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
r := newReader(in)
|
||||
|
||||
rn, err := r.RuneAt(13)
|
||||
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
|
||||
rn, err := r.runeAt(13)
|
||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
AssertEqual(t, "'<27>' EOF true", result, "result")
|
||||
|
||||
rn, err = r.RuneAt(20)
|
||||
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
|
||||
|
||||
// Output:
|
||||
// '<27>' EOF true
|
||||
// '<27>' EOF true
|
||||
rn, err = r.runeAt(20)
|
||||
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
AssertEqual(t, "'<27>' EOF true", result, "result")
|
||||
}
|
||||
|
||||
func ExampleReader_RuneAt_invalidRune() {
|
||||
func TestReader_runeAt_invalidRune(t *testing.T) {
|
||||
in := strings.NewReader("Hello, \xcdworld!")
|
||||
r := NewReader(in)
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
rn, err := r.RuneAt(6)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
rn, err = r.RuneAt(7)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
rn, err = r.RuneAt(8)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
rn, err = r.RuneAt(9)
|
||||
fmt.Printf("%q %t\n", rn, err == nil)
|
||||
|
||||
// Output:
|
||||
// ' ' true
|
||||
// '<27>' true
|
||||
// 'w' true
|
||||
// 'o' true
|
||||
}
|
||||
|
||||
func ExampleReader_RunesAt() {
|
||||
in := strings.NewReader("Hello, \xcdworld!")
|
||||
r := NewReader(in)
|
||||
|
||||
rs, err := r.RunesAt(4, 6)
|
||||
fmt.Printf("%q %t\n", string(rs), err == nil)
|
||||
rs, err = r.RunesAt(4, 0)
|
||||
fmt.Printf("%q %t\n", string(rs), err == nil)
|
||||
rs, err = r.RunesAt(8, 100)
|
||||
fmt.Printf("%q %t\n", string(rs), err == io.EOF)
|
||||
|
||||
// Output:
|
||||
// "o, <20>wo" true
|
||||
// "" true
|
||||
// "world!" true
|
||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||
AssertEqual(t, " <20>wo", result, "result")
|
||||
}
|
||||
|
||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
in := strings.NewReader("\uFEFFBommetje!")
|
||||
r := NewReader(in)
|
||||
b, _ := r.RuneAt(0)
|
||||
o, _ := r.RuneAt(1)
|
||||
m, _ := r.RuneAt(2)
|
||||
r := newReader(in)
|
||||
b, _ := r.runeAt(0)
|
||||
o, _ := r.runeAt(1)
|
||||
m, _ := r.runeAt(2)
|
||||
bom := fmt.Sprintf("%c%c%c", b, o, m)
|
||||
assert.Equal(t, "Bom", bom, "first three runes")
|
||||
AssertEqual(t, "Bom", bom, "first three runes")
|
||||
}
|
||||
|
||||
func ExampleReader_Flush() {
|
||||
func TestReader_Flush(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
rb := func(start int, len int) []rune { r, _ := r.RunesAt(start, len); return r }
|
||||
r := newReader(in)
|
||||
at := func(i int) rune { r, _ := r.runeAt(i); return r }
|
||||
|
||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||
fmt.Printf("%c\n", at(7))
|
||||
result := fmt.Sprintf("%c", at(7))
|
||||
AssertEqual(t, "w", result, "first read")
|
||||
|
||||
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
|
||||
r.Flush(4)
|
||||
r.flush(4)
|
||||
|
||||
// Rune 0 is now pointing at what originally was rune offset 4.
|
||||
// We can continue reading from there.
|
||||
fmt.Printf("%s", string(rb(0, 8)))
|
||||
|
||||
// Output:
|
||||
// w
|
||||
// o, world
|
||||
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
|
||||
AssertEqual(t, "o, wor", result, "second read")
|
||||
}
|
||||
|
||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||
in := strings.NewReader("Hello, world!")
|
||||
r := NewReader(in)
|
||||
r := newReader(in)
|
||||
|
||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||
r.RuneAt(10)
|
||||
r.runeAt(10)
|
||||
|
||||
// However, we flush 12 runes, which exceeds the buffer size.
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() { r.Flush(12) },
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() { r.flush(12) },
|
||||
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
|
||||
})
|
||||
}
|
||||
|
|
130
tokenapi.go
130
tokenapi.go
|
@ -5,7 +5,7 @@ import (
|
|||
"io"
|
||||
)
|
||||
|
||||
// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and
|
||||
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve input data and
|
||||
// to report back results. For easy lookahead support, a forking strategy is
|
||||
// provided.
|
||||
//
|
||||
|
@ -14,54 +14,64 @@ import (
|
|||
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
|
||||
//
|
||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||
// is then added to the result buffer of the TokenAPI struct.
|
||||
// is then added to the results of the TokenAPI and the read cursor is moved
|
||||
// forward. Runes collected this way can later on be retrieved using for
|
||||
// example the method Result().Runes().
|
||||
//
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result buffer is extended
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed.
|
||||
//
|
||||
// Next to adding runes to the output, it is also possible to modify the
|
||||
// already collected runes or to produce lexical Tokens. For all things
|
||||
// concerning results, take a look at the Result struct, which can be
|
||||
// accessed though the method Result().
|
||||
//
|
||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||
//
|
||||
// Sometimes, we must be able to perform a lookahead, which might either
|
||||
// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be
|
||||
// brought back to the original state, so we can try a different route.
|
||||
// succeed or fail. In case of a failing lookahead, the state of the
|
||||
// TokenAPI must be brought back to the original state, so we can try
|
||||
// a different route.
|
||||
//
|
||||
// The way in which this is supported, is by forking a TokenAPI struct by calling
|
||||
// Fork(). This will return a forked child TokenAPI, with an empty result buffer,
|
||||
// but using the same input cursor position as the forked parent.
|
||||
// The way in which this is supported, is by forking a TokenAPI struct by
|
||||
// calling method Fork(). This will return a forked child TokenAPI, with
|
||||
// an empty result buffer, but using the same read cursor position as the
|
||||
// forked parent.
|
||||
//
|
||||
// After forking, the same interface as described for BASIC OPERATION can be
|
||||
// used to fill the result buffer. When the lookahead was successful, then
|
||||
// Merge() can be called on the forked child to append the child's result
|
||||
// buffer to the parent's result buffer, and to move the input cursor position
|
||||
// buffer to the parent's result buffer, and to move the read cursor position
|
||||
// to that of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child TokenAPI can simply
|
||||
// be discarded. The parent TokenAPI was never modified, so it can safely be used
|
||||
// as if the lookahead never happened.
|
||||
// When the lookahead was unsuccessful, then the forked child TokenAPI can
|
||||
// simply be discarded. The parent TokenAPI was never modified, so it can
|
||||
// safely be used as if the lookahead never happened.
|
||||
//
|
||||
// Note:
|
||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||
// peeks and by moving the input cursor position back and forth, or by putting
|
||||
// peeks and by moving the read cursor position back and forth, or by putting
|
||||
// read input back on the input stream. That often leads to code that is
|
||||
// efficient, however, in my opinion, not very untuitive to read.
|
||||
// efficient, however, in my opinion, not very intuitive to read.
|
||||
type TokenAPI struct {
|
||||
reader *Reader
|
||||
cursor *Cursor // current read cursor position, rel. to the input start
|
||||
offset int // current rune offset rel. to the Reader's sliding window
|
||||
result *Result // results as produced by a TokenHandler (runes, Tokens)
|
||||
root *TokenAPI // the root TokenAPI
|
||||
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||
reader *reader
|
||||
cursor *Cursor // current read cursor position, rel. to the input start
|
||||
offset int // current rune offset rel. to the Reader's sliding window
|
||||
result *TokenResult // results as produced by a TokenHandler (runes, Tokens)
|
||||
root *TokenAPI // the root TokenAPI
|
||||
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||
}
|
||||
|
||||
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
||||
func NewTokenAPI(r io.Reader) *TokenAPI {
|
||||
input := &TokenAPI{
|
||||
reader: NewReader(r),
|
||||
reader: newReader(r),
|
||||
cursor: &Cursor{},
|
||||
result: NewResult(),
|
||||
result: newTokenResult(),
|
||||
}
|
||||
input.root = input
|
||||
return input
|
||||
|
@ -78,13 +88,14 @@ func NewTokenAPI(r io.Reader) *TokenAPI {
|
|||
// without explicitly accepting, this method will panic.
|
||||
func (i *TokenAPI) NextRune() (rune, error) {
|
||||
if i.result.lastRune != nil {
|
||||
caller, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+
|
||||
"to Accept() from %s at %s", caller, linepos))
|
||||
_, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.TokenAPI.NextRune(): NextRune() called at %s without a "+
|
||||
"prior call to Accept()", linepos))
|
||||
}
|
||||
i.detachChilds()
|
||||
|
||||
readRune, err := i.reader.RuneAt(i.offset)
|
||||
readRune, err := i.reader.runeAt(i.offset)
|
||||
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
||||
return readRune, err
|
||||
}
|
||||
|
@ -96,24 +107,38 @@ func (i *TokenAPI) NextRune() (rune, error) {
|
|||
// returned an error. Calling Accept() in such case will result in a panic.
|
||||
func (i *TokenAPI) Accept() {
|
||||
if i.result.lastRune == nil {
|
||||
caller, linepos := getCaller(1)
|
||||
_, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.TokenAPI.Accept(): Accept() called without first "+
|
||||
"calling NextRune() from %s at %s", caller, linepos))
|
||||
"parsekit.TokenAPI.Accept(): Accept() called at %s without "+
|
||||
"first calling NextRune()", linepos))
|
||||
} else if i.result.lastRune.err != nil {
|
||||
caller, linepos := getCaller(1)
|
||||
_, linepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.TokenAPI.Accept(): Accept() called while the previous "+
|
||||
"call to NextRune() failed from %s at %s", caller, linepos))
|
||||
"parsekit.TokenAPI.Accept(): Accept() called at %s, but the "+
|
||||
"prior call to NextRune() failed", linepos))
|
||||
}
|
||||
i.result.runes = append(i.result.runes, i.result.lastRune.r)
|
||||
i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
|
||||
i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
|
||||
i.offset++
|
||||
i.result.lastRune = nil
|
||||
}
|
||||
|
||||
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
|
||||
// read cursor position, but for the rest this is a fresh TokenAPI.
|
||||
//
|
||||
// By forking a TokenAPI, you can freely work with the forked child, without
|
||||
// affecting the parent TokenAPI. This is for example useful when you must perform
|
||||
// some form of lookahead.
|
||||
//
|
||||
// When such lookahead turned out successful and you want to accept the results
|
||||
// into the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
|
||||
// child. This will add the runes in the result buffer to the result buffer of
|
||||
// the parent. It also updates the read cursor position of the parent to that
|
||||
// of the child.
|
||||
//
|
||||
// When the lookahead failed, or you don't the results as produced by that
|
||||
// lookahead, the forked child can simply be discarded. You can continue to work
|
||||
// with the parent TokenAPI as if nothing ever happened.
|
||||
func (i *TokenAPI) Fork() *TokenAPI {
|
||||
i.detachChilds()
|
||||
|
||||
|
@ -125,18 +150,27 @@ func (i *TokenAPI) Fork() *TokenAPI {
|
|||
root: i.root,
|
||||
parent: i,
|
||||
}
|
||||
child.result = NewResult()
|
||||
child.result = newTokenResult()
|
||||
*child.cursor = *i.cursor
|
||||
i.child = child
|
||||
i.result.lastRune = nil
|
||||
return child
|
||||
}
|
||||
|
||||
// Merge adds the data of the forked child TokenAPI that Merge() is called on to the
|
||||
// data of its parent (results and read cursor position).
|
||||
// Merge appends the Result of a forked child TokenAPI to the Result of its
|
||||
// parent. The read cursor position of the parent is also updated to that of
|
||||
// the forked child.
|
||||
//
|
||||
// After the merge operation, the child is reset so it can immediately be
|
||||
// reused for performing another match. This means that all Result data are
|
||||
// cleared, but the read cursor position is kept at its current position.
|
||||
// This allows a child to feed results in chunks to its parent.
|
||||
func (i *TokenAPI) Merge() {
|
||||
if i.parent == nil {
|
||||
panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI")
|
||||
_, filepos := getCaller(1)
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.TokenAPI.Merge(): Merge() called at %s "+
|
||||
"on a non-forked TokenAPI", filepos))
|
||||
}
|
||||
|
||||
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
|
||||
|
@ -145,12 +179,12 @@ func (i *TokenAPI) Merge() {
|
|||
i.parent.cursor = i.cursor
|
||||
|
||||
i.detachChilds()
|
||||
i.result = NewResult()
|
||||
i.result = newTokenResult()
|
||||
}
|
||||
|
||||
// Result returns the result data for the TokenAPI. The returned struct
|
||||
// can be used to retrieve and modify the result data.
|
||||
func (i *TokenAPI) Result() *Result {
|
||||
// Result returns the TokenResult data for the TokenAPI. The returned struct
|
||||
// can be used to retrieve and to modify result data.
|
||||
func (i *TokenAPI) Result() *TokenResult {
|
||||
return i.result
|
||||
}
|
||||
|
||||
|
@ -160,18 +194,6 @@ func (i *TokenAPI) Cursor() Cursor {
|
|||
return *i.cursor
|
||||
}
|
||||
|
||||
// FlushReaderBuffer delegates to the Flush() method of the contained
|
||||
// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the
|
||||
// reader cache.
|
||||
func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) {
|
||||
if i != i.root {
|
||||
panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child")
|
||||
}
|
||||
i.detachChilds()
|
||||
i.reader.Flush(numberOfRunes)
|
||||
i.offset = 0
|
||||
}
|
||||
|
||||
func (i *TokenAPI) detachChilds() {
|
||||
if i.child != nil {
|
||||
i.child.detachChildsRecurse()
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func ExampleTokenAPI_Fork() {
|
||||
// This custom TokenHandler checks for a sequence of runes: "abcd"
|
||||
// This is done in 4 steps and only after finishing all steps,
|
||||
// the TokenHandler will confirm a successful match.
|
||||
abcdSequence := func(t *parsekit.TokenAPI) bool {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
for _, checkRune := range "abcd" {
|
||||
readRune, err := child.NextRune()
|
||||
if err != nil || readRune != checkRune {
|
||||
return false // report mismatch, parent t is left untouched
|
||||
}
|
||||
child.Accept() // add rune to child output
|
||||
}
|
||||
child.Merge() // we have a match, add resulting output to parent
|
||||
return true // and report the successful match
|
||||
}
|
||||
|
||||
// Note: a custom TokenHandler is normally not what you need.
|
||||
// You can make use of the parser/combinator tooling to do things
|
||||
// a lot simpler. The handler from above can be replaced with:
|
||||
simpler := parsekit.A.Str("abcd")
|
||||
|
||||
result, err := parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcdefgh")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(simpler, "abcd").Execute("abcdefgh")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcx")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("xyz")
|
||||
fmt.Println(result, err)
|
||||
|
||||
// Output:
|
||||
// abcd <nil>
|
||||
// abcd <nil>
|
||||
// <nil> unexpected character 'a' (expected abcd)
|
||||
// <nil> unexpected character 'x' (expected abcd)
|
||||
}
|
||||
|
||||
func ExampleTokenAPI_Merge() {
|
||||
tokenHandler := func(t *parsekit.TokenAPI) bool {
|
||||
child1 := t.Fork()
|
||||
child1.NextRune() // reads 'H'
|
||||
child1.Accept()
|
||||
child1.NextRune() // reads 'i'
|
||||
child1.Accept()
|
||||
|
||||
child2 := child1.Fork()
|
||||
child2.NextRune() // reads ' '
|
||||
child2.Accept()
|
||||
child2.NextRune() // reads 'd'
|
||||
child2.Accept()
|
||||
|
||||
child1.Merge() // We merge child1, which has read 'H' and 'i' only.
|
||||
return true
|
||||
}
|
||||
|
||||
result, _ := parsekit.NewTokenizer(tokenHandler, "a match").Execute("Hi mister X!")
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
||||
// Hi
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Result holds results as produced by a TokenHandler.
|
||||
type Result struct {
|
||||
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||
runes []rune
|
||||
tokens []*Token
|
||||
}
|
||||
|
||||
type runeInfo struct {
|
||||
r rune
|
||||
err error
|
||||
}
|
||||
|
||||
// Token defines a lexical token as produced by TokenHandlers.
|
||||
type Token struct {
|
||||
Type interface{} // token type, can be any type that a parser author sees fit
|
||||
Runes []rune // the runes that make up the token
|
||||
Value interface{} // an optional value of any type
|
||||
}
|
||||
|
||||
// NewResult initializes an empty result struct.
|
||||
func NewResult() *Result {
|
||||
return &Result{
|
||||
runes: []rune{},
|
||||
tokens: []*Token{},
|
||||
}
|
||||
}
|
||||
|
||||
// ClearRunes clears the runes in the Result.
|
||||
func (r *Result) ClearRunes() {
|
||||
r.runes = []rune{}
|
||||
}
|
||||
|
||||
// SetRunes replaces the Runes from the Result with the provided input.
|
||||
func (r *Result) SetRunes(s interface{}) {
|
||||
r.ClearRunes()
|
||||
r.AddRunes(s)
|
||||
}
|
||||
|
||||
// AddRunes is used to add runes to the Result.
|
||||
func (r *Result) AddRunes(s interface{}) {
|
||||
switch s := s.(type) {
|
||||
case string:
|
||||
r.runes = append(r.runes, []rune(s)...)
|
||||
case []rune:
|
||||
r.runes = append(r.runes, s...)
|
||||
case rune:
|
||||
r.runes = append(r.runes, s)
|
||||
default:
|
||||
panic(fmt.Sprintf("parsekit.Result.SetRunes(): unsupported type '%T' used", s))
|
||||
}
|
||||
}
|
||||
|
||||
// Runes retrieves the Runes from the Result.
|
||||
func (r *Result) Runes() []rune {
|
||||
return r.runes
|
||||
}
|
||||
|
||||
// Rune retrieve a single rune from the Result at the specified index.
|
||||
func (r *Result) Rune(idx int) rune {
|
||||
return r.runes[idx]
|
||||
}
|
||||
|
||||
// String returns the Runes from the Result as a string.
|
||||
func (r *Result) String() string {
|
||||
return string(r.runes)
|
||||
}
|
||||
|
||||
// ClearTokens clears the tokens in the Result.
|
||||
func (r *Result) ClearTokens() {
|
||||
r.tokens = []*Token{}
|
||||
}
|
||||
|
||||
// AddToken is used to add a Token to the results.
|
||||
func (r *Result) AddToken(t *Token) {
|
||||
r.tokens = append(r.tokens, t)
|
||||
}
|
||||
|
||||
// Tokens retrieves the Tokens from the Result.
|
||||
func (r *Result) Tokens() []*Token {
|
||||
return r.tokens
|
||||
}
|
||||
|
||||
// Token retrieves a single Token from the Result at the specified index.
|
||||
func (r *Result) Token(idx int) *Token {
|
||||
return r.tokens[idx]
|
||||
}
|
||||
|
||||
// Values retrieves a slice containing only the Values for the Result Tokens.
|
||||
func (r *Result) Values() []interface{} {
|
||||
values := make([]interface{}, len(r.tokens))
|
||||
for i, tok := range r.tokens {
|
||||
values[i] = tok.Value
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
// Value retrieves a single Value from the Result Token at the specified index.
|
||||
func (r *Result) Value(idx int) interface{} {
|
||||
return r.tokens[idx].Value
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.Result().SetRunes("string")
|
||||
assert.Equal(t, "string", string(i.Result().String()), "i.Result() with string input")
|
||||
i.Result().SetRunes([]rune("rune slice"))
|
||||
assert.Equal(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
|
||||
i.Result().SetRunes('X')
|
||||
assert.Equal(t, "X", string(i.Result().String()), "i.Result() with rune input")
|
||||
}
|
||||
|
||||
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Result().SetRunes(1234567)
|
||||
},
|
||||
Expect: "parsekit.Result.SetRunes(): unsupported type 'int' used",
|
||||
})
|
||||
}
|
288
tokenapi_test.go
288
tokenapi_test.go
|
@ -1,288 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
r, _ := mkInput().NextRune()
|
||||
assert.Equal(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
|
||||
`a prior call to Accept\(\) from .*TestCallingNextRuneTwice_Panics.* at /.*_test.go:\d+`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: mkInput().Accept,
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
|
||||
`first calling NextRune\(\) from .* at /.*:\d+`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Merge()
|
||||
},
|
||||
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.NextRune()
|
||||
f.Merge()
|
||||
},
|
||||
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.Fork()
|
||||
f.Merge()
|
||||
},
|
||||
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
|
||||
})
|
||||
}
|
||||
|
||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||
i := mkInput()
|
||||
f1 := i.Fork()
|
||||
f2 := f1.Fork()
|
||||
f3 := f2.Fork()
|
||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||
f5 := f4.Fork()
|
||||
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
|
||||
assert.Equal(t, true, i.child == f1, "i.child == f1")
|
||||
assert.Equal(t, true, f1.parent == i, "f1.parent == i")
|
||||
assert.Equal(t, true, f1.child == f4, "f1.child == f4")
|
||||
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
|
||||
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
|
||||
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
assert.Equal(t, true, f4.parent == f1, "f4.parent == f1")
|
||||
assert.Equal(t, true, f4.child == f5, "f4.child == f5")
|
||||
assert.Equal(t, true, f5.parent == f4, "f5.parent == f4")
|
||||
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
|
||||
|
||||
i.NextRune()
|
||||
|
||||
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
|
||||
assert.Equal(t, true, i.child == nil, "i.child == nil")
|
||||
assert.Equal(t, true, f1.parent == nil, "f1.parent == nil")
|
||||
assert.Equal(t, true, f1.child == nil, "f1.child == nil")
|
||||
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
|
||||
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
|
||||
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
assert.Equal(t, true, f4.parent == nil, "f4.parent == nil")
|
||||
assert.Equal(t, true, f4.child == nil, "f4.child == nil")
|
||||
assert.Equal(t, true, f5.parent == nil, "f5.parent == nil")
|
||||
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
|
||||
`first calling NextRune\(\) from .* at /.*:\d+`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := mkInput()
|
||||
r, _ := i.NextRune()
|
||||
assert.Equal(t, 'T', r, "result from 1st call to NextRune()")
|
||||
// TODO still (*runeInfo) case needed?
|
||||
assert.NotEqual(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after NextRune()")
|
||||
i.Accept()
|
||||
assert.Equal(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after Accept()")
|
||||
assert.Equal(t, 1, i.offset, "Input.offset")
|
||||
assert.Equal(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
|
||||
r, _ = i.NextRune()
|
||||
assert.Equal(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
||||
i := mkInput()
|
||||
for j := 0; j < 7; j++ {
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
assert.Equal(t, "Testing", string(i.reader.buffer), "reader input buffer")
|
||||
assert.Equal(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
assert.Equal(t, "line 1, column 1", i.cursor.String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
assert.Equal(t, "line 1, column 7", i.cursor.String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
assert.Equal(t, "line 2, column 1", i.cursor.String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
assert.Equal(t, "line 3, column 5", i.cursor.String(), "cursor 4")
|
||||
assert.Equal(t, *i.cursor, i.Cursor(), "i.Cursor()")
|
||||
}
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
assert.Equal(t, "T", i.Result().String(), "accepted rune in input")
|
||||
// Fork
|
||||
f := i.Fork()
|
||||
assert.Equal(t, f, i.child, "Input.child (must be f)")
|
||||
assert.Equal(t, i, f.parent, "Input.parent (must be i)")
|
||||
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
assert.Equal(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Accept two runes via fork.
|
||||
f.NextRune()
|
||||
f.Accept() // e
|
||||
f.NextRune()
|
||||
f.Accept() // s
|
||||
assert.Equal(t, "es", f.Result().String(), "result runes in fork")
|
||||
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
assert.Equal(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Merge fork back into parent
|
||||
f.Merge()
|
||||
assert.Equal(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||
assert.Equal(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
f1.NextRune()
|
||||
f1.Accept()
|
||||
f2 := f1.Fork()
|
||||
f2.NextRune()
|
||||
f2.Accept()
|
||||
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
|
||||
assert.Equal(t, 1, i.offset, "i.offset")
|
||||
assert.Equal(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||
assert.Equal(t, 2, f1.offset, "f1.offset")
|
||||
assert.Equal(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||
f2.Merge()
|
||||
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
|
||||
assert.Equal(t, 1, i.offset, "i.offset")
|
||||
assert.Equal(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||
assert.Equal(t, 3, f1.offset, "f1.offset")
|
||||
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||
f1.Merge()
|
||||
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
assert.Equal(t, 3, i.offset, "i.offset")
|
||||
assert.Equal(t, "", f1.Result().String(), "f1.Result().String()")
|
||||
assert.Equal(t, 3, f1.offset, "f1.offset")
|
||||
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
assert.Equal(t, 3, f2.offset, "f2.offset")
|
||||
}
|
||||
|
||||
func TestGivenForkedChild_FlushReaderBuffer_Panics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
f.FlushReaderBuffer(1)
|
||||
},
|
||||
Expect: "parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() " +
|
||||
"can only be called on the root TokenAPI, not on a forked child",
|
||||
})
|
||||
}
|
||||
|
||||
func TestGivenRootWithSomeRunesRead_FlushReaderBuffer_ClearsReaderBuffer(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.FlushReaderBuffer(2)
|
||||
assert.Equal(t, "Te", i.Result().String(), "i.Result()")
|
||||
assert.Equal(t, 0, i.offset, "i.offset")
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
assert.Equal(t, 2, i.offset, "i.offset")
|
||||
i.FlushReaderBuffer(2)
|
||||
assert.Equal(t, "Test", i.Result().String(), "i.Result()")
|
||||
assert.Equal(t, 0, i.offset, "i.offset")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextRuneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
assert.Equal(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
assert.Equal(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingRuneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
f := i.Fork()
|
||||
f.NextRune()
|
||||
f.Accept()
|
||||
r, err := f.NextRune()
|
||||
assert.Equal(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
r, err = i.NextRune()
|
||||
assert.Equal(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
assert.Equal(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func mkInput() *TokenAPI {
|
||||
return NewTokenAPI(strings.NewReader("Testing"))
|
||||
}
|
139
tokenhandler.go
139
tokenhandler.go
|
@ -1,139 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
// TokenHandler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the token(s).
|
||||
//
|
||||
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The TokenAPI is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type TokenHandler func(t *TokenAPI) bool
|
||||
|
||||
// NextRune retrieves the next rune from the input.
|
||||
//
|
||||
// It returns the rune and a boolean. The boolean will be false in case an
|
||||
// invalid UTF8 rune or the end of the file was encountered.
|
||||
//
|
||||
// After retrieving a rune, Accept() or Skip() can be called to respectively add
|
||||
// the rune to the TokenAPIold's string buffer or to fully ignore it. This way,
|
||||
// a TokenHandler has full control over what runes are significant for the
|
||||
// resulting output of that TokenHandler.
|
||||
//
|
||||
// After using NextRune(), this method can not be reinvoked, until the last read
|
||||
// rune is explicitly accepted or skipped as described above.
|
||||
// func (t *TokenAPIold) NextRune() (rune, bool) {
|
||||
// if t.lastRune != nil {
|
||||
// caller, filepos := getCaller(1)
|
||||
// panic(fmt.Sprintf(
|
||||
// "TokenHandler bug: NextRune() was called from %s at %s "+
|
||||
// "without accepting or skipping the previously read rune", caller, filepos))
|
||||
// }
|
||||
// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset)
|
||||
// t.lastRune = &runeInfo{r, w, ok}
|
||||
// if ok {
|
||||
// t.result.Input = append(t.result.Input, r)
|
||||
// }
|
||||
// return r, ok
|
||||
// }
|
||||
|
||||
// Fork splits off a child TokenAPIold, containing the same input cursor position
|
||||
// as the parent TokenAPIold, but with all other data in a fresh state.
|
||||
//
|
||||
// By forking, a TokenHandler function can freely work with a TokenAPIold, without
|
||||
// affecting the parent TokenAPIold. This is for example useful when the
|
||||
// TokenHandler function must perform some form of lookahead.
|
||||
//
|
||||
// When a successful match was found, the TokenHandler function can call
|
||||
// TokenAPIold.Merge() on the forked child to have the resulting output added
|
||||
// to the parent TokenAPIold.
|
||||
//
|
||||
// When no match was found, the forked child can simply be discarded.
|
||||
//
|
||||
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
|
||||
// This is done in 4 steps and only after finishing all steps, the TokenHandler
|
||||
// function can confirm a successful match. The TokenHandler function for this
|
||||
// case could look like this (yes, it's naive, but it shows the point):
|
||||
// TODO make proper tested example
|
||||
//
|
||||
// func MatchAbcd(t *TokenAPIold) bool {
|
||||
// child := t.Fork() // fork to keep m from input untouched
|
||||
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
|
||||
// if r, ok := t.NextRune(); !ok || r != letter {
|
||||
// return false // report mismatch, t is left untouched
|
||||
// }
|
||||
// child.Accept() // add rune to child output
|
||||
// }
|
||||
// child.Merge() // we have a match, add resulting output to parent
|
||||
// return true // and report the successful match
|
||||
// }
|
||||
|
||||
// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting
|
||||
// output of the TokenAPIold.
|
||||
// func (t *TokenAPIold) Accept() {
|
||||
// t.checkAllowedCall("Accept()")
|
||||
// t.buffer = append(t.buffer, t.lastRune.Rune)
|
||||
// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune)
|
||||
// t.inputOffset += t.lastRune.ByteSize
|
||||
// t.lastRune = nil
|
||||
// }
|
||||
|
||||
// Skip will ignore the last rune as read by NextRune().
|
||||
// func (t *TokenAPIold) Skip() {
|
||||
// t.checkAllowedCall("Skip()")
|
||||
// t.inputOffset += t.lastRune.ByteSize
|
||||
// t.lastRune = nil
|
||||
// }
|
||||
|
||||
// func (t *TokenAPIold) checkAllowedCall(name string) {
|
||||
// if t.lastRune == nil {
|
||||
// caller, filepos := getCaller(2)
|
||||
// panic(fmt.Sprintf(
|
||||
// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
|
||||
// name, caller, filepos))
|
||||
// }
|
||||
// if !t.lastRune.OK {
|
||||
// caller, filepos := getCaller(2)
|
||||
// panic(fmt.Sprintf(
|
||||
// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
|
||||
// "did not return OK (EOF or invalid rune)", name, caller, filepos))
|
||||
// }
|
||||
// }
|
||||
|
||||
// AddToken is used to add a token to the results of the TokenHandler.
|
||||
// func (t *TokenAPIold) AddToken(tok *Token) {
|
||||
// t.result.Tokens = append(t.result.Tokens, tok)
|
||||
// }
|
||||
|
||||
// Merge merges the resulting output from a forked child TokenAPIold back into
|
||||
// its parent: The runes that are accepted in the child are added to the parent
|
||||
// runes and the parent's input cursor position is advanced to the child's
|
||||
// cursor position.
|
||||
//
|
||||
// After the merge, the child TokenAPIold is reset so it can immediately be
|
||||
// reused for performing another match (all data are cleared, except for the
|
||||
// input offset which is kept at its current position).
|
||||
// func (t *TokenAPIold) Merge() bool {
|
||||
// if t.parent == nil {
|
||||
// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
|
||||
// }
|
||||
// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...)
|
||||
// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...)
|
||||
// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...)
|
||||
// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...)
|
||||
// t.parent.inputOffset = t.inputOffset
|
||||
// t.result = &TokResult{}
|
||||
// return true
|
||||
// }
|
||||
|
||||
func getCaller(depth int) (string, string) {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, file, line, _ := runtime.Caller(depth + 1)
|
||||
filepos := fmt.Sprintf("%s:%d", file, line)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
return caller.Name(), filepos
|
||||
}
|
|
@ -4,11 +4,10 @@ import (
|
|||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/assert"
|
||||
)
|
||||
|
||||
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
for i := 0; i < 20; i++ {
|
||||
t.NextRune()
|
||||
t.Accept()
|
||||
|
@ -22,7 +21,7 @@ func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.Result().AddToken(&parsekit.Token{
|
||||
Type: "PI",
|
||||
Runes: []rune("π"),
|
||||
|
@ -60,8 +59,9 @@ func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
|
||||
fooToken := tok.StrLiteral("ASCII", c.OneOrMore(a.ASCII))
|
||||
parser := parsekit.NewMatcher(fooToken, "something")
|
||||
var tok, c, a = parsekit.T, parsekit.C, parsekit.A
|
||||
fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII))
|
||||
parser := parsekit.NewTokenizer(fooToken, "something")
|
||||
input := "This is fine ASCII Åltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
||||
|
@ -74,15 +74,16 @@ func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
|
||||
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
|
||||
fooToken := c.Seq(
|
||||
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||
tok.StrLiteral("COMBI", c.Seq(
|
||||
tok.StrLiteral("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
|
||||
tok.StrLiteral("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
|
||||
tok.Str("COMBI", c.Seq(
|
||||
tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
|
||||
tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
|
||||
)),
|
||||
m.Drop(c.ZeroOrMore(a.Asterisk)),
|
||||
)
|
||||
parser := parsekit.NewMatcher(fooToken, "something")
|
||||
parser := parsekit.NewTokenizer(fooToken, "something")
|
||||
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
|
||||
output := "This is fine ASCIIÅltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
@ -108,50 +109,50 @@ func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.Accept()
|
||||
return false
|
||||
}, "test")
|
||||
assert.Panic(t, assert.PanicT{
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("input string") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit.TokenAPI.Accept\(\): Accept\(\) called without first ` +
|
||||
`calling NextRune\(\) from .*CallToAcceptPanics.* at /.*_test.go`,
|
||||
})
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
|
||||
`/.*/tokenhandler_test\.go:\d+ without first calling NextRune\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.NextRune()
|
||||
t.NextRune()
|
||||
return false
|
||||
}, "test")
|
||||
assert.Panic(t, assert.PanicT{
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("input string") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
|
||||
`a prior call to Accept\(\) from .*CallToNextRunePanics.* at /.*/tokenhandler_test.go:\d+`})
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
|
||||
`/.*/tokenhandler_test\.go:\d+ without a prior call to Accept\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
||||
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.NextRune()
|
||||
t.Accept()
|
||||
return false
|
||||
}, "test")
|
||||
assert.Panic(t, assert.PanicT{
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called while the previous call to ` +
|
||||
`NextRune\(\) failed from .*CallToAcceptPanics.* at .*_test\.go:\d+`})
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
|
||||
`/.*/tokenhandler_test.go:\d+, but the prior call to NextRune\(\) failed`})
|
||||
}
|
||||
|
||||
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
|
||||
assert.Panic(t, assert.PanicT{
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() {
|
||||
a := parsekit.TokenAPI{}
|
||||
a.Merge()
|
||||
},
|
||||
Expect: `parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI`,
|
||||
})
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
|
||||
`/.*/tokenhandler_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
|
|
@ -215,7 +215,7 @@ var A = struct {
|
|||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var T = struct {
|
||||
StrLiteral func(interface{}, TokenHandler) TokenHandler
|
||||
Str func(interface{}, TokenHandler) TokenHandler
|
||||
StrInterpreted func(interface{}, TokenHandler) TokenHandler
|
||||
Byte func(interface{}, TokenHandler) TokenHandler
|
||||
Rune func(interface{}, TokenHandler) TokenHandler
|
||||
|
@ -234,7 +234,7 @@ var T = struct {
|
|||
Boolean func(interface{}, TokenHandler) TokenHandler
|
||||
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
|
||||
}{
|
||||
StrLiteral: MakeStrLiteralToken,
|
||||
Str: MakeStrLiteralToken,
|
||||
StrInterpreted: MakeStrInterpretedToken,
|
||||
Byte: MakeByteToken,
|
||||
Rune: MakeRuneToken,
|
||||
|
@ -343,13 +343,13 @@ func MatchOpt(handler TokenHandler) TokenHandler {
|
|||
}
|
||||
|
||||
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
|
||||
// applied in their exact order. Only if all matcher apply, the sequence
|
||||
// applied in their exact order. Only if all TokenHandlers apply, the sequence
|
||||
// reports successful match.
|
||||
func MatchSeq(handlers ...TokenHandler) TokenHandler {
|
||||
return func(t *TokenAPI) bool {
|
||||
child := t.Fork()
|
||||
for _, matcher := range handlers {
|
||||
if !matcher(child) {
|
||||
for _, handler := range handlers {
|
||||
if !handler(child) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,8 @@ import (
|
|||
)
|
||||
|
||||
func TestCombinators(t *testing.T) {
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
{"abc", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
|
@ -67,24 +68,26 @@ func TestCombinators(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestCombinatorPanics(t *testing.T) {
|
||||
RunPanicTests(t, []PanicTest{
|
||||
{func() { a.RuneRange('z', 'a') },
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
parsekit.AssertPanics(t, []parsekit.PanicT{
|
||||
{func() { a.RuneRange('z', 'a') }, false,
|
||||
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
|
||||
{func() { c.MinMax(-1, 1, parsekit.A.Space) },
|
||||
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, false,
|
||||
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
|
||||
{func() { c.MinMax(1, -1, parsekit.A.Space) },
|
||||
{func() { c.MinMax(1, -1, parsekit.A.Space) }, false,
|
||||
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
|
||||
{func() { c.MinMax(10, 5, parsekit.A.Space) },
|
||||
{func() { c.MinMax(10, 5, parsekit.A.Space) }, false,
|
||||
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
|
||||
{func() { c.Min(-10, parsekit.A.Space) },
|
||||
{func() { c.Min(-10, parsekit.A.Space) }, false,
|
||||
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
|
||||
{func() { c.Max(-42, parsekit.A.Space) },
|
||||
{func() { c.Max(-42, parsekit.A.Space) }, false,
|
||||
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
var a = parsekit.A
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||
|
@ -223,7 +226,8 @@ func TestAtoms(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestModifiers(t *testing.T) {
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
|
@ -242,21 +246,23 @@ func TestModifiers(t *testing.T) {
|
|||
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||
// input cannot be processed successfully.
|
||||
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||
var a, tok = parsekit.A, parsekit.T
|
||||
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||
parser := parsekit.NewMatcher(invalid, "boolean")
|
||||
RunPanicTest(t, PanicTest{
|
||||
func() { parser.Execute("no") },
|
||||
parser := parsekit.NewTokenizer(invalid, "boolean")
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
func() { parser.Execute("no") }, false,
|
||||
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
|
||||
`invalid syntax \(only use a type conversion token maker, when the input has been validated on beforehand\)`,
|
||||
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenMakers(t *testing.T) {
|
||||
RunTokenMakerTests(t, []TokenMakerTest{
|
||||
{`empty token`, tok.StrLiteral("A", c.ZeroOrMore(a.Digit)),
|
||||
var c, a, tok = parsekit.C, parsekit.A, parsekit.T
|
||||
parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{
|
||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
||||
|
||||
{`Ѝюج literal \string`, tok.StrLiteral("B", c.OneOrMore(a.AnyRune)),
|
||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
||||
|
||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
|
@ -313,6 +319,7 @@ func TestTokenMakers(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestSequenceOfRunes(t *testing.T) {
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
sequence := c.Seq(
|
||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
|
||||
a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||
|
@ -340,6 +347,7 @@ func TestSequenceOfRunes(t *testing.T) {
|
|||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
demonic := c.Seq(
|
||||
c.Opt(a.SquareOpen),
|
||||
m.Trim(
|
||||
|
@ -360,7 +368,7 @@ func TestCombination(t *testing.T) {
|
|||
c.Opt(a.SquareClose),
|
||||
)
|
||||
|
||||
RunTokenHandlerTests(t, []TokenHandlerTest{
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
package parsekit
|
||||
|
||||
// Tokenizer is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a TokenHandler function.
|
||||
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
|
||||
// method.
|
||||
type Tokenizer struct {
|
||||
parser *Parser
|
||||
result *TokenResult
|
||||
}
|
||||
|
||||
// TokenHandler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the results.
|
||||
//
|
||||
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The TokenAPI is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type TokenHandler func(t *TokenAPI) bool
|
||||
|
||||
// NewTokenizer instantiates a new Tokenizer.
|
||||
//
|
||||
// This is a simple wrapper around a TokenHandler function. It can be used to
|
||||
// match an input string against that TokenHandler function and retrieve the
|
||||
// results in a straight forward way.
|
||||
//
|
||||
// The 'expects' parameter is used for creating an error message in case parsed
|
||||
// input does not match the TokenHandler.
|
||||
func NewTokenizer(tokenHandler TokenHandler, expects string) *Tokenizer {
|
||||
tokenizer := &Tokenizer{}
|
||||
tokenizer.parser = NewParser(func(p *ParseAPI) {
|
||||
if p.On(tokenHandler).Accept() {
|
||||
tokenizer.result = p.Result()
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expects(expects)
|
||||
p.UnexpectedInput()
|
||||
}
|
||||
})
|
||||
return tokenizer
|
||||
}
|
||||
|
||||
// Execute feeds the input to the wrapped TokenHandler function.
|
||||
// It returns the TokenHandler's TokenResult. When an error occurred
|
||||
// during parsing, the error will be set, nil otherwise.
|
||||
func (t *Tokenizer) Execute(input string) (*TokenResult, *Error) {
|
||||
err := t.parser.Execute(input)
|
||||
return t.result, err
|
||||
}
|
|
@ -0,0 +1,257 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
r, _ := mkInput().NextRune()
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
|
||||
`/.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: mkInput().Accept,
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` +
|
||||
`at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
|
||||
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.NextRune()
|
||||
f.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
|
||||
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.Fork()
|
||||
f.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
|
||||
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||
i := mkInput()
|
||||
f1 := i.Fork()
|
||||
f2 := f1.Fork()
|
||||
f3 := f2.Fork()
|
||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||
f5 := f4.Fork()
|
||||
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
AssertEqual(t, true, i.child == f1, "i.child == f1")
|
||||
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
|
||||
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
|
||||
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
|
||||
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
|
||||
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
|
||||
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
|
||||
i.NextRune()
|
||||
|
||||
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
AssertEqual(t, true, i.child == nil, "i.child == nil")
|
||||
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
|
||||
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
|
||||
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
|
||||
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
|
||||
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
|
||||
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` +
|
||||
`at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := mkInput()
|
||||
r, _ := i.NextRune()
|
||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||
AssertTrue(t, i.result.lastRune != nil, "Input.lastRune after NextRune() is not nil")
|
||||
i.Accept()
|
||||
AssertTrue(t, i.result.lastRune == nil, "Input.lastRune after Accept() is nil")
|
||||
AssertEqual(t, 1, i.offset, "Input.offset")
|
||||
AssertEqual(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
||||
i := mkInput()
|
||||
for j := 0; j < 7; j++ {
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "Testing", string(i.reader.buffer), "reader input buffer")
|
||||
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.cursor.String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.cursor.String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.cursor.String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.cursor.String(), "cursor 4")
|
||||
AssertEqual(t, *i.cursor, i.Cursor(), "i.Cursor()")
|
||||
}
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
|
||||
// Fork
|
||||
f := i.Fork()
|
||||
AssertEqual(t, f, i.child, "Input.child (must be f)")
|
||||
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
|
||||
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
AssertEqual(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Accept two runes via fork.
|
||||
f.NextRune()
|
||||
f.Accept() // e
|
||||
f.NextRune()
|
||||
f.Accept() // s
|
||||
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
AssertEqual(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Merge fork back into parent
|
||||
f.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
f1.NextRune()
|
||||
f1.Accept()
|
||||
f2 := f1.Fork()
|
||||
f2.NextRune()
|
||||
f2.Accept()
|
||||
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 1, i.offset, "i.offset")
|
||||
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 2, f1.offset, "f1.offset")
|
||||
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.offset, "f2.offset")
|
||||
f2.Merge()
|
||||
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 1, i.offset, "i.offset")
|
||||
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 3, f1.offset, "f1.offset")
|
||||
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.offset, "f2.offset")
|
||||
f1.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 3, i.offset, "i.offset")
|
||||
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 3, f1.offset, "f1.offset")
|
||||
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.offset, "f2.offset")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
f := i.Fork()
|
||||
f.NextRune()
|
||||
f.Accept()
|
||||
r, err := f.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
r, err = i.NextRune()
|
||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func mkInput() *TokenAPI {
|
||||
return NewTokenAPI(strings.NewReader("Testing"))
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Result holds results as produced by a TokenHandler.
|
||||
type TokenResult struct {
|
||||
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||
runes []rune
|
||||
tokens []*Token
|
||||
}
|
||||
|
||||
type runeInfo struct {
|
||||
r rune
|
||||
err error
|
||||
}
|
||||
|
||||
// Token defines a lexical token as produced by TokenHandlers.
|
||||
type Token struct {
|
||||
Type interface{} // token type, can be any type that a parser author sees fit
|
||||
Runes []rune // the runes that make up the token
|
||||
Value interface{} // an optional value of any type
|
||||
}
|
||||
|
||||
// newTokenResult initializes an empty result struct.
|
||||
func newTokenResult() *TokenResult {
|
||||
return &TokenResult{
|
||||
runes: []rune{},
|
||||
tokens: []*Token{},
|
||||
}
|
||||
}
|
||||
|
||||
// ClearRunes clears the runes in the TokenResult.
|
||||
func (r *TokenResult) ClearRunes() {
|
||||
r.runes = []rune{}
|
||||
}
|
||||
|
||||
// SetRunes replaces the Runes from the TokenResult with the provided input.
|
||||
func (r *TokenResult) SetRunes(s interface{}) {
|
||||
r.ClearRunes()
|
||||
r.AddRunes(s)
|
||||
}
|
||||
|
||||
// AddRunes is used to add runes to the TokenResult.
|
||||
func (r *TokenResult) AddRunes(set ...interface{}) {
|
||||
for _, s := range set {
|
||||
switch s := s.(type) {
|
||||
case string:
|
||||
r.runes = append(r.runes, []rune(s)...)
|
||||
case []rune:
|
||||
r.runes = append(r.runes, s...)
|
||||
case rune:
|
||||
r.runes = append(r.runes, s)
|
||||
default:
|
||||
panic(fmt.Sprintf("parsekit.TokenResult.SetRunes(): unsupported type '%T' used", s))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Runes retrieves the Runes from the TokenResult.
|
||||
func (r *TokenResult) Runes() []rune {
|
||||
return r.runes
|
||||
}
|
||||
|
||||
// Rune retrieve a single rune from the TokenResult at the specified index.
|
||||
func (r *TokenResult) Rune(idx int) rune {
|
||||
return r.runes[idx]
|
||||
}
|
||||
|
||||
// String returns the Runes from the TokenResult as a string.
|
||||
func (r *TokenResult) String() string {
|
||||
return string(r.runes)
|
||||
}
|
||||
|
||||
// ClearTokens clears the tokens in the TokenResult.
|
||||
func (r *TokenResult) ClearTokens() {
|
||||
r.tokens = []*Token{}
|
||||
}
|
||||
|
||||
// SetTokens replaces the Tokens from the TokenResult with the provided input.
|
||||
func (r *TokenResult) SetTokens(tokens []*Token) {
|
||||
r.ClearTokens()
|
||||
for _, t := range tokens {
|
||||
r.AddToken(t)
|
||||
}
|
||||
}
|
||||
|
||||
// AddToken is used to add a Token to the TokenResult.
|
||||
func (r *TokenResult) AddToken(t *Token) {
|
||||
r.tokens = append(r.tokens, t)
|
||||
}
|
||||
|
||||
// Tokens retrieves the Tokens from the TokenResult.
|
||||
func (r *TokenResult) Tokens() []*Token {
|
||||
return r.tokens
|
||||
}
|
||||
|
||||
// Token retrieves a single Token from the TokenResult at the specified index.
|
||||
func (r *TokenResult) Token(idx int) *Token {
|
||||
return r.tokens[idx]
|
||||
}
|
||||
|
||||
// Values retrieves a slice containing only the Values for the TokenResult Tokens.
|
||||
func (r *TokenResult) Values() []interface{} {
|
||||
values := make([]interface{}, len(r.tokens))
|
||||
for i, tok := range r.tokens {
|
||||
values[i] = tok.Value
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
// Value retrieves a single Value from the TokenResult Token at the specified index.
|
||||
func (r *TokenResult) Value(idx int) interface{} {
|
||||
return r.tokens[idx].Value
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.Result().SetRunes("string")
|
||||
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
|
||||
i.Result().SetRunes([]rune("rune slice"))
|
||||
AssertEqual(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
|
||||
i.Result().SetRunes('X')
|
||||
AssertEqual(t, "X", string(i.Result().String()), "i.Result() with rune input")
|
||||
}
|
||||
|
||||
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Result().SetRunes(1234567)
|
||||
},
|
||||
Expect: "parsekit.TokenResult.SetRunes(): unsupported type 'int' used",
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue