Big simplification run once more, cleaned up code, added tests and examples, made stuff unexported where possible, to slim down the exported interface.

This commit is contained in:
Maurice Makaay 2019-06-04 23:15:02 +00:00
parent 4580962fb8
commit 75373e5ed5
41 changed files with 1662 additions and 1553 deletions

View File

@ -1,19 +0,0 @@
package assert
import (
"testing"
)
func Equal(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
func NotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
if notExpected == actual {
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
}
}

View File

@ -1,34 +0,0 @@
package assert
import (
"regexp"
"testing"
)
type PanicT struct {
Function func()
Expect string
Regexp bool
}
func Panic(t *testing.T, p PanicT) {
defer func() {
if r := recover(); r != nil {
mismatch := false
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
mismatch = true
}
if !p.Regexp && p.Expect != r.(string) {
mismatch = true
}
if mismatch {
t.Errorf(
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
p.Expect, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
}
}()
p.Function()
}

125
assertions_test.go Normal file
View File

@ -0,0 +1,125 @@
package parsekit
// This file contains some tools that are used for writing parsekit tests.
import (
"regexp"
"testing"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
if notExpected == actual {
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
}
}
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
}
}
type PanicT struct {
Function func()
Regexp bool
Expect string
}
func AssertPanics(t *testing.T, testSet []PanicT) {
for _, test := range testSet {
AssertPanic(t, test)
}
}
func AssertPanic(t *testing.T, p PanicT) {
defer func() {
if r := recover(); r != nil {
mismatch := false
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
mismatch = true
}
if !p.Regexp && p.Expect != r.(string) {
mismatch = true
}
if mismatch {
t.Errorf(
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
p.Expect, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
}
}()
p.Function()
}
type TokenHandlerT struct {
Input string
TokenHandler TokenHandler
MustMatch bool
Expected string
}
func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
for _, test := range testSet {
AssertTokenHandler(t, test)
}
}
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output := result.String(); output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.Input)
}
}
}
type TokenMakerT struct {
Input string
TokenHandler TokenHandler
Expected []Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
for _, test := range testSet {
AssertTokenMaker(t, test)
}
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
result, err := NewTokenizer(test.TokenHandler, "a match").Execute(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {
if len(result.Tokens()) != len(test.Expected) {
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
}
for i, expected := range test.Expected {
actual := result.Token(i)
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if string(expected.Runes) != string(actual.Runes) {
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}
}
}
}

View File

@ -2,20 +2,31 @@ package parsekit
import "fmt" import "fmt"
// Cursor represents the position of the input cursor in various ways. // Cursor represents the position of a cursor in various ways.
type Cursor struct { type Cursor struct {
Byte int // The cursor offset in bytes Byte int // The cursor offset in bytes, relative to start of file
Rune int // The cursor offset in UTF8 runes Rune int // The cursor offset in UTF8 runes, relative to start of file
Column int // The column at which the cursor is (0-indexed) Column int // The column at which the cursor is (0-indexed)
Line int // The line at which the cursor is (0-indexed) Line int // The line at which the cursor is (0-indexed)
} }
func (c *Cursor) String() string { // String produces a string representation of the cursor position.
func (c Cursor) String() string {
if c.Line == 0 && c.Column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1) return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
} }
// move updates the position of the cursor, based on the provided input string. // Move updates the position of the cursor, based on the provided input string.
func (c *Cursor) move(input string) { // The input string represents the runes that has been skipped over. This
// method will take newlines into account to keep track of line numbers and
// column positions automatically.
//
// Note: when you are writing a parser using parsekit, it's unlikely
// that you will use this method directly. The parsekit package takes care
// of calling it at the correct time.
func (c *Cursor) Move(input string) *Cursor {
c.Byte += len(input) c.Byte += len(input)
for _, r := range input { for _, r := range input {
c.Rune++ c.Rune++
@ -26,4 +37,5 @@ func (c *Cursor) move(input string) {
c.Column++ c.Column++
} }
} }
return c
} }

View File

@ -1,9 +1,38 @@
package parsekit package parsekit_test
import ( import (
"fmt"
"testing" "testing"
"git.makaay.nl/mauricem/go-parsekit"
) )
func ExampleCursor_Move() {
c := &parsekit.Cursor{}
fmt.Printf("after initialization : %s\n", c)
fmt.Printf("after 'some words' : %s\n", c.Move("some words"))
fmt.Printf("after '\\n' : %s\n", c.Move("\n"))
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.Move("\r\nskip\nlines"))
// Output:
// after initialization : start of file
// after 'some words' : line 1, column 11
// after '\n' : line 2, column 1
// after '\r\nskip\nlines' : line 4, column 6
}
func ExampleCursor_String() {
c := &parsekit.Cursor{}
fmt.Println(c.String())
c.Move("\nfoobar")
fmt.Println(c.String())
// Output:
// start of file
// line 2, column 7
}
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
for _, test := range []struct { for _, test := range []struct {
name string name string
@ -22,9 +51,9 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} { } {
c := Cursor{} c := parsekit.Cursor{}
for _, s := range test.input { for _, s := range test.input {
c.move(s) c.Move(s)
} }
if c.Byte != test.byte { if c.Byte != test.byte {
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte) t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)

31
error.go Normal file
View File

@ -0,0 +1,31 @@
package parsekit
import (
"fmt"
)
// Error is used as the error type when parsing errors occur.
// The error includes some context information to allow for useful
// error messages to the user.
type Error struct {
Message string
Cursor Cursor
}
func (err *Error) Error() string {
if err == nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf("parsekit.Error.Error(): method called with nil error at %s", linepos))
}
return err.Message
}
// Full returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) Full() string {
if err == nil {
_, linepos := getCaller(1)
panic(fmt.Sprintf("parsekit.Error.Full(): method called with nil error at %s", linepos))
}
return fmt.Sprintf("%s at %s", err, err.Cursor)
}

46
error_test.go Normal file
View File

@ -0,0 +1,46 @@
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleError() {
err := &parsekit.Error{
Message: "it broke down",
Cursor: parsekit.Cursor{Line: 9, Column: 41},
}
fmt.Println(err.Error())
fmt.Printf("%s\n", err)
fmt.Println(err.Full())
// Output:
// it broke down
// it broke down
// it broke down at line 10, column 42
}
func ExampleError_Error() {
err := &parsekit.Error{
Message: "it broke down",
Cursor: parsekit.Cursor{Line: 9, Column: 41},
}
fmt.Println(err.Error())
fmt.Printf("%s\n", err)
// Output:
// it broke down
// it broke down
}
func ExampleError_Full() {
err := &parsekit.Error{
Message: "it broke down",
Cursor: parsekit.Cursor{Line: 9, Column: 41},
}
fmt.Println(err.Full())
// Output:
// it broke down at line 10, column 42
}

View File

@ -5,7 +5,7 @@
// //
// So positive numbers that can be either added or substracted, and whitespace // So positive numbers that can be either added or substracted, and whitespace
// is ignored. // is ignored.
package parsekit_test package examples
import ( import (
"fmt" "fmt"
@ -83,15 +83,15 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
} }
func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) { func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
var a = parsekit.A var A = parsekit.A
switch { switch {
case p.On(a.Add).Skip(): case p.On(A.Add).Skip():
c.op = +1 c.op = +1
p.Handle(c.number) p.Handle(c.number)
case p.On(a.Subtract).Skip(): case p.On(A.Subtract).Skip():
c.op = -1 c.op = -1
p.Handle(c.number) p.Handle(c.number)
case !p.On(a.EndOfFile).Stay(): case !p.On(A.EndOfFile).Stay():
p.Expects("operator, '+' or '-'") p.Expects("operator, '+' or '-'")
p.UnexpectedInput() p.UnexpectedInput()
default: default:

View File

@ -10,7 +10,7 @@
// <term> = (<factor> | <factor> (MUL|DIV) <factor>) // <term> = (<factor> | <factor> (MUL|DIV) <factor>)
// <space> = (<space> (SPACE|TAB) | "") // <space> = (<space> (SPACE|TAB) | "")
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space> // <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
package parsekit_test package examples
import ( import (
"fmt" "fmt"
@ -40,7 +40,7 @@ func Example_basicCalculator2() {
output, err := Compute(c.input) output, err := Compute(c.input)
output = math.Round(output*1000000) / 1000000 // to make the expectation comparisons usable output = math.Round(output*1000000) / 1000000 // to make the expectation comparisons usable
if err != nil { if err != nil {
fmt.Printf("Input: %q, got error: %s\n", c.input, err) fmt.Printf("Input: %q, got error: %s\n", c.input, err.Full())
} else { } else {
fmt.Printf("Input: %q, got outcome: %f, correct = %t\n", c.input, output, output == c.expected) fmt.Printf("Input: %q, got outcome: %f, correct = %t\n", c.input, output, output == c.expected)
} }
@ -53,11 +53,11 @@ func Example_basicCalculator2() {
// Input: "(3.05+2)*(4.3+5.12)", got outcome: 47.571000, correct = true // Input: "(3.05+2)*(4.3+5.12)", got outcome: 47.571000, correct = true
// Input: "8.10 + 999/233", got outcome: 12.387554, correct = true // Input: "8.10 + 999/233", got outcome: 12.387554, correct = true
// Input: " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 ", got outcome: -20.168000, correct = true // Input: " -10 + (10.8+ (3 *-20-3*(8 +-4.12)) + 10)/5 ", got outcome: -20.168000, correct = true
// Input: "", got error: unexpected end of file // Input: "", got error: unexpected end of file at start of file
// Input: "(", got error: unexpected end of file // Input: "(", got error: unexpected end of file at line 1, column 2
// Input: "10+20-", got error: unexpected end of file // Input: "10+20-", got error: unexpected end of file at line 1, column 7
// Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) // Input: "10+20-(4*10))", got error: unexpected character ')' (expected end of file) at line 1, column 13
// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') // Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -94,9 +94,9 @@ func (c *calculator) calculation(p *parsekit.ParseAPI) {
func (c *calculator) expr(p *parsekit.ParseAPI) { func (c *calculator) expr(p *parsekit.ParseAPI) {
c.interpreter.push() c.interpreter.push()
var pc, a = parsekit.C, parsekit.A var C, A = parsekit.C, parsekit.A
if p.Handle(c.term) { if p.Handle(c.term) {
for p.On(pc.Any(a.Add, a.Subtract)).Accept() { for p.On(C.Any(A.Add, A.Subtract)).Accept() {
op := p.Result().Rune(0) op := p.Result().Rune(0)
if !p.Handle(c.term) { if !p.Handle(c.term) {
return return
@ -112,9 +112,9 @@ func (c *calculator) expr(p *parsekit.ParseAPI) {
func (c *calculator) term(p *parsekit.ParseAPI) { func (c *calculator) term(p *parsekit.ParseAPI) {
c.interpreter.push() c.interpreter.push()
var pc, a = parsekit.C, parsekit.A var C, A = parsekit.C, parsekit.A
if p.Handle(c.factor) { if p.Handle(c.factor) {
for p.On(pc.Any(a.Multiply, a.Divide)).Accept() { for p.On(C.Any(A.Multiply, A.Divide)).Accept() {
op := p.Result().Rune(0) op := p.Result().Rune(0)
if !p.Handle(c.factor) { if !p.Handle(c.factor) {
return return
@ -129,17 +129,17 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
// <space> = (<space> (SPACE|TAB) | "") // <space> = (<space> (SPACE|TAB) | "")
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space> // <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
func (c *calculator) factor(p *parsekit.ParseAPI) { func (c *calculator) factor(p *parsekit.ParseAPI) {
var a, tok = parsekit.A, parsekit.T var A, T = parsekit.A, parsekit.T
p.On(a.Whitespace).Skip() p.On(A.Whitespace).Skip()
switch { switch {
case p.On(tok.Float64(nil, a.Signed(a.Float))).Accept(): case p.On(T.Float64(nil, A.Signed(A.Float))).Accept():
value := p.Result().Value(0).(float64) value := p.Result().Value(0).(float64)
c.interpreter.pushValue(value) c.interpreter.pushValue(value)
case p.On(a.LeftParen).Skip(): case p.On(A.LeftParen).Skip():
if !p.Handle(c.expr) { if !p.Handle(c.expr) {
return return
} }
if !p.On(a.RightParen).Skip() { if !p.On(A.RightParen).Skip() {
p.Expects("')'") p.Expects("')'")
p.UnexpectedInput() p.UnexpectedInput()
return return
@ -148,7 +148,7 @@ func (c *calculator) factor(p *parsekit.ParseAPI) {
p.UnexpectedInput() p.UnexpectedInput()
return return
} }
p.On(a.Whitespace).Skip() p.On(A.Whitespace).Skip()
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -2,7 +2,7 @@
// The implementation uses only TokenHandler functions and does not implement a // The implementation uses only TokenHandler functions and does not implement a
// full-fledged state-based Parser for it. // full-fledged state-based Parser for it.
package parsekit_test package examples
import ( import (
"fmt" "fmt"
@ -10,8 +10,8 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
func Example_dutchPostcodeUsingMatcher() { func Example_dutchPostcodeUsingTokenizer() {
parser := createPostcodeMatcher() parser := createPostcodeTokenizer()
for i, input := range []string{ for i, input := range []string{
"1234 AB", "1234 AB",
@ -24,18 +24,22 @@ func Example_dutchPostcodeUsingMatcher() {
"", "",
"\xcd2222AB", "\xcd2222AB",
} { } {
output, err := parser.Execute(input) result, err := parser.Execute(input)
if err != nil { if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full()) fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err.Full())
} else { } else {
fmt.Printf("[%d] Input: %q Output: %s\n", i, input, output) fmt.Printf("[%d] Input: %q Output: %s Tokens:", i, input, result)
for _, t := range result.Tokens() {
fmt.Printf(" %s(%s)", t.Type, t.Value)
}
fmt.Printf("\n")
} }
} }
// Output: // Output:
// [0] Input: "1234 AB" Output: 1234 AB // [0] Input: "1234 AB" Output: 1234 AB Tokens: PCD(1234) PCL(AB)
// [1] Input: "2233Ab" Output: 2233 AB // [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
// [2] Input: "1001\t\tab" Output: 1001 AB // [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
// [3] Input: "1818ab" Output: 1818 AB // [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file // [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file // [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file // [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
@ -47,9 +51,9 @@ func Example_dutchPostcodeUsingMatcher() {
// Implementation of the parser // Implementation of the parser
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
func createPostcodeMatcher() *parsekit.Matcher { func createPostcodeTokenizer() *parsekit.Tokenizer {
// Easy access to the parsekit definitions. // Easy access to the parsekit definitions.
c, a, m := parsekit.C, parsekit.A, parsekit.M C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T
// TokenHandler functions are created and combined to satisfy these rules: // TokenHandler functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX). // - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
@ -57,14 +61,14 @@ func createPostcodeMatcher() *parsekit.Matcher {
// - A space between letters and digits is optional. // - A space between letters and digits is optional.
// - It is good form to write the letters in upper case. // - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters. // - It is good form to use a single space between digits and letters.
digitNotZero := c.Except(a.Rune('0'), a.Digit) digitNotZero := C.Except(A.Rune('0'), A.Digit)
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit)) pcDigits := C.Seq(digitNotZero, C.Rep(3, A.Digit))
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper) pcLetter := C.Any(A.ASCIILower, A.ASCIIUpper)
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter)) pcLetters := M.ToUpper(C.Seq(pcLetter, pcLetter))
space := m.Replace(c.Opt(a.Whitespace), " ") space := M.Replace(C.Opt(A.Whitespace), " ")
postcode := c.Seq(pcDigits, space, pcLetters, a.EndOfFile) postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
// Create a Matcher that wraps the 'postcode' TokenHandler and allows // Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
// us to match some input against that handler. // us to match some input against that handler.
return parsekit.NewMatcher(postcode, "a Dutch postcode") return parsekit.NewTokenizer(postcode, "a Dutch postcode")
} }

View File

@ -13,7 +13,7 @@
// like this, then also take a look at the helloSingleState example, which does // like this, then also take a look at the helloSingleState example, which does
// the same thing as this version, only more concise. // the same thing as this version, only more concise.
package parsekit_test package examples
import ( import (
"fmt" "fmt"
@ -131,6 +131,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
// different route was taken to implement a more friendly 'end of greeting' // different route was taken to implement a more friendly 'end of greeting'
// error message. // error message.
func (h *helloparser1) end(p *parsekit.ParseAPI) { func (h *helloparser1) end(p *parsekit.ParseAPI) {
var a = parsekit.A
if !p.On(a.EndOfFile).Stay() { if !p.On(a.EndOfFile).Stay() {
p.Expects("end of greeting") p.Expects("end of greeting")
p.UnexpectedInput() p.UnexpectedInput()

View File

@ -5,7 +5,7 @@
// not implement a full-fledged state-based Parser for it. If you want to see the // not implement a full-fledged state-based Parser for it. If you want to see the
// same kind of functionality, implementated using a Parser, take a look at the // same kind of functionality, implementated using a Parser, take a look at the
// other hello examples. // other hello examples.
package parsekit_test package examples
import ( import (
"fmt" "fmt"
@ -13,8 +13,8 @@ import (
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
) )
func Example_helloWorldUsingMatcher() { func Example_helloWorldUsingTokenizer() {
parser := createHelloMatcher() parser := createHelloTokenizer()
for i, input := range []string{ for i, input := range []string{
"Hello, world!", "Hello, world!",
@ -46,7 +46,7 @@ func Example_helloWorldUsingMatcher() {
// Implementation of the parser // Implementation of the parser
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
func createHelloMatcher() *parsekit.Matcher { func createHelloTokenizer() *parsekit.Tokenizer {
// Easy access to parsekit definition. // Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M c, a, m := parsekit.C, parsekit.A, parsekit.M
@ -59,7 +59,7 @@ func createHelloMatcher() *parsekit.Matcher {
name := c.OneOrMore(c.Not(a.Excl)) name := c.OneOrMore(c.Not(a.Excl))
greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile) greeting := c.Seq(m.Drop(hello), m.Drop(separator), name, m.Drop(a.Excl), a.EndOfFile)
// Create a Matcher that wraps the 'greeting' TokenHandler and allows // Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler. // us to match some input against that handler.
return parsekit.NewMatcher(greeting, "a friendly greeting") return parsekit.NewTokenizer(greeting, "a friendly greeting")
} }

View File

@ -11,7 +11,7 @@
// is that this parser reports errors much more fine-grained. This might or // is that this parser reports errors much more fine-grained. This might or
// might not be useful for your specific use case. // might not be useful for your specific use case.
package parsekit_test package examples
import ( import (
"fmt" "fmt"

5
examples/examples.go Normal file
View File

@ -0,0 +1,5 @@
// Package examples contains various examples for the parsekit module.
// These examples have been moved into their own package, because they
// are quite numerous and quite big. Too big in my opinion to make them
// all available from within the parsekit package godocs.
package examples

View File

@ -5,7 +5,7 @@
// for []string. We add a ParseHandler method directly to that type // for []string. We add a ParseHandler method directly to that type
// and let the parsing code fill the slice with strings during parsing. // and let the parsing code fill the slice with strings during parsing.
package parsekit_test package examples
import ( import (
"fmt" "fmt"

View File

@ -1,94 +0,0 @@
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleError() {
err := &parsekit.Error{
Message: "it broke down",
Line: 10,
Column: 42,
}
fmt.Println(err.Error())
fmt.Printf("%s\n", err)
fmt.Println(err.Full())
// Output:
// it broke down
// it broke down
// it broke down at line 10, column 42
}
func ExampleError_Error() {
err := &parsekit.Error{
Message: "it broke down",
Line: 10,
Column: 42,
}
fmt.Println(err.Error())
fmt.Printf("%s\n", err)
// Output:
// it broke down
// it broke down
}
func ExampleError_Full() {
err := &parsekit.Error{
Message: "it broke down",
Line: 10,
Column: 42,
}
fmt.Println(err.Full())
// Output:
// it broke down at line 10, column 42
}
func ExampleMatchAnyRune_usingAcceptedRunes() {
// Easy access to the parsekit definitions.
a := parsekit.A
matches := []string{}
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(a.AnyRune).Accept() {
matches = append(matches, p.Result().String())
}
p.ExpectEndOfFile()
})
err := parser.Execute("¡Any will dö!")
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
// Output:
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
}
func ExampleMatchAnyRune_usingTokens() {
// Easy access to the parsekit definitions.
c, a, tok := parsekit.C, parsekit.A, parsekit.T
var tokens []*parsekit.Token
var accepted string
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
tokens = p.Result().Tokens()
accepted = p.Result().String()
}
p.ExpectEndOfFile()
})
parser.Execute("¡Any will dö!")
fmt.Printf("Runes accepted: %q\n", accepted)
fmt.Printf("Token values: ")
for _, t := range tokens {
fmt.Printf("%c ", t.Value)
}
// Output:
// Runes accepted: "¡Any will dö!"
// Token values: ¡ A n y w i l l d ö !
}

View File

@ -2,7 +2,7 @@ package parsekit
import ( import (
"fmt" "fmt"
"runtime" "io"
"strings" "strings"
) )
@ -12,7 +12,7 @@ type ParseAPI struct {
tokenAPI *TokenAPI // the input reader tokenAPI *TokenAPI // the input reader
loopCheck map[string]bool // used for parser loop detection loopCheck map[string]bool // used for parser loop detection
expecting string // a description of what the current state expects to find (see Expects()) expecting string // a description of what the current state expects to find (see Expects())
result *Result // TokenHandler result, as received from On(...).Accept() result *TokenResult // Last TokenHandler result as retrieved by On(...).Accept()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
} }
@ -29,17 +29,20 @@ func (p *ParseAPI) panicWhenStoppedOrInError() {
return return
} }
called, _ := p.getCaller(1) called, _ := getCaller(1)
parts := strings.Split(called, ".") parts := strings.Split(called, ".")
calledShort := parts[len(parts)-1] calledShort := parts[len(parts)-1]
caller, filepos := p.getCaller(2) _, filepos := getCaller(2)
after := "Error()" after := "Error()"
if p.stopped { if p.stopped {
after = "Stop()" after = "Stop()"
} }
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after)) panic(fmt.Sprintf(
"parsekit.ParseAPI.%s(): Illegal call to %s() at %s: "+
"no calls allowed after ParseAPI.%s",
calledShort, calledShort, filepos, after))
} }
func (p *ParseAPI) isStoppedOrInError() bool { func (p *ParseAPI) isStoppedOrInError() bool {
@ -51,18 +54,261 @@ func (p *ParseAPI) initLoopCheck() {
} }
func (p *ParseAPI) checkForLoops() { func (p *ParseAPI) checkForLoops() {
caller, filepos := p.getCaller(2) _, filepos := getCaller(2)
if _, ok := p.loopCheck[filepos]; ok { if _, ok := p.loopCheck[filepos]; ok {
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos)) panic(fmt.Sprintf("parsekit.ParseAPI: Loop detected in parser at %s", filepos))
} }
p.loopCheck[filepos] = true p.loopCheck[filepos] = true
} }
// TODO delete this one // On checks if the input at the current cursor position matches the provided
func (p *ParseAPI) getCaller(depth int) (string, string) { // TokenHandler. On must be chained with another method that tells the parser
// No error handling, because we call this method ourselves with safe depth values. // what action to perform when a match was found:
pc, file, line, _ := runtime.Caller(depth + 1) //
filepos := fmt.Sprintf("%s:%d", file, line) // 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
caller := runtime.FuncForPC(pc) //
return caller.Name(), filepos // 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
//
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
//
// So an example chain could look like this:
//
// p.On(parsekit.A.Whitespace).Skip()
//
// The chain as a whole returns a boolean that indicates whether or not at match
// was found. When no match was found, false is returned and Skip() and Accept()
// will have no effect. Because of this, typical use of an On() chain is as
// expression for a conditional statement (if, switch/case, for). E.g.:
//
// // Skip multiple exclamation marks.
// for p.On(parsekit.A.Excl).Skip() { }
//
// // Fork a route based on the input.
// switch {
// case p.On(parsekit.A.Excl).Stay()
// p.RouteTo(stateHandlerA)
// case p.On(parsekit.A.Colon).Stay():
// p.RouteTo(stateHandlerB)
// default:
// p.RouteTo(stateHandlerC)
// }
//
// // When there's a "hi" on input, then say hello.
// if p.On(parsekit.C.Str("hi")).Accept() {
// fmt.Println("Hello!")
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
p.panicWhenStoppedOrInError()
p.checkForLoops()
if tokenHandler == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.ParseAPI.On(): On() called with nil "+
"tokenHandler argument at %s", filepos))
}
p.result = nil
p.tokenAPI.result = newTokenResult()
fork := p.tokenAPI.Fork()
ok := tokenHandler(fork)
return &ParseAPIOnAction{
parseAPI: p,
tokenAPI: fork,
ok: ok,
}
}
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
// The On() method will return an initialized struct of this type.
type ParseAPIOnAction struct {
parseAPI *ParseAPI
tokenAPI *TokenAPI
ok bool
}
// Accept tells the parser to move the cursor past a match that was found,
// and to make the TokenResult from the TokenAPI available in the ParseAPI
// through the Result() method.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Accept() bool {
if a.ok {
a.tokenAPI.Merge()
a.parseAPI.result = a.tokenAPI.root.result
a.flushTokenAPI()
a.flushReader() //a.flush()
}
return a.ok
}
// Skip tells the parser to move the cursor past a match that was found,
// without making the results available through the ParseAPI.
//
// Note that functionally, you could call Accept() just as well, simply
// ignoring the results. However, the Skip() call is a bit more efficient
// than the Accept() call and (more important if you ask me) the code
// expresses more clearly that your intent is to skip the match.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Skip() bool {
if a.ok {
a.tokenAPI.root.cursor = a.tokenAPI.cursor
a.parseAPI.result = nil
a.flushTokenAPI()
a.flushReader()
}
return a.ok
}
// Stay tells the parser to not move the cursor after finding a match.
//
// A typical use of Stay() is to let one ParseHandler detect the start
// of some kind of token, but without moving the read cursor forward.
// When a match is found, it hands off control to another ParseHandler
// to take care of the actual token parsing.
//
// Returns true in case a match was found, false otherwise.
func (a *ParseAPIOnAction) Stay() bool {
if a.ok {
a.parseAPI.result = nil
a.flushTokenAPI()
}
return a.ok
}
func (a *ParseAPIOnAction) flushTokenAPI() {
a.tokenAPI.root.result = newTokenResult()
a.tokenAPI.root.detachChilds()
}
func (a *ParseAPIOnAction) flushReader() {
if a.tokenAPI.offset > 0 {
a.tokenAPI.root.reader.flush(a.tokenAPI.offset)
a.tokenAPI.root.offset = 0
a.parseAPI.initLoopCheck()
}
}
// Result returns a TokenResult struct, containing results as produced by the
// last ParseAPI.On().Accept() call.
func (p *ParseAPI) Result() *TokenResult {
result := p.result
if p.result == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.ParseAPI.TokenResult(): TokenResult() called at %s without "+
"calling ParseAPI.Accept() on beforehand", filepos))
}
return result
}
// Handle is used to execute other ParseHandler functions from within your
// ParseHandler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set (using ParseAPI.Error()),
// or the parser was stopped (using ParseAPI.Stop()).
func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
p.panicWhenStoppedOrInError()
p.panicWhenParseHandlerNil(parseHandler)
parseHandler(p)
return !p.isStoppedOrInError()
}
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
if parseHandler == nil {
_, filepos := getCaller(2)
panic(fmt.Sprintf("parsekit.ParseAPI.Handle(): Handle() called with nil input at %s", filepos))
}
}
// Expects is used to let a ParseHandler function describe what input it is
// expecting. This expectation is used in error messages to provide some
// context to them.
//
// When defining an expectation inside a ParseHandler, you do not need to
// handle unexpected input yourself. When the end of the parser is reached
// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(),
// an automatic error will be emitted using ParseAPI.UnexpectedInput().
func (p *ParseAPI) Expects(description string) {
p.panicWhenStoppedOrInError()
p.expecting = description
}
// Stop is used by the parser impementation to tell the ParseAPI that it has
// completed the parsing process successfully.
//
// When the parser implementation returns without stopping first (and
// without running into an error), the Parser.Execute() will call
// ParserAPI.ExpectEndOfFile() to check if the end of the file was reached.
// If not, then things will end in an UnexpectedError().
// Even though this fallback mechanism will work in a lot of cases, try to make
// your parser explicit about things and call Stop() actively yourself.
//
// After stopping, no more calls to ParseAPI methods are allowed.
// Calling a method in this state will result in a panic.
func (p *ParseAPI) Stop() {
p.stopped = true
}
// Error sets the error message in the ParseAPI.
//
// After setting an error, no more calls to ParseAPI methods are allowed.
// Calling a method in this state will result in a panic.
func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.tokenAPI.Cursor()}
}
// ExpectEndOfFile can be used to check if the input is at end of file.
//
// When it finds that the end of the file was indeed reached, then the
// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected
// input is reported through ParseAPI.UnexpectedInput() with "end of file"
// as the expectation.
func (p *ParseAPI) ExpectEndOfFile() {
p.panicWhenStoppedOrInError()
if p.On(A.EndOfFile).Stay() {
p.Stop()
} else {
p.Expects("end of file")
p.UnexpectedInput()
}
}
// UnexpectedInput is used to set an error that tells the user that some
// unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
p.panicWhenStoppedOrInError()
r, err := p.tokenAPI.NextRune()
switch {
case err == nil:
p.Error("unexpected character %q%s", r, fmtExpects(p))
case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(p))
default:
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
}
}
func fmtExpects(p *ParseAPI) string {
if p.expecting == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", p.expecting)
} }

View File

@ -1,9 +0,0 @@
package parsekit
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the ParseHandler uses to interact with the parser.
type ParseHandler func(*ParseAPI)

View File

@ -1,37 +0,0 @@
package parsekit
import (
"fmt"
)
// Error is used as the error type when parsing errors occur.
// The error includes some context information to allow for useful
// error messages to the user.
type Error struct {
Message string
Line int
Column int
}
func (err *Error) Error() string {
return err.Message
}
// Full returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) Full() string {
if err.Line == 0 {
return fmt.Sprintf("%s at start of file", err)
} else {
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
}
}
// Error sets the error message in the parser API. This error message
// will eventually be returned by the Parser.Execute() method.
func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.tokenAPI.cursor.Line, p.tokenAPI.cursor.Column}
}

View File

@ -1,128 +0,0 @@
package parsekit
import "fmt"
// On checks if the input at the current cursor position matches the provided
// TokenHandler. On must be chained with another method that tells the parser
// what action to perform when a match was found:
//
// 1) On(...).Skip() - Only move cursor forward, ignore the matched runes.
//
// 2) On(...).Accept() - Move cursor forward, add runes to parsers's string buffer.
//
// 3) On(...).Stay() - Do nothing, the cursor stays at the same position.
//
// So an example chain could look like this:
//
// p.On(parsekit.A.Whitespace).Skip()
//
// The chain as a whole returns a boolean that indicates whether or not at match
// was found. When no match was found, false is returned and Skip() and Accept()
// will have no effect. Because of this, typical use of an On() chain is as
// expression for a conditional statement (if, switch/case, for). E.g.:
//
// // Skip multiple exclamation marks.
// for p.On(parsekit.A.Excl).Skip() { }
//
// // Fork a route based on the input.
// switch {
// case p.On(parsekit.A.Excl).Stay()
// p.RouteTo(stateHandlerA)
// case p.On(parsekit.A.Colon).Stay():
// p.RouteTo(stateHandlerB)
// default:
// p.RouteTo(stateHandlerC)
// }
//
// // When there's a "hi" on input, then say hello.
// if p.On(parsekit.C.Str("hi")).Accept() {
// fmt.Println("Hello!")
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
p.panicWhenStoppedOrInError()
p.checkForLoops()
if tokenHandler == nil {
panic("ParseHandler bug: tokenHandler argument for On() is nil")
}
p.result = nil
p.tokenAPI.result = NewResult()
fork := p.tokenAPI.Fork()
ok := tokenHandler(fork)
return &ParseAPIOnAction{
parseAPI: p,
tokenAPI: fork,
ok: ok,
}
}
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
// The On() method will return an initialized struct of this type.
type ParseAPIOnAction struct {
parseAPI *ParseAPI
tokenAPI *TokenAPI
ok bool
}
// Accept tells the parser to move the cursor past a match that was found,
// and to make the TokenHandler results available in the ParseAPI through
// the Result() method.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Accept() bool {
if a.ok {
a.tokenAPI.Merge()
a.parseAPI.result = a.tokenAPI.root.result
a.tokenAPI.root.result = NewResult()
a.tokenAPI.root.detachChilds()
if a.tokenAPI.offset > 0 {
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
a.parseAPI.initLoopCheck()
}
}
return a.ok
}
// Skip tells the parser to move the cursor past a match that was found,
// without making the results available through the ParseAPI.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Skip() bool {
if a.ok {
a.tokenAPI.root.cursor = a.tokenAPI.cursor
a.tokenAPI.root.result = NewResult()
a.tokenAPI.root.detachChilds()
if a.tokenAPI.offset > 0 {
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
a.parseAPI.initLoopCheck()
}
}
return a.ok
}
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *ParseAPIOnAction) Stay() bool {
if a.ok {
a.tokenAPI.root.result = NewResult()
a.tokenAPI.root.detachChilds()
}
return a.ok
}
// Result returns a Result struct, containing results as produced by the
// last ParseAPI.On() call.
func (p *ParseAPI) Result() *Result {
result := p.result
if p.result == nil {
caller, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.ParseAPI.Result(): Result() called without calling "+
"ParseAPI.Accept() on beforehand to make the result available "+
"from %s at %s", caller, filepos))
}
return result
}

View File

@ -1,99 +0,0 @@
package parsekit
import (
"fmt"
"io"
)
// Handle is used to execute other ParseHandler functions from within your
// ParseHandler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set (using ParseAPI.Error()),
// or the parser was stopped (using ParseAPI.Stop()).
func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
p.panicWhenStoppedOrInError()
p.panicWhenParseHandlerNil(parseHandler)
parseHandler(p)
return !p.isStoppedOrInError()
}
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
if parseHandler == nil {
caller, filepos := p.getCaller(2)
panic(fmt.Sprintf("ParseAPI.Handle() called with nil input from %s at %s", caller, filepos))
}
}
// Expects is used to let a ParseHandler function describe what input it is
// expecting. This expectation is used in error messages to provide some
// context to them.
//
// When defining an expectation inside a ParseHandler, you do not need to
// handle unexpected input yourself. When the end of the parser is reached
// without stopping it using ParseAPI.Stop() or ParseAPI.ExpectEndOfFile(),
// an automatic error will be emitted using ParseAPI.UnexpectedInput().
func (p *ParseAPI) Expects(description string) {
p.panicWhenStoppedOrInError()
p.expecting = description
}
// Stop is used by the parser impementation to tell the API that it has
// completed the parsing process successfully.
//
// When the parser implementation returns without stopping first, the
// Parser.Execute() will assume that something went wrong and calls
// ParserAPI.UnexpectedInput() to report an error about this.
//
// The parser implementation can define what was being expected, by
// providing a description to ParseAPI.Expecting().
func (p *ParseAPI) Stop() {
p.stopped = true
}
// ExpectEndOfFile can be used to check if the input is at end of file.
//
// When it finds that the end of the file was indeed reached, then the
// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected
// input is reported through ParseAPI.UnexpectedInput() with "end of file"
// as the expectation.
func (p *ParseAPI) ExpectEndOfFile() {
p.panicWhenStoppedOrInError()
if p.On(A.EndOfFile).Stay() {
p.Stop()
} else {
p.Expects("end of file")
p.UnexpectedInput()
}
}
// UnexpectedInput is used to set an error that tells the user that some
// unexpected input was encountered.
//
// It can automatically produce an error message for a couple of situations:
// 1) input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an invalid UTF8 character on the input.
//
// The parser implementation can provide some feedback for this error by
// calling ParseAPI.Expects() to set the expectation. When set, the
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
p.panicWhenStoppedOrInError()
r, err := p.tokenAPI.NextRune()
switch {
case err == nil:
p.Error("unexpected character %q%s", r, fmtExpects(p))
case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(p))
default:
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
}
}
func fmtExpects(p *ParseAPI) string {
if p.expecting == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", p.expecting)
}

View File

@ -1,106 +0,0 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestGivenNilTokenHandler_WhenCallingOn_ParsekitPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(nil)
})
RunPanicTest(t, PanicTest{
func() { p.Execute("") },
`ParseHandler bug: tokenHandler argument for On\(\) is nil`})
}
func TestGivenStoppedParser_WhenCallingHandle_ParsekitPanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Stop()
p.Handle(otherHandler)
})
RunPanicTest(t, PanicTest{
func() { p.Execute("") },
`Illegal call to ParseAPI.Handle\(\) from .*ParsekitPanics.func.* at ` +
`.*/parsehandler_test.go:\d+: no calls allowed after ParseAPI.Stop\(\)`})
}
func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Error("It ends here")
p.Handle(otherHandler)
})
RunPanicTest(t, PanicTest{
func() { p.Execute("") },
`Illegal call to ParseAPI\.Handle\(\) from .*ParsekitPanics\.func2 at ` +
`.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
}
type parserWithLoop struct {
loopCounter int
}
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.third)
}
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
if l.loopCounter++; l.loopCounter > 100 {
p.Error("Loop not detected by parsekit")
return
}
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.first)
}
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
looper := &parserWithLoop{}
parser := parsekit.NewParser(looper.first)
RunPanicTest(t, PanicTest{
func() { parser.Execute("Het houdt niet op, niet vanzelf") },
`Loop detected in parser in .*\(\*parserWithLoop\).second at .*/parsehandler_test\.go:\d+`})
}
// This test incorporates an actual loop bug that I dropped on myself and
// that I could not easily spot in my code. It sounded so logical:
// I want to get chunks of 5 chars from the input, so I simply loop on:
//
// p.On(c.Max(5, a.AnyRune))
//
// The problem here is that Max(5, ...) will also match when there is
// no more input, since Max(5, ---) is actually MinMax(0, 5, ...).
// Therefore the loop will never stop. Solving the loop was simple:
//
// p.On(c.MinMax(1, 5, a.AnyRune))
//
// Now the loop stops when the parser finds no more matching input data.
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(c.Max(5, a.AnyRune)).Accept() {
}
p.Stop()
})
RunPanicTest(t, PanicTest{
func() { parser.Execute("This will end soon") },
`Loop detected in parser in .*ParserPanics.* at .*/parsehandler_test.go:\d+`})
}
func TestGivenNullHandler_HandlePanics(t *testing.T) {
parser := parsekit.NewParser(nil)
RunPanicTest(t, PanicTest{
func() { parser.Execute("") },
`ParseAPI.Handle\(\) called with nil input from .*\(\*Parser\).Execute at .*/parsekit\.go:\d+`})
}

View File

@ -1,78 +0,0 @@
package parsekit
import (
"strings"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
startHandler ParseHandler // the function that handles the very first state
}
// NewParser instantiates a new Parser.
//
// The Parser is a state machine-style recursive descent parser, in which
// ParseHandler functions are used to move the state machine forward during
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
return &Parser{startHandler: startHandler}
}
// Execute starts the parser for the provided input.
// When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Execute(input string) *Error {
api := &ParseAPI{
tokenAPI: NewTokenAPI(strings.NewReader(input)),
loopCheck: map[string]bool{},
}
api.Handle(p.startHandler)
if !api.stopped && api.err == nil {
api.UnexpectedInput()
}
return api.err
}
// Matcher is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Matcher can be instantiated using the parsekit.NewMatcher()
// method.
// TODO Rename to Tokenizer
type Matcher struct {
parser *Parser
result *Result
}
// NewMatcher instantiates a new Matcher.
//
// This is a simple wrapper around a TokenHandler function. It can be used to
// match an input string against that TokenHandler function and retrieve the
// results in a straight forward way.
//
// The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler.
// TODO Rename to NewTokenizer, and make matcher Tokeninzer, also see if we can use a Reader straight away, no ParseAPI.
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
matcher := &Matcher{}
matcher.parser = NewParser(func(p *ParseAPI) {
if p.On(tokenHandler).Accept() {
matcher.result = p.Result()
p.Stop()
} else {
p.Expects(expects)
p.UnexpectedInput()
}
})
return matcher
}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the TokenHandler's results. When an error occurred during parsing,
// the error will be set, nil otherwise.
func (m *Matcher) Execute(input string) (*Result, *Error) {
err := m.parser.Execute(input)
return m.result, err
}

View File

@ -1,101 +0,0 @@
package parsekit_test
// This file only provides building blocks for writing tests.
// No actual tests belong in this file.
import (
"regexp"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
// Easy access to the parsekit definitions.
var c, a, m, tok = parsekit.C, parsekit.A, parsekit.M, parsekit.T
type TokenHandlerTest struct {
Input string
TokenHandler parsekit.TokenHandler
MustMatch bool
Expected string
}
func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
for _, test := range testSet {
RunTokenHandlerTest(t, test)
}
}
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output := result.String(); output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.Input)
}
}
}
type TokenMakerTest struct {
Input string
TokenHandler parsekit.TokenHandler
Expected []parsekit.Token
}
func RunTokenMakerTest(t *testing.T, test TokenMakerTest) {
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {
if len(result.Tokens()) != len(test.Expected) {
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
}
for i, expected := range test.Expected {
actual := result.Token(i)
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if string(expected.Runes) != string(actual.Runes) {
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}
}
}
}
func RunTokenMakerTests(t *testing.T, testSet []TokenMakerTest) {
for _, test := range testSet {
RunTokenMakerTest(t, test)
}
}
type PanicTest struct {
function func()
expected string
}
func RunPanicTest(t *testing.T, p PanicTest) {
defer func() {
if r := recover(); r != nil {
if !regexp.MustCompile(p.expected).MatchString(r.(string)) {
t.Errorf("Function did panic, but unexpected panic message received:\nexpected: %q\nactual: %q\n", p.expected, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.expected)
}
}()
p.function()
}
func RunPanicTests(t *testing.T, testSet []PanicTest) {
for _, test := range testSet {
RunPanicTest(t, test)
}
}

64
parser.go Normal file
View File

@ -0,0 +1,64 @@
package parsekit
import (
"fmt"
"runtime"
"strings"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
startHandler ParseHandler // the function that handles the very first state
}
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the ParseHandler uses to interact with the parser.
type ParseHandler func(*ParseAPI)
// NewParser instantiates a new Parser.
//
// The Parser is a state machine-style recursive descent parser, in which
// ParseHandler functions are used to move the state machine forward during
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
if startHandler == nil {
_, filepos := getCaller(1)
panic(fmt.Sprintf("parsekit.NewParser(): NewParser() called with nil input at %s", filepos))
}
return &Parser{startHandler: startHandler}
}
// Execute starts the parser for the provided input.
// When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Execute(input string) *Error {
api := &ParseAPI{
tokenAPI: NewTokenAPI(strings.NewReader(input)),
loopCheck: map[string]bool{},
}
if api.Handle(p.startHandler) {
// Handle indicated that parsing could still continue, meaning that there
// was no error and that the parsing has not actively been Stop()-ed.
// However, at this point, the parsing really should have stopped.
// We'll see what happens when we tell the parser that EOF was expected.
// This might work if we're indeed at EOF. Otherwise, an error will be
// generated.
api.ExpectEndOfFile()
}
return api.err
}
func getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}

327
parser_test.go Normal file
View File

@ -0,0 +1,327 @@
package parsekit_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleParser_usingAcceptedRunes() {
// Easy access to the parsekit definitions.
a := parsekit.A
matches := []string{}
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(a.AnyRune).Accept() {
matches = append(matches, p.Result().String())
}
p.ExpectEndOfFile()
})
err := parser.Execute("¡Any will dö!")
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
// Output:
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
}
func ExampleParser_usingTokens() {
// Easy access to the parsekit definitions.
c, a, tok := parsekit.C, parsekit.A, parsekit.T
var tokens []*parsekit.Token
var accepted string
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
tokens = p.Result().Tokens()
accepted = p.Result().String()
}
p.ExpectEndOfFile()
})
parser.Execute("¡Any will dö!")
fmt.Printf("Runes accepted: %q\n", accepted)
fmt.Printf("Token values: ")
for _, t := range tokens {
fmt.Printf("%c ", t.Value)
}
// Output:
// Runes accepted: "¡Any will dö!"
// Token values: ¡ A n y w i l l d ö !
}
func ExampleParseAPI_UnexpectedInput() {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expects("a thing")
p.UnexpectedInput()
})
err := parser.Execute("Whatever, this parser will never be happy...")
fmt.Println(err.Full())
// Output:
// unexpected character 'W' (expected a thing) at start of file
}
func ExampleParseAPIOnAction_Accept() {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
// When a case-insensitive match on "Yowza!" is found by the
// tokenizer, then Accept() will make the result available
// through ParseAPI.Result()
if p.On(parsekit.A.StrNoCase("Yowza!")).Accept() {
// Result.String() returns a string containing all
// accepted runes that were matched against.
fmt.Println(p.Result().String())
}
})
parser.Execute("YOWZA!")
// Output:
// YOWZA!
}
func ExampleParseAPIOnAction_Skip() {
var result string
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for loop := true; loop; {
switch {
case p.On(parsekit.A.Rune('X')).Skip():
// NOOP, skip this rune
case p.On(parsekit.A.AnyRune).Accept():
result += p.Result().String()
default:
loop = false
}
}
})
parser.Execute("HXeXllXoXX, XXwoXrlXXXd!")
fmt.Println(result)
// Output:
// Hello, world!
}
func ExampleParseAPI_Stop() {
C, A := parsekit.C, parsekit.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("First word: ")
for p.On(C.Not(A.Space)).Accept() {
fmt.Printf("%s", p.Result())
}
p.Stop()
})
parser.Execute("Input with spaces")
// Output:
// First word: Input
}
func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
C, A := parsekit.C, parsekit.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("Word: ")
for p.On(C.Not(A.Space)).Accept() {
fmt.Printf("%s", p.Result())
}
fmt.Printf("\n")
})
err := parser.Execute("Troglodyte")
fmt.Printf("Error is nil: %t\n", err == nil)
// Output:
// Word: Troglodyte
// Error is nil: true
}
func ExampleParseAPI_Stop_notCalledButInputPending() {
C, A := parsekit.C, parsekit.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("First word: ")
for p.On(C.Not(A.Space)).Accept() {
fmt.Printf("%s", p.Result())
}
fmt.Printf("\n")
})
err := parser.Execute("Input with spaces")
fmt.Printf("Error: %s\n", err.Full())
// Output:
// First word: Input
// Error: unexpected character ' ' (expected end of file) at line 1, column 6
}
func ExampleParseAPIOnAction_Stay() {
// Definition of a fantasy serial number format.
C, A := parsekit.C, parsekit.A
serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits)
// This handler is able to handle serial numbers.
serialnrHandler := func(p *parsekit.ParseAPI) {
if p.On(serialnr).Accept() {
fmt.Println(p.Result().String())
}
}
// Start could function as a sort of dispatcher, handing over
// control to the correct ParseHandler function, based on the input.
start := func(p *parsekit.ParseAPI) {
if p.On(parsekit.A.Asterisk).Stay() {
p.Handle(serialnrHandler)
return
}
// ... other cases could go here ...
}
parser := parsekit.NewParser(start)
parser.Execute("#XX1234")
parser.Execute("*ay432566")
parser.Execute("*ZD987112")
// Output:
// *ZD987112
}
func TestGivenNullHandler_NewParserPanics(t *testing.T) {
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parsekit.NewParser(nil) },
Regexp: true,
Expect: `parsekit\.NewParser\(\): NewParser\(\) called ` +
`with nil input at /.*/parser_test\.go:\d+`})
}
func TestGivenNullHandler_HandlePanics(t *testing.T) {
brokenParseHandler := func(p *parsekit.ParseAPI) {
p.Handle(nil)
}
parser := parsekit.NewParser(brokenParseHandler)
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Handle\(\): Handle\(\) called with nil input ` +
`at /.*/parser_test\.go:\d+`})
}
func TestGivenNilTokenHandler_OnPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(nil)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.On\(\): On\(\) called with nil ` +
`tokenHandler argument at /.*/parser_test\.go:\d+`})
}
func TestGivenStoppedParser_HandlePanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Stop()
p.Handle(otherHandler)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` +
`at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Stop\(\)`})
}
func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Error("It ends here")
p.Handle(otherHandler)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` +
`at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
}
func TestGivenParserWithoutCallToAccept_ResultPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Result()
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.TokenResult\(\): TokenResult\(\) called at ` +
`/.*/parser_test.go:\d+ without calling ParseAPI.Accept\(\) on beforehand`})
}
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
err := p.Execute("")
parsekit.AssertTrue(t, err == nil, "err")
}
func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
err := p.Execute("x")
parsekit.AssertEqual(t, "unexpected character 'x' (expected end of file) at start of file", err.Full(), "err")
}
type parserWithLoop struct {
loopCounter int
}
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.third)
}
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
if l.loopCounter++; l.loopCounter > 100 {
p.Error("Loop not detected by parsekit")
return
}
p.On(parsekit.A.ASCII).Accept()
p.Handle(l.first)
}
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
looper := &parserWithLoop{}
parser := parsekit.NewParser(looper.first)
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("Het houdt niet op, niet vanzelf") },
Regexp: true,
Expect: `parsekit\.ParseAPI: Loop detected in parser at /.*/parser_test.go:\d+`})
}
// This test incorporates an actual loop bug that I dropped on myself and
// that I could not easily spot in my code. It sounded so logical:
// I want to get chunks of 5 chars from the input, so I simply loop on:
//
// p.On(c.Max(5, a.AnyRune))
//
// The problem here is that Max(5, ...) will also match when there is
// no more input, since Max(5, ---) is actually MinMax(0, 5, ...).
// Therefore the loop will never stop. Solving the loop was simple:
//
// p.On(c.MinMax(1, 5, a.AnyRune))
//
// Now the loop stops when the parser finds no more matching input data.
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
var c, a = parsekit.C, parsekit.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(c.Max(5, a.AnyRune)).Accept() {
}
p.Stop()
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("This will end soon") },
Regexp: true,
Expect: `parsekit\.ParseAPI: Loop detected in parser at .*/parser_test.go:\d+`})
}

View File

@ -7,29 +7,31 @@ import (
"unicode/utf8" "unicode/utf8"
) )
// Reader wraps around an io.Reader and provides buffering to allows us to read // reader wraps around an io.Reader and provides buffering to allows us to read
// the same runes over and over again. This is useful for implementing a parser // the same runes over and over again. This is useful for implementing a parser
// that must be able to do lookahead on the input, returning to the original // that must be able to do lookahead on the input, returning to the original
// input position after finishing that lookahead). // input position after finishing that lookahead).
// //
// To minimze memory use, it is also possible to flush the buffer when there is // To minimze memory use, it is also possible to flush the buffer when there is
// no more need to go back to previously read runes. // no more need to go back to previously read runes.
type Reader struct { //
// The reader is used internally by parsekit.TokenAPI.
type reader struct {
bufio *bufio.Reader // Used for ReadRune() bufio *bufio.Reader // Used for ReadRune()
buffer []rune // Input buffer, holding runes that were read from input buffer []rune // Input buffer, holding runes that were read from input
bufferOffset int // The offset of the buffer, relative to the start of the input bufferOffset int // The offset of the buffer, relative to the start of the input
bufferLen int // Input size, the number of runes in the buffer bufferLen int // Input size, the number of runes in the buffer
} }
// NewReader initializes a new Reader struct, wrapped around the provided io.Reader. // newwReader initializes a new reader struct, wrapped around the provided io.Reader.
func NewReader(r io.Reader) *Reader { func newReader(r io.Reader) *reader {
return &Reader{ return &reader{
bufio: bufio.NewReader(r), bufio: bufio.NewReader(r),
buffer: []rune{}, buffer: []rune{},
} }
} }
// RuneAt reads the rune at the provided rune offset. // runeAt reads the rune at the provided rune offset.
// //
// This offset is relative to the current starting position of the buffer in // This offset is relative to the current starting position of the buffer in
// the reader. When starting reading, offset 0 will point at the start of the // the reader. When starting reading, offset 0 will point at the start of the
@ -43,7 +45,7 @@ func NewReader(r io.Reader) *Reader {
// When reading failed, the rune will be utf8.RuneError. One special read // When reading failed, the rune will be utf8.RuneError. One special read
// fail is actually a normal situation: end of file reached. In that case, // fail is actually a normal situation: end of file reached. In that case,
// the returned error wille be io.EOF. // the returned error wille be io.EOF.
func (r *Reader) RuneAt(offset int) (rune, error) { func (r *reader) runeAt(offset int) (rune, error) {
// Rune at provided offset is not yet available in the input buffer. // Rune at provided offset is not yet available in the input buffer.
// Read runes until we have enough runes to satisfy the offset. // Read runes until we have enough runes to satisfy the offset.
for r.bufferLen <= offset { for r.bufferLen <= offset {
@ -66,37 +68,11 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
return r.buffer[offset], nil return r.buffer[offset], nil
} }
// RunesAt reads a slice of runes of length 'len', starting from offset 'offset'.
//
// This offset is relative to the current starting position of the buffer in
// the reader. When starting reading, offset 0 will point at the start of the
// input. After flushing, offset 0 will point at the input up to where
// the flush was done.
//
// When an error is encountered during reading (EOF or other error), then the
// error return value will be set. In case of an error, any runes that could be
// successfully read are returned along with the error.
// TODO Do I actually use this interface?
func (r *Reader) RunesAt(start int, len int) ([]rune, error) {
if len == 0 {
return r.buffer[0:0], nil
}
end := start + len
_, err := r.RuneAt(end)
if err != nil {
if end > r.bufferLen {
end = r.bufferLen
}
return r.buffer[start:end], err
}
return r.buffer[start:end], nil
}
// Flush deletes the provided number of runes from the start of the // Flush deletes the provided number of runes from the start of the
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt() // reader buffer. After flushing the buffer, offset 0 as used by runeAt()
// will point to the rune that comes after the flushed runes. // will point to the rune that comes after the flushed runes.
// So what this basically does is turn the Reader into a sliding window. // So what this basically does is turn the Reader into a sliding window.
func (r *Reader) Flush(numberOfRunes int) { func (r *reader) flush(numberOfRunes int) {
if numberOfRunes > r.bufferLen { if numberOfRunes > r.bufferLen {
panic(fmt.Sprintf( panic(fmt.Sprintf(
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+ "parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+

View File

@ -5,14 +5,12 @@ import (
"io" "io"
"strings" "strings"
"testing" "testing"
"git.makaay.nl/mauricem/go-parsekit/assert"
) )
func ExampleNewReader() { func ExamplenewReader() {
in := strings.NewReader("Hello, world!") in := strings.NewReader("Hello, world!")
r := NewReader(in) r := newReader(in)
at := func(i int) rune { r, _ := r.RuneAt(i); return r } at := func(i int) rune { r, _ := r.runeAt(i); return r }
fmt.Printf("%c", at(0)) fmt.Printf("%c", at(0))
fmt.Printf("%c", at(12)) fmt.Printf("%c", at(12))
@ -21,114 +19,76 @@ func ExampleNewReader() {
// H! // H!
} }
func ExampleReader_RuneAt() { func TestReader_runeAt(t *testing.T) {
in := strings.NewReader("Hello, world!") in := strings.NewReader("Hello, world!")
r := NewReader(in) r := newReader(in)
at := func(i int) rune { r, _ := r.RuneAt(i); return r } at := func(i int) rune { r, _ := r.runeAt(i); return r }
// It is possible to go back and forth while reading the input. // It is possible to go back and forth while reading the input.
fmt.Printf("%c", at(0)) result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
fmt.Printf("%c", at(12)) AssertEqual(t, "H!wH", result, "result")
fmt.Printf("%c", at(7))
fmt.Printf("%c", at(0))
// Output:
// H!wH
} }
func ExampleReader_RuneAt_endOfFile() { func TestReader_runeAt_endOfFile(t *testing.T) {
in := strings.NewReader("Hello, world!") in := strings.NewReader("Hello, world!")
r := NewReader(in) r := newReader(in)
rn, err := r.RuneAt(13) rn, err := r.runeAt(13)
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
AssertEqual(t, "'<27>' EOF true", result, "result")
rn, err = r.RuneAt(20) rn, err = r.runeAt(20)
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF) result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
AssertEqual(t, "'<27>' EOF true", result, "result")
// Output:
// '<27>' EOF true
// '<27>' EOF true
} }
func ExampleReader_RuneAt_invalidRune() { func TestReader_runeAt_invalidRune(t *testing.T) {
in := strings.NewReader("Hello, \xcdworld!") in := strings.NewReader("Hello, \xcdworld!")
r := NewReader(in) r := newReader(in)
at := func(i int) rune { r, _ := r.runeAt(i); return r }
rn, err := r.RuneAt(6) result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
fmt.Printf("%q %t\n", rn, err == nil) AssertEqual(t, " <20>wo", result, "result")
rn, err = r.RuneAt(7)
fmt.Printf("%q %t\n", rn, err == nil)
rn, err = r.RuneAt(8)
fmt.Printf("%q %t\n", rn, err == nil)
rn, err = r.RuneAt(9)
fmt.Printf("%q %t\n", rn, err == nil)
// Output:
// ' ' true
// '<27>' true
// 'w' true
// 'o' true
}
func ExampleReader_RunesAt() {
in := strings.NewReader("Hello, \xcdworld!")
r := NewReader(in)
rs, err := r.RunesAt(4, 6)
fmt.Printf("%q %t\n", string(rs), err == nil)
rs, err = r.RunesAt(4, 0)
fmt.Printf("%q %t\n", string(rs), err == nil)
rs, err = r.RunesAt(8, 100)
fmt.Printf("%q %t\n", string(rs), err == io.EOF)
// Output:
// "o, <20>wo" true
// "" true
// "world!" true
} }
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
in := strings.NewReader("\uFEFFBommetje!") in := strings.NewReader("\uFEFFBommetje!")
r := NewReader(in) r := newReader(in)
b, _ := r.RuneAt(0) b, _ := r.runeAt(0)
o, _ := r.RuneAt(1) o, _ := r.runeAt(1)
m, _ := r.RuneAt(2) m, _ := r.runeAt(2)
bom := fmt.Sprintf("%c%c%c", b, o, m) bom := fmt.Sprintf("%c%c%c", b, o, m)
assert.Equal(t, "Bom", bom, "first three runes") AssertEqual(t, "Bom", bom, "first three runes")
} }
func ExampleReader_Flush() { func TestReader_Flush(t *testing.T) {
in := strings.NewReader("Hello, world!") in := strings.NewReader("Hello, world!")
r := NewReader(in) r := newReader(in)
at := func(i int) rune { r, _ := r.RuneAt(i); return r } at := func(i int) rune { r, _ := r.runeAt(i); return r }
rb := func(start int, len int) []rune { r, _ := r.RunesAt(start, len); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w" // Fills the buffer with the first 8 runes on the input: "Hello, w"
fmt.Printf("%c\n", at(7)) result := fmt.Sprintf("%c", at(7))
AssertEqual(t, "w", result, "first read")
// Now flush the first 4 runes from the buffer (dropping "Hell" from it) // Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4) r.flush(4)
// Rune 0 is now pointing at what originally was rune offset 4. // Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there. // We can continue reading from there.
fmt.Printf("%s", string(rb(0, 8))) result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
AssertEqual(t, "o, wor", result, "second read")
// Output:
// w
// o, world
} }
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
in := strings.NewReader("Hello, world!") in := strings.NewReader("Hello, world!")
r := NewReader(in) r := newReader(in)
// Fill buffer with "Hello, worl", the first 11 runes. // Fill buffer with "Hello, worl", the first 11 runes.
r.RuneAt(10) r.runeAt(10)
// However, we flush 12 runes, which exceeds the buffer size. // However, we flush 12 runes, which exceeds the buffer size.
assert.Panic(t, assert.PanicT{ AssertPanic(t, PanicT{
Function: func() { r.Flush(12) }, Function: func() { r.flush(12) },
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)", Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
}) })
} }

View File

@ -5,7 +5,7 @@ import (
"io" "io"
) )
// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and // TokenAPI wraps a parsekit.reader and its purpose is to retrieve input data and
// to report back results. For easy lookahead support, a forking strategy is // to report back results. For easy lookahead support, a forking strategy is
// provided. // provided.
// //
@ -14,54 +14,64 @@ import (
// To retrieve the next rune from the TokenAPI, call the NextRune() method. // To retrieve the next rune from the TokenAPI, call the NextRune() method.
// //
// When the rune is to be accepted as input, call the method Accept(). The rune // When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the result buffer of the TokenAPI struct. // is then added to the results of the TokenAPI and the read cursor is moved
// forward. Runes collected this way can later on be retrieved using for
// example the method Result().Runes().
//
// It is mandatory to call Accept() after retrieving a rune, before calling // It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic. // NextRune() again. Failing to do so will result in a panic.
// //
// By invoking NextRune() + Accept() multiple times, the result buffer is extended // By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. // with as many runes as needed.
// //
// Next to adding runes to the output, it is also possible to modify the
// already collected runes or to produce lexical Tokens. For all things
// concerning results, take a look at the Result struct, which can be
// accessed though the method Result().
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT: // FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
// //
// Sometimes, we must be able to perform a lookahead, which might either // Sometimes, we must be able to perform a lookahead, which might either
// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be // succeed or fail. In case of a failing lookahead, the state of the
// brought back to the original state, so we can try a different route. // TokenAPI must be brought back to the original state, so we can try
// a different route.
// //
// The way in which this is supported, is by forking a TokenAPI struct by calling // The way in which this is supported, is by forking a TokenAPI struct by
// Fork(). This will return a forked child TokenAPI, with an empty result buffer, // calling method Fork(). This will return a forked child TokenAPI, with
// but using the same input cursor position as the forked parent. // an empty result buffer, but using the same read cursor position as the
// forked parent.
// //
// After forking, the same interface as described for BASIC OPERATION can be // After forking, the same interface as described for BASIC OPERATION can be
// used to fill the result buffer. When the lookahead was successful, then // used to fill the result buffer. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's result // Merge() can be called on the forked child to append the child's result
// buffer to the parent's result buffer, and to move the input cursor position // buffer to the parent's result buffer, and to move the read cursor position
// to that of the child. // to that of the child.
// //
// When the lookahead was unsuccessful, then the forked child TokenAPI can simply // When the lookahead was unsuccessful, then the forked child TokenAPI can
// be discarded. The parent TokenAPI was never modified, so it can safely be used // simply be discarded. The parent TokenAPI was never modified, so it can
// as if the lookahead never happened. // safely be used as if the lookahead never happened.
// //
// Note: // Note:
// Many tokenizers/parsers take a different approach on lookaheads by using // Many tokenizers/parsers take a different approach on lookaheads by using
// peeks and by moving the input cursor position back and forth, or by putting // peeks and by moving the read cursor position back and forth, or by putting
// read input back on the input stream. That often leads to code that is // read input back on the input stream. That often leads to code that is
// efficient, however, in my opinion, not very untuitive to read. // efficient, however, in my opinion, not very intuitive to read.
type TokenAPI struct { type TokenAPI struct {
reader *Reader reader *reader
cursor *Cursor // current read cursor position, rel. to the input start cursor *Cursor // current read cursor position, rel. to the input start
offset int // current rune offset rel. to the Reader's sliding window offset int // current rune offset rel. to the Reader's sliding window
result *Result // results as produced by a TokenHandler (runes, Tokens) result *TokenResult // results as produced by a TokenHandler (runes, Tokens)
root *TokenAPI // the root TokenAPI root *TokenAPI // the root TokenAPI
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
} }
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. // NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(r io.Reader) *TokenAPI { func NewTokenAPI(r io.Reader) *TokenAPI {
input := &TokenAPI{ input := &TokenAPI{
reader: NewReader(r), reader: newReader(r),
cursor: &Cursor{}, cursor: &Cursor{},
result: NewResult(), result: newTokenResult(),
} }
input.root = input input.root = input
return input return input
@ -78,13 +88,14 @@ func NewTokenAPI(r io.Reader) *TokenAPI {
// without explicitly accepting, this method will panic. // without explicitly accepting, this method will panic.
func (i *TokenAPI) NextRune() (rune, error) { func (i *TokenAPI) NextRune() (rune, error) {
if i.result.lastRune != nil { if i.result.lastRune != nil {
caller, linepos := getCaller(1) _, linepos := getCaller(1)
panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+ panic(fmt.Sprintf(
"to Accept() from %s at %s", caller, linepos)) "parsekit.TokenAPI.NextRune(): NextRune() called at %s without a "+
"prior call to Accept()", linepos))
} }
i.detachChilds() i.detachChilds()
readRune, err := i.reader.RuneAt(i.offset) readRune, err := i.reader.runeAt(i.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err} i.result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err return readRune, err
} }
@ -96,24 +107,38 @@ func (i *TokenAPI) NextRune() (rune, error) {
// returned an error. Calling Accept() in such case will result in a panic. // returned an error. Calling Accept() in such case will result in a panic.
func (i *TokenAPI) Accept() { func (i *TokenAPI) Accept() {
if i.result.lastRune == nil { if i.result.lastRune == nil {
caller, linepos := getCaller(1) _, linepos := getCaller(1)
panic(fmt.Sprintf( panic(fmt.Sprintf(
"parsekit.TokenAPI.Accept(): Accept() called without first "+ "parsekit.TokenAPI.Accept(): Accept() called at %s without "+
"calling NextRune() from %s at %s", caller, linepos)) "first calling NextRune()", linepos))
} else if i.result.lastRune.err != nil { } else if i.result.lastRune.err != nil {
caller, linepos := getCaller(1) _, linepos := getCaller(1)
panic(fmt.Sprintf( panic(fmt.Sprintf(
"parsekit.TokenAPI.Accept(): Accept() called while the previous "+ "parsekit.TokenAPI.Accept(): Accept() called at %s, but the "+
"call to NextRune() failed from %s at %s", caller, linepos)) "prior call to NextRune() failed", linepos))
} }
i.result.runes = append(i.result.runes, i.result.lastRune.r) i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r)) i.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
i.offset++ i.offset++
i.result.lastRune = nil i.result.lastRune = nil
} }
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and // Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
// read cursor position, but for the rest this is a fresh TokenAPI. // read cursor position, but for the rest this is a fresh TokenAPI.
//
// By forking a TokenAPI, you can freely work with the forked child, without
// affecting the parent TokenAPI. This is for example useful when you must perform
// some form of lookahead.
//
// When such lookahead turned out successful and you want to accept the results
// into the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
// child. This will add the runes in the result buffer to the result buffer of
// the parent. It also updates the read cursor position of the parent to that
// of the child.
//
// When the lookahead failed, or you don't the results as produced by that
// lookahead, the forked child can simply be discarded. You can continue to work
// with the parent TokenAPI as if nothing ever happened.
func (i *TokenAPI) Fork() *TokenAPI { func (i *TokenAPI) Fork() *TokenAPI {
i.detachChilds() i.detachChilds()
@ -125,18 +150,27 @@ func (i *TokenAPI) Fork() *TokenAPI {
root: i.root, root: i.root,
parent: i, parent: i,
} }
child.result = NewResult() child.result = newTokenResult()
*child.cursor = *i.cursor *child.cursor = *i.cursor
i.child = child i.child = child
i.result.lastRune = nil i.result.lastRune = nil
return child return child
} }
// Merge adds the data of the forked child TokenAPI that Merge() is called on to the // Merge appends the Result of a forked child TokenAPI to the Result of its
// data of its parent (results and read cursor position). // parent. The read cursor position of the parent is also updated to that of
// the forked child.
//
// After the merge operation, the child is reset so it can immediately be
// reused for performing another match. This means that all Result data are
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
func (i *TokenAPI) Merge() { func (i *TokenAPI) Merge() {
if i.parent == nil { if i.parent == nil {
panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI") _, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.Merge(): Merge() called at %s "+
"on a non-forked TokenAPI", filepos))
} }
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
@ -145,12 +179,12 @@ func (i *TokenAPI) Merge() {
i.parent.cursor = i.cursor i.parent.cursor = i.cursor
i.detachChilds() i.detachChilds()
i.result = NewResult() i.result = newTokenResult()
} }
// Result returns the result data for the TokenAPI. The returned struct // Result returns the TokenResult data for the TokenAPI. The returned struct
// can be used to retrieve and modify the result data. // can be used to retrieve and to modify result data.
func (i *TokenAPI) Result() *Result { func (i *TokenAPI) Result() *TokenResult {
return i.result return i.result
} }
@ -160,18 +194,6 @@ func (i *TokenAPI) Cursor() Cursor {
return *i.cursor return *i.cursor
} }
// FlushReaderBuffer delegates to the Flush() method of the contained
// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the
// reader cache.
func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) {
if i != i.root {
panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child")
}
i.detachChilds()
i.reader.Flush(numberOfRunes)
i.offset = 0
}
func (i *TokenAPI) detachChilds() { func (i *TokenAPI) detachChilds() {
if i.child != nil { if i.child != nil {
i.child.detachChildsRecurse() i.child.detachChildsRecurse()

70
tokenapi_example_test.go Normal file
View File

@ -0,0 +1,70 @@
package parsekit_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
)
func ExampleTokenAPI_Fork() {
// This custom TokenHandler checks for a sequence of runes: "abcd"
// This is done in 4 steps and only after finishing all steps,
// the TokenHandler will confirm a successful match.
abcdSequence := func(t *parsekit.TokenAPI) bool {
child := t.Fork() // fork, so we won't change parent t
for _, checkRune := range "abcd" {
readRune, err := child.NextRune()
if err != nil || readRune != checkRune {
return false // report mismatch, parent t is left untouched
}
child.Accept() // add rune to child output
}
child.Merge() // we have a match, add resulting output to parent
return true // and report the successful match
}
// Note: a custom TokenHandler is normally not what you need.
// You can make use of the parser/combinator tooling to do things
// a lot simpler. The handler from above can be replaced with:
simpler := parsekit.A.Str("abcd")
result, err := parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcdefgh")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(simpler, "abcd").Execute("abcdefgh")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("abcx")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcdSequence, "abcd").Execute("xyz")
fmt.Println(result, err)
// Output:
// abcd <nil>
// abcd <nil>
// <nil> unexpected character 'a' (expected abcd)
// <nil> unexpected character 'x' (expected abcd)
}
func ExampleTokenAPI_Merge() {
tokenHandler := func(t *parsekit.TokenAPI) bool {
child1 := t.Fork()
child1.NextRune() // reads 'H'
child1.Accept()
child1.NextRune() // reads 'i'
child1.Accept()
child2 := child1.Fork()
child2.NextRune() // reads ' '
child2.Accept()
child2.NextRune() // reads 'd'
child2.Accept()
child1.Merge() // We merge child1, which has read 'H' and 'i' only.
return true
}
result, _ := parsekit.NewTokenizer(tokenHandler, "a match").Execute("Hi mister X!")
fmt.Println(result)
// Output:
// Hi
}

View File

@ -1,106 +0,0 @@
package parsekit
import (
"fmt"
)
// Result holds results as produced by a TokenHandler.
type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
type Token struct {
Type interface{} // token type, can be any type that a parser author sees fit
Runes []rune // the runes that make up the token
Value interface{} // an optional value of any type
}
// NewResult initializes an empty result struct.
func NewResult() *Result {
return &Result{
runes: []rune{},
tokens: []*Token{},
}
}
// ClearRunes clears the runes in the Result.
func (r *Result) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the Result with the provided input.
func (r *Result) SetRunes(s interface{}) {
r.ClearRunes()
r.AddRunes(s)
}
// AddRunes is used to add runes to the Result.
func (r *Result) AddRunes(s interface{}) {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
panic(fmt.Sprintf("parsekit.Result.SetRunes(): unsupported type '%T' used", s))
}
}
// Runes retrieves the Runes from the Result.
func (r *Result) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the Result at the specified index.
func (r *Result) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the Result as a string.
func (r *Result) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the Result.
func (r *Result) ClearTokens() {
r.tokens = []*Token{}
}
// AddToken is used to add a Token to the results.
func (r *Result) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// Tokens retrieves the Tokens from the Result.
func (r *Result) Tokens() []*Token {
return r.tokens
}
// Token retrieves a single Token from the Result at the specified index.
func (r *Result) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the Result Tokens.
func (r *Result) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the Result Token at the specified index.
func (r *Result) Value(idx int) interface{} {
return r.tokens[idx].Value
}

View File

@ -1,27 +0,0 @@
package parsekit
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit/assert"
)
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := mkInput()
i.Result().SetRunes("string")
assert.Equal(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
assert.Equal(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
i.Result().SetRunes('X')
assert.Equal(t, "X", string(i.Result().String()), "i.Result() with rune input")
}
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.Result().SetRunes(1234567)
},
Expect: "parsekit.Result.SetRunes(): unsupported type 'int' used",
})
}

View File

@ -1,288 +0,0 @@
package parsekit
import (
"io"
"strings"
"testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/assert"
)
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
assert.Equal(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.NextRune()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
`a prior call to Accept\(\) from .*TestCallingNextRuneTwice_Panics.* at /.*_test.go:\d+`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
`first calling NextRune\(\) from .* at /.*:\d+`,
})
}
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.Merge()
},
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.NextRune()
f.Merge()
},
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.Fork()
f.Merge()
},
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
})
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := mkInput()
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
f5 := f4.Fork()
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
assert.Equal(t, true, i.child == f1, "i.child == f1")
assert.Equal(t, true, f1.parent == i, "f1.parent == i")
assert.Equal(t, true, f1.child == f4, "f1.child == f4")
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
assert.Equal(t, true, f4.parent == f1, "f4.parent == f1")
assert.Equal(t, true, f4.child == f5, "f4.child == f5")
assert.Equal(t, true, f5.parent == f4, "f5.parent == f4")
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
assert.Equal(t, true, i.child == nil, "i.child == nil")
assert.Equal(t, true, f1.parent == nil, "f1.parent == nil")
assert.Equal(t, true, f1.child == nil, "f1.child == nil")
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
assert.Equal(t, true, f4.parent == nil, "f4.parent == nil")
assert.Equal(t, true, f4.child == nil, "f4.child == nil")
assert.Equal(t, true, f5.parent == nil, "f5.parent == nil")
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.Fork()
i.Accept()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
`first calling NextRune\(\) from .* at /.*:\d+`,
})
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := mkInput()
r, _ := i.NextRune()
assert.Equal(t, 'T', r, "result from 1st call to NextRune()")
// TODO still (*runeInfo) case needed?
assert.NotEqual(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after NextRune()")
i.Accept()
assert.Equal(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after Accept()")
assert.Equal(t, 1, i.offset, "Input.offset")
assert.Equal(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
r, _ = i.NextRune()
assert.Equal(t, 'e', r, "result from 2nd call to NextRune()")
}
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
i := mkInput()
for j := 0; j < 7; j++ {
i.NextRune()
i.Accept()
}
assert.Equal(t, "Testing", string(i.reader.buffer), "reader input buffer")
assert.Equal(t, "Testing", i.Result().String(), "i.Result().String()")
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
assert.Equal(t, "line 1, column 1", i.cursor.String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
assert.Equal(t, "line 1, column 7", i.cursor.String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
assert.Equal(t, "line 2, column 1", i.cursor.String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
assert.Equal(t, "line 3, column 5", i.cursor.String(), "cursor 4")
assert.Equal(t, *i.cursor, i.Cursor(), "i.Cursor()")
}
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := mkInput()
i.NextRune()
i.Accept() // T
assert.Equal(t, "T", i.Result().String(), "accepted rune in input")
// Fork
f := i.Fork()
assert.Equal(t, f, i.child, "Input.child (must be f)")
assert.Equal(t, i, f.parent, "Input.parent (must be i)")
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
assert.Equal(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
assert.Equal(t, "es", f.Result().String(), "result runes in fork")
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
assert.Equal(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent
f.Merge()
assert.Equal(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
assert.Equal(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
f1 := i.Fork()
f1.NextRune()
f1.Accept()
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
assert.Equal(t, 1, i.offset, "i.offset")
assert.Equal(t, "e", f1.Result().String(), "f1.Result().String()")
assert.Equal(t, 2, f1.offset, "f1.offset")
assert.Equal(t, "s", f2.Result().String(), "f2.Result().String()")
assert.Equal(t, 3, f2.offset, "f2.offset")
f2.Merge()
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
assert.Equal(t, 1, i.offset, "i.offset")
assert.Equal(t, "es", f1.Result().String(), "f1.Result().String()")
assert.Equal(t, 3, f1.offset, "f1.offset")
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
assert.Equal(t, 3, f2.offset, "f2.offset")
f1.Merge()
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
assert.Equal(t, 3, i.offset, "i.offset")
assert.Equal(t, "", f1.Result().String(), "f1.Result().String()")
assert.Equal(t, 3, f1.offset, "f1.offset")
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
assert.Equal(t, 3, f2.offset, "f2.offset")
}
func TestGivenForkedChild_FlushReaderBuffer_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
f.FlushReaderBuffer(1)
},
Expect: "parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() " +
"can only be called on the root TokenAPI, not on a forked child",
})
}
func TestGivenRootWithSomeRunesRead_FlushReaderBuffer_ClearsReaderBuffer(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.FlushReaderBuffer(2)
assert.Equal(t, "Te", i.Result().String(), "i.Result()")
assert.Equal(t, 0, i.offset, "i.offset")
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
assert.Equal(t, 2, i.offset, "i.offset")
i.FlushReaderBuffer(2)
assert.Equal(t, "Test", i.Result().String(), "i.Result()")
assert.Equal(t, 0, i.offset, "i.offset")
}
func TestWhenCallingNextRuneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
assert.Equal(t, true, r == utf8.RuneError, "returned rune from NextRune()")
assert.Equal(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingRuneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
r, err := f.NextRune()
assert.Equal(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
r, err = i.NextRune()
assert.Equal(t, 'X', r, "returned rune from 2nd NextRune()")
assert.Equal(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *TokenAPI {
return NewTokenAPI(strings.NewReader("Testing"))
}

View File

@ -1,139 +0,0 @@
package parsekit
import (
"fmt"
"runtime"
)
// TokenHandler is the function type that is involved in turning a low level
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the token(s).
//
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
// indicate whether or not it found a match on the input. The TokenAPI is used
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
// NextRune retrieves the next rune from the input.
//
// It returns the rune and a boolean. The boolean will be false in case an
// invalid UTF8 rune or the end of the file was encountered.
//
// After retrieving a rune, Accept() or Skip() can be called to respectively add
// the rune to the TokenAPIold's string buffer or to fully ignore it. This way,
// a TokenHandler has full control over what runes are significant for the
// resulting output of that TokenHandler.
//
// After using NextRune(), this method can not be reinvoked, until the last read
// rune is explicitly accepted or skipped as described above.
// func (t *TokenAPIold) NextRune() (rune, bool) {
// if t.lastRune != nil {
// caller, filepos := getCaller(1)
// panic(fmt.Sprintf(
// "TokenHandler bug: NextRune() was called from %s at %s "+
// "without accepting or skipping the previously read rune", caller, filepos))
// }
// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset)
// t.lastRune = &runeInfo{r, w, ok}
// if ok {
// t.result.Input = append(t.result.Input, r)
// }
// return r, ok
// }
// Fork splits off a child TokenAPIold, containing the same input cursor position
// as the parent TokenAPIold, but with all other data in a fresh state.
//
// By forking, a TokenHandler function can freely work with a TokenAPIold, without
// affecting the parent TokenAPIold. This is for example useful when the
// TokenHandler function must perform some form of lookahead.
//
// When a successful match was found, the TokenHandler function can call
// TokenAPIold.Merge() on the forked child to have the resulting output added
// to the parent TokenAPIold.
//
// When no match was found, the forked child can simply be discarded.
//
// Example case: A TokenHandler checks for a sequence of runes: 'a', 'b', 'c', 'd'.
// This is done in 4 steps and only after finishing all steps, the TokenHandler
// function can confirm a successful match. The TokenHandler function for this
// case could look like this (yes, it's naive, but it shows the point):
// TODO make proper tested example
//
// func MatchAbcd(t *TokenAPIold) bool {
// child := t.Fork() // fork to keep m from input untouched
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
// if r, ok := t.NextRune(); !ok || r != letter {
// return false // report mismatch, t is left untouched
// }
// child.Accept() // add rune to child output
// }
// child.Merge() // we have a match, add resulting output to parent
// return true // and report the successful match
// }
// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting
// output of the TokenAPIold.
// func (t *TokenAPIold) Accept() {
// t.checkAllowedCall("Accept()")
// t.buffer = append(t.buffer, t.lastRune.Rune)
// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune)
// t.inputOffset += t.lastRune.ByteSize
// t.lastRune = nil
// }
// Skip will ignore the last rune as read by NextRune().
// func (t *TokenAPIold) Skip() {
// t.checkAllowedCall("Skip()")
// t.inputOffset += t.lastRune.ByteSize
// t.lastRune = nil
// }
// func (t *TokenAPIold) checkAllowedCall(name string) {
// if t.lastRune == nil {
// caller, filepos := getCaller(2)
// panic(fmt.Sprintf(
// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
// name, caller, filepos))
// }
// if !t.lastRune.OK {
// caller, filepos := getCaller(2)
// panic(fmt.Sprintf(
// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
// "did not return OK (EOF or invalid rune)", name, caller, filepos))
// }
// }
// AddToken is used to add a token to the results of the TokenHandler.
// func (t *TokenAPIold) AddToken(tok *Token) {
// t.result.Tokens = append(t.result.Tokens, tok)
// }
// Merge merges the resulting output from a forked child TokenAPIold back into
// its parent: The runes that are accepted in the child are added to the parent
// runes and the parent's input cursor position is advanced to the child's
// cursor position.
//
// After the merge, the child TokenAPIold is reset so it can immediately be
// reused for performing another match (all data are cleared, except for the
// input offset which is kept at its current position).
// func (t *TokenAPIold) Merge() bool {
// if t.parent == nil {
// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
// }
// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...)
// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...)
// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...)
// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...)
// t.parent.inputOffset = t.inputOffset
// t.result = &TokResult{}
// return true
// }
func getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}

View File

@ -4,11 +4,10 @@ import (
"testing" "testing"
"git.makaay.nl/mauricem/go-parsekit" "git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/assert"
) )
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) { func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
for i := 0; i < 20; i++ { for i := 0; i < 20; i++ {
t.NextRune() t.NextRune()
t.Accept() t.Accept()
@ -22,7 +21,7 @@ func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
} }
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) { func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.Result().AddToken(&parsekit.Token{ t.Result().AddToken(&parsekit.Token{
Type: "PI", Type: "PI",
Runes: []rune("π"), Runes: []rune("π"),
@ -60,8 +59,9 @@ func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
} }
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) { func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
fooToken := tok.StrLiteral("ASCII", c.OneOrMore(a.ASCII)) var tok, c, a = parsekit.T, parsekit.C, parsekit.A
parser := parsekit.NewMatcher(fooToken, "something") fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII))
parser := parsekit.NewTokenizer(fooToken, "something")
input := "This is fine ASCII Åltho hère öt endĩt!" input := "This is fine ASCII Åltho hère öt endĩt!"
result, err := parser.Execute(input) result, err := parser.Execute(input)
@ -74,15 +74,16 @@ func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
} }
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) { func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
fooToken := c.Seq( fooToken := c.Seq(
m.Drop(c.ZeroOrMore(a.Asterisk)), m.Drop(c.ZeroOrMore(a.Asterisk)),
tok.StrLiteral("COMBI", c.Seq( tok.Str("COMBI", c.Seq(
tok.StrLiteral("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))), tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
tok.StrLiteral("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))), tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
)), )),
m.Drop(c.ZeroOrMore(a.Asterisk)), m.Drop(c.ZeroOrMore(a.Asterisk)),
) )
parser := parsekit.NewMatcher(fooToken, "something") parser := parsekit.NewTokenizer(fooToken, "something")
input := "*** This is fine ASCII Åltho hère öt endĩt! ***" input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
output := "This is fine ASCIIÅltho hère öt endĩt!" output := "This is fine ASCIIÅltho hère öt endĩt!"
result, err := parser.Execute(input) result, err := parser.Execute(input)
@ -108,50 +109,50 @@ func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
} }
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) { func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.Accept() t.Accept()
return false return false
}, "test") }, "test")
assert.Panic(t, assert.PanicT{ parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("input string") }, Function: func() { parser.Execute("input string") },
Regexp: true, Regexp: true,
Expect: `parsekit.TokenAPI.Accept\(\): Accept\(\) called without first ` + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
`calling NextRune\(\) from .*CallToAcceptPanics.* at /.*_test.go`, `/.*/tokenhandler_test\.go:\d+ without first calling NextRune\(\)`})
})
} }
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) { func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.NextRune() t.NextRune()
t.NextRune() t.NextRune()
return false return false
}, "test") }, "test")
assert.Panic(t, assert.PanicT{ parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("input string") }, Function: func() { parser.Execute("input string") },
Regexp: true, Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` + Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
`a prior call to Accept\(\) from .*CallToNextRunePanics.* at /.*/tokenhandler_test.go:\d+`}) `/.*/tokenhandler_test\.go:\d+ without a prior call to Accept\(\)`})
} }
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) { func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool { parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.NextRune() t.NextRune()
t.Accept() t.Accept()
return false return false
}, "test") }, "test")
assert.Panic(t, assert.PanicT{ parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("") }, Function: func() { parser.Execute("") },
Regexp: true, Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called while the previous call to ` + Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
`NextRune\(\) failed from .*CallToAcceptPanics.* at .*_test\.go:\d+`}) `/.*/tokenhandler_test.go:\d+, but the prior call to NextRune\(\) failed`})
} }
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) { func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
assert.Panic(t, assert.PanicT{ parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { Function: func() {
a := parsekit.TokenAPI{} a := parsekit.TokenAPI{}
a.Merge() a.Merge()
}, },
Expect: `parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI`, Regexp: true,
}) Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenhandler_test\.go:\d+ on a non-forked TokenAPI`})
} }

View File

@ -215,7 +215,7 @@ var A = struct {
// //
// Doing so saves you a lot of typing, and it makes your code a lot cleaner. // Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var T = struct { var T = struct {
StrLiteral func(interface{}, TokenHandler) TokenHandler Str func(interface{}, TokenHandler) TokenHandler
StrInterpreted func(interface{}, TokenHandler) TokenHandler StrInterpreted func(interface{}, TokenHandler) TokenHandler
Byte func(interface{}, TokenHandler) TokenHandler Byte func(interface{}, TokenHandler) TokenHandler
Rune func(interface{}, TokenHandler) TokenHandler Rune func(interface{}, TokenHandler) TokenHandler
@ -234,7 +234,7 @@ var T = struct {
Boolean func(interface{}, TokenHandler) TokenHandler Boolean func(interface{}, TokenHandler) TokenHandler
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
}{ }{
StrLiteral: MakeStrLiteralToken, Str: MakeStrLiteralToken,
StrInterpreted: MakeStrInterpretedToken, StrInterpreted: MakeStrInterpretedToken,
Byte: MakeByteToken, Byte: MakeByteToken,
Rune: MakeRuneToken, Rune: MakeRuneToken,
@ -343,13 +343,13 @@ func MatchOpt(handler TokenHandler) TokenHandler {
} }
// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be // MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be
// applied in their exact order. Only if all matcher apply, the sequence // applied in their exact order. Only if all TokenHandlers apply, the sequence
// reports successful match. // reports successful match.
func MatchSeq(handlers ...TokenHandler) TokenHandler { func MatchSeq(handlers ...TokenHandler) TokenHandler {
return func(t *TokenAPI) bool { return func(t *TokenAPI) bool {
child := t.Fork() child := t.Fork()
for _, matcher := range handlers { for _, handler := range handlers {
if !matcher(child) { if !handler(child) {
return false return false
} }
} }

View File

@ -8,7 +8,8 @@ import (
) )
func TestCombinators(t *testing.T) { func TestCombinators(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{ var c, a, m = parsekit.C, parsekit.A, parsekit.M
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"abc", c.Not(a.Rune('b')), true, "a"}, {"abc", c.Not(a.Rune('b')), true, "a"},
{"bcd", c.Not(a.Rune('b')), false, ""}, {"bcd", c.Not(a.Rune('b')), false, ""},
{"bcd", c.Not(a.Rune('b')), false, ""}, {"bcd", c.Not(a.Rune('b')), false, ""},
@ -67,24 +68,26 @@ func TestCombinators(t *testing.T) {
} }
func TestCombinatorPanics(t *testing.T) { func TestCombinatorPanics(t *testing.T) {
RunPanicTests(t, []PanicTest{ var c, a = parsekit.C, parsekit.A
{func() { a.RuneRange('z', 'a') }, parsekit.AssertPanics(t, []parsekit.PanicT{
{func() { a.RuneRange('z', 'a') }, false,
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"}, "TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, {func() { c.MinMax(-1, 1, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"}, "TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
{func() { c.MinMax(1, -1, parsekit.A.Space) }, {func() { c.MinMax(1, -1, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"}, "TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
{func() { c.MinMax(10, 5, parsekit.A.Space) }, {func() { c.MinMax(10, 5, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"}, "TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
{func() { c.Min(-10, parsekit.A.Space) }, {func() { c.Min(-10, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMin definition error: min must be >= 0"}, "TokenHandler bug: MatchMin definition error: min must be >= 0"},
{func() { c.Max(-42, parsekit.A.Space) }, {func() { c.Max(-42, parsekit.A.Space) }, false,
"TokenHandler bug: MatchMax definition error: max must be >= 0"}, "TokenHandler bug: MatchMax definition error: max must be >= 0"},
}) })
} }
func TestAtoms(t *testing.T) { func TestAtoms(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{ var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"}, {"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"}, {"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""}, {"ff", a.RuneRange('b', 'e'), false, ""},
@ -223,7 +226,8 @@ func TestAtoms(t *testing.T) {
} }
func TestModifiers(t *testing.T) { func TestModifiers(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{ var c, a, m = parsekit.C, parsekit.A, parsekit.M
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
@ -242,21 +246,23 @@ func TestModifiers(t *testing.T) {
// follow the correct pattern. Therefore, tokenmakers will panic when the // follow the correct pattern. Therefore, tokenmakers will panic when the
// input cannot be processed successfully. // input cannot be processed successfully.
func TestTokenMakerErrorHandling(t *testing.T) { func TestTokenMakerErrorHandling(t *testing.T) {
var a, tok = parsekit.A, parsekit.T
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool() invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
parser := parsekit.NewMatcher(invalid, "boolean") parser := parsekit.NewTokenizer(invalid, "boolean")
RunPanicTest(t, PanicTest{ parsekit.AssertPanic(t, parsekit.PanicT{
func() { parser.Execute("no") }, func() { parser.Execute("no") }, false,
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + `TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
`invalid syntax \(only use a type conversion token maker, when the input has been validated on beforehand\)`, `invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
}) })
} }
func TestTokenMakers(t *testing.T) { func TestTokenMakers(t *testing.T) {
RunTokenMakerTests(t, []TokenMakerTest{ var c, a, tok = parsekit.C, parsekit.A, parsekit.T
{`empty token`, tok.StrLiteral("A", c.ZeroOrMore(a.Digit)), parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}}, []parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
{`Ѝюج literal \string`, tok.StrLiteral("B", c.OneOrMore(a.AnyRune)), {`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}}, []parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)), {`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
@ -313,6 +319,7 @@ func TestTokenMakers(t *testing.T) {
} }
func TestSequenceOfRunes(t *testing.T) { func TestSequenceOfRunes(t *testing.T) {
var c, a = parsekit.C, parsekit.A
sequence := c.Seq( sequence := c.Seq(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen, a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash, a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
@ -340,6 +347,7 @@ func TestSequenceOfRunes(t *testing.T) {
// I know, this is hell, but that's the whole point for this test :-> // I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) { func TestCombination(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
demonic := c.Seq( demonic := c.Seq(
c.Opt(a.SquareOpen), c.Opt(a.SquareOpen),
m.Trim( m.Trim(
@ -360,7 +368,7 @@ func TestCombination(t *testing.T) {
c.Opt(a.SquareClose), c.Opt(a.SquareClose),
) )
RunTokenHandlerTests(t, []TokenHandlerTest{ parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"}, {"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"}, {"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"}, {">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},

49
tokenizer.go Normal file
View File

@ -0,0 +1,49 @@
package parsekit
// Tokenizer is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
// method.
type Tokenizer struct {
parser *Parser
result *TokenResult
}
// TokenHandler is the function type that is involved in turning a low level
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the results.
//
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
// indicate whether or not it found a match on the input. The TokenAPI is used
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
// NewTokenizer instantiates a new Tokenizer.
//
// This is a simple wrapper around a TokenHandler function. It can be used to
// match an input string against that TokenHandler function and retrieve the
// results in a straight forward way.
//
// The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler.
func NewTokenizer(tokenHandler TokenHandler, expects string) *Tokenizer {
tokenizer := &Tokenizer{}
tokenizer.parser = NewParser(func(p *ParseAPI) {
if p.On(tokenHandler).Accept() {
tokenizer.result = p.Result()
p.Stop()
} else {
p.Expects(expects)
p.UnexpectedInput()
}
})
return tokenizer
}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the TokenHandler's TokenResult. When an error occurred
// during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input string) (*TokenResult, *Error) {
err := t.parser.Execute(input)
return t.result, err
}

257
tokenizer_test.go Normal file
View File

@ -0,0 +1,257 @@
package parsekit
import (
"io"
"strings"
"testing"
"unicode/utf8"
)
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.NextRune()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` +
`at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
})
}
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.NextRune()
f.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.Fork()
f.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := mkInput()
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
f5 := f4.Fork()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == f1, "i.child == f1")
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == nil, "i.child == nil")
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.Fork()
i.Accept()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called ` +
`at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
})
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := mkInput()
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "Input.lastRune after NextRune() is not nil")
i.Accept()
AssertTrue(t, i.result.lastRune == nil, "Input.lastRune after Accept() is nil")
AssertEqual(t, 1, i.offset, "Input.offset")
AssertEqual(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
i := mkInput()
for j := 0; j < 7; j++ {
i.NextRune()
i.Accept()
}
AssertEqual(t, "Testing", string(i.reader.buffer), "reader input buffer")
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.cursor.String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.cursor.String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.cursor.String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.cursor.String(), "cursor 4")
AssertEqual(t, *i.cursor, i.Cursor(), "i.Cursor()")
}
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := mkInput()
i.NextRune()
i.Accept() // T
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
// Fork
f := i.Fork()
AssertEqual(t, f, i.child, "Input.child (must be f)")
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent
f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
f1 := i.Fork()
f1.NextRune()
f1.Accept()
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.offset, "i.offset")
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 2, f1.offset, "f1.offset")
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset")
f2.Merge()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.offset, "i.offset")
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.offset, "f1.offset")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset")
f1.Merge()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, 3, i.offset, "i.offset")
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.offset, "f1.offset")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.offset, "f2.offset")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
r, err := f.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
r, err = i.NextRune()
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *TokenAPI {
return NewTokenAPI(strings.NewReader("Testing"))
}

116
tokenresult.go Normal file
View File

@ -0,0 +1,116 @@
package parsekit
import (
"fmt"
)
// Result holds results as produced by a TokenHandler.
type TokenResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
type Token struct {
Type interface{} // token type, can be any type that a parser author sees fit
Runes []rune // the runes that make up the token
Value interface{} // an optional value of any type
}
// newTokenResult initializes an empty result struct.
func newTokenResult() *TokenResult {
return &TokenResult{
runes: []rune{},
tokens: []*Token{},
}
}
// ClearRunes clears the runes in the TokenResult.
func (r *TokenResult) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the TokenResult with the provided input.
func (r *TokenResult) SetRunes(s interface{}) {
r.ClearRunes()
r.AddRunes(s)
}
// AddRunes is used to add runes to the TokenResult.
func (r *TokenResult) AddRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
panic(fmt.Sprintf("parsekit.TokenResult.SetRunes(): unsupported type '%T' used", s))
}
}
}
// Runes retrieves the Runes from the TokenResult.
func (r *TokenResult) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the TokenResult at the specified index.
func (r *TokenResult) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the TokenResult as a string.
func (r *TokenResult) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the TokenResult.
func (r *TokenResult) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the TokenResult with the provided input.
func (r *TokenResult) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddToken(t)
}
}
// AddToken is used to add a Token to the TokenResult.
func (r *TokenResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// Tokens retrieves the Tokens from the TokenResult.
func (r *TokenResult) Tokens() []*Token {
return r.tokens
}
// Token retrieves a single Token from the TokenResult at the specified index.
func (r *TokenResult) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the TokenResult Tokens.
func (r *TokenResult) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the TokenResult Token at the specified index.
func (r *TokenResult) Value(idx int) interface{} {
return r.tokens[idx].Value
}

25
tokenresult_test.go Normal file
View File

@ -0,0 +1,25 @@
package parsekit
import (
"testing"
)
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := mkInput()
i.Result().SetRunes("string")
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
AssertEqual(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
i.Result().SetRunes('X')
AssertEqual(t, "X", string(i.Result().String()), "i.Result() with rune input")
}
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.Result().SetRunes(1234567)
},
Expect: "parsekit.TokenResult.SetRunes(): unsupported type 'int' used",
})
}