Splitting up functionality in packages, intermediate step.
This commit is contained in:
parent
0f7b4e0d26
commit
1f0e0fcc17
|
@ -5,6 +5,8 @@ package parsekit
|
|||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
|
@ -63,7 +65,7 @@ func AssertPanic(t *testing.T, p PanicT) {
|
|||
|
||||
type TokenHandlerT struct {
|
||||
Input string
|
||||
TokenHandler TokenHandler
|
||||
TokenHandler tokenize.TokenHandler
|
||||
MustMatch bool
|
||||
Expected string
|
||||
}
|
||||
|
@ -75,7 +77,7 @@ func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
|
|||
}
|
||||
|
||||
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
|
||||
result, err := NewTokenizer(test.TokenHandler).Execute(test.Input)
|
||||
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
|
@ -91,8 +93,8 @@ func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
|
|||
|
||||
type TokenMakerT struct {
|
||||
Input string
|
||||
TokenHandler TokenHandler
|
||||
Expected []Token
|
||||
TokenHandler tokenize.TokenHandler
|
||||
Expected []tokenize.Token
|
||||
}
|
||||
|
||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||
|
@ -102,7 +104,7 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
|||
}
|
||||
|
||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||
result, err := NewTokenizer(test.TokenHandler).Execute(test.Input)
|
||||
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package parsekit
|
||||
package common
|
||||
|
||||
import "fmt"
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
package parsekit_test
|
||||
package common_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
)
|
||||
|
||||
func ExampleCursor_Move() {
|
||||
c := &parsekit.Cursor{}
|
||||
c := &common.Cursor{}
|
||||
fmt.Printf("after initialization : %s\n", c)
|
||||
fmt.Printf("after 'some words' : %s\n", c.Move("some words"))
|
||||
fmt.Printf("after '\\n' : %s\n", c.Move("\n"))
|
||||
|
@ -22,7 +22,7 @@ func ExampleCursor_Move() {
|
|||
}
|
||||
|
||||
func ExampleCursor_String() {
|
||||
c := &parsekit.Cursor{}
|
||||
c := &common.Cursor{}
|
||||
fmt.Println(c.String())
|
||||
|
||||
c.Move("\nfoobar")
|
||||
|
@ -51,7 +51,7 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
|||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||
} {
|
||||
c := parsekit.Cursor{}
|
||||
c := common.Cursor{}
|
||||
for _, s := range test.input {
|
||||
c.Move(s)
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package parsekit
|
||||
package common
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -16,12 +16,12 @@ type Error struct {
|
|||
|
||||
func (err *Error) Error() string {
|
||||
if err == nil {
|
||||
callerPanic(1, "parsekit.Error.Error(): method called with nil error at {caller}")
|
||||
CallerPanic(1, "common.Error.Error(): method called with nil error at {caller}")
|
||||
}
|
||||
return fmt.Sprintf("%s at %s", err.Message, err.Cursor)
|
||||
}
|
||||
|
||||
func callerFunc(depth int) string {
|
||||
func CallerFunc(depth int) string {
|
||||
// No error handling, because we call this method ourselves with safe depth values.
|
||||
pc, _, _, _ := runtime.Caller(depth + 1)
|
||||
caller := runtime.FuncForPC(pc)
|
||||
|
@ -36,7 +36,7 @@ func callerFilepos(depth int) string {
|
|||
return fmt.Sprintf("%s:%d", file, line)
|
||||
}
|
||||
|
||||
func callerPanic(depth int, f string, args ...interface{}) {
|
||||
func CallerPanic(depth int, f string, args ...interface{}) {
|
||||
filepos := callerFilepos(depth + 1)
|
||||
m := fmt.Sprintf(f, args...)
|
||||
m = strings.Replace(m, "{caller}", filepos, 1)
|
|
@ -1,15 +1,15 @@
|
|||
package parsekit_test
|
||||
package common_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
)
|
||||
|
||||
func ExampleError() {
|
||||
err := &parsekit.Error{
|
||||
err := &common.Error{
|
||||
Message: "it broke down",
|
||||
Cursor: parsekit.Cursor{Line: 9, Column: 41},
|
||||
Cursor: common.Cursor{Line: 9, Column: 41},
|
||||
}
|
||||
|
||||
fmt.Println(err.Error())
|
|
@ -11,6 +11,8 @@ import (
|
|||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func Example_basicCalculator1() {
|
||||
|
@ -54,7 +56,7 @@ func Example_basicCalculator1() {
|
|||
// ComputeSimple interprets a simple calculation, consisting of only integers
|
||||
// and add or subtract operators. It returns the result of the calculation.
|
||||
// An error is returned in case the calculation failed.
|
||||
func ComputeSimple(calculation string) (int64, *parsekit.Error) {
|
||||
func ComputeSimple(calculation string) (int64, *common.Error) {
|
||||
calculator := &simpleCalculator{op: +1}
|
||||
parser := parsekit.NewParser(calculator.number)
|
||||
err := parser.Execute(calculation)
|
||||
|
@ -70,9 +72,9 @@ type simpleCalculator struct {
|
|||
}
|
||||
|
||||
// A definition of an int64, which conveniently drops surrounding blanks.
|
||||
var dropBlank = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Blanks))
|
||||
var bareInteger = parsekit.C.Seq(dropBlank, parsekit.A.Integer, dropBlank)
|
||||
var int64Token = parsekit.T.Int64(nil, bareInteger)
|
||||
var dropBlank = tokenize.M.Drop(tokenize.C.Opt(tokenize.A.Blanks))
|
||||
var bareInteger = tokenize.C.Seq(dropBlank, tokenize.A.Integer, dropBlank)
|
||||
var int64Token = tokenize.T.Int64(nil, bareInteger)
|
||||
|
||||
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
||||
if p.Accept(int64Token) {
|
||||
|
@ -84,7 +86,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
|
|||
}
|
||||
|
||||
func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
|
||||
var A = parsekit.A
|
||||
var A = tokenize.A
|
||||
switch {
|
||||
case p.Accept(A.Add):
|
||||
c.op = +1
|
||||
|
|
|
@ -17,6 +17,8 @@ import (
|
|||
"math"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func Example_basicCalculator2() {
|
||||
|
@ -75,7 +77,7 @@ type calculator struct {
|
|||
// Compute takes a calculation string as input and returns the interpreted result
|
||||
// value for the calculation. An error can be returned as well, in case the
|
||||
// calculation fails for some reason.
|
||||
func Compute(input string) (float64, *parsekit.Error) {
|
||||
func Compute(input string) (float64, *common.Error) {
|
||||
calc := &calculator{}
|
||||
parser := parsekit.NewParser(calc.calculation)
|
||||
err := parser.Execute(input)
|
||||
|
@ -94,7 +96,7 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) {
|
|||
func (calc *calculator) expr(p *parsekit.ParseAPI) {
|
||||
calc.interpreter.push()
|
||||
|
||||
var A = parsekit.A
|
||||
var A = tokenize.A
|
||||
if p.Handle(calc.term) {
|
||||
for p.Accept(A.Add.Or(A.Subtract)) {
|
||||
op := p.Result().Rune(0)
|
||||
|
@ -112,7 +114,7 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) {
|
|||
func (calc *calculator) term(p *parsekit.ParseAPI) {
|
||||
calc.interpreter.push()
|
||||
|
||||
var A = parsekit.A
|
||||
var A = tokenize.A
|
||||
if p.Handle(calc.factor) {
|
||||
for p.Accept(A.Multiply.Or(A.Divide)) {
|
||||
op := p.Result().Rune(0)
|
||||
|
@ -129,7 +131,7 @@ func (calc *calculator) term(p *parsekit.ParseAPI) {
|
|||
// <space> = (<space> (SPACE|TAB) | "")
|
||||
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
|
||||
func (calc *calculator) factor(p *parsekit.ParseAPI) {
|
||||
var A, T = parsekit.A, parsekit.T
|
||||
var A, T = tokenize.A, tokenize.T
|
||||
p.Accept(A.Blanks)
|
||||
switch {
|
||||
case p.Accept(T.Float64(nil, A.Signed(A.Float))):
|
||||
|
|
|
@ -7,7 +7,7 @@ package examples
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func Example_dutchPostcodeUsingTokenizer() {
|
||||
|
@ -40,20 +40,20 @@ func Example_dutchPostcodeUsingTokenizer() {
|
|||
// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
|
||||
// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
|
||||
// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
|
||||
// [4] Input: "1212abc" Error: unexpected input at start of file
|
||||
// [5] Input: "1234" Error: unexpected input at start of file
|
||||
// [6] Input: "huh" Error: unexpected input at start of file
|
||||
// [7] Input: "" Error: unexpected end of file at start of file
|
||||
// [8] Input: "\xcd2222AB" Error: unexpected input at start of file
|
||||
// [4] Input: "1212abc" Error: mismatch at start of file
|
||||
// [5] Input: "1234" Error: mismatch at start of file
|
||||
// [6] Input: "huh" Error: mismatch at start of file
|
||||
// [7] Input: "" Error: mismatch at start of file
|
||||
// [8] Input: "\xcd2222AB" Error: mismatch at start of file
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Implementation of the parser
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func createPostcodeTokenizer() *parsekit.Tokenizer {
|
||||
func createPostcodeTokenizer() *tokenize.Tokenizer {
|
||||
// Easy access to the parsekit definitions.
|
||||
C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T
|
||||
C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T
|
||||
|
||||
// TokenHandler functions are created and combined to satisfy these rules:
|
||||
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
|
||||
|
@ -61,14 +61,13 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
|
|||
// - A space between letters and digits is optional.
|
||||
// - It is good form to write the letters in upper case.
|
||||
// - It is good form to use a single space between digits and letters.
|
||||
digitNotZero := C.Except(A.Rune('0'), A.Digit)
|
||||
pcDigits := C.Seq(digitNotZero, A.Digit.Times(3))
|
||||
pcDigits := A.DigitNotZero.Then(A.Digit.Times(3))
|
||||
pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
|
||||
pcLetters := M.ToUpper(pcLetter.Times(2))
|
||||
space := M.Replace(C.Opt(A.Blanks), " ")
|
||||
space := M.Replace(A.Blanks.Optional(), " ")
|
||||
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
|
||||
|
||||
// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
return parsekit.NewTokenizer(postcode)
|
||||
return tokenize.NewTokenizer(postcode)
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ import (
|
|||
"strings"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func Example_helloWorldUsingParser1() {
|
||||
|
@ -74,14 +76,14 @@ type helloparser1 struct {
|
|||
greetee string
|
||||
}
|
||||
|
||||
func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
|
||||
func (h *helloparser1) Parse(input string) (string, *common.Error) {
|
||||
parser := parsekit.NewParser(h.start)
|
||||
err := parser.Execute(input)
|
||||
return h.greetee, err
|
||||
}
|
||||
|
||||
func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
a := tokenize.A
|
||||
if p.Accept(a.StrNoCase("hello")) {
|
||||
p.Handle(h.comma)
|
||||
} else {
|
||||
|
@ -90,7 +92,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
|
|||
}
|
||||
|
||||
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
a := tokenize.A
|
||||
switch {
|
||||
case p.Accept(a.Blanks):
|
||||
p.Handle(h.comma)
|
||||
|
@ -102,7 +104,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) {
|
|||
}
|
||||
|
||||
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
a := tokenize.A
|
||||
p.Accept(a.Blanks)
|
||||
if p.Peek(a.AnyRune) {
|
||||
p.Handle(h.name)
|
||||
|
@ -112,7 +114,7 @@ func (h *helloparser1) startName(p *parsekit.ParseAPI) {
|
|||
}
|
||||
|
||||
func (h *helloparser1) name(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
a := tokenize.A
|
||||
switch {
|
||||
case p.Peek(a.Excl):
|
||||
p.Handle(h.exclamation)
|
||||
|
@ -125,7 +127,7 @@ func (h *helloparser1) name(p *parsekit.ParseAPI) {
|
|||
}
|
||||
|
||||
func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
|
||||
a := parsekit.A
|
||||
a := tokenize.A
|
||||
if p.Accept(a.Excl) {
|
||||
p.Handle(h.end)
|
||||
} else {
|
||||
|
@ -137,7 +139,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
|
|||
// different route was taken to implement a more friendly 'end of greeting'
|
||||
// error message.
|
||||
func (h *helloparser1) end(p *parsekit.ParseAPI) {
|
||||
var a = parsekit.A
|
||||
var a = tokenize.A
|
||||
if !p.Accept(a.EndOfFile) {
|
||||
p.Expected("end of greeting")
|
||||
return
|
||||
|
|
|
@ -10,7 +10,7 @@ package examples
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func Example_helloWorldUsingTokenizer() {
|
||||
|
@ -37,18 +37,18 @@ func Example_helloWorldUsingTokenizer() {
|
|||
// [1] Input: "HELLO ,Johnny!" Output: Johnny
|
||||
// [2] Input: "hello , Bob123!" Output: Bob123
|
||||
// [3] Input: "hello Pizza!" Output: Pizza
|
||||
// [4] Input: "Oh no!" Error: unexpected input at start of file
|
||||
// [5] Input: "Hello, world" Error: unexpected input at start of file
|
||||
// [6] Input: "Hello,!" Error: unexpected input at start of file
|
||||
// [4] Input: "Oh no!" Error: mismatch at start of file
|
||||
// [5] Input: "Hello, world" Error: mismatch at start of file
|
||||
// [6] Input: "Hello,!" Error: mismatch at start of file
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Implementation of the parser
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func createHelloTokenizer() *parsekit.Tokenizer {
|
||||
func createHelloTokenizer() *tokenize.Tokenizer {
|
||||
// Easy access to parsekit definition.
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
c, a, m := tokenize.C, tokenize.A, tokenize.M
|
||||
|
||||
// Using the parser/combinator support of parsekit, we create a TokenHandler function
|
||||
// that does all the work. The 'greeting' TokenHandler matches the whole input and
|
||||
|
@ -65,5 +65,5 @@ func createHelloTokenizer() *parsekit.Tokenizer {
|
|||
|
||||
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
|
||||
// us to match some input against that handler.
|
||||
return parsekit.NewTokenizer(greeting)
|
||||
return tokenize.NewTokenizer(greeting)
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ import (
|
|||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func Example_helloWorldUsingParser2() {
|
||||
|
@ -72,14 +74,14 @@ type helloparser2 struct {
|
|||
greetee string
|
||||
}
|
||||
|
||||
func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
|
||||
func (h *helloparser2) Parse(input string) (string, *common.Error) {
|
||||
parser := parsekit.NewParser(h.start)
|
||||
err := parser.Execute(input)
|
||||
return h.greetee, err
|
||||
}
|
||||
|
||||
func (h *helloparser2) start(p *parsekit.ParseAPI) {
|
||||
c, a, m := parsekit.C, parsekit.A, parsekit.M
|
||||
c, a, m := tokenize.C, tokenize.A, tokenize.M
|
||||
if !p.Accept(a.StrNoCase("hello")) {
|
||||
p.Error("the greeting is not being friendly")
|
||||
return
|
||||
|
|
|
@ -11,12 +11,14 @@ import (
|
|||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
type Chunks []string
|
||||
|
||||
func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
|
||||
c, a := parsekit.C, parsekit.A
|
||||
func (l *Chunks) AddChopped(s string, chunkSize int) *common.Error {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
chunkOfRunes := c.MinMax(1, chunkSize, a.AnyRune)
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
|
|
49
parseapi.go
49
parseapi.go
|
@ -3,16 +3,19 @@ package parsekit
|
|||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
// ParseAPI holds the internal state of a parse run and provides an API that
|
||||
// ParseHandler methods can use to communicate with the parser.
|
||||
type ParseAPI struct {
|
||||
tokenAPI *TokenAPI // the TokenAPI, used for communicating with TokenHandler functions
|
||||
loopCheck map[string]bool // used for parser loop detection
|
||||
result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
|
||||
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
tokenAPI *tokenize.TokenAPI // the TokenAPI, used for communicating with TokenHandler functions
|
||||
loopCheck map[string]bool // used for parser loop detection
|
||||
result *tokenize.TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
|
||||
err *common.Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
|
||||
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
|
||||
}
|
||||
|
||||
// Peek checks if the upcoming input data matches the provided TokenHandler.
|
||||
|
@ -22,13 +25,13 @@ type ParseAPI struct {
|
|||
//
|
||||
// After calling this method, you can retrieve the produced TokenHandlerResult
|
||||
// using the ParseAPI.Result() method.
|
||||
func (p *ParseAPI) Peek(tokenHandler TokenHandler) bool {
|
||||
func (p *ParseAPI) Peek(tokenHandler tokenize.TokenHandler) bool {
|
||||
p.result = nil
|
||||
forkedTokenAPI, ok := p.invokeTokenHandler("Peek", tokenHandler)
|
||||
if ok {
|
||||
p.result = forkedTokenAPI.Result()
|
||||
p.tokenAPI.clearResults()
|
||||
p.tokenAPI.detachChilds()
|
||||
p.tokenAPI.ClearResults()
|
||||
p.tokenAPI.DetachChilds()
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
@ -39,29 +42,29 @@ func (p *ParseAPI) Peek(tokenHandler TokenHandler) bool {
|
|||
//
|
||||
// After calling this method, you can retrieve the produced TokenHandlerResult
|
||||
// using the ParseAPI.Result() method.
|
||||
func (p *ParseAPI) Accept(tokenHandler TokenHandler) bool {
|
||||
func (p *ParseAPI) Accept(tokenHandler tokenize.TokenHandler) bool {
|
||||
p.result = nil
|
||||
forkedTokenAPI, ok := p.invokeTokenHandler("Accept", tokenHandler)
|
||||
if ok {
|
||||
forkedTokenAPI.Merge()
|
||||
p.result = p.tokenAPI.Result()
|
||||
p.tokenAPI.detachChilds()
|
||||
if p.tokenAPI.flushReader() {
|
||||
p.tokenAPI.DetachChilds()
|
||||
if p.tokenAPI.FlushReader() {
|
||||
p.initLoopCheck()
|
||||
}
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler TokenHandler) (*TokenAPI, bool) {
|
||||
func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler tokenize.TokenHandler) (*tokenize.TokenAPI, bool) {
|
||||
p.panicWhenStoppedOrInError()
|
||||
p.checkForLoops()
|
||||
if tokenHandler == nil {
|
||||
callerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
|
||||
common.CallerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
|
||||
}
|
||||
|
||||
p.result = nil
|
||||
p.tokenAPI.clearResults()
|
||||
p.tokenAPI.ClearResults()
|
||||
child := p.tokenAPI.Fork()
|
||||
ok := tokenHandler(child)
|
||||
|
||||
|
@ -80,14 +83,14 @@ func (p *ParseAPI) panicWhenStoppedOrInError() {
|
|||
return
|
||||
}
|
||||
|
||||
called := callerFunc(1)
|
||||
called := common.CallerFunc(1)
|
||||
|
||||
after := "Error()"
|
||||
if p.stopped {
|
||||
after = "Stop()"
|
||||
}
|
||||
|
||||
callerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+
|
||||
common.CallerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+
|
||||
"no calls allowed after ParseAPI.%s", called, called, after)
|
||||
}
|
||||
|
||||
|
@ -100,9 +103,9 @@ func (p *ParseAPI) initLoopCheck() {
|
|||
}
|
||||
|
||||
func (p *ParseAPI) checkForLoops() {
|
||||
filepos := callerFilepos(3)
|
||||
filepos := common.CallerFilePos(3)
|
||||
if _, ok := p.loopCheck[filepos]; ok {
|
||||
callerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}")
|
||||
common.CallerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}")
|
||||
}
|
||||
p.loopCheck[filepos] = true
|
||||
}
|
||||
|
@ -112,10 +115,10 @@ func (p *ParseAPI) checkForLoops() {
|
|||
//
|
||||
// When Result() is called without first doing a Peek() or Accept(), then no
|
||||
// result will be available and the method will panic.
|
||||
func (p *ParseAPI) Result() *TokenHandlerResult {
|
||||
func (p *ParseAPI) Result() *tokenize.TokenHandlerResult {
|
||||
result := p.result
|
||||
if p.result == nil {
|
||||
callerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
|
||||
common.CallerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
|
||||
"at {caller} without calling ParseAPI.Peek() or ParseAPI.Accept() on beforehand")
|
||||
}
|
||||
return result
|
||||
|
@ -136,7 +139,7 @@ func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
|
|||
|
||||
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
|
||||
if parseHandler == nil {
|
||||
callerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}")
|
||||
common.CallerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,7 +167,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
|
|||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||
// set a different error message when needed.
|
||||
message := fmt.Sprintf(format, args...)
|
||||
p.err = &Error{message, *p.tokenAPI.result.cursor}
|
||||
p.err = &common.Error{message, *p.tokenAPI.Result().Cursor()}
|
||||
}
|
||||
|
||||
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||
|
@ -175,7 +178,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
|
|||
// as the expectation.
|
||||
func (p *ParseAPI) ExpectEndOfFile() {
|
||||
p.panicWhenStoppedOrInError()
|
||||
if p.Peek(A.EndOfFile) {
|
||||
if p.Peek(tokenize.A.EndOfFile) {
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expected("end of file")
|
||||
|
|
11
parser.go
11
parser.go
|
@ -1,5 +1,10 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
// Parser is the top-level struct that holds the configuration for a parser.
|
||||
// The Parser can be instantiated using the parsekit.NewParser() method.
|
||||
type Parser struct {
|
||||
|
@ -27,7 +32,7 @@ type ParseHandler func(*ParseAPI)
|
|||
// To parse input data, use the method Parser.Execute().
|
||||
func NewParser(startHandler ParseHandler) *Parser {
|
||||
if startHandler == nil {
|
||||
callerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}")
|
||||
common.CallerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}")
|
||||
}
|
||||
return &Parser{startHandler: startHandler}
|
||||
}
|
||||
|
@ -36,9 +41,9 @@ func NewParser(startHandler ParseHandler) *Parser {
|
|||
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
|
||||
//
|
||||
// When an error occurs during parsing, then this error is returned, nil otherwise.
|
||||
func (p *Parser) Execute(input interface{}) *Error {
|
||||
func (p *Parser) Execute(input interface{}) *common.Error {
|
||||
api := &ParseAPI{
|
||||
tokenAPI: NewTokenAPI(input),
|
||||
tokenAPI: tokenize.NewTokenAPI(input),
|
||||
loopCheck: map[string]bool{},
|
||||
}
|
||||
if api.Handle(p.startHandler) {
|
||||
|
|
|
@ -5,11 +5,12 @@ import (
|
|||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func ExampleParser_usingAcceptedRunes() {
|
||||
// Easy access to the parsekit definitions.
|
||||
a := parsekit.A
|
||||
a := tokenize.A
|
||||
|
||||
matches := []string{}
|
||||
|
||||
|
@ -28,7 +29,7 @@ func ExampleParser_usingAcceptedRunes() {
|
|||
|
||||
func ExampleParser_usingTokens() {
|
||||
// Easy access to the parsekit definitions.
|
||||
c, a, tok := parsekit.C, parsekit.A, parsekit.T
|
||||
c, a, tok := tokenize.C, tokenize.A, tokenize.T
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
|
||||
|
@ -60,7 +61,7 @@ func ExampleParseAPI_Accept_inIfStatement() {
|
|||
// When a case-insensitive match on "Yowza!" is found by the
|
||||
// tokenizer, then Accept() will make the result available
|
||||
// through ParseAPI.Result()
|
||||
if p.Accept(parsekit.A.StrNoCase("Yowza!")) {
|
||||
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
|
||||
// Result.String() returns a string containing all
|
||||
// accepted runes that were matched against.
|
||||
fmt.Println(p.Result().String())
|
||||
|
@ -77,9 +78,9 @@ func ExampleParseAPI_Accept_inSwitchStatement() {
|
|||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for loop := true; loop; {
|
||||
switch {
|
||||
case p.Accept(parsekit.A.Rune('X')):
|
||||
case p.Accept(tokenize.A.Rune('X')):
|
||||
// NOOP, skip this rune
|
||||
case p.Accept(parsekit.A.AnyRune):
|
||||
case p.Accept(tokenize.A.AnyRune):
|
||||
result += p.Result().String()
|
||||
default:
|
||||
loop = false
|
||||
|
@ -94,7 +95,7 @@ func ExampleParseAPI_Accept_inSwitchStatement() {
|
|||
}
|
||||
|
||||
func ExampleParseAPI_Stop() {
|
||||
C, A := parsekit.C, parsekit.A
|
||||
C, A := tokenize.C, tokenize.A
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
fmt.Printf("First word: ")
|
||||
|
@ -110,7 +111,7 @@ func ExampleParseAPI_Stop() {
|
|||
}
|
||||
|
||||
func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
|
||||
C, A := parsekit.C, parsekit.A
|
||||
C, A := tokenize.C, tokenize.A
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
fmt.Printf("Word: ")
|
||||
|
@ -128,7 +129,7 @@ func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
|
|||
}
|
||||
|
||||
func ExampleParseAPI_Stop_notCalledButInputPending() {
|
||||
C, A := parsekit.C, parsekit.A
|
||||
C, A := tokenize.C, tokenize.A
|
||||
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
fmt.Printf("First word: ")
|
||||
|
@ -147,7 +148,7 @@ func ExampleParseAPI_Stop_notCalledButInputPending() {
|
|||
|
||||
func ExampleParseAPI_Peek() {
|
||||
// Definition of a fantasy serial number format.
|
||||
C, A := parsekit.C, parsekit.A
|
||||
C, A := tokenize.C, tokenize.A
|
||||
serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits)
|
||||
|
||||
// This handler is able to handle serial numbers.
|
||||
|
@ -160,7 +161,7 @@ func ExampleParseAPI_Peek() {
|
|||
// Start could function as a sort of dispatcher, handing over
|
||||
// control to the correct ParseHandler function, based on the input.
|
||||
start := func(p *parsekit.ParseAPI) {
|
||||
if p.Peek(parsekit.A.Asterisk) {
|
||||
if p.Peek(tokenize.A.Asterisk) {
|
||||
p.Handle(serialnrHandler)
|
||||
return
|
||||
}
|
||||
|
@ -275,12 +276,12 @@ type parserWithLoop struct {
|
|||
}
|
||||
|
||||
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
|
||||
p.Accept(parsekit.A.ASCII)
|
||||
p.Accept(tokenize.A.ASCII)
|
||||
p.Handle(l.second)
|
||||
}
|
||||
|
||||
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
|
||||
p.Accept(parsekit.A.ASCII)
|
||||
p.Accept(tokenize.A.ASCII)
|
||||
p.Handle(l.third)
|
||||
}
|
||||
|
||||
|
@ -289,7 +290,7 @@ func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
|
|||
p.Error("Loop not detected by parsekit")
|
||||
return
|
||||
}
|
||||
p.Accept(parsekit.A.ASCII)
|
||||
p.Accept(tokenize.A.ASCII)
|
||||
p.Handle(l.first)
|
||||
}
|
||||
|
||||
|
@ -316,7 +317,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
|
|||
//
|
||||
// Now the loop stops when the parser finds no more matching input data.
|
||||
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
var c, a = tokenize.C, tokenize.A
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
for p.Accept(c.Max(5, a.AnyRune)) {
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Package reader provides a buffered Reader that wraps around an io.Reader.
|
||||
// Package read provides a buffered Reader that wraps around an io.Reader.
|
||||
//
|
||||
// Functionally, it provides an input buffer in the form of a sliding window.
|
||||
// Let's say we've got the following input coming up in the io.Reader that is
|
||||
|
@ -37,7 +37,7 @@
|
|||
//
|
||||
// So after a flush, the first upcoming rune after the flushed runes
|
||||
// will always be at offset 0.
|
||||
package reader
|
||||
package read
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
|
@ -57,7 +57,7 @@ import (
|
|||
// To minimze memory use, it is also possible to flush the read buffer when there is
|
||||
// no more need to go back to previously read runes.
|
||||
//
|
||||
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
|
||||
// The parserkit.reader.Reader is used internally by tokenize.TokenAPI.
|
||||
type Reader struct {
|
||||
bufio *bufio.Reader // Used for ReadRune()
|
||||
buffer []rune // Input buffer, holding runes that were read from input
|
||||
|
@ -89,7 +89,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
|
|||
case string:
|
||||
return bufio.NewReader(strings.NewReader(input))
|
||||
default:
|
||||
panic(fmt.Sprintf("parsekit.reader.New(): no support for input of type %T", input))
|
||||
panic(fmt.Sprintf("parsekit.read.New(): no support for input of type %T", input))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,7 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
|
|||
const smallBufferSize = 64
|
||||
|
||||
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
|
||||
var ErrTooLarge = errors.New("parsekit.reader: too large")
|
||||
var ErrTooLarge = errors.New("parsekit.read: too large")
|
||||
|
||||
// grow grows the buffer to guarantee space for n more bytes.
|
||||
// It returns the index where bytes should be written.
|
||||
|
@ -200,7 +200,7 @@ func makeSlice(n int) []rune {
|
|||
func (r *Reader) Flush(numberOfRunes int) {
|
||||
if numberOfRunes > len(r.buffer) {
|
||||
panic(fmt.Sprintf(
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
|
||||
"parsekit.read.Reader.Flush(): number of runes to flush (%d) "+
|
||||
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
|
||||
}
|
||||
r.buffer = r.buffer[numberOfRunes:]
|
|
@ -1,4 +1,4 @@
|
|||
package reader_test
|
||||
package read_test
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
|
@ -8,12 +8,12 @@ import (
|
|||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/reader"
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func ExampleNew() {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
r := read.New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
fmt.Printf("%c", at(0))
|
||||
|
@ -33,7 +33,7 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
|||
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
|
||||
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
|
||||
} {
|
||||
r := reader.New(test.input)
|
||||
r := read.New(test.input)
|
||||
firstRune, _ := r.RuneAt(0)
|
||||
if firstRune != 'H' {
|
||||
t.Errorf("[%s] first rune not 'H'", test.name)
|
||||
|
@ -47,12 +47,12 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
|
|||
|
||||
func TestNew_UnhandledInputType_Panics(t *testing.T) {
|
||||
assert.PanicsWithValue(t,
|
||||
"parsekit.reader.New(): no support for input of type int",
|
||||
func() { reader.New(12345) })
|
||||
"parsekit.read.New(): no support for input of type int",
|
||||
func() { read.New(12345) })
|
||||
}
|
||||
|
||||
func TestReader_RuneAt(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
r := read.New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
// It is possible to go back and forth while reading the input.
|
||||
|
@ -61,7 +61,7 @@ func TestReader_RuneAt(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestReader_RuneAt_endOfFile(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
r := read.New(strings.NewReader("Hello, world!"))
|
||||
|
||||
rn, err := r.RuneAt(13)
|
||||
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
|
||||
|
@ -73,7 +73,7 @@ func TestReader_RuneAt_endOfFile(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestReader_RuneAt_invalidRune(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, \xcdworld!"))
|
||||
r := read.New(strings.NewReader("Hello, \xcdworld!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
|
||||
|
@ -81,7 +81,7 @@ func TestReader_RuneAt_invalidRune(t *testing.T) {
|
|||
}
|
||||
|
||||
func ExampleReader_RuneAt() {
|
||||
reader := reader.New(strings.NewReader("Hello, world!"))
|
||||
reader := read.New(strings.NewReader("Hello, world!"))
|
||||
|
||||
fmt.Printf("Runes: ")
|
||||
for i := 0; ; i++ {
|
||||
|
@ -99,7 +99,7 @@ func ExampleReader_RuneAt() {
|
|||
}
|
||||
|
||||
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("\uFEFFBommetje!"))
|
||||
r := read.New(strings.NewReader("\uFEFFBommetje!"))
|
||||
b, _ := r.RuneAt(0)
|
||||
o, _ := r.RuneAt(1)
|
||||
m, _ := r.RuneAt(2)
|
||||
|
@ -108,7 +108,7 @@ func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestReader_Flush(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
r := read.New(strings.NewReader("Hello, world!"))
|
||||
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
|
||||
|
||||
// Fills the buffer with the first 8 runes on the input: "Hello, w"
|
||||
|
@ -125,7 +125,7 @@ func TestReader_Flush(t *testing.T) {
|
|||
}
|
||||
|
||||
func ExampleReader_Flush() {
|
||||
r := reader.New(strings.NewReader("dog eat dog!"))
|
||||
r := read.New(strings.NewReader("dog eat dog!"))
|
||||
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
|
||||
|
||||
// Read from the first 4 runes of the input.
|
||||
|
@ -148,20 +148,20 @@ func ExampleReader_Flush() {
|
|||
}
|
||||
|
||||
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
r := read.New(strings.NewReader("Hello, world!"))
|
||||
|
||||
// Fill buffer with "Hello, worl", the first 11 runes.
|
||||
r.RuneAt(10)
|
||||
|
||||
// However, we flush 12 runes, which exceeds the buffer size.
|
||||
assert.PanicsWithValue(t,
|
||||
"parsekit.Input.Reader.Flush(): number of runes to flush "+
|
||||
"parsekit.read.Reader.Flush(): number of runes to flush "+
|
||||
"(12) exceeds size of the buffer (11)",
|
||||
func() { r.Flush(12) })
|
||||
}
|
||||
|
||||
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
|
||||
r := reader.New(strings.NewReader("Hello, world!"))
|
||||
r := read.New(strings.NewReader("Hello, world!"))
|
||||
_, err := r.RuneAt(13)
|
||||
assert.Equal(t, err.Error(), "EOF")
|
||||
_, err = r.RuneAt(13)
|
||||
|
@ -188,7 +188,7 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
|
|||
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
|
||||
},
|
||||
}
|
||||
r := reader.New(input)
|
||||
r := read.New(input)
|
||||
|
||||
// Read the last availble rune.
|
||||
readRune, _ := r.RuneAt(3)
|
||||
|
@ -233,7 +233,7 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
|
|||
|
||||
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
||||
input, size := makeLargeStubReader()
|
||||
r := reader.New(input)
|
||||
r := read.New(input)
|
||||
|
||||
readRune, err := r.RuneAt(0)
|
||||
assert.Equal(t, 'X', readRune)
|
||||
|
@ -247,7 +247,7 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) {
|
|||
|
||||
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
|
||||
input, size := makeLargeStubReader()
|
||||
r := reader.New(input)
|
||||
r := read.New(input)
|
||||
|
||||
readRune, _ := r.RuneAt(size - 200)
|
||||
assert.Equal(t, 'X', readRune)
|
||||
|
@ -257,7 +257,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes
|
|||
|
||||
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
|
||||
input, size := makeLargeStubReader()
|
||||
r := reader.New(input)
|
||||
r := read.New(input)
|
||||
|
||||
readRune, _ := r.RuneAt(size - 1)
|
||||
assert.Equal(t, 'Y', readRune)
|
|
@ -1,155 +0,0 @@
|
|||
package parsekit_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
)
|
||||
|
||||
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
for i := 0; i < 20; i++ {
|
||||
t.NextRune()
|
||||
t.Accept()
|
||||
}
|
||||
return true
|
||||
})
|
||||
result, _ := parser.Execute("This is some random data to parse")
|
||||
if result.String() != "This is some random " {
|
||||
t.Fatalf("Got unexpected output from TokenHandler: %s", result.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.Result().AddToken(&parsekit.Token{
|
||||
Type: "PI",
|
||||
Runes: []rune("π"),
|
||||
Value: 3.1415,
|
||||
})
|
||||
t.Result().AddToken(&parsekit.Token{
|
||||
Type: nil,
|
||||
Runes: []rune("yes"),
|
||||
Value: true,
|
||||
})
|
||||
return true
|
||||
})
|
||||
result, _ := parser.Execute("doesn't matter")
|
||||
if len(result.Tokens()) != 2 {
|
||||
t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens()))
|
||||
}
|
||||
if result.Token(0).Value != 3.1415 {
|
||||
t.Fatal("Token 0 value not 3.1415")
|
||||
}
|
||||
if string(result.Token(0).Runes) != "π" {
|
||||
t.Fatal("Token 0 runes not \"π\"")
|
||||
}
|
||||
if result.Token(0).Type != "PI" {
|
||||
t.Fatal("Token 0 type not \"PI\"")
|
||||
}
|
||||
if result.Token(1).Value != true {
|
||||
t.Fatal("Token 1 value not true")
|
||||
}
|
||||
if string(result.Token(1).Runes) != "yes" {
|
||||
t.Fatal("Token 1 runes not \"yes\"")
|
||||
}
|
||||
if result.Token(1).Type != nil {
|
||||
t.Fatal("Token 1 type not nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
|
||||
var tok, c, a = parsekit.T, parsekit.C, parsekit.A
|
||||
fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII))
|
||||
parser := parsekit.NewTokenizer(fooToken)
|
||||
input := "This is fine ASCII Åltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error from parser: %s", err)
|
||||
}
|
||||
if result.String() != "This is fine ASCII " {
|
||||
t.Fatalf("result.String() contains unexpected data: %s", result.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
|
||||
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
|
||||
ascii := tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII)))
|
||||
utf8 := tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune))))
|
||||
stars := m.Drop(c.ZeroOrMore(a.Asterisk))
|
||||
fooToken := c.Seq(stars, tok.Str("COMBI", ascii.Then(utf8)), stars)
|
||||
parser := parsekit.NewTokenizer(fooToken)
|
||||
|
||||
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
|
||||
output := "This is fine ASCIIÅltho hère öt endĩt!"
|
||||
result, err := parser.Execute(input)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error from parser: %s", err)
|
||||
}
|
||||
if result.String() != output {
|
||||
t.Fatalf("result.String() contains unexpected data: %s", result.String())
|
||||
}
|
||||
if result.Token(0).Type != "COMBI" {
|
||||
t.Fatalf("Token 0 has unexpected type: %s", result.Token(0).Type)
|
||||
}
|
||||
if result.Token(0).Value != "This is fine ASCIIÅltho hère öt endĩt!" {
|
||||
t.Fatalf("Token 0 has unexpected value: %s", result.Token(0).Value)
|
||||
}
|
||||
if result.Token(1).Value != "This is fine ASCII" {
|
||||
t.Fatalf("Token 1 has unexpected value: %s", result.Token(0).Value)
|
||||
}
|
||||
if result.Token(2).Value != "Åltho hère öt endĩt!" {
|
||||
t.Fatalf("Token 2 has unexpected value: %s", result.Token(0).Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.Accept()
|
||||
return false
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("input string") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
|
||||
`/.*/tokenhandler_test\.go:\d+ without first calling NextRune\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.NextRune()
|
||||
t.NextRune()
|
||||
return false
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("input string") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
|
||||
`/.*/tokenhandler_test\.go:\d+ without a prior call to Accept\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
|
||||
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
|
||||
t.NextRune()
|
||||
t.Accept()
|
||||
return false
|
||||
})
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() { parser.Execute("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
|
||||
`/.*/tokenhandler_test.go:\d+, but the prior call to NextRune\(\) failed`})
|
||||
}
|
||||
|
||||
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
Function: func() {
|
||||
a := parsekit.TokenAPI{}
|
||||
a.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
|
||||
`/.*/tokenhandler_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
package tokenize_test
|
||||
|
||||
// This file contains some tools that are used for writing parsekit tests.
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
// func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
|
||||
// if notExpected == actual {
|
||||
// t.Errorf("Unexpected value for %s: %q", forWhat, actual)
|
||||
// }
|
||||
// }
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
||||
|
||||
type PanicT struct {
|
||||
Function func()
|
||||
Regexp bool
|
||||
Expect string
|
||||
}
|
||||
|
||||
func AssertPanics(t *testing.T, testSet []PanicT) {
|
||||
for _, test := range testSet {
|
||||
AssertPanic(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertPanic(t *testing.T, p PanicT) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
mismatch := false
|
||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||
mismatch = true
|
||||
}
|
||||
if !p.Regexp && p.Expect != r.(string) {
|
||||
mismatch = true
|
||||
}
|
||||
if mismatch {
|
||||
t.Errorf(
|
||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||
p.Expect, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||
}
|
||||
}()
|
||||
p.Function()
|
||||
}
|
||||
|
||||
type TokenHandlerT struct {
|
||||
Input string
|
||||
TokenHandler tokenize.TokenHandler
|
||||
MustMatch bool
|
||||
Expected string
|
||||
}
|
||||
|
||||
func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenHandler(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
|
||||
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else if output := result.String(); output != test.Expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TokenMakerT struct {
|
||||
Input string
|
||||
TokenHandler tokenize.TokenHandler
|
||||
Expected []tokenize.Token
|
||||
}
|
||||
|
||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenMaker(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if string(expected.Runes) != string(actual.Runes) {
|
||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,13 +1,14 @@
|
|||
package parsekit
|
||||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/reader"
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
)
|
||||
|
||||
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
|
||||
// a parsekit.reader.Reader and to report back tokenizing results. For easy
|
||||
// a parsekit.read.Reader and to report back tokenizing results. For easy
|
||||
// lookahead support, a forking strategy is provided.
|
||||
//
|
||||
// BASIC OPERATION:
|
||||
|
@ -61,7 +62,7 @@ import (
|
|||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type TokenAPI struct {
|
||||
reader *reader.Reader
|
||||
reader *read.Reader
|
||||
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
|
||||
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
|
||||
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
|
||||
|
@ -70,7 +71,7 @@ type TokenAPI struct {
|
|||
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
|
||||
func NewTokenAPI(input interface{}) *TokenAPI {
|
||||
return &TokenAPI{
|
||||
reader: reader.New(input),
|
||||
reader: read.New(input),
|
||||
result: newTokenHandlerResult(),
|
||||
}
|
||||
}
|
||||
|
@ -86,10 +87,10 @@ func NewTokenAPI(input interface{}) *TokenAPI {
|
|||
// without explicitly accepting, this method will panic.
|
||||
func (i *TokenAPI) NextRune() (rune, error) {
|
||||
if i.result.lastRune != nil {
|
||||
callerPanic(1, "parsekit.TokenAPI.NextRune(): NextRune() called at {caller} "+
|
||||
common.CallerPanic(1, "tokenize.TokenAPI.NextRune(): NextRune() called at {caller} "+
|
||||
"without a prior call to Accept()")
|
||||
}
|
||||
i.detachChilds()
|
||||
i.DetachChilds()
|
||||
|
||||
readRune, err := i.reader.RuneAt(i.result.offset)
|
||||
i.result.lastRune = &runeInfo{r: readRune, err: err}
|
||||
|
@ -103,9 +104,9 @@ func (i *TokenAPI) NextRune() (rune, error) {
|
|||
// returned an error. Calling Accept() in such case will result in a panic.
|
||||
func (i *TokenAPI) Accept() {
|
||||
if i.result.lastRune == nil {
|
||||
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()")
|
||||
common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()")
|
||||
} else if i.result.lastRune.err != nil {
|
||||
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
|
||||
common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
|
||||
}
|
||||
i.result.runes = append(i.result.runes, i.result.lastRune.r)
|
||||
i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
|
||||
|
@ -131,7 +132,7 @@ func (i *TokenAPI) Accept() {
|
|||
// with the parent TokenAPI as if nothing ever happened.
|
||||
func (i *TokenAPI) Fork() *TokenAPI {
|
||||
// Cleanup current forking / reading state.
|
||||
i.detachChilds()
|
||||
i.DetachChilds()
|
||||
i.result.lastRune = nil
|
||||
|
||||
// Create the new fork.
|
||||
|
@ -155,12 +156,12 @@ func (i *TokenAPI) Fork() *TokenAPI {
|
|||
// This allows a child to feed results in chunks to its parent.
|
||||
func (i *TokenAPI) Merge() {
|
||||
if i.parent == nil {
|
||||
callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
|
||||
common.CallerPanic(1, "tokenize.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
|
||||
}
|
||||
i.addResultsToParent()
|
||||
i.syncCursorTo(i.parent)
|
||||
i.clearResults()
|
||||
i.detachChilds()
|
||||
i.ClearResults()
|
||||
i.DetachChilds()
|
||||
}
|
||||
|
||||
func (i *TokenAPI) addResultsToParent() {
|
||||
|
@ -173,29 +174,29 @@ func (i *TokenAPI) syncCursorTo(to *TokenAPI) {
|
|||
*to.result.cursor = *i.result.cursor
|
||||
}
|
||||
|
||||
func (i *TokenAPI) clearResults() {
|
||||
func (i *TokenAPI) ClearResults() {
|
||||
i.result.lastRune = nil
|
||||
i.result.runes = []rune{}
|
||||
i.result.tokens = []*Token{}
|
||||
i.result.err = nil
|
||||
}
|
||||
|
||||
func (i *TokenAPI) detachChilds() {
|
||||
func (i *TokenAPI) DetachChilds() {
|
||||
if i.child != nil {
|
||||
i.child.detachChildsRecurse()
|
||||
i.child.DetachChildsRecurse()
|
||||
i.child = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (i *TokenAPI) detachChildsRecurse() {
|
||||
func (i *TokenAPI) DetachChildsRecurse() {
|
||||
if i.child != nil {
|
||||
i.child.detachChildsRecurse()
|
||||
i.child.DetachChildsRecurse()
|
||||
}
|
||||
i.child = nil
|
||||
i.parent = nil
|
||||
}
|
||||
|
||||
func (i *TokenAPI) flushReader() bool {
|
||||
func (i *TokenAPI) FlushReader() bool {
|
||||
if i.result.offset > 0 {
|
||||
i.reader.Flush(i.result.offset)
|
||||
i.result.offset = 0
|
|
@ -1,15 +1,15 @@
|
|||
package parsekit_test
|
||||
package tokenize_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func ExampleTokenAPI_Fork() {
|
||||
// This custom TokenHandler checks for input 'a', 'b' or 'c'.
|
||||
abcHandler := func(t *parsekit.TokenAPI) bool {
|
||||
a := parsekit.A
|
||||
abcHandler := func(t *tokenize.TokenAPI) bool {
|
||||
a := tokenize.A
|
||||
for _, r := range []rune{'a', 'b', 'c'} {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
if a.Rune(r)(child) {
|
||||
|
@ -26,26 +26,26 @@ func ExampleTokenAPI_Fork() {
|
|||
// You can make use of the parser/combinator tooling to do things
|
||||
// a lot simpler and take care of forking at the appropriate places.
|
||||
// The handler from above can be replaced with:
|
||||
simpler := parsekit.A.RuneRange('a', 'c')
|
||||
simpler := tokenize.A.RuneRange('a', 'c')
|
||||
|
||||
result, err := parsekit.NewTokenizer(abcHandler).Execute("another test")
|
||||
result, err := tokenize.NewTokenizer(abcHandler).Execute("another test")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(simpler).Execute("curious")
|
||||
result, err = tokenize.NewTokenizer(simpler).Execute("curious")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(abcHandler).Execute("bang on!")
|
||||
result, err = tokenize.NewTokenizer(abcHandler).Execute("bang on!")
|
||||
fmt.Println(result, err)
|
||||
result, err = parsekit.NewTokenizer(abcHandler).Execute("not a match")
|
||||
result, err = tokenize.NewTokenizer(abcHandler).Execute("not a match")
|
||||
fmt.Println(result, err)
|
||||
|
||||
// Output:
|
||||
// a <nil>
|
||||
// c <nil>
|
||||
// b <nil>
|
||||
// <nil> unexpected input at start of file
|
||||
// <nil> mismatch at start of file
|
||||
}
|
||||
|
||||
func ExampleTokenAPI_Merge() {
|
||||
tokenHandler := func(t *parsekit.TokenAPI) bool {
|
||||
tokenHandler := func(t *tokenize.TokenAPI) bool {
|
||||
child1 := t.Fork()
|
||||
child1.NextRune() // reads 'H'
|
||||
child1.Accept()
|
||||
|
@ -62,7 +62,7 @@ func ExampleTokenAPI_Merge() {
|
|||
return true
|
||||
}
|
||||
|
||||
result, _ := parsekit.NewTokenizer(tokenHandler).Execute("Hi mister X!")
|
||||
result, _ := tokenize.NewTokenizer(tokenHandler).Execute("Hi mister X!")
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
|
@ -1,13 +1,4 @@
|
|||
package parsekit
|
||||
|
||||
// Tokenizer is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a TokenHandler function.
|
||||
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
|
||||
// method.
|
||||
type Tokenizer struct {
|
||||
parser *Parser
|
||||
result *TokenHandlerResult
|
||||
}
|
||||
package tokenize
|
||||
|
||||
// TokenHandler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||
|
@ -47,31 +38,3 @@ func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHand
|
|||
func (handler TokenHandler) Optional() TokenHandler {
|
||||
return MatchOpt(handler)
|
||||
}
|
||||
|
||||
// NewTokenizer instantiates a new Tokenizer.
|
||||
//
|
||||
// This is a simple wrapper around a TokenHandler function. It can be used to
|
||||
// match an input string against that TokenHandler function and retrieve the
|
||||
// results in a straight forward way.
|
||||
func NewTokenizer(tokenHandler TokenHandler) *Tokenizer {
|
||||
tokenizer := &Tokenizer{}
|
||||
tokenizer.parser = NewParser(func(p *ParseAPI) {
|
||||
if p.Accept(tokenHandler) {
|
||||
tokenizer.result = p.Result()
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expected("")
|
||||
}
|
||||
})
|
||||
return tokenizer
|
||||
}
|
||||
|
||||
// Execute feeds the input to the wrapped TokenHandler function.
|
||||
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
|
||||
//
|
||||
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
|
||||
// during parsing, the error will be set, nil otherwise.
|
||||
func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *Error) {
|
||||
err := t.parser.Execute(input)
|
||||
return t.result, err
|
||||
}
|
|
@ -1,8 +1,10 @@
|
|||
package parsekit
|
||||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
)
|
||||
|
||||
// TokenHandlerResult is a struct that is used for holding tokenizing results
|
||||
|
@ -12,9 +14,9 @@ type TokenHandlerResult struct {
|
|||
lastRune *runeInfo // Information about the last rune read using NextRune()
|
||||
runes []rune
|
||||
tokens []*Token
|
||||
cursor *Cursor // current read cursor position, relative to the start of the file
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
err *Error // can be used by a TokenHandler to report a specific issue with the input
|
||||
cursor *common.Cursor // current read cursor position, relative to the start of the file
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
err *common.Error // can be used by a TokenHandler to report a specific issue with the input
|
||||
}
|
||||
|
||||
type runeInfo struct {
|
||||
|
@ -59,7 +61,7 @@ func newTokenHandlerResult() *TokenHandlerResult {
|
|||
return &TokenHandlerResult{
|
||||
runes: []rune{},
|
||||
tokens: []*Token{},
|
||||
cursor: &Cursor{},
|
||||
cursor: &common.Cursor{},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,7 +92,7 @@ func (r *TokenHandlerResult) addRunes(set ...interface{}) {
|
|||
case rune:
|
||||
r.runes = append(r.runes, s)
|
||||
default:
|
||||
callerPanic(2, "parsekit.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
|
||||
common.CallerPanic(2, "tokenize.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -167,6 +169,6 @@ func (r *TokenHandlerResult) Value(idx int) interface{} {
|
|||
|
||||
// Cursor retrieves the read cursor from the TokenHandlerResult. This is the
|
||||
// first cursor position after the runes that were read by the TokenHandler.
|
||||
func (r *TokenHandlerResult) Cursor() *Cursor {
|
||||
func (r *TokenHandlerResult) Cursor() *common.Cursor {
|
||||
return r.cursor
|
||||
}
|
|
@ -1,17 +1,19 @@
|
|||
package parsekit
|
||||
package tokenize_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func ExampleToken() {
|
||||
t0 := Token{
|
||||
t0 := tokenize.Token{
|
||||
Runes: []rune("10.1.2.3"),
|
||||
}
|
||||
|
||||
t1 := Token{
|
||||
t1 := tokenize.Token{
|
||||
Runes: []rune("two hundred and twenty four"),
|
||||
Type: "Number",
|
||||
Value: 224,
|
||||
|
@ -19,12 +21,12 @@ func ExampleToken() {
|
|||
|
||||
const TName = 1
|
||||
|
||||
t2 := Token{
|
||||
t2 := tokenize.Token{
|
||||
Runes: []rune("John"),
|
||||
Type: TName,
|
||||
}
|
||||
|
||||
t3 := Token{
|
||||
t3 := tokenize.Token{
|
||||
Runes: []rune("The answer"),
|
||||
Value: 42,
|
||||
}
|
||||
|
@ -39,7 +41,7 @@ func ExampleToken() {
|
|||
}
|
||||
|
||||
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("Testing"))
|
||||
i := tokenize.NewTokenAPI(strings.NewReader("Testing"))
|
||||
i.Result().SetRunes("string")
|
||||
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
|
||||
i.Result().SetRunes([]rune("rune slice"))
|
||||
|
@ -51,10 +53,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
|||
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := NewTokenAPI(strings.NewReader("Testing"))
|
||||
i := tokenize.NewTokenAPI(strings.NewReader("Testing"))
|
||||
i.Result().SetRunes(1234567)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`,
|
||||
Expect: `tokenize\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenhandlerresult_test.go:\d+`,
|
||||
})
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package parsekit
|
||||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
@ -9,6 +9,8 @@ import (
|
|||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
)
|
||||
|
||||
// C provides convenient access to a range of parser/combinators that can be
|
||||
|
@ -22,7 +24,7 @@ import (
|
|||
// When using C in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var c = parsekit.C
|
||||
// var c = tokenize.C
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var C = struct {
|
||||
|
@ -58,7 +60,7 @@ var C = struct {
|
|||
// When using A in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var a = parsekit.A
|
||||
// var a = tokenize.A
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var A = struct {
|
||||
|
@ -236,7 +238,7 @@ var A = struct {
|
|||
// When using M in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var m = parsekit.M
|
||||
// var m = tokenize.M
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var M = struct {
|
||||
|
@ -268,7 +270,7 @@ var M = struct {
|
|||
// When using T in your own parser, then it is advised to create a variable
|
||||
// to reference it:
|
||||
//
|
||||
// var t = parsekit.T
|
||||
// var t = tokenize.T
|
||||
//
|
||||
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
|
||||
var T = struct {
|
||||
|
@ -332,7 +334,7 @@ func MatchRunes(expected ...rune) TokenHandler {
|
|||
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
|
||||
func MatchRuneRange(start rune, end rune) TokenHandler {
|
||||
if end < start {
|
||||
callerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
|
||||
common.CallerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
|
||||
}
|
||||
return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end })
|
||||
}
|
||||
|
@ -485,7 +487,7 @@ func MatchRep(times int, handler TokenHandler) TokenHandler {
|
|||
// When more matches are possible, these will be included in the output.
|
||||
func MatchMin(min int, handler TokenHandler) TokenHandler {
|
||||
if min < 0 {
|
||||
callerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0")
|
||||
common.CallerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0")
|
||||
}
|
||||
return matchMinMax(min, -1, handler, "MatchMin")
|
||||
}
|
||||
|
@ -496,7 +498,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
|
|||
// Zero matches are considered a successful match.
|
||||
func MatchMax(max int, handler TokenHandler) TokenHandler {
|
||||
if max < 0 {
|
||||
callerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0")
|
||||
common.CallerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0")
|
||||
}
|
||||
return matchMinMax(0, max, handler, "MatchMax")
|
||||
}
|
||||
|
@ -519,17 +521,17 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler {
|
|||
// inclusive. All matches will be included in the output.
|
||||
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
|
||||
if max < 0 {
|
||||
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0")
|
||||
common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0")
|
||||
}
|
||||
if min < 0 {
|
||||
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0")
|
||||
common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0")
|
||||
}
|
||||
return matchMinMax(min, max, handler, "MatchMinMax")
|
||||
}
|
||||
|
||||
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
|
||||
if max >= 0 && min > max {
|
||||
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
|
||||
common.CallerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
|
||||
}
|
||||
return func(t *TokenAPI) bool {
|
||||
total := 0
|
||||
|
@ -594,7 +596,7 @@ func MatchSigned(handler TokenHandler) TokenHandler {
|
|||
// ranging from -9223372036854775808 to 9223372036854775807.
|
||||
func MatchIntegerBetween(min int64, max int64) TokenHandler {
|
||||
if max < min {
|
||||
callerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min)
|
||||
common.CallerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min)
|
||||
}
|
||||
digits := MatchSigned(MatchDigits())
|
||||
return func(t *TokenAPI) bool {
|
|
@ -1,15 +1,15 @@
|
|||
package parsekit_test
|
||||
package tokenize_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit"
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func TestCombinators(t *testing.T) {
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"abc", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
|
@ -68,19 +68,19 @@ func TestCombinators(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestCombinatorPanics(t *testing.T) {
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
parsekit.AssertPanics(t, []parsekit.PanicT{
|
||||
var c, a = tokenize.C, tokenize.A
|
||||
AssertPanics(t, []PanicT{
|
||||
{func() { a.RuneRange('z', 'a') }, true,
|
||||
`TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
|
||||
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, true,
|
||||
{func() { c.MinMax(-1, 1, a.Space) }, true,
|
||||
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.MinMax(1, -1, parsekit.A.Space) }, true,
|
||||
{func() { c.MinMax(1, -1, a.Space) }, true,
|
||||
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||
{func() { c.MinMax(10, 5, parsekit.A.Space) }, true,
|
||||
{func() { c.MinMax(10, 5, a.Space) }, true,
|
||||
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
|
||||
{func() { c.Min(-10, parsekit.A.Space) }, true,
|
||||
{func() { c.Min(-10, a.Space) }, true,
|
||||
`TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.Max(-42, parsekit.A.Space) }, true,
|
||||
{func() { c.Max(-42, a.Space) }, true,
|
||||
`TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||
{func() { a.IntegerBetween(10, -10) }, true,
|
||||
`TokenHandler: MatchIntegerBetween definition error at /.*/tokenhandlers_builtin_test.go:\d+: max -10 must not be < min 10`},
|
||||
|
@ -88,8 +88,8 @@ func TestCombinatorPanics(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
var a = parsekit.A
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
var a = tokenize.A
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||
|
@ -225,8 +225,8 @@ func TestAtoms(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestIPv4Atoms(t *testing.T) {
|
||||
var a = parsekit.A
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
var a = tokenize.A
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"0X", a.Octet, true, "0"},
|
||||
{"00X", a.Octet, true, "00"},
|
||||
{"000X", a.Octet, true, "000"},
|
||||
|
@ -257,8 +257,8 @@ func TestIPv4Atoms(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestIPv6Atoms(t *testing.T) {
|
||||
var a = parsekit.A
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
var a = tokenize.A
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"", a.IPv6, false, ""},
|
||||
{"::", a.IPv6, true, "::"},
|
||||
{"1::", a.IPv6, true, "1::"},
|
||||
|
@ -286,8 +286,8 @@ func TestIPv6Atoms(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestModifiers(t *testing.T) {
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
|
@ -306,10 +306,10 @@ func TestModifiers(t *testing.T) {
|
|||
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||
// input cannot be processed successfully.
|
||||
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||
var a, tok = parsekit.A, parsekit.T
|
||||
var a, tok = tokenize.A, tokenize.T
|
||||
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||
parser := parsekit.NewTokenizer(invalid)
|
||||
parsekit.AssertPanic(t, parsekit.PanicT{
|
||||
parser := tokenize.NewTokenizer(invalid)
|
||||
AssertPanic(t, PanicT{
|
||||
func() { parser.Execute("no") }, false,
|
||||
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
|
||||
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
|
||||
|
@ -317,19 +317,19 @@ func TestTokenMakerErrorHandling(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTokenMakers(t *testing.T) {
|
||||
var c, a, tok = parsekit.C, parsekit.A, parsekit.T
|
||||
parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
AssertTokenMakers(t, []TokenMakerT{
|
||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
||||
[]tokenize.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
||||
|
||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
||||
[]tokenize.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
||||
|
||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
[]parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
[]tokenize.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
|
||||
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
|
||||
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
|
||||
|
@ -337,28 +337,28 @@ func TestTokenMakers(t *testing.T) {
|
|||
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
|
||||
}},
|
||||
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
|
||||
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
|
||||
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
|
||||
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
|
||||
|
||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
|
||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Runes: []rune("1"), Value: true},
|
||||
{Type: "P", Runes: []rune("t"), Value: true},
|
||||
{Type: "P", Runes: []rune("T"), Value: true},
|
||||
|
@ -367,7 +367,7 @@ func TestTokenMakers(t *testing.T) {
|
|||
{Type: "P", Runes: []rune("True"), Value: true},
|
||||
}},
|
||||
|
||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
|
||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Runes: []rune("0"), Value: false},
|
||||
{Type: "P", Runes: []rune("f"), Value: false},
|
||||
{Type: "P", Runes: []rune("F"), Value: false},
|
||||
|
@ -379,8 +379,8 @@ func TestTokenMakers(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestSyntacticSugar(t *testing.T) {
|
||||
var a = parsekit.A
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
var a = tokenize.A
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
||||
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
||||
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
||||
|
@ -391,37 +391,9 @@ func TestSyntacticSugar(t *testing.T) {
|
|||
})
|
||||
}
|
||||
|
||||
func TestSequenceOfRunes(t *testing.T) {
|
||||
var c, a = parsekit.C, parsekit.A
|
||||
sequence := c.Seq(
|
||||
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
|
||||
a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
|
||||
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
|
||||
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
|
||||
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
|
||||
)
|
||||
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
|
||||
output := ""
|
||||
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
|
||||
if p.Accept(sequence) {
|
||||
output = p.Result().String()
|
||||
p.Stop()
|
||||
} else {
|
||||
p.Expected("sequence of runes")
|
||||
}
|
||||
})
|
||||
err := parser.Execute(input)
|
||||
if err != nil {
|
||||
t.Fatalf("Parsing failed: %s", err)
|
||||
}
|
||||
if output != input {
|
||||
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output)
|
||||
}
|
||||
}
|
||||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
var c, a, m = parsekit.C, parsekit.A, parsekit.M
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
demonic := c.Seq(
|
||||
c.Opt(a.SquareOpen),
|
||||
m.Trim(
|
||||
|
@ -442,7 +414,7 @@ func TestCombination(t *testing.T) {
|
|||
c.Opt(a.SquareClose),
|
||||
)
|
||||
|
||||
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
|
||||
AssertTokenHandlers(t, []TokenHandlerT{
|
||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
|
@ -0,0 +1,34 @@
|
|||
package tokenize
|
||||
|
||||
import (
|
||||
"git.makaay.nl/mauricem/go-parsekit/common"
|
||||
)
|
||||
|
||||
// Tokenizer is the top-level struct that holds the configuration for
|
||||
// a parser that is based solely on a TokenHandler function.
|
||||
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
|
||||
// method.
|
||||
type Tokenizer struct {
|
||||
handler TokenHandler
|
||||
}
|
||||
|
||||
// NewTokenizer instantiates a new Tokenizer, based on the provided TokenHandler.
|
||||
func NewTokenizer(tokenHandler TokenHandler) *Tokenizer {
|
||||
return &Tokenizer{tokenHandler}
|
||||
}
|
||||
|
||||
// Execute feeds the input to the wrapped TokenHandler function.
|
||||
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
|
||||
//
|
||||
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
|
||||
// during parsing, the error will be set, nil otherwise.
|
||||
func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *common.Error) {
|
||||
api := NewTokenAPI(input)
|
||||
ok := t.handler(api)
|
||||
|
||||
if !ok {
|
||||
err := &common.Error{Message: "mismatch", Cursor: common.Cursor{}}
|
||||
return nil, err
|
||||
}
|
||||
return api.Result(), nil
|
||||
}
|
|
@ -0,0 +1,179 @@
|
|||
package tokenize_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
// TODO For error handling, it would be really cool if for example the
|
||||
// 10.0.300.1/24 case would return an actual error stating that
|
||||
// 300 is not a valid octet for an IPv4 address.
|
||||
// Biggest thing to take care of here, is that errors should not stop
|
||||
// a Parser flow (since we might be trying to match different cases in
|
||||
// sequence), but a Parser flow should optionally be able to make use
|
||||
// of the actual error.
|
||||
// The same goes for a Tokenizer, since those can also make use of
|
||||
// optional matching using tokenize.C.Any(...) for example. If matching
|
||||
// for Any(IPv4, Digits), the example case should simply end up with 10
|
||||
// after the IPv4 mismatch.
|
||||
func ExampleTokenizer_Execute() {
|
||||
// Build the tokenizer for ip/mask.
|
||||
var c, a, t = tokenize.C, tokenize.A, tokenize.T
|
||||
ip := t.Str("ip", a.IPv4)
|
||||
mask := t.Int8("mask", a.IPv4CIDRMask)
|
||||
cidr := c.Seq(ip, a.Slash, mask)
|
||||
tokenizer := tokenize.NewTokenizer(cidr)
|
||||
|
||||
for _, input := range []string{
|
||||
"000.000.000.000/000",
|
||||
"192.168.0.1/24",
|
||||
"255.255.255.255/32",
|
||||
"10.0.300.1/24",
|
||||
"not an IPv4 CIDR",
|
||||
} {
|
||||
// Execute returns a TokenHandlerResult and an error, which is nil on success.
|
||||
result, err := tokenizer.Execute(input)
|
||||
|
||||
if err == nil {
|
||||
fmt.Printf("Result: %s\n", result.Tokens())
|
||||
} else {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
|
||||
// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
|
||||
// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
|
||||
// Error: mismatch at start of file
|
||||
// Error: mismatch at start of file
|
||||
}
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
r, _ := mkInput().NextRune()
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: mkInput().Accept,
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.NextRune()
|
||||
f.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.Fork()
|
||||
f.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := tokenize.NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.Result().Cursor().String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.Result().Cursor().String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.Result().Cursor().String(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := tokenize.NewTokenAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := tokenize.NewTokenAPI(strings.NewReader("X"))
|
||||
f := i.Fork()
|
||||
f.NextRune()
|
||||
f.Accept()
|
||||
r, err := f.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
r, err = i.NextRune()
|
||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func mkInput() *tokenize.TokenAPI {
|
||||
return tokenize.NewTokenAPI("Testing")
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
package tokenize
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := NewTokenAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
|
||||
// Fork
|
||||
f := i.Fork()
|
||||
AssertEqual(t, f, i.child, "Input.child (must be f)")
|
||||
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
|
||||
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Accept two runes via fork.
|
||||
f.NextRune()
|
||||
f.Accept() // e
|
||||
f.NextRune()
|
||||
f.Accept() // s
|
||||
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Merge fork back into parent
|
||||
f.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := NewTokenAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
f1.NextRune()
|
||||
f1.Accept()
|
||||
f2 := f1.Fork()
|
||||
f2.NextRune()
|
||||
f2.Accept()
|
||||
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 1, i.result.offset, "i.offset A")
|
||||
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 2, f1.result.offset, "f1.offset A")
|
||||
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.result.offset, "f2.offset A")
|
||||
f2.Merge()
|
||||
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 1, i.result.offset, "i.offset B")
|
||||
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 3, f1.result.offset, "f1.offset B")
|
||||
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.result.offset, "f2.offset B")
|
||||
f1.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 3, i.result.offset, "i.offset C")
|
||||
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 3, f1.result.offset, "f1.offset C")
|
||||
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.result.offset, "f2.offset C")
|
||||
}
|
||||
|
||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||
i := NewTokenAPI("Testing")
|
||||
f1 := i.Fork()
|
||||
f2 := f1.Fork()
|
||||
f3 := f2.Fork()
|
||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||
f5 := f4.Fork()
|
||||
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
AssertEqual(t, true, i.child == f1, "i.child == f1")
|
||||
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
|
||||
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
|
||||
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
|
||||
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
|
||||
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
|
||||
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
|
||||
i.NextRune()
|
||||
|
||||
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
AssertEqual(t, true, i.child == nil, "i.child == nil")
|
||||
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
|
||||
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
|
||||
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
|
||||
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
|
||||
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
|
||||
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := NewTokenAPI("Testing")
|
||||
r, _ := i.NextRune()
|
||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||
AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
|
||||
i.Accept()
|
||||
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
|
||||
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
|
@ -1,291 +0,0 @@
|
|||
package parsekit
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO For error handling, it would be really cool if for example the
|
||||
// 10.0.300.1/24 case would return an actual error stating that
|
||||
// 300 is not a valid octet for an IPv4 address.
|
||||
// Biggest thing to take care of here, is that errors should not stop
|
||||
// a Parser flow (since we might be trying to match different cases in
|
||||
// sequence), but a Parser flow should optionally be able to make use
|
||||
// of the actual error.
|
||||
// The same goes for a Tokenizer, since those can also make use of
|
||||
// optional matching using parsekit.C.Any(...) for example. If matching
|
||||
// for Any(IPv4, Digits), the example case should simply end up with 10
|
||||
// after the IPv4 mismatch.
|
||||
func ExampleTokenizer_Execute() {
|
||||
// Build the tokenizer for ip/mask.
|
||||
ip := T.Str("ip", A.IPv4)
|
||||
mask := T.Int8("mask", A.IPv4CIDRMask)
|
||||
cidr := C.Seq(ip, A.Slash, mask)
|
||||
tokenizer := NewTokenizer(cidr)
|
||||
|
||||
for _, input := range []string{
|
||||
"000.000.000.000/000",
|
||||
"192.168.0.1/24",
|
||||
"255.255.255.255/32",
|
||||
"10.0.300.1/24",
|
||||
"not an IPv4 CIDR",
|
||||
} {
|
||||
// Execute returns a TokenHandlerResult and an error, which is nil on success.
|
||||
result, err := tokenizer.Execute(input)
|
||||
|
||||
if err == nil {
|
||||
fmt.Printf("Result: %s\n", result.Tokens())
|
||||
} else {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
|
||||
// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
|
||||
// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
|
||||
// Error: unexpected input at start of file
|
||||
// Error: unexpected input at start of file
|
||||
}
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
r, _ := mkInput().NextRune()
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: mkInput().Accept,
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.NextRune()
|
||||
f.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
f := i.Fork()
|
||||
i.Fork()
|
||||
f.Merge()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
|
||||
}
|
||||
|
||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||
i := mkInput()
|
||||
f1 := i.Fork()
|
||||
f2 := f1.Fork()
|
||||
f3 := f2.Fork()
|
||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||
f5 := f4.Fork()
|
||||
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
AssertEqual(t, true, i.child == f1, "i.child == f1")
|
||||
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
|
||||
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
|
||||
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
|
||||
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
|
||||
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
|
||||
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
|
||||
i.NextRune()
|
||||
|
||||
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
AssertEqual(t, true, i.child == nil, "i.child == nil")
|
||||
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
|
||||
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
|
||||
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
|
||||
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
|
||||
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
|
||||
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := mkInput()
|
||||
r, _ := i.NextRune()
|
||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||
AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
|
||||
i.Accept()
|
||||
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
|
||||
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
|
||||
i := mkInput()
|
||||
for j := 0; j < 7; j++ {
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.result.cursor.String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.result.cursor.String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.result.cursor.String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.result.cursor.String(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
|
||||
// Fork
|
||||
f := i.Fork()
|
||||
AssertEqual(t, f, i.child, "Input.child (must be f)")
|
||||
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
|
||||
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Accept two runes via fork.
|
||||
f.NextRune()
|
||||
f.Accept() // e
|
||||
f.NextRune()
|
||||
f.Accept() // s
|
||||
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
// Merge fork back into parent
|
||||
f.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := mkInput()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
f1.NextRune()
|
||||
f1.Accept()
|
||||
f2 := f1.Fork()
|
||||
f2.NextRune()
|
||||
f2.Accept()
|
||||
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 1, i.result.offset, "i.offset A")
|
||||
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 2, f1.result.offset, "f1.offset A")
|
||||
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.result.offset, "f2.offset A")
|
||||
f2.Merge()
|
||||
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 1, i.result.offset, "i.offset B")
|
||||
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 3, f1.result.offset, "f1.offset B")
|
||||
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.result.offset, "f2.offset B")
|
||||
f1.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, 3, i.result.offset, "i.offset C")
|
||||
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
|
||||
AssertEqual(t, 3, f1.result.offset, "f1.offset C")
|
||||
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
AssertEqual(t, 3, f2.result.offset, "f2.offset C")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := NewTokenAPI(strings.NewReader("X"))
|
||||
f := i.Fork()
|
||||
f.NextRune()
|
||||
f.Accept()
|
||||
r, err := f.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
r, err = i.NextRune()
|
||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func mkInput() *TokenAPI {
|
||||
return NewTokenAPI(strings.NewReader("Testing"))
|
||||
}
|
Loading…
Reference in New Issue