Splitting up functionality in packages, intermediate step.

This commit is contained in:
Maurice Makaay 2019-06-11 22:23:30 +00:00
parent 0f7b4e0d26
commit 1f0e0fcc17
30 changed files with 727 additions and 746 deletions

View File

@ -5,6 +5,8 @@ package parsekit
import (
"regexp"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
@ -63,7 +65,7 @@ func AssertPanic(t *testing.T, p PanicT) {
type TokenHandlerT struct {
Input string
TokenHandler TokenHandler
TokenHandler tokenize.TokenHandler
MustMatch bool
Expected string
}
@ -75,7 +77,7 @@ func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
}
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
result, err := NewTokenizer(test.TokenHandler).Execute(test.Input)
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
@ -91,8 +93,8 @@ func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
type TokenMakerT struct {
Input string
TokenHandler TokenHandler
Expected []Token
TokenHandler tokenize.TokenHandler
Expected []tokenize.Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
@ -102,7 +104,7 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
result, err := NewTokenizer(test.TokenHandler).Execute(test.Input)
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {

View File

@ -1,4 +1,4 @@
package parsekit
package common
import "fmt"

View File

@ -1,14 +1,14 @@
package parsekit_test
package common_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
)
func ExampleCursor_Move() {
c := &parsekit.Cursor{}
c := &common.Cursor{}
fmt.Printf("after initialization : %s\n", c)
fmt.Printf("after 'some words' : %s\n", c.Move("some words"))
fmt.Printf("after '\\n' : %s\n", c.Move("\n"))
@ -22,7 +22,7 @@ func ExampleCursor_Move() {
}
func ExampleCursor_String() {
c := &parsekit.Cursor{}
c := &common.Cursor{}
fmt.Println(c.String())
c.Move("\nfoobar")
@ -51,7 +51,7 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
c := parsekit.Cursor{}
c := common.Cursor{}
for _, s := range test.input {
c.Move(s)
}

View File

@ -1,4 +1,4 @@
package parsekit
package common
import (
"fmt"
@ -16,12 +16,12 @@ type Error struct {
func (err *Error) Error() string {
if err == nil {
callerPanic(1, "parsekit.Error.Error(): method called with nil error at {caller}")
CallerPanic(1, "common.Error.Error(): method called with nil error at {caller}")
}
return fmt.Sprintf("%s at %s", err.Message, err.Cursor)
}
func callerFunc(depth int) string {
func CallerFunc(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
pc, _, _, _ := runtime.Caller(depth + 1)
caller := runtime.FuncForPC(pc)
@ -36,7 +36,7 @@ func callerFilepos(depth int) string {
return fmt.Sprintf("%s:%d", file, line)
}
func callerPanic(depth int, f string, args ...interface{}) {
func CallerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)

View File

@ -1,15 +1,15 @@
package parsekit_test
package common_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
)
func ExampleError() {
err := &parsekit.Error{
err := &common.Error{
Message: "it broke down",
Cursor: parsekit.Cursor{Line: 9, Column: 41},
Cursor: common.Cursor{Line: 9, Column: 41},
}
fmt.Println(err.Error())

View File

@ -11,6 +11,8 @@ import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func Example_basicCalculator1() {
@ -54,7 +56,7 @@ func Example_basicCalculator1() {
// ComputeSimple interprets a simple calculation, consisting of only integers
// and add or subtract operators. It returns the result of the calculation.
// An error is returned in case the calculation failed.
func ComputeSimple(calculation string) (int64, *parsekit.Error) {
func ComputeSimple(calculation string) (int64, *common.Error) {
calculator := &simpleCalculator{op: +1}
parser := parsekit.NewParser(calculator.number)
err := parser.Execute(calculation)
@ -70,9 +72,9 @@ type simpleCalculator struct {
}
// A definition of an int64, which conveniently drops surrounding blanks.
var dropBlank = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Blanks))
var bareInteger = parsekit.C.Seq(dropBlank, parsekit.A.Integer, dropBlank)
var int64Token = parsekit.T.Int64(nil, bareInteger)
var dropBlank = tokenize.M.Drop(tokenize.C.Opt(tokenize.A.Blanks))
var bareInteger = tokenize.C.Seq(dropBlank, tokenize.A.Integer, dropBlank)
var int64Token = tokenize.T.Int64(nil, bareInteger)
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
if p.Accept(int64Token) {
@ -84,7 +86,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
}
func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
var A = parsekit.A
var A = tokenize.A
switch {
case p.Accept(A.Add):
c.op = +1

View File

@ -17,6 +17,8 @@ import (
"math"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func Example_basicCalculator2() {
@ -75,7 +77,7 @@ type calculator struct {
// Compute takes a calculation string as input and returns the interpreted result
// value for the calculation. An error can be returned as well, in case the
// calculation fails for some reason.
func Compute(input string) (float64, *parsekit.Error) {
func Compute(input string) (float64, *common.Error) {
calc := &calculator{}
parser := parsekit.NewParser(calc.calculation)
err := parser.Execute(input)
@ -94,7 +96,7 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) {
func (calc *calculator) expr(p *parsekit.ParseAPI) {
calc.interpreter.push()
var A = parsekit.A
var A = tokenize.A
if p.Handle(calc.term) {
for p.Accept(A.Add.Or(A.Subtract)) {
op := p.Result().Rune(0)
@ -112,7 +114,7 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) {
func (calc *calculator) term(p *parsekit.ParseAPI) {
calc.interpreter.push()
var A = parsekit.A
var A = tokenize.A
if p.Handle(calc.factor) {
for p.Accept(A.Multiply.Or(A.Divide)) {
op := p.Result().Rune(0)
@ -129,7 +131,7 @@ func (calc *calculator) term(p *parsekit.ParseAPI) {
// <space> = (<space> (SPACE|TAB) | "")
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
func (calc *calculator) factor(p *parsekit.ParseAPI) {
var A, T = parsekit.A, parsekit.T
var A, T = tokenize.A, tokenize.T
p.Accept(A.Blanks)
switch {
case p.Accept(T.Float64(nil, A.Signed(A.Float))):

View File

@ -7,7 +7,7 @@ package examples
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func Example_dutchPostcodeUsingTokenizer() {
@ -40,20 +40,20 @@ func Example_dutchPostcodeUsingTokenizer() {
// [1] Input: "2233Ab" Output: 2233 AB Tokens: PCD(2233) PCL(AB)
// [2] Input: "1001\t\tab" Output: 1001 AB Tokens: PCD(1001) PCL(AB)
// [3] Input: "1818ab" Output: 1818 AB Tokens: PCD(1818) PCL(AB)
// [4] Input: "1212abc" Error: unexpected input at start of file
// [5] Input: "1234" Error: unexpected input at start of file
// [6] Input: "huh" Error: unexpected input at start of file
// [7] Input: "" Error: unexpected end of file at start of file
// [8] Input: "\xcd2222AB" Error: unexpected input at start of file
// [4] Input: "1212abc" Error: mismatch at start of file
// [5] Input: "1234" Error: mismatch at start of file
// [6] Input: "huh" Error: mismatch at start of file
// [7] Input: "" Error: mismatch at start of file
// [8] Input: "\xcd2222AB" Error: mismatch at start of file
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createPostcodeTokenizer() *parsekit.Tokenizer {
func createPostcodeTokenizer() *tokenize.Tokenizer {
// Easy access to the parsekit definitions.
C, A, M, T := parsekit.C, parsekit.A, parsekit.M, parsekit.T
C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T
// TokenHandler functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
@ -61,14 +61,13 @@ func createPostcodeTokenizer() *parsekit.Tokenizer {
// - A space between letters and digits is optional.
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
digitNotZero := C.Except(A.Rune('0'), A.Digit)
pcDigits := C.Seq(digitNotZero, A.Digit.Times(3))
pcDigits := A.DigitNotZero.Then(A.Digit.Times(3))
pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
pcLetters := M.ToUpper(pcLetter.Times(2))
space := M.Replace(C.Opt(A.Blanks), " ")
space := M.Replace(A.Blanks.Optional(), " ")
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewTokenizer(postcode)
return tokenize.NewTokenizer(postcode)
}

View File

@ -20,6 +20,8 @@ import (
"strings"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func Example_helloWorldUsingParser1() {
@ -74,14 +76,14 @@ type helloparser1 struct {
greetee string
}
func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
func (h *helloparser1) Parse(input string) (string, *common.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
return h.greetee, err
}
func (h *helloparser1) start(p *parsekit.ParseAPI) {
a := parsekit.A
a := tokenize.A
if p.Accept(a.StrNoCase("hello")) {
p.Handle(h.comma)
} else {
@ -90,7 +92,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
}
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
a := parsekit.A
a := tokenize.A
switch {
case p.Accept(a.Blanks):
p.Handle(h.comma)
@ -102,7 +104,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) {
}
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
a := parsekit.A
a := tokenize.A
p.Accept(a.Blanks)
if p.Peek(a.AnyRune) {
p.Handle(h.name)
@ -112,7 +114,7 @@ func (h *helloparser1) startName(p *parsekit.ParseAPI) {
}
func (h *helloparser1) name(p *parsekit.ParseAPI) {
a := parsekit.A
a := tokenize.A
switch {
case p.Peek(a.Excl):
p.Handle(h.exclamation)
@ -125,7 +127,7 @@ func (h *helloparser1) name(p *parsekit.ParseAPI) {
}
func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
a := parsekit.A
a := tokenize.A
if p.Accept(a.Excl) {
p.Handle(h.end)
} else {
@ -137,7 +139,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
// different route was taken to implement a more friendly 'end of greeting'
// error message.
func (h *helloparser1) end(p *parsekit.ParseAPI) {
var a = parsekit.A
var a = tokenize.A
if !p.Accept(a.EndOfFile) {
p.Expected("end of greeting")
return

View File

@ -10,7 +10,7 @@ package examples
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func Example_helloWorldUsingTokenizer() {
@ -37,18 +37,18 @@ func Example_helloWorldUsingTokenizer() {
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected input at start of file
// [5] Input: "Hello, world" Error: unexpected input at start of file
// [6] Input: "Hello,!" Error: unexpected input at start of file
// [4] Input: "Oh no!" Error: mismatch at start of file
// [5] Input: "Hello, world" Error: mismatch at start of file
// [6] Input: "Hello,!" Error: mismatch at start of file
}
// ---------------------------------------------------------------------------
// Implementation of the parser
// ---------------------------------------------------------------------------
func createHelloTokenizer() *parsekit.Tokenizer {
func createHelloTokenizer() *tokenize.Tokenizer {
// Easy access to parsekit definition.
c, a, m := parsekit.C, parsekit.A, parsekit.M
c, a, m := tokenize.C, tokenize.A, tokenize.M
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
@ -65,5 +65,5 @@ func createHelloTokenizer() *parsekit.Tokenizer {
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
// us to match some input against that handler.
return parsekit.NewTokenizer(greeting)
return tokenize.NewTokenizer(greeting)
}

View File

@ -17,6 +17,8 @@ import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func Example_helloWorldUsingParser2() {
@ -72,14 +74,14 @@ type helloparser2 struct {
greetee string
}
func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
func (h *helloparser2) Parse(input string) (string, *common.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
return h.greetee, err
}
func (h *helloparser2) start(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
c, a, m := tokenize.C, tokenize.A, tokenize.M
if !p.Accept(a.StrNoCase("hello")) {
p.Error("the greeting is not being friendly")
return

View File

@ -11,12 +11,14 @@ import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
type Chunks []string
func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
c, a := parsekit.C, parsekit.A
func (l *Chunks) AddChopped(s string, chunkSize int) *common.Error {
c, a := tokenize.C, tokenize.A
chunkOfRunes := c.MinMax(1, chunkSize, a.AnyRune)
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {

View File

@ -3,16 +3,19 @@ package parsekit
import (
"fmt"
"io"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// ParseAPI holds the internal state of a parse run and provides an API that
// ParseHandler methods can use to communicate with the parser.
type ParseAPI struct {
tokenAPI *TokenAPI // the TokenAPI, used for communicating with TokenHandler functions
loopCheck map[string]bool // used for parser loop detection
result *TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
tokenAPI *tokenize.TokenAPI // the TokenAPI, used for communicating with TokenHandler functions
loopCheck map[string]bool // used for parser loop detection
result *tokenize.TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
err *common.Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
}
// Peek checks if the upcoming input data matches the provided TokenHandler.
@ -22,13 +25,13 @@ type ParseAPI struct {
//
// After calling this method, you can retrieve the produced TokenHandlerResult
// using the ParseAPI.Result() method.
func (p *ParseAPI) Peek(tokenHandler TokenHandler) bool {
func (p *ParseAPI) Peek(tokenHandler tokenize.TokenHandler) bool {
p.result = nil
forkedTokenAPI, ok := p.invokeTokenHandler("Peek", tokenHandler)
if ok {
p.result = forkedTokenAPI.Result()
p.tokenAPI.clearResults()
p.tokenAPI.detachChilds()
p.tokenAPI.ClearResults()
p.tokenAPI.DetachChilds()
}
return ok
}
@ -39,29 +42,29 @@ func (p *ParseAPI) Peek(tokenHandler TokenHandler) bool {
//
// After calling this method, you can retrieve the produced TokenHandlerResult
// using the ParseAPI.Result() method.
func (p *ParseAPI) Accept(tokenHandler TokenHandler) bool {
func (p *ParseAPI) Accept(tokenHandler tokenize.TokenHandler) bool {
p.result = nil
forkedTokenAPI, ok := p.invokeTokenHandler("Accept", tokenHandler)
if ok {
forkedTokenAPI.Merge()
p.result = p.tokenAPI.Result()
p.tokenAPI.detachChilds()
if p.tokenAPI.flushReader() {
p.tokenAPI.DetachChilds()
if p.tokenAPI.FlushReader() {
p.initLoopCheck()
}
}
return ok
}
func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler TokenHandler) (*TokenAPI, bool) {
func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler tokenize.TokenHandler) (*tokenize.TokenAPI, bool) {
p.panicWhenStoppedOrInError()
p.checkForLoops()
if tokenHandler == nil {
callerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
common.CallerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
}
p.result = nil
p.tokenAPI.clearResults()
p.tokenAPI.ClearResults()
child := p.tokenAPI.Fork()
ok := tokenHandler(child)
@ -80,14 +83,14 @@ func (p *ParseAPI) panicWhenStoppedOrInError() {
return
}
called := callerFunc(1)
called := common.CallerFunc(1)
after := "Error()"
if p.stopped {
after = "Stop()"
}
callerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+
common.CallerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+
"no calls allowed after ParseAPI.%s", called, called, after)
}
@ -100,9 +103,9 @@ func (p *ParseAPI) initLoopCheck() {
}
func (p *ParseAPI) checkForLoops() {
filepos := callerFilepos(3)
filepos := common.CallerFilePos(3)
if _, ok := p.loopCheck[filepos]; ok {
callerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}")
common.CallerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}")
}
p.loopCheck[filepos] = true
}
@ -112,10 +115,10 @@ func (p *ParseAPI) checkForLoops() {
//
// When Result() is called without first doing a Peek() or Accept(), then no
// result will be available and the method will panic.
func (p *ParseAPI) Result() *TokenHandlerResult {
func (p *ParseAPI) Result() *tokenize.TokenHandlerResult {
result := p.result
if p.result == nil {
callerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
common.CallerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
"at {caller} without calling ParseAPI.Peek() or ParseAPI.Accept() on beforehand")
}
return result
@ -136,7 +139,7 @@ func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
if parseHandler == nil {
callerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}")
common.CallerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}")
}
}
@ -164,7 +167,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &Error{message, *p.tokenAPI.result.cursor}
p.err = &common.Error{message, *p.tokenAPI.Result().Cursor()}
}
// ExpectEndOfFile can be used to check if the input is at end of file.
@ -175,7 +178,7 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
// as the expectation.
func (p *ParseAPI) ExpectEndOfFile() {
p.panicWhenStoppedOrInError()
if p.Peek(A.EndOfFile) {
if p.Peek(tokenize.A.EndOfFile) {
p.Stop()
} else {
p.Expected("end of file")

View File

@ -1,5 +1,10 @@
package parsekit
import (
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
@ -27,7 +32,7 @@ type ParseHandler func(*ParseAPI)
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
if startHandler == nil {
callerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}")
common.CallerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}")
}
return &Parser{startHandler: startHandler}
}
@ -36,9 +41,9 @@ func NewParser(startHandler ParseHandler) *Parser {
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
//
// When an error occurs during parsing, then this error is returned, nil otherwise.
func (p *Parser) Execute(input interface{}) *Error {
func (p *Parser) Execute(input interface{}) *common.Error {
api := &ParseAPI{
tokenAPI: NewTokenAPI(input),
tokenAPI: tokenize.NewTokenAPI(input),
loopCheck: map[string]bool{},
}
if api.Handle(p.startHandler) {

View File

@ -5,11 +5,12 @@ import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleParser_usingAcceptedRunes() {
// Easy access to the parsekit definitions.
a := parsekit.A
a := tokenize.A
matches := []string{}
@ -28,7 +29,7 @@ func ExampleParser_usingAcceptedRunes() {
func ExampleParser_usingTokens() {
// Easy access to the parsekit definitions.
c, a, tok := parsekit.C, parsekit.A, parsekit.T
c, a, tok := tokenize.C, tokenize.A, tokenize.T
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
@ -60,7 +61,7 @@ func ExampleParseAPI_Accept_inIfStatement() {
// When a case-insensitive match on "Yowza!" is found by the
// tokenizer, then Accept() will make the result available
// through ParseAPI.Result()
if p.Accept(parsekit.A.StrNoCase("Yowza!")) {
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
// Result.String() returns a string containing all
// accepted runes that were matched against.
fmt.Println(p.Result().String())
@ -77,9 +78,9 @@ func ExampleParseAPI_Accept_inSwitchStatement() {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for loop := true; loop; {
switch {
case p.Accept(parsekit.A.Rune('X')):
case p.Accept(tokenize.A.Rune('X')):
// NOOP, skip this rune
case p.Accept(parsekit.A.AnyRune):
case p.Accept(tokenize.A.AnyRune):
result += p.Result().String()
default:
loop = false
@ -94,7 +95,7 @@ func ExampleParseAPI_Accept_inSwitchStatement() {
}
func ExampleParseAPI_Stop() {
C, A := parsekit.C, parsekit.A
C, A := tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("First word: ")
@ -110,7 +111,7 @@ func ExampleParseAPI_Stop() {
}
func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
C, A := parsekit.C, parsekit.A
C, A := tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("Word: ")
@ -128,7 +129,7 @@ func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
}
func ExampleParseAPI_Stop_notCalledButInputPending() {
C, A := parsekit.C, parsekit.A
C, A := tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("First word: ")
@ -147,7 +148,7 @@ func ExampleParseAPI_Stop_notCalledButInputPending() {
func ExampleParseAPI_Peek() {
// Definition of a fantasy serial number format.
C, A := parsekit.C, parsekit.A
C, A := tokenize.C, tokenize.A
serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits)
// This handler is able to handle serial numbers.
@ -160,7 +161,7 @@ func ExampleParseAPI_Peek() {
// Start could function as a sort of dispatcher, handing over
// control to the correct ParseHandler function, based on the input.
start := func(p *parsekit.ParseAPI) {
if p.Peek(parsekit.A.Asterisk) {
if p.Peek(tokenize.A.Asterisk) {
p.Handle(serialnrHandler)
return
}
@ -275,12 +276,12 @@ type parserWithLoop struct {
}
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
p.Accept(parsekit.A.ASCII)
p.Accept(tokenize.A.ASCII)
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
p.Accept(parsekit.A.ASCII)
p.Accept(tokenize.A.ASCII)
p.Handle(l.third)
}
@ -289,7 +290,7 @@ func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
p.Error("Loop not detected by parsekit")
return
}
p.Accept(parsekit.A.ASCII)
p.Accept(tokenize.A.ASCII)
p.Handle(l.first)
}
@ -316,7 +317,7 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
//
// Now the loop stops when the parser finds no more matching input data.
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
var c, a = parsekit.C, parsekit.A
var c, a = tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.Accept(c.Max(5, a.AnyRune)) {
}

View File

@ -1,4 +1,4 @@
// Package reader provides a buffered Reader that wraps around an io.Reader.
// Package read provides a buffered Reader that wraps around an io.Reader.
//
// Functionally, it provides an input buffer in the form of a sliding window.
// Let's say we've got the following input coming up in the io.Reader that is
@ -37,7 +37,7 @@
//
// So after a flush, the first upcoming rune after the flushed runes
// will always be at offset 0.
package reader
package read
import (
"bufio"
@ -57,7 +57,7 @@ import (
// To minimze memory use, it is also possible to flush the read buffer when there is
// no more need to go back to previously read runes.
//
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
// The parserkit.reader.Reader is used internally by tokenize.TokenAPI.
type Reader struct {
bufio *bufio.Reader // Used for ReadRune()
buffer []rune // Input buffer, holding runes that were read from input
@ -89,7 +89,7 @@ func makeBufioReader(input interface{}) *bufio.Reader {
case string:
return bufio.NewReader(strings.NewReader(input))
default:
panic(fmt.Sprintf("parsekit.reader.New(): no support for input of type %T", input))
panic(fmt.Sprintf("parsekit.read.New(): no support for input of type %T", input))
}
}
@ -153,7 +153,7 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
const smallBufferSize = 64
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
var ErrTooLarge = errors.New("parsekit.reader: too large")
var ErrTooLarge = errors.New("parsekit.read: too large")
// grow grows the buffer to guarantee space for n more bytes.
// It returns the index where bytes should be written.
@ -200,7 +200,7 @@ func makeSlice(n int) []rune {
func (r *Reader) Flush(numberOfRunes int) {
if numberOfRunes > len(r.buffer) {
panic(fmt.Sprintf(
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
"parsekit.read.Reader.Flush(): number of runes to flush (%d) "+
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
}
r.buffer = r.buffer[numberOfRunes:]

View File

@ -1,4 +1,4 @@
package reader_test
package read_test
import (
"bufio"
@ -8,12 +8,12 @@ import (
"testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/reader"
"git.makaay.nl/mauricem/go-parsekit/read"
"github.com/stretchr/testify/assert"
)
func ExampleNew() {
r := reader.New(strings.NewReader("Hello, world!"))
r := read.New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
fmt.Printf("%c", at(0))
@ -33,7 +33,7 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
{"*bufio.Reader", bufio.NewReader(strings.NewReader("Hello, world!"))},
{"bufio.Reader", *(bufio.NewReader(strings.NewReader("Hello, world!")))},
} {
r := reader.New(test.input)
r := read.New(test.input)
firstRune, _ := r.RuneAt(0)
if firstRune != 'H' {
t.Errorf("[%s] first rune not 'H'", test.name)
@ -47,12 +47,12 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
func TestNew_UnhandledInputType_Panics(t *testing.T) {
assert.PanicsWithValue(t,
"parsekit.reader.New(): no support for input of type int",
func() { reader.New(12345) })
"parsekit.read.New(): no support for input of type int",
func() { read.New(12345) })
}
func TestReader_RuneAt(t *testing.T) {
r := reader.New(strings.NewReader("Hello, world!"))
r := read.New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input.
@ -61,7 +61,7 @@ func TestReader_RuneAt(t *testing.T) {
}
func TestReader_RuneAt_endOfFile(t *testing.T) {
r := reader.New(strings.NewReader("Hello, world!"))
r := read.New(strings.NewReader("Hello, world!"))
rn, err := r.RuneAt(13)
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
@ -73,7 +73,7 @@ func TestReader_RuneAt_endOfFile(t *testing.T) {
}
func TestReader_RuneAt_invalidRune(t *testing.T) {
r := reader.New(strings.NewReader("Hello, \xcdworld!"))
r := read.New(strings.NewReader("Hello, \xcdworld!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
@ -81,7 +81,7 @@ func TestReader_RuneAt_invalidRune(t *testing.T) {
}
func ExampleReader_RuneAt() {
reader := reader.New(strings.NewReader("Hello, world!"))
reader := read.New(strings.NewReader("Hello, world!"))
fmt.Printf("Runes: ")
for i := 0; ; i++ {
@ -99,7 +99,7 @@ func ExampleReader_RuneAt() {
}
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
r := reader.New(strings.NewReader("\uFEFFBommetje!"))
r := read.New(strings.NewReader("\uFEFFBommetje!"))
b, _ := r.RuneAt(0)
o, _ := r.RuneAt(1)
m, _ := r.RuneAt(2)
@ -108,7 +108,7 @@ func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
}
func TestReader_Flush(t *testing.T) {
r := reader.New(strings.NewReader("Hello, world!"))
r := read.New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w"
@ -125,7 +125,7 @@ func TestReader_Flush(t *testing.T) {
}
func ExampleReader_Flush() {
r := reader.New(strings.NewReader("dog eat dog!"))
r := read.New(strings.NewReader("dog eat dog!"))
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
// Read from the first 4 runes of the input.
@ -148,20 +148,20 @@ func ExampleReader_Flush() {
}
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
r := reader.New(strings.NewReader("Hello, world!"))
r := read.New(strings.NewReader("Hello, world!"))
// Fill buffer with "Hello, worl", the first 11 runes.
r.RuneAt(10)
// However, we flush 12 runes, which exceeds the buffer size.
assert.PanicsWithValue(t,
"parsekit.Input.Reader.Flush(): number of runes to flush "+
"parsekit.read.Reader.Flush(): number of runes to flush "+
"(12) exceeds size of the buffer (11)",
func() { r.Flush(12) })
}
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
r := reader.New(strings.NewReader("Hello, world!"))
r := read.New(strings.NewReader("Hello, world!"))
_, err := r.RuneAt(13)
assert.Equal(t, err.Error(), "EOF")
_, err = r.RuneAt(13)
@ -188,7 +188,7 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
io.ErrUnexpectedEOF, // This error must never popup in the tests below.
},
}
r := reader.New(input)
r := read.New(input)
// Read the last availble rune.
readRune, _ := r.RuneAt(3)
@ -233,7 +233,7 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := reader.New(input)
r := read.New(input)
readRune, err := r.RuneAt(0)
assert.Equal(t, 'X', readRune)
@ -247,7 +247,7 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) {
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
input, size := makeLargeStubReader()
r := reader.New(input)
r := read.New(input)
readRune, _ := r.RuneAt(size - 200)
assert.Equal(t, 'X', readRune)
@ -257,7 +257,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
input, size := makeLargeStubReader()
r := reader.New(input)
r := read.New(input)
readRune, _ := r.RuneAt(size - 1)
assert.Equal(t, 'Y', readRune)

View File

@ -1,155 +0,0 @@
package parsekit_test
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
)
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
for i := 0; i < 20; i++ {
t.NextRune()
t.Accept()
}
return true
})
result, _ := parser.Execute("This is some random data to parse")
if result.String() != "This is some random " {
t.Fatalf("Got unexpected output from TokenHandler: %s", result.String())
}
}
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.Result().AddToken(&parsekit.Token{
Type: "PI",
Runes: []rune("π"),
Value: 3.1415,
})
t.Result().AddToken(&parsekit.Token{
Type: nil,
Runes: []rune("yes"),
Value: true,
})
return true
})
result, _ := parser.Execute("doesn't matter")
if len(result.Tokens()) != 2 {
t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens()))
}
if result.Token(0).Value != 3.1415 {
t.Fatal("Token 0 value not 3.1415")
}
if string(result.Token(0).Runes) != "π" {
t.Fatal("Token 0 runes not \"π\"")
}
if result.Token(0).Type != "PI" {
t.Fatal("Token 0 type not \"PI\"")
}
if result.Token(1).Value != true {
t.Fatal("Token 1 value not true")
}
if string(result.Token(1).Runes) != "yes" {
t.Fatal("Token 1 runes not \"yes\"")
}
if result.Token(1).Type != nil {
t.Fatal("Token 1 type not nil")
}
}
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
var tok, c, a = parsekit.T, parsekit.C, parsekit.A
fooToken := tok.Str("ASCII", c.OneOrMore(a.ASCII))
parser := parsekit.NewTokenizer(fooToken)
input := "This is fine ASCII Åltho hère öt endĩt!"
result, err := parser.Execute(input)
if err != nil {
t.Fatalf("Unexpected error from parser: %s", err)
}
if result.String() != "This is fine ASCII " {
t.Fatalf("result.String() contains unexpected data: %s", result.String())
}
}
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
var c, m, tok, a = parsekit.C, parsekit.M, parsekit.T, parsekit.A
ascii := tok.Str("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII)))
utf8 := tok.Str("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune))))
stars := m.Drop(c.ZeroOrMore(a.Asterisk))
fooToken := c.Seq(stars, tok.Str("COMBI", ascii.Then(utf8)), stars)
parser := parsekit.NewTokenizer(fooToken)
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
output := "This is fine ASCIIÅltho hère öt endĩt!"
result, err := parser.Execute(input)
if err != nil {
t.Fatalf("Unexpected error from parser: %s", err)
}
if result.String() != output {
t.Fatalf("result.String() contains unexpected data: %s", result.String())
}
if result.Token(0).Type != "COMBI" {
t.Fatalf("Token 0 has unexpected type: %s", result.Token(0).Type)
}
if result.Token(0).Value != "This is fine ASCIIÅltho hère öt endĩt!" {
t.Fatalf("Token 0 has unexpected value: %s", result.Token(0).Value)
}
if result.Token(1).Value != "This is fine ASCII" {
t.Fatalf("Token 1 has unexpected value: %s", result.Token(0).Value)
}
if result.Token(2).Value != "Åltho hère öt endĩt!" {
t.Fatalf("Token 2 has unexpected value: %s", result.Token(0).Value)
}
}
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.Accept()
return false
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("input string") },
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
`/.*/tokenhandler_test\.go:\d+ without first calling NextRune\(\)`})
}
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.NextRune()
t.NextRune()
return false
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("input string") },
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at ` +
`/.*/tokenhandler_test\.go:\d+ without a prior call to Accept\(\)`})
}
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
parser := parsekit.NewTokenizer(func(t *parsekit.TokenAPI) bool {
t.NextRune()
t.Accept()
return false
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("") },
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at ` +
`/.*/tokenhandler_test.go:\d+, but the prior call to NextRune\(\) failed`})
}
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() {
a := parsekit.TokenAPI{}
a.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at ` +
`/.*/tokenhandler_test\.go:\d+ on a non-forked TokenAPI`})
}

127
tokenize/assertions_test.go Normal file
View File

@ -0,0 +1,127 @@
package tokenize_test
// This file contains some tools that are used for writing parsekit tests.
import (
"regexp"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
// func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
// if notExpected == actual {
// t.Errorf("Unexpected value for %s: %q", forWhat, actual)
// }
// }
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
}
}
type PanicT struct {
Function func()
Regexp bool
Expect string
}
func AssertPanics(t *testing.T, testSet []PanicT) {
for _, test := range testSet {
AssertPanic(t, test)
}
}
func AssertPanic(t *testing.T, p PanicT) {
defer func() {
if r := recover(); r != nil {
mismatch := false
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
mismatch = true
}
if !p.Regexp && p.Expect != r.(string) {
mismatch = true
}
if mismatch {
t.Errorf(
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
p.Expect, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
}
}()
p.Function()
}
type TokenHandlerT struct {
Input string
TokenHandler tokenize.TokenHandler
MustMatch bool
Expected string
}
func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
for _, test := range testSet {
AssertTokenHandler(t, test)
}
}
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output := result.String(); output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.Input)
}
}
}
type TokenMakerT struct {
Input string
TokenHandler tokenize.TokenHandler
Expected []tokenize.Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
for _, test := range testSet {
AssertTokenMaker(t, test)
}
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {
if len(result.Tokens()) != len(test.Expected) {
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
}
for i, expected := range test.Expected {
actual := result.Token(i)
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if string(expected.Runes) != string(actual.Runes) {
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}
}
}
}

View File

@ -1,13 +1,14 @@
package parsekit
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/reader"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/read"
)
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
// a parsekit.reader.Reader and to report back tokenizing results. For easy
// a parsekit.read.Reader and to report back tokenizing results. For easy
// lookahead support, a forking strategy is provided.
//
// BASIC OPERATION:
@ -61,7 +62,7 @@ import (
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type TokenAPI struct {
reader *reader.Reader
reader *read.Reader
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
@ -70,7 +71,7 @@ type TokenAPI struct {
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(input interface{}) *TokenAPI {
return &TokenAPI{
reader: reader.New(input),
reader: read.New(input),
result: newTokenHandlerResult(),
}
}
@ -86,10 +87,10 @@ func NewTokenAPI(input interface{}) *TokenAPI {
// without explicitly accepting, this method will panic.
func (i *TokenAPI) NextRune() (rune, error) {
if i.result.lastRune != nil {
callerPanic(1, "parsekit.TokenAPI.NextRune(): NextRune() called at {caller} "+
common.CallerPanic(1, "tokenize.TokenAPI.NextRune(): NextRune() called at {caller} "+
"without a prior call to Accept()")
}
i.detachChilds()
i.DetachChilds()
readRune, err := i.reader.RuneAt(i.result.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err}
@ -103,9 +104,9 @@ func (i *TokenAPI) NextRune() (rune, error) {
// returned an error. Calling Accept() in such case will result in a panic.
func (i *TokenAPI) Accept() {
if i.result.lastRune == nil {
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()")
common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()")
} else if i.result.lastRune.err != nil {
callerPanic(1, "parsekit.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
@ -131,7 +132,7 @@ func (i *TokenAPI) Accept() {
// with the parent TokenAPI as if nothing ever happened.
func (i *TokenAPI) Fork() *TokenAPI {
// Cleanup current forking / reading state.
i.detachChilds()
i.DetachChilds()
i.result.lastRune = nil
// Create the new fork.
@ -155,12 +156,12 @@ func (i *TokenAPI) Fork() *TokenAPI {
// This allows a child to feed results in chunks to its parent.
func (i *TokenAPI) Merge() {
if i.parent == nil {
callerPanic(1, "parsekit.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
common.CallerPanic(1, "tokenize.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
}
i.addResultsToParent()
i.syncCursorTo(i.parent)
i.clearResults()
i.detachChilds()
i.ClearResults()
i.DetachChilds()
}
func (i *TokenAPI) addResultsToParent() {
@ -173,29 +174,29 @@ func (i *TokenAPI) syncCursorTo(to *TokenAPI) {
*to.result.cursor = *i.result.cursor
}
func (i *TokenAPI) clearResults() {
func (i *TokenAPI) ClearResults() {
i.result.lastRune = nil
i.result.runes = []rune{}
i.result.tokens = []*Token{}
i.result.err = nil
}
func (i *TokenAPI) detachChilds() {
func (i *TokenAPI) DetachChilds() {
if i.child != nil {
i.child.detachChildsRecurse()
i.child.DetachChildsRecurse()
i.child = nil
}
}
func (i *TokenAPI) detachChildsRecurse() {
func (i *TokenAPI) DetachChildsRecurse() {
if i.child != nil {
i.child.detachChildsRecurse()
i.child.DetachChildsRecurse()
}
i.child = nil
i.parent = nil
}
func (i *TokenAPI) flushReader() bool {
func (i *TokenAPI) FlushReader() bool {
if i.result.offset > 0 {
i.reader.Flush(i.result.offset)
i.result.offset = 0

View File

@ -1,15 +1,15 @@
package parsekit_test
package tokenize_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleTokenAPI_Fork() {
// This custom TokenHandler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *parsekit.TokenAPI) bool {
a := parsekit.A
abcHandler := func(t *tokenize.TokenAPI) bool {
a := tokenize.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
if a.Rune(r)(child) {
@ -26,26 +26,26 @@ func ExampleTokenAPI_Fork() {
// You can make use of the parser/combinator tooling to do things
// a lot simpler and take care of forking at the appropriate places.
// The handler from above can be replaced with:
simpler := parsekit.A.RuneRange('a', 'c')
simpler := tokenize.A.RuneRange('a', 'c')
result, err := parsekit.NewTokenizer(abcHandler).Execute("another test")
result, err := tokenize.NewTokenizer(abcHandler).Execute("another test")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(simpler).Execute("curious")
result, err = tokenize.NewTokenizer(simpler).Execute("curious")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcHandler).Execute("bang on!")
result, err = tokenize.NewTokenizer(abcHandler).Execute("bang on!")
fmt.Println(result, err)
result, err = parsekit.NewTokenizer(abcHandler).Execute("not a match")
result, err = tokenize.NewTokenizer(abcHandler).Execute("not a match")
fmt.Println(result, err)
// Output:
// a <nil>
// c <nil>
// b <nil>
// <nil> unexpected input at start of file
// <nil> mismatch at start of file
}
func ExampleTokenAPI_Merge() {
tokenHandler := func(t *parsekit.TokenAPI) bool {
tokenHandler := func(t *tokenize.TokenAPI) bool {
child1 := t.Fork()
child1.NextRune() // reads 'H'
child1.Accept()
@ -62,7 +62,7 @@ func ExampleTokenAPI_Merge() {
return true
}
result, _ := parsekit.NewTokenizer(tokenHandler).Execute("Hi mister X!")
result, _ := tokenize.NewTokenizer(tokenHandler).Execute("Hi mister X!")
fmt.Println(result)
// Output:

View File

@ -1,13 +1,4 @@
package parsekit
// Tokenizer is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
// method.
type Tokenizer struct {
parser *Parser
result *TokenHandlerResult
}
package tokenize
// TokenHandler is the function type that is involved in turning a low level
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
@ -47,31 +38,3 @@ func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHand
func (handler TokenHandler) Optional() TokenHandler {
return MatchOpt(handler)
}
// NewTokenizer instantiates a new Tokenizer.
//
// This is a simple wrapper around a TokenHandler function. It can be used to
// match an input string against that TokenHandler function and retrieve the
// results in a straight forward way.
func NewTokenizer(tokenHandler TokenHandler) *Tokenizer {
tokenizer := &Tokenizer{}
tokenizer.parser = NewParser(func(p *ParseAPI) {
if p.Accept(tokenHandler) {
tokenizer.result = p.Result()
p.Stop()
} else {
p.Expected("")
}
})
return tokenizer
}
// Execute feeds the input to the wrapped TokenHandler function.
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
//
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
// during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *Error) {
err := t.parser.Execute(input)
return t.result, err
}

View File

@ -1,8 +1,10 @@
package parsekit
package tokenize
import (
"fmt"
"strings"
"git.makaay.nl/mauricem/go-parsekit/common"
)
// TokenHandlerResult is a struct that is used for holding tokenizing results
@ -12,9 +14,9 @@ type TokenHandlerResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
cursor *Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err *Error // can be used by a TokenHandler to report a specific issue with the input
cursor *common.Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err *common.Error // can be used by a TokenHandler to report a specific issue with the input
}
type runeInfo struct {
@ -59,7 +61,7 @@ func newTokenHandlerResult() *TokenHandlerResult {
return &TokenHandlerResult{
runes: []rune{},
tokens: []*Token{},
cursor: &Cursor{},
cursor: &common.Cursor{},
}
}
@ -90,7 +92,7 @@ func (r *TokenHandlerResult) addRunes(set ...interface{}) {
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "parsekit.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
common.CallerPanic(2, "tokenize.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
@ -167,6 +169,6 @@ func (r *TokenHandlerResult) Value(idx int) interface{} {
// Cursor retrieves the read cursor from the TokenHandlerResult. This is the
// first cursor position after the runes that were read by the TokenHandler.
func (r *TokenHandlerResult) Cursor() *Cursor {
func (r *TokenHandlerResult) Cursor() *common.Cursor {
return r.cursor
}

View File

@ -1,17 +1,19 @@
package parsekit
package tokenize_test
import (
"fmt"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleToken() {
t0 := Token{
t0 := tokenize.Token{
Runes: []rune("10.1.2.3"),
}
t1 := Token{
t1 := tokenize.Token{
Runes: []rune("two hundred and twenty four"),
Type: "Number",
Value: 224,
@ -19,12 +21,12 @@ func ExampleToken() {
const TName = 1
t2 := Token{
t2 := tokenize.Token{
Runes: []rune("John"),
Type: TName,
}
t3 := Token{
t3 := tokenize.Token{
Runes: []rune("The answer"),
Value: 42,
}
@ -39,7 +41,7 @@ func ExampleToken() {
}
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := NewTokenAPI(strings.NewReader("Testing"))
i := tokenize.NewTokenAPI(strings.NewReader("Testing"))
i.Result().SetRunes("string")
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
@ -51,10 +53,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := NewTokenAPI(strings.NewReader("Testing"))
i := tokenize.NewTokenAPI(strings.NewReader("Testing"))
i.Result().SetRunes(1234567)
},
Regexp: true,
Expect: `parsekit\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenresult_test.go:\d+`,
Expect: `tokenize\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenhandlerresult_test.go:\d+`,
})
}

View File

@ -1,4 +1,4 @@
package parsekit
package tokenize
import (
"fmt"
@ -9,6 +9,8 @@ import (
"strings"
"unicode"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/common"
)
// C provides convenient access to a range of parser/combinators that can be
@ -22,7 +24,7 @@ import (
// When using C in your own parser, then it is advised to create a variable
// to reference it:
//
// var c = parsekit.C
// var c = tokenize.C
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
@ -58,7 +60,7 @@ var C = struct {
// When using A in your own parser, then it is advised to create a variable
// to reference it:
//
// var a = parsekit.A
// var a = tokenize.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
@ -236,7 +238,7 @@ var A = struct {
// When using M in your own parser, then it is advised to create a variable
// to reference it:
//
// var m = parsekit.M
// var m = tokenize.M
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
@ -268,7 +270,7 @@ var M = struct {
// When using T in your own parser, then it is advised to create a variable
// to reference it:
//
// var t = parsekit.T
// var t = tokenize.T
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var T = struct {
@ -332,7 +334,7 @@ func MatchRunes(expected ...rune) TokenHandler {
// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'.
func MatchRuneRange(start rune, end rune) TokenHandler {
if end < start {
callerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
common.CallerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end)
}
return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end })
}
@ -485,7 +487,7 @@ func MatchRep(times int, handler TokenHandler) TokenHandler {
// When more matches are possible, these will be included in the output.
func MatchMin(min int, handler TokenHandler) TokenHandler {
if min < 0 {
callerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0")
common.CallerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, -1, handler, "MatchMin")
}
@ -496,7 +498,7 @@ func MatchMin(min int, handler TokenHandler) TokenHandler {
// Zero matches are considered a successful match.
func MatchMax(max int, handler TokenHandler) TokenHandler {
if max < 0 {
callerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0")
common.CallerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0")
}
return matchMinMax(0, max, handler, "MatchMax")
}
@ -519,17 +521,17 @@ func MatchOneOrMore(handler TokenHandler) TokenHandler {
// inclusive. All matches will be included in the output.
func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler {
if max < 0 {
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0")
common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0")
}
if min < 0 {
callerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0")
common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0")
}
return matchMinMax(min, max, handler, "MatchMinMax")
}
func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler {
if max >= 0 && min > max {
callerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
common.CallerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min)
}
return func(t *TokenAPI) bool {
total := 0
@ -594,7 +596,7 @@ func MatchSigned(handler TokenHandler) TokenHandler {
// ranging from -9223372036854775808 to 9223372036854775807.
func MatchIntegerBetween(min int64, max int64) TokenHandler {
if max < min {
callerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min)
common.CallerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min)
}
digits := MatchSigned(MatchDigits())
return func(t *TokenAPI) bool {

View File

@ -1,15 +1,15 @@
package parsekit_test
package tokenize_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func TestCombinators(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertTokenHandlers(t, []TokenHandlerT{
{"abc", c.Not(a.Rune('b')), true, "a"},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"bcd", c.Not(a.Rune('b')), false, ""},
@ -68,19 +68,19 @@ func TestCombinators(t *testing.T) {
}
func TestCombinatorPanics(t *testing.T) {
var c, a = parsekit.C, parsekit.A
parsekit.AssertPanics(t, []parsekit.PanicT{
var c, a = tokenize.C, tokenize.A
AssertPanics(t, []PanicT{
{func() { a.RuneRange('z', 'a') }, true,
`TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
{func() { c.MinMax(-1, 1, parsekit.A.Space) }, true,
{func() { c.MinMax(-1, 1, a.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.MinMax(1, -1, parsekit.A.Space) }, true,
{func() { c.MinMax(1, -1, a.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { c.MinMax(10, 5, parsekit.A.Space) }, true,
{func() { c.MinMax(10, 5, a.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
{func() { c.Min(-10, parsekit.A.Space) }, true,
{func() { c.Min(-10, a.Space) }, true,
`TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.Max(-42, parsekit.A.Space) }, true,
{func() { c.Max(-42, a.Space) }, true,
`TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { a.IntegerBetween(10, -10) }, true,
`TokenHandler: MatchIntegerBetween definition error at /.*/tokenhandlers_builtin_test.go:\d+: max -10 must not be < min 10`},
@ -88,8 +88,8 @@ func TestCombinatorPanics(t *testing.T) {
}
func TestAtoms(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""},
@ -225,8 +225,8 @@ func TestAtoms(t *testing.T) {
}
func TestIPv4Atoms(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "00"},
{"000X", a.Octet, true, "000"},
@ -257,8 +257,8 @@ func TestIPv4Atoms(t *testing.T) {
}
func TestIPv6Atoms(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
{"", a.IPv6, false, ""},
{"::", a.IPv6, true, "::"},
{"1::", a.IPv6, true, "1::"},
@ -286,8 +286,8 @@ func TestIPv6Atoms(t *testing.T) {
}
func TestModifiers(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertTokenHandlers(t, []TokenHandlerT{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
@ -306,10 +306,10 @@ func TestModifiers(t *testing.T) {
// follow the correct pattern. Therefore, tokenmakers will panic when the
// input cannot be processed successfully.
func TestTokenMakerErrorHandling(t *testing.T) {
var a, tok = parsekit.A, parsekit.T
var a, tok = tokenize.A, tokenize.T
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
parser := parsekit.NewTokenizer(invalid)
parsekit.AssertPanic(t, parsekit.PanicT{
parser := tokenize.NewTokenizer(invalid)
AssertPanic(t, PanicT{
func() { parser.Execute("no") }, false,
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
@ -317,19 +317,19 @@ func TestTokenMakerErrorHandling(t *testing.T) {
}
func TestTokenMakers(t *testing.T) {
var c, a, tok = parsekit.C, parsekit.A, parsekit.T
parsekit.AssertTokenMakers(t, []parsekit.TokenMakerT{
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
AssertTokenMakers(t, []TokenMakerT{
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
[]tokenize.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
[]tokenize.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
[]tokenize.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
@ -337,28 +337,28 @@ func TestTokenMakers(t *testing.T) {
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
}},
{"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
{`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Runes: []rune("1"), Value: true},
{Type: "P", Runes: []rune("t"), Value: true},
{Type: "P", Runes: []rune("T"), Value: true},
@ -367,7 +367,7 @@ func TestTokenMakers(t *testing.T) {
{Type: "P", Runes: []rune("True"), Value: true},
}},
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Runes: []rune("0"), Value: false},
{Type: "P", Runes: []rune("f"), Value: false},
{Type: "P", Runes: []rune("F"), Value: false},
@ -379,8 +379,8 @@ func TestTokenMakers(t *testing.T) {
}
func TestSyntacticSugar(t *testing.T) {
var a = parsekit.A
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
@ -391,37 +391,9 @@ func TestSyntacticSugar(t *testing.T) {
})
}
func TestSequenceOfRunes(t *testing.T) {
var c, a = parsekit.C, parsekit.A
sequence := c.Seq(
a.Hash, a.Dollar, a.Percent, a.Amp, a.SingleQuote, a.LeftParen,
a.RightParen, a.Asterisk, a.Plus, a.Comma, a.Minus, a.Dot, a.Slash,
a.Colon, a.Semicolon, a.AngleOpen, a.Equal, a.AngleClose, a.Question,
a.At, a.SquareOpen, a.Backslash, a.SquareClose, a.Caret, a.Underscore,
a.Backquote, a.CurlyOpen, a.Pipe, a.CurlyClose, a.Tilde,
)
input := "#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
output := ""
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.Accept(sequence) {
output = p.Result().String()
p.Stop()
} else {
p.Expected("sequence of runes")
}
})
err := parser.Execute(input)
if err != nil {
t.Fatalf("Parsing failed: %s", err)
}
if output != input {
t.Fatalf("Unexpected output from parser:\nexpected: %s\nactual: %s\n", input, output)
}
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
var c, a, m = parsekit.C, parsekit.A, parsekit.M
var c, a, m = tokenize.C, tokenize.A, tokenize.M
demonic := c.Seq(
c.Opt(a.SquareOpen),
m.Trim(
@ -442,7 +414,7 @@ func TestCombination(t *testing.T) {
c.Opt(a.SquareClose),
)
parsekit.AssertTokenHandlers(t, []parsekit.TokenHandlerT{
AssertTokenHandlers(t, []TokenHandlerT{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},

34
tokenize/tokenizer.go Normal file
View File

@ -0,0 +1,34 @@
package tokenize
import (
"git.makaay.nl/mauricem/go-parsekit/common"
)
// Tokenizer is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
// method.
type Tokenizer struct {
handler TokenHandler
}
// NewTokenizer instantiates a new Tokenizer, based on the provided TokenHandler.
func NewTokenizer(tokenHandler TokenHandler) *Tokenizer {
return &Tokenizer{tokenHandler}
}
// Execute feeds the input to the wrapped TokenHandler function.
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
//
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
// during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *common.Error) {
api := NewTokenAPI(input)
ok := t.handler(api)
if !ok {
err := &common.Error{Message: "mismatch", Cursor: common.Cursor{}}
return nil, err
}
return api.Result(), nil
}

179
tokenize/tokenizer_test.go Normal file
View File

@ -0,0 +1,179 @@
package tokenize_test
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using tokenize.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleTokenizer_Execute() {
// Build the tokenizer for ip/mask.
var c, a, t = tokenize.C, tokenize.A, tokenize.T
ip := t.Str("ip", a.IPv4)
mask := t.Int8("mask", a.IPv4CIDRMask)
cidr := c.Seq(ip, a.Slash, mask)
tokenizer := tokenize.NewTokenizer(cidr)
for _, input := range []string{
"000.000.000.000/000",
"192.168.0.1/24",
"255.255.255.255/32",
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a TokenHandlerResult and an error, which is nil on success.
result, err := tokenizer.Execute(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens())
} else {
fmt.Printf("Error: %s\n", err)
}
}
// Output:
// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
// Error: mismatch at start of file
// Error: mismatch at start of file
}
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.NextRune()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
})
}
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.Merge()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.NextRune()
f.Merge()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.Fork()
f.Merge()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.Fork()
i.Accept()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
})
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.Result().Cursor().String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.Result().Cursor().String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.Result().Cursor().String(), "cursor 4")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
r, err := f.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
r, err = i.NextRune()
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *tokenize.TokenAPI {
return tokenize.NewTokenAPI("Testing")
}

View File

@ -0,0 +1,125 @@
package tokenize
import (
"testing"
)
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := NewTokenAPI("Testing")
i.NextRune()
i.Accept() // T
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
// Fork
f := i.Fork()
AssertEqual(t, f, i.child, "Input.child (must be f)")
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent
f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := NewTokenAPI("Testing")
i.NextRune()
i.Accept()
f1 := i.Fork()
f1.NextRune()
f1.Accept()
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.result.offset, "i.offset A")
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 2, f1.result.offset, "f1.offset A")
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset A")
f2.Merge()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.result.offset, "i.offset B")
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.result.offset, "f1.offset B")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset B")
f1.Merge()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, 3, i.result.offset, "i.offset C")
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.result.offset, "f1.offset C")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset C")
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := NewTokenAPI("Testing")
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
f5 := f4.Fork()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == f1, "i.child == f1")
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == nil, "i.child == nil")
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := NewTokenAPI("Testing")
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
i.Accept()
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
}
}

View File

@ -1,291 +0,0 @@
package parsekit
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
)
// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using parsekit.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleTokenizer_Execute() {
// Build the tokenizer for ip/mask.
ip := T.Str("ip", A.IPv4)
mask := T.Int8("mask", A.IPv4CIDRMask)
cidr := C.Seq(ip, A.Slash, mask)
tokenizer := NewTokenizer(cidr)
for _, input := range []string{
"000.000.000.000/000",
"192.168.0.1/24",
"255.255.255.255/32",
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a TokenHandlerResult and an error, which is nil on success.
result, err := tokenizer.Execute(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens())
} else {
fmt.Printf("Error: %s\n", err)
}
}
// Output:
// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
// Error: unexpected input at start of file
// Error: unexpected input at start of file
}
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.NextRune()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
})
}
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.NextRune()
f.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.Fork()
f.Merge()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := mkInput()
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
f5 := f4.Fork()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == f1, "i.child == f1")
AssertEqual(t, true, f1.parent == i, "f1.parent == i")
AssertEqual(t, true, f1.child == f4, "f1.child == f4")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == f1, "f4.parent == f1")
AssertEqual(t, true, f4.child == f5, "f4.child == f5")
AssertEqual(t, true, f5.parent == f4, "f5.parent == f4")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
AssertEqual(t, true, i.parent == nil, "i.parent == nil")
AssertEqual(t, true, i.child == nil, "i.child == nil")
AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
AssertEqual(t, true, f1.child == nil, "f1.child == nil")
AssertEqual(t, true, f2.child == nil, "f2.child == nil")
AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
AssertEqual(t, true, f3.child == nil, "f3.child == nil")
AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
AssertEqual(t, true, f4.child == nil, "f4.child == nil")
AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
AssertEqual(t, true, f5.child == nil, "f5.child == nil")
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.Fork()
i.Accept()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
})
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := mkInput()
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
i.Accept()
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
i := mkInput()
for j := 0; j < 7; j++ {
i.NextRune()
i.Accept()
}
AssertEqual(t, "Testing", i.Result().String(), "i.Result().String()")
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.result.cursor.String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.result.cursor.String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.result.cursor.String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.result.cursor.String(), "cursor 4")
}
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := mkInput()
i.NextRune()
i.Accept() // T
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
// Fork
f := i.Fork()
AssertEqual(t, f, i.child, "Input.child (must be f)")
AssertEqual(t, i, f.parent, "Input.parent (must be i)")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 1, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.result.cursor.Byte, "i.child.cursor.Byte")
AssertEqual(t, 3, i.child.result.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent
f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.result.cursor.Byte, "i.child.cursor.Byte")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
f1 := i.Fork()
f1.NextRune()
f1.Accept()
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.result.offset, "i.offset A")
AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 2, f1.result.offset, "f1.offset A")
AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset A")
f2.Merge()
AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
AssertEqual(t, 1, i.result.offset, "i.offset B")
AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.result.offset, "f1.offset B")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset B")
f1.Merge()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, 3, i.result.offset, "i.offset C")
AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
AssertEqual(t, 3, f1.result.offset, "f1.offset C")
AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
AssertEqual(t, 3, f2.result.offset, "f2.offset C")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
r, err := f.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
r, err = i.NextRune()
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *TokenAPI {
return NewTokenAPI(strings.NewReader("Testing"))
}