Big overhaul on separating packages for code containment.

This commit is contained in:
Maurice Makaay 2019-06-12 14:30:46 +00:00
parent 1f0e0fcc17
commit 27c97ae902
39 changed files with 1845 additions and 1763 deletions

View File

@ -1,44 +0,0 @@
package common
import (
"fmt"
"runtime"
"strings"
)
// Error is used as the error type when parsing errors occur.
// The error includes some context information to allow for useful
// error messages to the user.
type Error struct {
Message string
Cursor Cursor
}
func (err *Error) Error() string {
if err == nil {
CallerPanic(1, "common.Error.Error(): method called with nil error at {caller}")
}
return fmt.Sprintf("%s at %s", err.Message, err.Cursor)
}
func CallerFunc(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
pc, _, _, _ := runtime.Caller(depth + 1)
caller := runtime.FuncForPC(pc)
parts := strings.Split(caller.Name(), ".")
funcName := parts[len(parts)-1]
return funcName
}
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
}
func CallerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)
panic(m)
}

View File

@ -1,20 +0,0 @@
package common_test
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/common"
)
func ExampleError() {
err := &common.Error{
Message: "it broke down",
Cursor: common.Cursor{Line: 9, Column: 41},
}
fmt.Println(err.Error())
fmt.Printf("%s\n", err)
// Output:
// it broke down at line 10, column 42
// it broke down at line 10, column 42
}

View File

@ -10,8 +10,7 @@ package examples
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
@ -49,17 +48,17 @@ func Example_basicCalculator1() {
// Input: "42+ ", got error: unexpected input (expected integer number) at line 1, column 4
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// ComputeSimple interprets a simple calculation, consisting of only integers
// and add or subtract operators. It returns the result of the calculation.
// An error is returned in case the calculation failed.
func ComputeSimple(calculation string) (int64, *common.Error) {
func ComputeSimple(calculation string) (int64, error) {
calculator := &simpleCalculator{op: +1}
parser := parsekit.NewParser(calculator.number)
err := parser.Execute(calculation)
parser := parse.New(calculator.number)
err := parser(calculation)
return calculator.Result, err
}
@ -76,7 +75,7 @@ var dropBlank = tokenize.M.Drop(tokenize.C.Opt(tokenize.A.Blanks))
var bareInteger = tokenize.C.Seq(dropBlank, tokenize.A.Integer, dropBlank)
var int64Token = tokenize.T.Int64(nil, bareInteger)
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
func (c *simpleCalculator) number(p *parse.API) {
if p.Accept(int64Token) {
c.Result += c.op * p.Result().Value(0).(int64)
p.Handle(c.operatorOrEndOfFile)
@ -85,7 +84,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
}
}
func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) {
func (c *simpleCalculator) operatorOrEndOfFile(p *parse.API) {
var A = tokenize.A
switch {
case p.Accept(A.Add):

View File

@ -16,8 +16,7 @@ import (
"fmt"
"math"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
@ -62,9 +61,9 @@ func Example_basicCalculator2() {
// Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// calculator implements a recursive descent parser that is responsible for parsing
// the input calculation string according to the grammar.
@ -77,15 +76,15 @@ type calculator struct {
// Compute takes a calculation string as input and returns the interpreted result
// value for the calculation. An error can be returned as well, in case the
// calculation fails for some reason.
func Compute(input string) (float64, *common.Error) {
func Compute(input string) (float64, error) {
calc := &calculator{}
parser := parsekit.NewParser(calc.calculation)
err := parser.Execute(input)
parser := parse.New(calc.calculation)
err := parser(input)
return calc.result, err
}
// <calculation> = <expr> <EOF>
func (calc *calculator) calculation(p *parsekit.ParseAPI) {
func (calc *calculator) calculation(p *parse.API) {
if p.Handle(calc.expr) {
p.ExpectEndOfFile()
calc.result = calc.interpreter.result
@ -93,7 +92,7 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) {
}
// <expr> = (<term> | <term> (ADD|SUB) <term>)
func (calc *calculator) expr(p *parsekit.ParseAPI) {
func (calc *calculator) expr(p *parse.API) {
calc.interpreter.push()
var A = tokenize.A
@ -111,7 +110,7 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) {
}
// <term> = (<factor> | <factor> (MUL|DIV) <factor>)
func (calc *calculator) term(p *parsekit.ParseAPI) {
func (calc *calculator) term(p *parse.API) {
calc.interpreter.push()
var A = tokenize.A
@ -130,7 +129,7 @@ func (calc *calculator) term(p *parsekit.ParseAPI) {
// <space> = (<space> (SPACE|TAB) | "")
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
func (calc *calculator) factor(p *parsekit.ParseAPI) {
func (calc *calculator) factor(p *parse.API) {
var A, T = tokenize.A, tokenize.T
p.Accept(A.Blanks)
switch {
@ -152,9 +151,9 @@ func (calc *calculator) factor(p *parsekit.ParseAPI) {
p.Accept(A.Blanks)
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// The computational interpreter, used by the calculator.
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
type stackFrame struct {
a float64

View File

@ -1,5 +1,5 @@
// In this example, a Parser is created that can parse and normalize Dutch postcodes
// The implementation uses only TokenHandler functions and does not implement a
// The implementation uses only Handler functions and does not implement a
// full-fledged state-based Parser for it.
package examples
@ -11,7 +11,7 @@ import (
)
func Example_dutchPostcodeUsingTokenizer() {
parser := createPostcodeTokenizer()
tokenizer := createPostcodeTokenizer()
for i, input := range []string{
"1234 AB",
@ -24,7 +24,7 @@ func Example_dutchPostcodeUsingTokenizer() {
"",
"\xcd2222AB",
} {
result, err := parser.Execute(input)
result, err := tokenizer(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
@ -47,27 +47,27 @@ func Example_dutchPostcodeUsingTokenizer() {
// [8] Input: "\xcd2222AB" Error: mismatch at start of file
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
func createPostcodeTokenizer() *tokenize.Tokenizer {
// Easy access to the parsekit definitions.
func createPostcodeTokenizer() tokenize.Func {
// Easy access to the tokenize definitions.
C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T
// TokenHandler functions are created and combined to satisfy these rules:
// - A Dutch postcode consists of 4 digits and 2 letters (1234XX).
// - The first digit is never a zero.
// - A space between letters and digits is optional.
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
// Handler functions are created and combined to satisfy these rules:
// A Dutch postcode consists of 4 digits and 2 letters (1234XX).
// The first digit is never a zero.
// A space between letters and digits is optional.
// It is good form to write the letters in upper case.
// It is good form to use a single space between digits and letters.
pcDigits := A.DigitNotZero.Then(A.Digit.Times(3))
pcLetter := A.ASCIILower.Or(A.ASCIIUpper)
pcLetters := M.ToUpper(pcLetter.Times(2))
space := M.Replace(A.Blanks.Optional(), " ")
postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile)
// Create a Tokenizer that wraps the 'postcode' TokenHandler and allows
// Create a Tokenizer that wraps the 'postcode' Handler and allows
// us to match some input against that handler.
return tokenize.NewTokenizer(postcode)
return tokenize.New(postcode)
}

View File

@ -2,8 +2,8 @@
// like "Hello, <name>!", and that extracts the name from it.
//
// This implementation uses a state-based Parser for it, and it does not
// implement any custom parser/combinator TokenHandler functions. Note that
// things are much easier to implement using custom TokenHandlers (see the
// implement any custom parser/combinator Handler functions. Note that
// things are much easier to implement using custom Handlers (see the
// helloParserCombinator example for this). Doing this fully parser-based
// implementation is mainly for your learning pleasure.
//
@ -19,8 +19,7 @@ import (
"fmt"
"strings"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
@ -68,21 +67,21 @@ func Example_helloWorldUsingParser1() {
// [15] Input: "hello, \t!" Error: The name cannot be empty
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
type helloparser1 struct {
greetee string
}
func (h *helloparser1) Parse(input string) (string, *common.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
func (h *helloparser1) Parse(input string) (string, error) {
parser := parse.New(h.start)
err := parser(input)
return h.greetee, err
}
func (h *helloparser1) start(p *parsekit.ParseAPI) {
func (h *helloparser1) start(p *parse.API) {
a := tokenize.A
if p.Accept(a.StrNoCase("hello")) {
p.Handle(h.comma)
@ -91,7 +90,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
}
}
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
func (h *helloparser1) comma(p *parse.API) {
a := tokenize.A
switch {
case p.Accept(a.Blanks):
@ -103,7 +102,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) {
}
}
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
func (h *helloparser1) startName(p *parse.API) {
a := tokenize.A
p.Accept(a.Blanks)
if p.Peek(a.AnyRune) {
@ -113,7 +112,7 @@ func (h *helloparser1) startName(p *parsekit.ParseAPI) {
}
}
func (h *helloparser1) name(p *parsekit.ParseAPI) {
func (h *helloparser1) name(p *parse.API) {
a := tokenize.A
switch {
case p.Peek(a.Excl):
@ -126,7 +125,7 @@ func (h *helloparser1) name(p *parsekit.ParseAPI) {
}
}
func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
func (h *helloparser1) exclamation(p *parse.API) {
a := tokenize.A
if p.Accept(a.Excl) {
p.Handle(h.end)
@ -138,7 +137,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
// Here we could have used p.ExpectEndOfFile() as well, but a slightly
// different route was taken to implement a more friendly 'end of greeting'
// error message.
func (h *helloparser1) end(p *parsekit.ParseAPI) {
func (h *helloparser1) end(p *parse.API) {
var a = tokenize.A
if !p.Accept(a.EndOfFile) {
p.Expected("end of greeting")

View File

@ -1,7 +1,7 @@
// In this example, a parser is created that is able to parse input that looks
// like "Hello, <name>!", and that extracts the name from it.
//
// The implementation uses only parser/combinator TokenHandler functions and does
// The implementation uses only parser/combinator Handler functions and does
// not implement a full-fledged state-based Parser for it. If you want to see the
// same kind of functionality, implementated using a Parser, take a look at the
// other hello examples.
@ -14,7 +14,7 @@ import (
)
func Example_helloWorldUsingTokenizer() {
parser := createHelloTokenizer()
tokenizer := createHelloTokenizer()
for i, input := range []string{
"Hello, world!",
@ -25,7 +25,7 @@ func Example_helloWorldUsingTokenizer() {
"Hello, world",
"Hello,!",
} {
output, err := parser.Execute(input)
output, err := tokenizer(input)
if err != nil {
fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err)
} else {
@ -42,16 +42,16 @@ func Example_helloWorldUsingTokenizer() {
// [6] Input: "Hello,!" Error: mismatch at start of file
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
func createHelloTokenizer() *tokenize.Tokenizer {
func createHelloTokenizer() tokenize.Func {
// Easy access to parsekit definition.
c, a, m := tokenize.C, tokenize.A, tokenize.M
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// Using the parser/combinator support of parsekit, we create a Handler function
// that does all the work. The 'greeting' Handler matches the whole input and
// drops all but the name from it.
hello := a.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank))
@ -63,7 +63,7 @@ func createHelloTokenizer() *tokenize.Tokenizer {
Then(m.Drop(a.Excl)).
Then(a.EndOfFile)
// Create a Tokenizer that wraps the 'greeting' TokenHandler and allows
// Create a Tokenizer that wraps the 'greeting' Handler and allows
// us to match some input against that handler.
return tokenize.NewTokenizer(greeting)
return tokenize.New(greeting)
}

View File

@ -1,9 +1,9 @@
// This is the same as the other hello examples, except that in this
// implementation the state machine is implemented using a combination of some
// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
// Handlers and only a single state, in which multiple API.On() calls
// are combined to do all the work in one go.
//
// Note that things are much easier to implement using custom TokenHandlers (see
// Note that things are much easier to implement using custom Handlers (see
// the other helloParserCombinator example for this). Doing this implementation
// is mainly for your learning pleasure.
//
@ -16,8 +16,7 @@ package examples
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
@ -66,21 +65,21 @@ func Example_helloWorldUsingParser2() {
// [14] Input: "HELLO, Buster! Eat this!" Error: too much stuff going on after the closing '!' at line 1, column 15
}
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
// Implementation of the parser
// ---------------------------------------------------------------------------
// ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
type helloparser2 struct {
greetee string
}
func (h *helloparser2) Parse(input string) (string, *common.Error) {
parser := parsekit.NewParser(h.start)
err := parser.Execute(input)
func (h *helloparser2) Parse(input string) (string, error) {
parser := parse.New(h.start)
err := parser(input)
return h.greetee, err
}
func (h *helloparser2) start(p *parsekit.ParseAPI) {
func (h *helloparser2) start(p *parse.API) {
c, a, m := tokenize.C, tokenize.A, tokenize.M
if !p.Accept(a.StrNoCase("hello")) {
p.Error("the greeting is not being friendly")

View File

@ -1,8 +1,8 @@
// In this example, we show that any type can be extended into a parser,
// filling that type with data from the ParseHandler methods.
// filling that type with data from the Handler methods.
//
// Here, we create a custom type 'Chunks', which is an alias
// for []string. We add a ParseHandler method directly to that type
// for []string. We add a Handler method directly to that type
// and let the parsing code fill the slice with strings during parsing.
package examples
@ -10,23 +10,22 @@ package examples
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
type Chunks []string
func (l *Chunks) AddChopped(s string, chunkSize int) *common.Error {
func (l *Chunks) AddChopped(s string, chunkSize int) error {
c, a := tokenize.C, tokenize.A
chunkOfRunes := c.MinMax(1, chunkSize, a.AnyRune)
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
parser := parse.New(func(p *parse.API) {
for p.Accept(chunkOfRunes) {
*l = append(*l, p.Result().String())
}
})
return parser.Execute(s)
return parser(s)
}
func Example_usingSliceAsParserState() {

235
parse/api.go Normal file
View File

@ -0,0 +1,235 @@
package parse
import (
"fmt"
"io"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// API holds the internal state of a parse run and provides an API that
// parse.Handler functions can use to:
//
// • communicate with tokenize.Handler functions (Peek, Accept, ExpectEndOfFile, Result)
//
// • update the parser status (Error, Expected, Stop)
//
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
type API struct {
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
loopCheck map[string]bool // used for parser loop detection
err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop(), using API methods is denied when true
}
// Peek checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned, false otherwise. The read cursor
// will be kept at the same position, so the next call to Peek() or Accept()
// will start from the same cursor position.
//
// After calling this method, you can retrieve the produced tokenize.Result
// struct using the Result() method.
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
p.result = nil
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
if ok {
p.result = forkedAPI.Result()
p.tokenAPI.Reset()
}
return ok
}
// Accept checks if the upcoming input data matches the provided tokenize.Handler.
// If it does, then true will be returned and the read cursor will be moved
// forward to beyond the match that was found. Otherwise false will be
// and the read cursor will stay at the same position.
//
// After calling this method, you can retrieve the tokenize.Result
// using the Result() method.
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
p.result = nil
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
if ok {
forkedAPI.Merge()
p.result = p.tokenAPI.Result()
forkedAPI.Dispose()
if p.tokenAPI.FlushInput() {
p.initLoopCheck()
}
}
return ok
}
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) {
p.panicWhenStoppedOrInError(name)
p.checkForLoops()
if tokenHandler == nil {
callerPanic(2, "parse.API.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
}
p.result = nil
p.tokenAPI.Reset()
child := p.tokenAPI.Fork()
ok := tokenHandler(child)
return child, ok
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the API methods, to
// prevent further calls to the API on these occasions.
//
// Basically, this guard helps with proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *API) panicWhenStoppedOrInError(name string) {
if !p.isStoppedOrInError() {
return
}
after := "Error()"
if p.stopped {
after = "Stop()"
}
callerPanic(2, "parse.API.%s(): Illegal call to %s() at {caller}: "+
"no calls allowed after API.%s", name, name, after)
}
func (p *API) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
// initLoopCheck clears the loop check data, a map in which we keep
// track of the lines of code from which Accept() and/or Peek() are called.
// When Accept() is called, and the parser moved forward in the input data,
// this method is called to reset the map for the new read cursor position.
func (p *API) initLoopCheck() {
p.loopCheck = map[string]bool{}
}
// checkForLoops checks if the line of code from which Accept() or Peek()
// was called has been seen before for the current read cursor position.
// If yes, then the parser is in a loop and the method will panic.
func (p *API) checkForLoops() {
filepos := callerFilepos(3)
if _, ok := p.loopCheck[filepos]; ok {
callerPanic(3, "parse.API: Loop detected in parser at {caller}")
}
p.loopCheck[filepos] = true
}
// Result returns the tokenize.Result struct, containing results as produced by the
// last Peek() or Accept() call.
//
// When Result() is called without first doing a Peek() or Accept(), then no
// result will be available and the method will panic.
func (p *API) Result() *tokenize.Result {
result := p.result
if p.result == nil {
callerPanic(1, "parse.API.Result(): Result() called "+
"at {caller} without calling API.Peek() or API.Accept() on beforehand")
}
return result
}
// Handle executes another parse.Handler function from within the active
// parse.Handler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set using Error(), or the
// parser was stopped using Stop().
//
// Instead of calling another handler using this method, you can also call
// that other handler directly. However, it is generally advised to make use
// of this method, because it performs some sanity checks and it will return
// an easy to use boolean indicating whether the parser can continue or not.
func (p *API) Handle(parseHandler Handler) bool {
p.panicWhenStoppedOrInError("Handle")
p.panicWhenHandlerNil(parseHandler)
parseHandler(p)
return !p.isStoppedOrInError()
}
func (p *API) panicWhenHandlerNil(parseHandler Handler) {
if parseHandler == nil {
callerPanic(2, "parse.API.Handle(): Handle() called with nil input at {caller}")
}
}
// Stop tells the parser that the parsing process has been completed.
//
// When the initial parse.Handler function returns without stopping first
// and without running into an error, the method ExpectEndOfFile() is automatically
// called to verify if the end of the file was reached. If not, then things will
// end in an unexpected input error.
//
// Note:
// Even though this fallback mechanism will work in a lot of cases, try to make
// your parser explicit about things and call Stop() actively yourself.
//
// After stopping, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic.
func (p *API) Stop() {
p.stopped = true
}
// Error sets the error message in the API.
//
// After setting an error, no more calls to API methods are allowed.
// Calling a method in this state will result in a panic.
// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example?
func (p *API) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = fmt.Errorf("%s at %s", message, *p.tokenAPI.Result().Cursor())
}
// ExpectEndOfFile can be used to check if the input is at end of file.
//
// When it finds that the end of the file was indeed reached, then the parser
// will be stopped through Stop(). Otherwise, the unexpected input is reported
// using Expected("end of file").
func (p *API) ExpectEndOfFile() {
p.panicWhenStoppedOrInError("ExpectEndofFile")
if p.Peek(tokenize.A.EndOfFile) {
p.Stop()
} else {
p.Expected("end of file")
}
}
// Expected sets a parser error that indicates that some unexpected
// input was encountered.
//
// The 'expected' argument can be an empty string. In that case the error
// message will not contain a description of the expected input.
//
// This method automatically produces an error message for a couple of situations:
//
// • the input simply didn't match the expectation
//
// • the end of the input was reached
//
// • there was an error while reading the input.
func (p *API) Expected(expected string) {
p.panicWhenStoppedOrInError("Expected")
_, err := p.tokenAPI.NextRune()
switch {
case err == nil:
p.Error("unexpected input%s", fmtExpects(expected))
case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(expected))
default:
p.Error("unexpected error '%s'%s", err, fmtExpects(expected))
}
}
func fmtExpects(expected string) string {
if expected == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", expected)
}

View File

@ -1,6 +1,6 @@
package parsekit
package parse
// This file contains some tools that are used for writing parsekit tests.
// This file contains some tools that are used for writing tests.
import (
"regexp"
@ -17,12 +17,6 @@ func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat
}
}
func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
if notExpected == actual {
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
}
}
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
@ -63,38 +57,10 @@ func AssertPanic(t *testing.T, p PanicT) {
p.Function()
}
type TokenHandlerT struct {
Input string
TokenHandler tokenize.TokenHandler
MustMatch bool
Expected string
}
func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
for _, test := range testSet {
AssertTokenHandler(t, test)
}
}
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output := result.String(); output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.Input)
}
}
}
type TokenMakerT struct {
Input string
TokenHandler tokenize.TokenHandler
Expected []tokenize.Token
Input string
Handler tokenize.Handler
Expected []tokenize.Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
@ -104,7 +70,8 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
tokenizer := tokenize.New(test.Handler)
result, err := tokenizer(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {

20
parse/callerinfo.go Normal file
View File

@ -0,0 +1,20 @@
package parse
import (
"fmt"
"runtime"
"strings"
)
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
}
func callerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)
panic(m)
}

10
parse/handler.go Normal file
View File

@ -0,0 +1,10 @@
// Package parse provides tooling to build a state machine-style recursive descent parser.
package parse
// Handler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A Handler function gets an API struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the Handler uses to interact with the parser.
type Handler func(*API)

46
parse/parse.go Normal file
View File

@ -0,0 +1,46 @@
// Package parse provides tooling to build a state machine-style recursive descent parser.
package parse
import (
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// Func is the function signature as returned by New: a function that takes
// any supported type of input, executes a parse run and returns an error
// (or nil when all went right).
type Func func(interface{}) error
// New instantiates a new parser.
//
// The parser is a state machine-style recursive descent parser, in which
// parse.Handler functions are used to move the state machine forward during
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// The startHandler argument points the parser to the parse.Handler function
// that must be executed at the start of the parsing process. From there on
// other parse.Handler functions can be invoked recursively to implement the
// parsing process.
//
// This function returns a function that can be invoked to run the parser
// on the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New().
func New(startHandler Handler) Func {
if startHandler == nil {
callerPanic(1, "parsekit.parse.New(): New() called with nil input at {caller}")
}
return func(input interface{}) error {
api := &API{
tokenAPI: tokenize.NewAPI(input),
loopCheck: map[string]bool{},
}
if api.Handle(startHandler) {
// Handle returned true, indicating that parsing could still continue.
// There was no error and that the parsing has not actively been Stop()-ed.
// Let's assume that we actually reached the end of the parsing successfully
// and try to make the best of it.
api.ExpectEndOfFile()
}
return api.err
}
}

337
parse/parse_test.go Normal file
View File

@ -0,0 +1,337 @@
package parse_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit/parse"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleNew_usingAcceptedRunes() {
// Easy access to the tokenize definitions.
a := tokenize.A
matches := []string{}
parser := parse.New(func(p *parse.API) {
for p.Accept(a.AnyRune) {
matches = append(matches, p.Result().String())
}
p.ExpectEndOfFile()
})
err := parser("¡Any will dö!")
fmt.Printf("Matches = %q, Error = %v\n", matches, err)
// Output:
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
}
func ExampleNew_usingTokens() {
// Easy access to the tokenize definitions.
c, a, tok := tokenize.C, tokenize.A, tokenize.T
parser := parse.New(func(p *parse.API) {
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
fmt.Printf("Runes accepted: %q\n", p.Result().String())
fmt.Printf("Tokens:\n")
for i, token := range p.Result().Tokens() {
fmt.Printf("[%d] %s\n", i, token)
}
}
p.ExpectEndOfFile()
})
parser("¡ök!")
// Output:
// Runes accepted: "¡ök!"
// Tokens:
// [0] RUNE("¡", value = (int32)161)
// [1] RUNE("ö", value = (int32)246)
// [2] RUNE("k", value = (int32)107)
// [3] RUNE("!", value = (int32)33)
}
func ExampleAPI_Expected() {
parser := parse.New(func(p *parse.API) {
p.Expected("a thing")
})
err := parser("Whatever, this parser will never be happy...")
fmt.Printf("Error: %s\n", err)
// Output:
// Error: unexpected input (expected a thing) at start of file
}
func ExampleAPI_Accept_inIfStatement() {
parser := parse.New(func(p *parse.API) {
// When a case-insensitive match on "Yowza!" is found by the
// tokenizer, then Accept() will make the result available
// through API.Result()
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
// Result.String() returns a string containing all
// accepted runes that were matched against.
fmt.Println(p.Result().String())
}
})
parser("YOWZA!")
// Output:
// YOWZA!
}
func ExampleAPI_Accept_inSwitchStatement() {
var result string
parser := parse.New(func(p *parse.API) {
for loop := true; loop; {
switch {
case p.Accept(tokenize.A.Rune('X')):
// NOOP, skip this rune
case p.Accept(tokenize.A.AnyRune):
result += p.Result().String()
default:
loop = false
}
}
})
parser("HXeXllXoXX, XXwoXrlXXXd!")
fmt.Println(result)
// Output:
// Hello, world!
}
func ExampleAPI_Stop() {
C, A := tokenize.C, tokenize.A
parser := parse.New(func(p *parse.API) {
fmt.Printf("First word: ")
for p.Accept(C.Not(A.Space)) {
fmt.Printf("%s", p.Result())
}
p.Stop()
})
parser("Input with spaces")
// Output:
// First word: Input
}
func ExampleAPI_Stop_notCalledAndNoInputPending() {
C, A := tokenize.C, tokenize.A
parser := parse.New(func(p *parse.API) {
fmt.Printf("Word: ")
for p.Accept(C.Not(A.Space)) {
fmt.Printf("%s", p.Result())
}
fmt.Printf("\n")
})
err := parser("Troglodyte")
fmt.Printf("Error is nil: %t\n", err == nil)
// Output:
// Word: Troglodyte
// Error is nil: true
}
func ExampleAPI_Stop_notCalledButInputPending() {
C, A := tokenize.C, tokenize.A
parser := parse.New(func(p *parse.API) {
fmt.Printf("First word: ")
for p.Accept(C.Not(A.Space)) {
fmt.Printf("%s", p.Result())
}
fmt.Printf("\n")
})
err := parser("Input with spaces")
fmt.Printf("Error: %s\n", err)
// Output:
// First word: Input
// Error: unexpected input (expected end of file) at line 1, column 6
}
func ExampleAPI_Peek() {
// Definition of a fantasy serial number format.
C, A := tokenize.C, tokenize.A
serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits)
// This handler is able to handle serial numbers.
serialnrHandler := func(p *parse.API) {
if p.Accept(serialnr) {
fmt.Println(p.Result().String())
}
}
// Start could function as a sort of dispatcher, handing over
// control to the correct Handler function, based on the input.
start := func(p *parse.API) {
if p.Peek(tokenize.A.Asterisk) {
p.Handle(serialnrHandler)
return
}
// ... other cases could go here ...
}
parser := parse.New(start)
parser("#XX1234")
parser("*ay432566")
parser("*ZD987112")
// Output:
// *ZD987112
}
func TestGivenNullHandler_NewPanics(t *testing.T) {
parse.AssertPanic(t, parse.PanicT{
Function: func() { parse.New(nil) },
Regexp: true,
Expect: `parsekit\.parse\.New\(\): New\(\) called ` +
`with nil input at /.*/parse_test\.go:\d+`})
}
func TestGivenNullHandler_HandlePanics(t *testing.T) {
brokenHandler := func(p *parse.API) {
p.Handle(nil)
}
parser := parse.New(brokenHandler)
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("") },
Regexp: true,
Expect: `parse\.API\.Handle\(\): Handle\(\) called with nil input ` +
`at /.*/parse_test\.go:\d+`})
}
func TestGivenNilHandler_AcceptPanics(t *testing.T) {
p := parse.New(func(p *parse.API) {
p.Accept(nil)
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { p("") },
Regexp: true,
Expect: `parse\.API\.Accept\(\): Accept\(\) called with nil ` +
`tokenHandler argument at /.*/parse_test\.go:\d+`})
}
func TestGivenNilHandler_PeekPanics(t *testing.T) {
p := parse.New(func(p *parse.API) {
p.Peek(nil)
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { p("") },
Regexp: true,
Expect: `parse\.API\.Peek\(\): Peek\(\) called with nil ` +
`tokenHandler argument at /.*/parse_test\.go:\d+`})
}
func TestGivenStoppedParser_HandlePanics(t *testing.T) {
otherHandler := func(p *parse.API) {
panic("This is not the handler you're looking for")
}
p := parse.New(func(p *parse.API) {
p.Stop()
p.Handle(otherHandler)
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { p("") },
Regexp: true,
Expect: `parse\.API\.Handle\(\): Illegal call to Handle\(\) ` +
`at /.*/parse_test\.go:\d+: no calls allowed after API\.Stop\(\)`})
}
func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
otherHandler := func(p *parse.API) {
panic("This is not the handler you're looking for")
}
p := parse.New(func(p *parse.API) {
p.Error("It ends here")
p.Handle(otherHandler)
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { p("") },
Regexp: true,
Expect: `parse\.API\.Handle\(\): Illegal call to Handle\(\) ` +
`at /.*/parse_test\.go:\d+: no calls allowed after API\.Error\(\)`})
}
func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) {
p := parse.New(func(p *parse.API) {
p.Result()
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { p("") },
Regexp: true,
Expect: `parse\.API\.Result\(\): Result\(\) called at ` +
`/.*/parse_test.go:\d+ without calling API.Peek\(\) or API.Accept\(\) on beforehand`})
}
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
p := parse.New(func(p *parse.API) {})
err := p("")
parse.AssertTrue(t, err == nil, "err")
}
func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) {
p := parse.New(func(p *parse.API) {})
err := p("x")
parse.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err")
}
type parserWithLoop struct {
loopCounter int
}
func (l *parserWithLoop) first(p *parse.API) {
p.Accept(tokenize.A.ASCII)
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parse.API) {
p.Accept(tokenize.A.ASCII)
p.Handle(l.third)
}
func (l *parserWithLoop) third(p *parse.API) {
if l.loopCounter++; l.loopCounter > 100 {
p.Error("Loop not detected by parsekit")
return
}
p.Accept(tokenize.A.ASCII)
p.Handle(l.first)
}
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
looper := &parserWithLoop{}
parser := parse.New(looper.first)
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("Het houdt niet op, niet vanzelf") },
Regexp: true,
Expect: `parse\.API: Loop detected in parser at /.*/parse_test.go:\d+`})
}
// This test incorporates an actual loop bug that I dropped on myself and
// that I could not easily spot in my code. It sounded so logical:
// I want to get chunks of 5 chars from the input, so I simply loop on:
//
// p.On(c.Max(5, a.AnyRune))
//
// The problem here is that Max(5, ...) will also match when there is
// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
// Therefore the loop will never stop. Solving the loop was simple:
//
// p.On(c.MinMax(1, 5, a.AnyRune))
//
// Now the loop stops when the parser finds no more matching input data.
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A
parser := parse.New(func(p *parse.API) {
for p.Accept(c.Max(5, a.AnyRune)) {
}
p.Stop()
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { parser("This will end soon") },
Regexp: true,
Expect: `parse\.API: Loop detected in parser at .*/parse_test.go:\d+`})
}

View File

@ -1,216 +0,0 @@
package parsekit
import (
"fmt"
"io"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// ParseAPI holds the internal state of a parse run and provides an API that
// ParseHandler methods can use to communicate with the parser.
type ParseAPI struct {
tokenAPI *tokenize.TokenAPI // the TokenAPI, used for communicating with TokenHandler functions
loopCheck map[string]bool // used for parser loop detection
result *tokenize.TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept()
err *common.Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
}
// Peek checks if the upcoming input data matches the provided TokenHandler.
// If it does, then true will be returned, false otherwise. The read cursor
// will be kept at the same position, so the next call to Peek() or Accept()
// will start from the same cursor position.
//
// After calling this method, you can retrieve the produced TokenHandlerResult
// using the ParseAPI.Result() method.
func (p *ParseAPI) Peek(tokenHandler tokenize.TokenHandler) bool {
p.result = nil
forkedTokenAPI, ok := p.invokeTokenHandler("Peek", tokenHandler)
if ok {
p.result = forkedTokenAPI.Result()
p.tokenAPI.ClearResults()
p.tokenAPI.DetachChilds()
}
return ok
}
// Accept checks if the upcoming input data matches the provided TokenHandler.
// If it does, then true will be returned, false otherwise. The read cursor
// will be moved forward to beyond the match that was found.
//
// After calling this method, you can retrieve the produced TokenHandlerResult
// using the ParseAPI.Result() method.
func (p *ParseAPI) Accept(tokenHandler tokenize.TokenHandler) bool {
p.result = nil
forkedTokenAPI, ok := p.invokeTokenHandler("Accept", tokenHandler)
if ok {
forkedTokenAPI.Merge()
p.result = p.tokenAPI.Result()
p.tokenAPI.DetachChilds()
if p.tokenAPI.FlushReader() {
p.initLoopCheck()
}
}
return ok
}
func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler tokenize.TokenHandler) (*tokenize.TokenAPI, bool) {
p.panicWhenStoppedOrInError()
p.checkForLoops()
if tokenHandler == nil {
common.CallerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name)
}
p.result = nil
p.tokenAPI.ClearResults()
child := p.tokenAPI.Fork()
ok := tokenHandler(child)
return child, ok
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the ParseAPI methods, to
// prevent further calls to the ParseAPI on these occasions.
//
// Basically, this guard ensures proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *ParseAPI) panicWhenStoppedOrInError() {
if !p.isStoppedOrInError() {
return
}
called := common.CallerFunc(1)
after := "Error()"
if p.stopped {
after = "Stop()"
}
common.CallerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+
"no calls allowed after ParseAPI.%s", called, called, after)
}
func (p *ParseAPI) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
func (p *ParseAPI) initLoopCheck() {
p.loopCheck = map[string]bool{}
}
func (p *ParseAPI) checkForLoops() {
filepos := common.CallerFilePos(3)
if _, ok := p.loopCheck[filepos]; ok {
common.CallerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}")
}
p.loopCheck[filepos] = true
}
// Result returns a TokenHandlerResult struct, containing results as produced by the
// last Peek() or Accept() call.
//
// When Result() is called without first doing a Peek() or Accept(), then no
// result will be available and the method will panic.
func (p *ParseAPI) Result() *tokenize.TokenHandlerResult {
result := p.result
if p.result == nil {
common.CallerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+
"at {caller} without calling ParseAPI.Peek() or ParseAPI.Accept() on beforehand")
}
return result
}
// Handle is used to execute other ParseHandler functions from within your
// ParseHandler function.
//
// The boolean return value is true when the parser can still continue.
// It will be false when either an error was set (using ParseAPI.Error()),
// or the parser was stopped (using ParseAPI.Stop()).
func (p *ParseAPI) Handle(parseHandler ParseHandler) bool {
p.panicWhenStoppedOrInError()
p.panicWhenParseHandlerNil(parseHandler)
parseHandler(p)
return !p.isStoppedOrInError()
}
func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) {
if parseHandler == nil {
common.CallerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}")
}
}
// Stop is used by the parser impementation to tell the ParseAPI that it has
// completed the parsing process successfully.
//
// When the parser implementation returns without stopping first (and
// without running into an error), the Parser.Execute() will call
// ParserAPI.ExpectEndOfFile() to check if the end of the file was reached.
// If not, then things will end in an unexpected input error.
// Even though this fallback mechanism will work in a lot of cases, try to make
// your parser explicit about things and call Stop() actively yourself.
//
// After stopping, no more calls to ParseAPI methods are allowed.
// Calling a method in this state will result in a panic.
func (p *ParseAPI) Stop() {
p.stopped = true
}
// Error sets the error message in the ParseAPI.
//
// After setting an error, no more calls to ParseAPI methods are allowed.
// Calling a method in this state will result in a panic.
func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &common.Error{message, *p.tokenAPI.Result().Cursor()}
}
// ExpectEndOfFile can be used to check if the input is at end of file.
//
// When it finds that the end of the file was indeed reached, then the
// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected
// input is reported through ParseAPI.Expected() with "end of file"
// as the expectation.
func (p *ParseAPI) ExpectEndOfFile() {
p.panicWhenStoppedOrInError()
if p.Peek(tokenize.A.EndOfFile) {
p.Stop()
} else {
p.Expected("end of file")
}
}
// Expected is used to set an error that tells the user that some
// unexpected input was encountered, and what input was expected.
//
// The 'expected' argument can be an empty string. In that case the error
// message will not contain a description of the expected input.
//
// It automatically produces an error message for a couple of situations:
// 1) the input simply didn't match the expectation
// 2) the end of the input was reached
// 3) there was an error while reading the input.
func (p *ParseAPI) Expected(expected string) {
p.panicWhenStoppedOrInError()
_, err := p.tokenAPI.NextRune()
switch {
case err == nil:
p.Error("unexpected input%s", fmtExpects(expected))
case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(expected))
default:
p.Error("unexpected error '%s'%s", err, fmtExpects(expected))
}
}
func fmtExpects(expected string) string {
if expected == "" {
return ""
}
return fmt.Sprintf(" (expected %s)", expected)
}

View File

@ -1,57 +0,0 @@
package parsekit
import (
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
startHandler ParseHandler // the function that handles the very first state
}
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
// A ParseHandler function gets a ParseAPI struct as its input. This struct holds
// all the internal state for the parsing state machine and provides the
// interface that the ParseHandler uses to interact with the parser.
type ParseHandler func(*ParseAPI)
// NewParser instantiates a new Parser.
//
// The Parser is a state machine-style recursive descent parser, in which
// ParseHandler functions are used to move the state machine forward during
// parsing. This style of parser is typically used for parsing programming
// languages and structured data formats (like json, xml, toml, etc.)
//
// The startHandler argument points the Parser to the ParseHandler function
// that must be executed at the start of the parsing process.
//
// To parse input data, use the method Parser.Execute().
func NewParser(startHandler ParseHandler) *Parser {
if startHandler == nil {
common.CallerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}")
}
return &Parser{startHandler: startHandler}
}
// Execute starts the parser for the provided input.
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
//
// When an error occurs during parsing, then this error is returned, nil otherwise.
func (p *Parser) Execute(input interface{}) *common.Error {
api := &ParseAPI{
tokenAPI: tokenize.NewTokenAPI(input),
loopCheck: map[string]bool{},
}
if api.Handle(p.startHandler) {
// Handle returned true, indicating that parsing could still continue.
// There was no error and that the parsing has not actively been Stop()-ed.
// Let's assume that we actually reached the end of the parsing successfully
// and try to make the best of it.
api.ExpectEndOfFile()
}
return api.err
}

View File

@ -1,330 +0,0 @@
package parsekit_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleParser_usingAcceptedRunes() {
// Easy access to the parsekit definitions.
a := tokenize.A
matches := []string{}
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.Accept(a.AnyRune) {
matches = append(matches, p.Result().String())
}
p.ExpectEndOfFile()
})
err := parser.Execute("¡Any will dö!")
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
// Output:
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
}
func ExampleParser_usingTokens() {
// Easy access to the parsekit definitions.
c, a, tok := tokenize.C, tokenize.A, tokenize.T
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
fmt.Printf("Runes accepted: %q\n", p.Result().String())
fmt.Printf("Token values: %s\n", p.Result().Tokens())
}
p.ExpectEndOfFile()
})
parser.Execute("¡ök!")
// Output:
// Runes accepted: "¡ök!"
// Token values: RUNE("¡", value = (int32)161) RUNE("ö", value = (int32)246) RUNE("k", value = (int32)107) RUNE("!", value = (int32)33)
}
func ExampleParseAPI_Expected() {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expected("a thing")
})
err := parser.Execute("Whatever, this parser will never be happy...")
fmt.Printf("Error: %s\n", err)
// Output:
// Error: unexpected input (expected a thing) at start of file
}
func ExampleParseAPI_Accept_inIfStatement() {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
// When a case-insensitive match on "Yowza!" is found by the
// tokenizer, then Accept() will make the result available
// through ParseAPI.Result()
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
// Result.String() returns a string containing all
// accepted runes that were matched against.
fmt.Println(p.Result().String())
}
})
parser.Execute("YOWZA!")
// Output:
// YOWZA!
}
func ExampleParseAPI_Accept_inSwitchStatement() {
var result string
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for loop := true; loop; {
switch {
case p.Accept(tokenize.A.Rune('X')):
// NOOP, skip this rune
case p.Accept(tokenize.A.AnyRune):
result += p.Result().String()
default:
loop = false
}
}
})
parser.Execute("HXeXllXoXX, XXwoXrlXXXd!")
fmt.Println(result)
// Output:
// Hello, world!
}
func ExampleParseAPI_Stop() {
C, A := tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("First word: ")
for p.Accept(C.Not(A.Space)) {
fmt.Printf("%s", p.Result())
}
p.Stop()
})
parser.Execute("Input with spaces")
// Output:
// First word: Input
}
func ExampleParseAPI_Stop_notCalledAndNoInputPending() {
C, A := tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("Word: ")
for p.Accept(C.Not(A.Space)) {
fmt.Printf("%s", p.Result())
}
fmt.Printf("\n")
})
err := parser.Execute("Troglodyte")
fmt.Printf("Error is nil: %t\n", err == nil)
// Output:
// Word: Troglodyte
// Error is nil: true
}
func ExampleParseAPI_Stop_notCalledButInputPending() {
C, A := tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
fmt.Printf("First word: ")
for p.Accept(C.Not(A.Space)) {
fmt.Printf("%s", p.Result())
}
fmt.Printf("\n")
})
err := parser.Execute("Input with spaces")
fmt.Printf("Error: %s\n", err)
// Output:
// First word: Input
// Error: unexpected input (expected end of file) at line 1, column 6
}
func ExampleParseAPI_Peek() {
// Definition of a fantasy serial number format.
C, A := tokenize.C, tokenize.A
serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits)
// This handler is able to handle serial numbers.
serialnrHandler := func(p *parsekit.ParseAPI) {
if p.Accept(serialnr) {
fmt.Println(p.Result().String())
}
}
// Start could function as a sort of dispatcher, handing over
// control to the correct ParseHandler function, based on the input.
start := func(p *parsekit.ParseAPI) {
if p.Peek(tokenize.A.Asterisk) {
p.Handle(serialnrHandler)
return
}
// ... other cases could go here ...
}
parser := parsekit.NewParser(start)
parser.Execute("#XX1234")
parser.Execute("*ay432566")
parser.Execute("*ZD987112")
// Output:
// *ZD987112
}
func TestGivenNullHandler_NewParserPanics(t *testing.T) {
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parsekit.NewParser(nil) },
Regexp: true,
Expect: `parsekit\.NewParser\(\): NewParser\(\) called ` +
`with nil input at /.*/parser_test\.go:\d+`})
}
func TestGivenNullHandler_HandlePanics(t *testing.T) {
brokenParseHandler := func(p *parsekit.ParseAPI) {
p.Handle(nil)
}
parser := parsekit.NewParser(brokenParseHandler)
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Handle\(\): Handle\(\) called with nil input ` +
`at /.*/parser_test\.go:\d+`})
}
func TestGivenNilTokenHandler_AcceptPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Accept(nil)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Accept\(\): Accept\(\) called with nil ` +
`tokenHandler argument at /.*/parser_test\.go:\d+`})
}
func TestGivenNilTokenHandler_PeekPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Peek(nil)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Peek\(\): Peek\(\) called with nil ` +
`tokenHandler argument at /.*/parser_test\.go:\d+`})
}
func TestGivenStoppedParser_HandlePanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Stop()
p.Handle(otherHandler)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` +
`at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Stop\(\)`})
}
func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
otherHandler := func(p *parsekit.ParseAPI) {
panic("This is not the handler you're looking for")
}
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Error("It ends here")
p.Handle(otherHandler)
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` +
`at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
}
func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Result()
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { p.Execute("") },
Regexp: true,
Expect: `parsekit\.ParseAPI\.TokenHandlerResult\(\): TokenHandlerResult\(\) called at ` +
`/.*/parser_test.go:\d+ without calling ParseAPI.Peek\(\) or ParseAPI.Accept\(\) on beforehand`})
}
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
err := p.Execute("")
parsekit.AssertTrue(t, err == nil, "err")
}
func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {})
err := p.Execute("x")
parsekit.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err")
}
type parserWithLoop struct {
loopCounter int
}
func (l *parserWithLoop) first(p *parsekit.ParseAPI) {
p.Accept(tokenize.A.ASCII)
p.Handle(l.second)
}
func (l *parserWithLoop) second(p *parsekit.ParseAPI) {
p.Accept(tokenize.A.ASCII)
p.Handle(l.third)
}
func (l *parserWithLoop) third(p *parsekit.ParseAPI) {
if l.loopCounter++; l.loopCounter > 100 {
p.Error("Loop not detected by parsekit")
return
}
p.Accept(tokenize.A.ASCII)
p.Handle(l.first)
}
func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
looper := &parserWithLoop{}
parser := parsekit.NewParser(looper.first)
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("Het houdt niet op, niet vanzelf") },
Regexp: true,
Expect: `parsekit\.ParseAPI: Loop detected in parser at /.*/parser_test.go:\d+`})
}
// This test incorporates an actual loop bug that I dropped on myself and
// that I could not easily spot in my code. It sounded so logical:
// I want to get chunks of 5 chars from the input, so I simply loop on:
//
// p.On(c.Max(5, a.AnyRune))
//
// The problem here is that Max(5, ...) will also match when there is
// no more input, since Max(5, ...) is actually MinMax(0, 5, ...).
// Therefore the loop will never stop. Solving the loop was simple:
//
// p.On(c.MinMax(1, 5, a.AnyRune))
//
// Now the loop stops when the parser finds no more matching input data.
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.Accept(c.Max(5, a.AnyRune)) {
}
p.Stop()
})
parsekit.AssertPanic(t, parsekit.PanicT{
Function: func() { parser.Execute("This will end soon") },
Regexp: true,
Expect: `parsekit\.ParseAPI: Loop detected in parser at .*/parser_test.go:\d+`})
}

38
read/assertions_test.go Normal file
View File

@ -0,0 +1,38 @@
package read_test
// This file contains some tools that are used for writing tests.
import (
"fmt"
"runtime"
"testing"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}) {
if expected != actual {
t.Errorf(
"Unexpected value at %s:\nexpected: %q\nactual: %q",
callerFilepos(1), expected, actual)
}
}
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
}
func AssertPanic(t *testing.T, code func(), expected string) {
defer func() {
if r := recover(); r != nil {
if expected != r.(string) {
t.Errorf(
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
expected, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", expected)
}
}()
code()
}

View File

@ -48,32 +48,17 @@ import (
"unicode/utf8"
)
// Reader wraps around a bufio.Reader and provides an additional layer of
// buffering that allows us to read the same runes over and over again.
// This is useful for implementing a parser that must be able to do lookahead
// on the input, returning to the original input position after finishing
// that lookahead).
//
// To minimze memory use, it is also possible to flush the read buffer when there is
// no more need to go back to previously read runes.
//
// The parserkit.reader.Reader is used internally by tokenize.TokenAPI.
type Reader struct {
bufio *bufio.Reader // Used for ReadRune()
buffer []rune // Input buffer, holding runes that were read from input
err error // A read error, if one occurred
errOffset int // The offset in the buffer at which the read error was encountered
firstReadDone bool // Whether or not the first read was done
}
// New initializes a new reader struct, wrapped around the provided input.
// New initializes a new Buffer struct, wrapped around the provided input.
//
// The input can be any one of the following types:
// - string
// - type implementing io.Reader
// - bufio.Reader
func New(input interface{}) *Reader {
return &Reader{
//
// • string
//
// • a type implementing io.Reader
//
// • bufio.Reader
func New(input interface{}) *Buffer {
return &Buffer{
bufio: makeBufioReader(input),
}
}
@ -93,12 +78,29 @@ func makeBufioReader(input interface{}) *bufio.Reader {
}
}
// Buffer wraps around a bufio.Reader and provides an additional layer of
// buffering that allows us to read the same runes over and over again.
// This is useful for implementing a parser that must be able to do lookahead
// on the input, returning to the original input position after finishing
// that lookahead).
//
// To minimze memory use, it is also possible to flush the read buffer when there is
// no more need to go back to previously read runes.
//
// The parserkit.reader.Reader is used internally by tokenize.API.
type Buffer struct {
bufio *bufio.Reader // used for ReadRune()
buffer []rune // input buffer, holding runes that were read from input
err error // a read error, if one occurred
errOffset int // the offset in the buffer at which the read error was encountered
firstReadDone bool // whether or not the first read was done
}
// RuneAt reads the rune at the provided rune offset.
//
// This offset is relative to the current starting position of the buffer in
// the reader. When starting reading, offset 0 will point at the start of the
// input. After flushing, offset 0 will point at the input up to where
// the flush was done.
// This offset is relative to the current starting position of the Buffer.
// When starting reading, offset 0 will point at the start of the input.
// After flushing, offset 0 will point at the input up to where the flush was done.
//
// The error return value will be nil when reading was successful.
// When an invalid rune is encountered on the input, the error will be nil,
@ -107,9 +109,10 @@ func makeBufioReader(input interface{}) *bufio.Reader {
// When reading failed, the rune will be utf8.RuneError and the error will
// be not nil. One special read fail is actually a normal situation: end
// of file reached. In that case, the returned error wille be io.EOF.
//
// Once a read error is encountered, that same read error will guaranteed
// be return on every subsequent read at or beyond the provided offset.
func (r *Reader) RuneAt(offset int) (rune, error) {
func (r *Buffer) RuneAt(offset int) (rune, error) {
// Re-issue a previously seen read error.
if r.err != nil && offset >= r.errOffset {
return utf8.RuneError, r.err
@ -153,12 +156,12 @@ func (r *Reader) RuneAt(offset int) (rune, error) {
const smallBufferSize = 64
// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
var ErrTooLarge = errors.New("parsekit.read: too large")
var ErrTooLarge = errors.New("parsekit.read.Buffer: too large")
// grow grows the buffer to guarantee space for n more bytes.
// It returns the index where bytes should be written.
// If the buffer can't grow it will panic with ErrTooLarge.
func (r *Reader) grow(n int) {
func (r *Buffer) grow(n int) {
// Instantiate new buffer.
if r.buffer == nil {
b := smallBufferSize
@ -193,14 +196,14 @@ func makeSlice(n int) []rune {
return make([]rune, n)
}
// Flush deletes the provided number of runes from the start of the
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
// will point to the rune that comes after the flushed runes.
// So what this basically does is turn the Reader into a sliding window.
func (r *Reader) Flush(numberOfRunes int) {
// Flush deletes the provided number of runes from the start of the Buffer.
// After flushing the Buffer, offset 0 as used by RuneAt() will point to
// the rune that comes after the runes that were flushed.
// So what this basically does, is turn the Buffer into a sliding window.
func (r *Buffer) Flush(numberOfRunes int) {
if numberOfRunes > len(r.buffer) {
panic(fmt.Sprintf(
"parsekit.read.Reader.Flush(): number of runes to flush (%d) "+
"parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+
"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
}
r.buffer = r.buffer[numberOfRunes:]

View File

@ -9,7 +9,6 @@ import (
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/read"
"github.com/stretchr/testify/assert"
)
func ExampleNew() {
@ -46,41 +45,41 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) {
}
func TestNew_UnhandledInputType_Panics(t *testing.T) {
assert.PanicsWithValue(t,
"parsekit.read.New(): no support for input of type int",
func() { read.New(12345) })
AssertPanic(t,
func() { read.New(12345) },
"parsekit.read.New(): no support for input of type int")
}
func TestReader_RuneAt(t *testing.T) {
func TestBuffer_RuneAt(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input.
result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0))
assert.Equal(t, "H!wH", result)
AssertEqual(t, "H!wH", result)
}
func TestReader_RuneAt_endOfFile(t *testing.T) {
func TestBuffer_RuneAt_endOfFile(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
rn, err := r.RuneAt(13)
result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
assert.Equal(t, "'<27>' EOF true", result)
AssertEqual(t, "'<27>' EOF true", result)
rn, err = r.RuneAt(20)
result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF)
assert.Equal(t, "'<27>' EOF true", result)
AssertEqual(t, "'<27>' EOF true", result)
}
func TestReader_RuneAt_invalidRune(t *testing.T) {
func TestBuffer_RuneAt_invalidRune(t *testing.T) {
r := read.New(strings.NewReader("Hello, \xcdworld!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9))
assert.Equal(t, " <20>wo", result, "result")
AssertEqual(t, " <20>wo", result)
}
func ExampleReader_RuneAt() {
func ExampleBuffer_RuneAt() {
reader := read.New(strings.NewReader("Hello, world!"))
fmt.Printf("Runes: ")
@ -104,16 +103,16 @@ func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
o, _ := r.RuneAt(1)
m, _ := r.RuneAt(2)
bom := fmt.Sprintf("%c%c%c", b, o, m)
assert.Equal(t, "Bom", bom, "first three runes")
AssertEqual(t, "Bom", bom)
}
func TestReader_Flush(t *testing.T) {
func TestBuffer_Flush(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w"
result := fmt.Sprintf("%c", at(7))
assert.Equal(t, "w", result, "first read")
AssertEqual(t, "w", result)
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4)
@ -121,10 +120,10 @@ func TestReader_Flush(t *testing.T) {
// Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there.
result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5))
assert.Equal(t, "o, wor", result)
AssertEqual(t, "o, wor", result)
}
func ExampleReader_Flush() {
func ExampleBuffer_Flush() {
r := read.New(strings.NewReader("dog eat dog!"))
at := func(offset int) rune { c, _ := r.RuneAt(offset); return c }
@ -154,33 +153,33 @@ func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
r.RuneAt(10)
// However, we flush 12 runes, which exceeds the buffer size.
assert.PanicsWithValue(t,
"parsekit.read.Reader.Flush(): number of runes to flush "+
"(12) exceeds size of the buffer (11)",
func() { r.Flush(12) })
AssertPanic(t,
func() { r.Flush(12) },
"parsekit.read.Buffer.Flush(): number of runes to flush "+
"(12) exceeds size of the buffer (11)")
}
func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) {
r := read.New(strings.NewReader("Hello, world!"))
_, err := r.RuneAt(13)
assert.Equal(t, err.Error(), "EOF")
AssertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(13)
assert.Equal(t, err.Error(), "EOF")
AssertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(14)
assert.Equal(t, err.Error(), "EOF")
AssertEqual(t, err.Error(), "EOF")
r.Flush(13)
_, err = r.RuneAt(0)
assert.Equal(t, err.Error(), "EOF")
AssertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(1)
assert.Equal(t, err.Error(), "EOF")
AssertEqual(t, err.Error(), "EOF")
_, err = r.RuneAt(2)
assert.Equal(t, err.Error(), "EOF")
AssertEqual(t, err.Error(), "EOF")
}
// In this test, I want to make sure that once a Reader returns an error,
// In this test, I want to make sure that once a Buffer returns an error,
// that error is cached and will be returned when data for the offset where
// the error occurred is read at a later time.
func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) {
input := &StubReader{
bytes: []byte{'a', 'b', 'c', 'd'},
errors: []error{
@ -192,43 +191,43 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) {
// Read the last availble rune.
readRune, _ := r.RuneAt(3)
assert.Equal(t, 'd', readRune)
AssertEqual(t, 'd', readRune)
// Reading the next offset must result in the io.EOF error from the stub.
readRune, err := r.RuneAt(4)
assert.Equal(t, utf8.RuneError, readRune)
assert.Equal(t, io.EOF, err)
AssertEqual(t, utf8.RuneError, readRune)
AssertEqual(t, io.EOF, err)
// Reading even further should yield the same io.EOF error.
readRune, err = r.RuneAt(5)
assert.Equal(t, utf8.RuneError, readRune)
assert.Equal(t, io.EOF, err)
AssertEqual(t, utf8.RuneError, readRune)
AssertEqual(t, io.EOF, err)
// After an error, we must still be able to read the last rune.
readRune, _ = r.RuneAt(3)
assert.Equal(t, 'd', readRune)
AssertEqual(t, 'd', readRune)
// Flushing updates the error index too.
r.Flush(3)
// The last rune is now at offset 0.
readRune, _ = r.RuneAt(0)
assert.Equal(t, 'd', readRune)
AssertEqual(t, 'd', readRune)
// The io.EOF is now at offset 1.
_, err = r.RuneAt(1)
assert.Equal(t, io.EOF, err)
AssertEqual(t, io.EOF, err)
// Let's flush that last rune too.
r.Flush(1)
// The io.EOF is now at offset 0.
_, err = r.RuneAt(0)
assert.Equal(t, io.EOF, err)
AssertEqual(t, io.EOF, err)
// And reading beyond that offset also yields io.EOF.
_, err = r.RuneAt(1)
assert.Equal(t, io.EOF, err)
AssertEqual(t, io.EOF, err)
}
func TestInputLargerThanDefaultBufSize64(t *testing.T) {
@ -236,13 +235,13 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) {
r := read.New(input)
readRune, err := r.RuneAt(0)
assert.Equal(t, 'X', readRune)
AssertEqual(t, 'X', readRune)
readRune, err = r.RuneAt(size - 1)
assert.Equal(t, 'Y', readRune)
AssertEqual(t, 'Y', readRune)
readRune, err = r.RuneAt(size)
assert.Equal(t, io.EOF, err)
AssertEqual(t, io.EOF, err)
readRune, err = r.RuneAt(10)
assert.Equal(t, 'X', readRune)
AssertEqual(t, 'X', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) {
@ -250,9 +249,9 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes
r := read.New(input)
readRune, _ := r.RuneAt(size - 200)
assert.Equal(t, 'X', readRune)
AssertEqual(t, 'X', readRune)
readRune, _ = r.RuneAt(size - 1)
assert.Equal(t, 'Y', readRune)
AssertEqual(t, 'Y', readRune)
}
func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
@ -260,7 +259,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) {
r := read.New(input)
readRune, _ := r.RuneAt(size - 1)
assert.Equal(t, 'Y', readRune)
AssertEqual(t, 'Y', readRune)
}
func makeLargeStubReader() (*StubReader, int) {

245
tokenize/api.go Normal file
View File

@ -0,0 +1,245 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
)
// API holds the internal state of a tokenizer run and provides an API that
// tokenize.Handler functions can use to:
//
// • read and accept runes from the input (NextRune, Accept)
//
// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
//
// • flush already read input data when not needed anymore (FlushInput)
//
// • retrieve the tokenizer Result struct (Result) to read or modify the results
//
// BASIC OPERATION:
//
// To retrieve the next rune from the API, call the NextRune() method.
//
// When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the result runes of the API and the read cursor is moved
// forward.
//
// By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. Runes collected this way can later on be
// retrieved using the method Result().Runes().
//
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.
//
// Next to adding runes to the result, it is also possible to modify the
// stored runes or to add lexical Tokens to the result. For all things
// concerning results, take a look at the Result struct, which
// can be accessed though the method Result().
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
//
// Sometimes, we must be able to perform a lookahead, which might either
// succeed or fail. In case of a failing lookahead, the state of the
// API must be brought back to the original state, so we can try
// a different route.
//
// The way in which this is supported, is by forking an API struct by
// calling method Fork(). This will return a forked child API, with
// empty result data, but using the same read cursor position as the
// forked parent.
//
// After forking, the same interface as described for BASIC OPERATION can be
// used to fill the results. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's results
// to the parent's results, and to move the read cursor position to that
// of the child.
//
// When the lookahead was unsuccessful, then the forked child API can
// disposed by calling Dispose() on the forked child. This is not mandatory.
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
//
// Opinionized note:
// Many tokenizers/parsers take a different approach on lookaheads by using
// peeks and by moving the read cursor position back and forth, or by putting
// read input back on the input stream. That often leads to code that is
// efficient, however, in my opinion, not very intuitive to read. It can also
// be tedious to get the cursor position back at the correct position, which
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type API struct {
reader *read.Buffer
parent *API // parent API in case this API is a forked child
child *API // child API in case this API has a forked child
result *Result // results as produced by a Handler (runes, Tokens, cursor position)
}
// NewAPI initializes a new API struct, wrapped around the provided input.
// For an overview of allowed inputs, take a look at the documentation
// for parsekit.read.New().
func NewAPI(input interface{}) *API {
return &API{
reader: read.New(input),
result: newResult(),
}
}
// NextRune returns the rune at the current read offset.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// After reading a rune it must be Accept()-ed to move the read cursor forward
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
// without explicitly accepting, this method will panic. You can see this as a
// built-in unit test, enforcing correct serialization of API method calls.
func (i *API) NextRune() (rune, error) {
if i.result.lastRune != nil {
callerPanic(1, "tokenize.API.NextRune(): NextRune() called at {caller} "+
"without a prior call to Accept()")
}
i.detachChild()
readRune, err := i.reader.RuneAt(i.result.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err
}
// Accept the last rune as read by NextRune() into the Result runes and move
// the cursor forward.
//
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *API) Accept() {
if i.result.lastRune == nil {
callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller} without first calling NextRune()")
} else if i.result.lastRune.err != nil {
callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.result.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
i.result.offset++
i.result.lastRune = nil
}
// Fork forks off a child of the API struct. It will reuse the same
// read buffer and cursor position, but for the rest this is a fresh API.
//
// By forking an API, you can freely work with the forked child, without
// affecting the parent API. This is for example useful when you must perform
// some form of lookahead.
//
// When processing of the Handler was successful and you want to add the results
// to the parent API, you can call Merge() on the forked child.
// This will add the results to the results of the parent (runes, tokens).
// It also updates the read cursor position of the parent to that of the child.
//
// When the lookahead was unsuccessful, then the forked child API can
// disposed by calling Dispose() on the forked child. This is not mandatory.
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
func (i *API) Fork() *API {
// Cleanup current forking / reading state.
i.detachChild()
i.result.lastRune = nil
// Create the new fork.
child := &API{
reader: i.reader,
parent: i,
}
child.result = newResult()
i.syncCursorTo(child)
i.child = child
return child
}
// Merge appends the results of a forked child API (runes, tokens) to the
// results of its parent. The read cursor of the parent is also updated
// to that of the forked child.
//
// After the merge operation, the child results are reset so it can immediately
// be reused for performing another match. This means that all Result data are
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
func (i *API) Merge() {
if i.parent == nil {
callerPanic(1, "tokenize.API.Merge(): Merge() called at {caller} on a non-forked API")
}
i.addResultsToParent()
i.syncCursorTo(i.parent)
i.clearResults()
i.detachChild()
}
func (i *API) addResultsToParent() {
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
}
func (i *API) syncCursorTo(to *API) {
to.result.offset = i.result.offset
*to.result.cursor = *i.result.cursor
}
// Reset clears the API results and - when forked - detaches the forked child.
func (i *API) Reset() {
i.clearResults()
i.detachChild()
}
// Dispose resets the API and - when it is a fork - detaches itself from its parent.
func (i *API) Dispose() {
i.Reset()
if i.parent != nil {
i.parent.detachChild()
}
}
func (i *API) clearResults() {
i.result.lastRune = nil
i.result.runes = []rune{}
i.result.tokens = []*Token{}
i.result.err = nil
}
func (i *API) detachChild() {
if i.child != nil {
i.child.detachChildsRecurse()
i.child = nil
}
}
func (i *API) detachChildsRecurse() {
if i.child != nil {
i.child.detachChildsRecurse()
}
i.child = nil
i.parent = nil
}
// FlushInput flushes processed input data from the read.Buffer.
// In this context 'processed' means all runes that were read using NextRune()
// and that were added to the results using Accept().
//
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when needed.
func (i *API) FlushInput() bool {
if i.result.offset > 0 {
i.reader.Flush(i.result.offset)
i.result.offset = 0
return true
}
return false
}
// Result returns the Result struct from the API. The returned struct
// can be used to retrieve and to modify result data.
func (i *API) Result() *Result {
return i.result
}

View File

@ -6,9 +6,9 @@ import (
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleTokenAPI_Fork() {
// This custom TokenHandler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *tokenize.TokenAPI) bool {
func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *tokenize.API) bool {
a := tokenize.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
@ -22,19 +22,19 @@ func ExampleTokenAPI_Fork() {
return false
}
// Note: a custom TokenHandler is normally not what you need.
// You can make use of the parser/combinator tooling to do things
// a lot simpler and take care of forking at the appropriate places.
// The handler from above can be replaced with:
// Note: a custom Handler is normally not what you need.
// You can make use of the parser/combinator tooling to make the
// implementation a lot simpler and to take care of forking at
// the appropriate places. The handler from above can be replaced with:
simpler := tokenize.A.RuneRange('a', 'c')
result, err := tokenize.NewTokenizer(abcHandler).Execute("another test")
result, err := tokenize.New(abcHandler)("another test")
fmt.Println(result, err)
result, err = tokenize.NewTokenizer(simpler).Execute("curious")
result, err = tokenize.New(simpler)("curious")
fmt.Println(result, err)
result, err = tokenize.NewTokenizer(abcHandler).Execute("bang on!")
result, err = tokenize.New(abcHandler)("bang on!")
fmt.Println(result, err)
result, err = tokenize.NewTokenizer(abcHandler).Execute("not a match")
result, err = tokenize.New(abcHandler)("not a match")
fmt.Println(result, err)
// Output:
@ -44,8 +44,8 @@ func ExampleTokenAPI_Fork() {
// <nil> mismatch at start of file
}
func ExampleTokenAPI_Merge() {
tokenHandler := func(t *tokenize.TokenAPI) bool {
func ExampleAPI_Merge() {
tokenHandler := func(t *tokenize.API) bool {
child1 := t.Fork()
child1.NextRune() // reads 'H'
child1.Accept()
@ -62,7 +62,7 @@ func ExampleTokenAPI_Merge() {
return true
}
result, _ := tokenize.NewTokenizer(tokenHandler).Execute("Hi mister X!")
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
fmt.Println(result)
// Output:

View File

@ -1,6 +1,6 @@
package tokenize_test
// This file contains some tools that are used for writing parsekit tests.
// This file contains some tools that are used for writing tests.
import (
"regexp"
@ -17,12 +17,6 @@ func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat
}
}
// func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
// if notExpected == actual {
// t.Errorf("Unexpected value for %s: %q", forWhat, actual)
// }
// }
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
@ -63,21 +57,21 @@ func AssertPanic(t *testing.T, p PanicT) {
p.Function()
}
type TokenHandlerT struct {
Input string
TokenHandler tokenize.TokenHandler
MustMatch bool
Expected string
type HandlerT struct {
Input string
Handler tokenize.Handler
MustMatch bool
Expected string
}
func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) {
func AssertHandlers(t *testing.T, testSet []HandlerT) {
for _, test := range testSet {
AssertTokenHandler(t, test)
AssertHandler(t, test)
}
}
func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
func AssertHandler(t *testing.T, test HandlerT) {
result, err := tokenize.New(test.Handler)(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
@ -92,9 +86,9 @@ func AssertTokenHandler(t *testing.T, test TokenHandlerT) {
}
type TokenMakerT struct {
Input string
TokenHandler tokenize.TokenHandler
Expected []tokenize.Token
Input string
Handler tokenize.Handler
Expected []tokenize.Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
@ -104,7 +98,7 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input)
result, err := tokenize.New(test.Handler)(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {

20
tokenize/callerinfo.go Normal file
View File

@ -0,0 +1,20 @@
package tokenize
import (
"fmt"
"runtime"
"strings"
)
func callerPanic(depth int, f string, args ...interface{}) {
filepos := callerFilepos(depth + 1)
m := fmt.Sprintf(f, args...)
m = strings.Replace(m, "{caller}", filepos, 1)
panic(m)
}
func callerFilepos(depth int) string {
// No error handling, because we call this method ourselves with safe depth values.
_, file, line, _ := runtime.Caller(depth + 1)
return fmt.Sprintf("%s:%d", file, line)
}

View File

@ -1,11 +1,11 @@
package common
package tokenize
import "fmt"
// Cursor represents the position of a cursor in various ways.
type Cursor struct {
Byte int // The cursor offset in bytes, relative to start of file
Rune int // The cursor offset in UTF8 runes, relative to start of file
Byte int // The cursor offset in bytes
Rune int // The cursor offset in UTF8 runes
Column int // The column at which the cursor is (0-indexed)
Line int // The line at which the cursor is (0-indexed)
}
@ -18,15 +18,11 @@ func (c Cursor) String() string {
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
}
// Move updates the position of the cursor, based on the provided input string.
// The input string represents the runes that has been skipped over. This
// method will take newlines into account to keep track of line numbers and
// move updates the position of the cursor, based on the provided input string.
// The input string represents the runes that the cursor must be moved over.
// This method will take newlines into account to keep track of line numbers and
// column positions automatically.
//
// Note: when you are writing a parser using parsekit, it's unlikely
// that you will use this method directly. The parsekit package takes care
// of calling it at the correct time.
func (c *Cursor) Move(input string) *Cursor {
func (c *Cursor) move(input string) *Cursor {
c.Byte += len(input)
for _, r := range input {
c.Rune++

View File

@ -1,18 +1,16 @@
package common_test
package tokenize
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit/common"
)
func ExampleCursor_Move() {
c := &common.Cursor{}
func ExampleCursor_move() {
c := &Cursor{}
fmt.Printf("after initialization : %s\n", c)
fmt.Printf("after 'some words' : %s\n", c.Move("some words"))
fmt.Printf("after '\\n' : %s\n", c.Move("\n"))
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.Move("\r\nskip\nlines"))
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
// Output:
// after initialization : start of file
@ -22,10 +20,10 @@ func ExampleCursor_Move() {
}
func ExampleCursor_String() {
c := &common.Cursor{}
c := &Cursor{}
fmt.Println(c.String())
c.Move("\nfoobar")
c.move("\nfoobar")
fmt.Println(c.String())
// Output:
@ -51,9 +49,9 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
c := common.Cursor{}
c := Cursor{}
for _, s := range test.input {
c.Move(s)
c.move(s)
}
if c.Byte != test.byte {
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)

View File

@ -1,40 +1,47 @@
package tokenize
// TokenHandler is the function type that is involved in turning a low level
// Handler is the function type that is involved in turning a low level
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the results.
//
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
// indicate whether or not it found a match on the input. The TokenAPI is used
// A Handler function gets an API as its input and returns a boolean to
// indicate whether or not it found a match on the input. The API is used
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
type Handler func(t *API) bool
// Match is syntactic sugar that allows you to write a construction like
// NewTokenizer(handler).Execute(input) as handler.Match(input).
func (handler Handler) Match(input interface{}) (*Result, error) {
tokenizer := New(handler)
return tokenizer(input)
}
// Or is syntactic sugar that allows you to write a construction like
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
func (handler TokenHandler) Or(otherHandler TokenHandler) TokenHandler {
func (handler Handler) Or(otherHandler Handler) Handler {
return MatchAny(handler, otherHandler)
}
// Times is syntactic sugar that allows you to write a construction like
// MatchRep(3, handler) as handler.Times(3).
func (handler TokenHandler) Times(n int) TokenHandler {
func (handler Handler) Times(n int) Handler {
return MatchRep(n, handler)
}
// Then is syntactic sugar that allows you to write a construction like
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
func (handler TokenHandler) Then(otherHandler TokenHandler) TokenHandler {
func (handler Handler) Then(otherHandler Handler) Handler {
return MatchSeq(handler, otherHandler)
}
// SeparatedBy is syntactic sugar that allows you to write a construction like
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHandler {
func (handler Handler) SeparatedBy(separatorHandler Handler) Handler {
return MatchSeparated(separatorHandler, handler)
}
// Optional is syntactic sugar that allows you to write a construction like
// MatchOpt(handler) as handler.Optional().
func (handler TokenHandler) Optional() TokenHandler {
func (handler Handler) Optional() Handler {
return MatchOpt(handler)
}

97
tokenize/handler_test.go Normal file
View File

@ -0,0 +1,97 @@
package tokenize_test
import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func TestSyntacticSugar(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""},
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
})
}
func ExampleHandler_Times() {
c, a := tokenize.C, tokenize.A
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
fmt.Println(phoneNumber.Match("0201234567"))
// Output:
// 0201234567 <nil>
}
func ExampleHandler_Then() {
c, a := tokenize.C, tokenize.A
phoneNumber := a.Rune('0').Then(c.Rep(9, a.Digit))
fmt.Println(phoneNumber.Match("0208888888"))
// Output:
// 0208888888 <nil>
}
func ExampleHandler_Or() {
c, a := tokenize.C, tokenize.A
phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Rep(8, a.Digit))
fmt.Println(phoneNumber.Match("+31209876543"))
fmt.Println(phoneNumber.Match("0031209876543"))
fmt.Println(phoneNumber.Match("0031020991234"))
fmt.Println(phoneNumber.Match("0031201234"))
// Output:
// +31209876543 <nil>
// 0031209876543 <nil>
// <nil> mismatch at start of file
// <nil> mismatch at start of file
}
func ExampleHandler_SeparatedBy() {
a, t := tokenize.A, tokenize.T
csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
r, _ := csv.Match("123,456,7,8,9")
for i, token := range r.Tokens() {
fmt.Printf("[%d] %v\n", i, token)
}
// Output:
// [0] number("123", value = (int)123)
// [1] number("456", value = (int)456)
// [2] number("7", value = (int)7)
// [3] number("8", value = (int)8)
// [4] number("9", value = (int)9)
}
func ExampleHandler_Optional() {
c, a := tokenize.C, tokenize.A
spanish := c.Seq(
a.Rune('¿').Optional(),
c.OneOrMore(c.Except(a.Question, a.AnyRune)),
a.Rune('?').Optional())
fmt.Println(spanish.Match("¿Habla español María?"))
fmt.Println(spanish.Match("Sí, María habla español."))
// Output:
// ¿Habla español María? <nil>
// Sí, María habla español. <nil>
}
func ExampleHandler_Match() {
r, err := tokenize.A.IPv4.Match("001.002.003.004")
fmt.Println(r, err)
r, err = tokenize.A.IPv4.Match("1.2.3")
fmt.Println(r, err)
// Output:
// 1.2.3.4 <nil>
// <nil> mismatch at start of file
}

View File

@ -9,7 +9,7 @@ import (
func TestCombinators(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertTokenHandlers(t, []TokenHandlerT{
AssertHandlers(t, []HandlerT{
{"abc", c.Not(a.Rune('b')), true, "a"},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"bcd", c.Not(a.Rune('b')), false, ""},
@ -71,25 +71,25 @@ func TestCombinatorPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A
AssertPanics(t, []PanicT{
{func() { a.RuneRange('z', 'a') }, true,
`TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
{func() { c.MinMax(-1, 1, a.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.MinMax(1, -1, a.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { c.MinMax(10, 5, a.Space) }, true,
`TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
{func() { c.Min(-10, a.Space) }, true,
`TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`},
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.Max(-42, a.Space) }, true,
`TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`},
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { a.IntegerBetween(10, -10) }, true,
`TokenHandler: MatchIntegerBetween definition error at /.*/tokenhandlers_builtin_test.go:\d+: max -10 must not be < min 10`},
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
})
}
func TestAtoms(t *testing.T) {
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
AssertHandlers(t, []HandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""},
@ -226,7 +226,7 @@ func TestAtoms(t *testing.T) {
func TestIPv4Atoms(t *testing.T) {
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
AssertHandlers(t, []HandlerT{
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "00"},
{"000X", a.Octet, true, "000"},
@ -258,7 +258,7 @@ func TestIPv4Atoms(t *testing.T) {
func TestIPv6Atoms(t *testing.T) {
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
AssertHandlers(t, []HandlerT{
{"", a.IPv6, false, ""},
{"::", a.IPv6, true, "::"},
{"1::", a.IPv6, true, "1::"},
@ -287,7 +287,7 @@ func TestIPv6Atoms(t *testing.T) {
func TestModifiers(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertTokenHandlers(t, []TokenHandlerT{
AssertHandlers(t, []HandlerT{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
@ -308,10 +308,10 @@ func TestModifiers(t *testing.T) {
func TestTokenMakerErrorHandling(t *testing.T) {
var a, tok = tokenize.A, tokenize.T
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
parser := tokenize.NewTokenizer(invalid)
tokenizer := tokenize.New(invalid)
AssertPanic(t, PanicT{
func() { parser.Execute("no") }, false,
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
func() { tokenizer("no") }, false,
`Handler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
`invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`,
})
}
@ -378,19 +378,6 @@ func TestTokenMakers(t *testing.T) {
})
}
func TestSyntacticSugar(t *testing.T) {
var a = tokenize.A
AssertTokenHandlers(t, []TokenHandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""},
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
})
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
@ -414,7 +401,7 @@ func TestCombination(t *testing.T) {
c.Opt(a.SquareClose),
)
AssertTokenHandlers(t, []TokenHandlerT{
AssertHandlers(t, []HandlerT{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},

157
tokenize/result.go Normal file
View File

@ -0,0 +1,157 @@
package tokenize
import (
"fmt"
)
// Result is a struct that is used for holding tokenizer results as produced
// by a tokenize.Handler. It also provides the API that Handlers and Parsers
// can use to store and retrieve the results.
type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune // runes as added to the result by tokenize.Handler functions
tokens []*Token // Tokens as added to the result by tokenize.Handler functions
cursor *Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by tokenize.Handlers.
//
// The only mandatory data in a Token are the Runes. The Type and Value fields
// are optional fields that can be filled with data at will.
//
// The use of the Type field is to let a tokenizer communicate to
// the parser what type of token it's handling.
//
// The use of the Value field is to store any kind af data along with the token.
// One use of this can be found in the built-in token maker functions like
// MakeInt8Token(), which store an interpreted version of the input string
// in the Value field.
type Token struct {
Runes []rune // the runes that make up the token
Type interface{} // optional token type, can be any type that a parser author sees fit
Value interface{} // optional token value, of any type as well
}
func (t Token) String() string {
tokenType := ""
if t.Type != nil {
tokenType = fmt.Sprintf("%v", t.Type)
}
value := ""
if t.Value != nil {
value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value)
}
return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value)
}
// newResult initializes an empty Result struct.
func newResult() *Result {
return &Result{
runes: []rune{},
tokens: []*Token{},
cursor: &Cursor{},
}
}
// ClearRunes clears the runes in the Result.
func (r *Result) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the Result with the provided input.
func (r *Result) SetRunes(s interface{}) {
r.ClearRunes()
r.addRunes(s)
}
// AddRunes is used to add runes to the Result.
func (r *Result) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
func (r *Result) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(2, "tokenize.Result.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the Result.
func (r *Result) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the Result at the specified index.
func (r *Result) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the Result as a string.
func (r *Result) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the Result.
func (r *Result) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the Result with the provided tokens.
func (r *Result) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddTokens(t)
}
}
// AddTokens is used to add Tokens to the Result.
func (r *Result) AddTokens(tokens ...*Token) {
r.tokens = append(r.tokens, tokens...)
}
// Tokens retrieves the Tokens from the Result.
func (r *Result) Tokens() []*Token {
return r.tokens
}
// Token retrieves a single Token from the Result at the specified index.
func (r *Result) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the Result Tokens.
func (r *Result) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the Result Token at the specified index.
func (r *Result) Value(idx int) interface{} {
return r.tokens[idx].Value
}
// Cursor retrieves the read cursor from the Result. This is the first
// cursor position after the runes that were read and accepted by the Handler.
func (r *Result) Cursor() *Cursor {
return r.cursor
}

View File

@ -41,7 +41,7 @@ func ExampleToken() {
}
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("Testing"))
i := tokenize.NewAPI(strings.NewReader("Testing"))
i.Result().SetRunes("string")
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
@ -53,10 +53,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := tokenize.NewTokenAPI(strings.NewReader("Testing"))
i := tokenize.NewAPI(strings.NewReader("Testing"))
i.Result().SetRunes(1234567)
},
Regexp: true,
Expect: `tokenize\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenhandlerresult_test.go:\d+`,
Expect: `tokenize\.Result\.AddRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`,
})
}

View File

@ -1,212 +0,0 @@
package tokenize
import (
"fmt"
"git.makaay.nl/mauricem/go-parsekit/common"
"git.makaay.nl/mauricem/go-parsekit/read"
)
// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from
// a parsekit.read.Reader and to report back tokenizing results. For easy
// lookahead support, a forking strategy is provided.
//
// BASIC OPERATION:
//
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
//
// When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the result runes of the TokenAPI and the read cursor is moved
// forward.
//
// By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. Runes collected this way can later on be
// retrieved using the method Result().Runes().
//
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.
//
// Next to adding runes to the result, it is also possible to modify the
// stored runes or to add lexical Tokens to the result. For all things
// concerning results, take a look at the TokenHandlerResult struct, which
// can be accessed though the method Result().
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
//
// Sometimes, we must be able to perform a lookahead, which might either
// succeed or fail. In case of a failing lookahead, the state of the
// TokenAPI must be brought back to the original state, so we can try
// a different route.
//
// The way in which this is supported, is by forking a TokenAPI struct by
// calling method Fork(). This will return a forked child TokenAPI, with
// empty result data, but using the same read cursor position as the
// forked parent.
//
// After forking, the same interface as described for BASIC OPERATION can be
// used to fill the results. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's results
// to the parent's results, and to move the read cursor position to that
// of the child.
//
// When the lookahead was unsuccessful, then the forked child TokenAPI can
// simply be discarded. The parent TokenAPI was never modified, so it can
// safely be used as if the lookahead never happened.
//
// Opinionized note:
// Many tokenizers/parsers take a different approach on lookaheads by using
// peeks and by moving the read cursor position back and forth, or by putting
// read input back on the input stream. That often leads to code that is
// efficient, however, in my opinion, not very intuitive to read. It can also
// be tedious to get the cursor position back at the correct position, which
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type TokenAPI struct {
reader *read.Reader
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position)
}
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(input interface{}) *TokenAPI {
return &TokenAPI{
reader: read.New(input),
result: newTokenHandlerResult(),
}
}
// NextRune returns the rune at the current read offset.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// After reading a rune it must be Accept()-ed to move the read cursor forward
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
// without explicitly accepting, this method will panic.
func (i *TokenAPI) NextRune() (rune, error) {
if i.result.lastRune != nil {
common.CallerPanic(1, "tokenize.TokenAPI.NextRune(): NextRune() called at {caller} "+
"without a prior call to Accept()")
}
i.DetachChilds()
readRune, err := i.reader.RuneAt(i.result.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err
}
// Accept the last rune as read by NextRune() into the result buffer and move
// the cursor forward.
//
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *TokenAPI) Accept() {
if i.result.lastRune == nil {
common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()")
} else if i.result.lastRune.err != nil {
common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed")
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r))
i.result.offset++
i.result.lastRune = nil
}
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
// read cursor position, but for the rest this is a fresh TokenAPI.
//
// By forking a TokenAPI, you can freely work with the forked child, without
// affecting the parent TokenAPI. This is for example useful when you must perform
// some form of lookahead.
//
// When processing of the TokenHandler was successful and you want to add the results
// to the parent TokenAPI, you can call TokenAPIold.Merge() on the forked
// child. This will add the runes in the result buffer to the result buffer of
// the parent. It also updates the read cursor position of the parent to that
// of the child.
//
// When processing failed, or you don't want to use the results as produced by that
// lookahead, the forked child can simply be discarded. You can continue to work
// with the parent TokenAPI as if nothing ever happened.
func (i *TokenAPI) Fork() *TokenAPI {
// Cleanup current forking / reading state.
i.DetachChilds()
i.result.lastRune = nil
// Create the new fork.
child := &TokenAPI{
reader: i.reader,
parent: i,
}
child.result = newTokenHandlerResult()
i.syncCursorTo(child)
i.child = child
return child
}
// Merge appends the TokenHandlerResult of a forked child TokenAPI to the TokenHandlerResult
// of its parent. The read cursor position of the parent is also updated to
// that of the forked child.
//
// After the merge operation, the child is reset so it can immediately be
// reused for performing another match. This means that all TokenHandlerResult data are
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
func (i *TokenAPI) Merge() {
if i.parent == nil {
common.CallerPanic(1, "tokenize.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI")
}
i.addResultsToParent()
i.syncCursorTo(i.parent)
i.ClearResults()
i.DetachChilds()
}
func (i *TokenAPI) addResultsToParent() {
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
}
func (i *TokenAPI) syncCursorTo(to *TokenAPI) {
to.result.offset = i.result.offset
*to.result.cursor = *i.result.cursor
}
func (i *TokenAPI) ClearResults() {
i.result.lastRune = nil
i.result.runes = []rune{}
i.result.tokens = []*Token{}
i.result.err = nil
}
func (i *TokenAPI) DetachChilds() {
if i.child != nil {
i.child.DetachChildsRecurse()
i.child = nil
}
}
func (i *TokenAPI) DetachChildsRecurse() {
if i.child != nil {
i.child.DetachChildsRecurse()
}
i.child = nil
i.parent = nil
}
func (i *TokenAPI) FlushReader() bool {
if i.result.offset > 0 {
i.reader.Flush(i.result.offset)
i.result.offset = 0
return true
}
return false
}
// Result returns the TokenHandlerResult data for the TokenAPI. The returned struct
// can be used to retrieve and to modify result data.
func (i *TokenAPI) Result() *TokenHandlerResult {
return i.result
}

View File

@ -1,174 +0,0 @@
package tokenize
import (
"fmt"
"strings"
"git.makaay.nl/mauricem/go-parsekit/common"
)
// TokenHandlerResult is a struct that is used for holding tokenizing results
// as produced by a TokenHandler. It also provides the API that TokenHandlers
// and Parsers can use to respectively store and access the results.
type TokenHandlerResult struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
cursor *common.Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err *common.Error // can be used by a TokenHandler to report a specific issue with the input
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
//
// The only mandatory data in a Token are the Runes. The Type and Value fields
// are optional fields that can be filled with data at will.
//
// The use of the Type field is to let a tokenizer communicate to
// the parser what type of token it's handling.
//
// The use of the Value field is to store any kind af data along with the token.
// One use of this can be found in the built-in token maker functions like
// MakeInt8Token(), which store an interpreted version of the input string
// in the Value field.
type Token struct {
Runes []rune // the runes that make up the token
Type interface{} // optional token type, can be any type that a parser author sees fit
Value interface{} // optional token value, of any type as well
}
func (t Token) String() string {
tokenType := ""
if t.Type != nil {
tokenType = fmt.Sprintf("%v", t.Type)
}
value := ""
if t.Value != nil {
value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value)
}
return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value)
}
// newTokenHandlerResult initializes an empty TokenHandlerResult struct.
func newTokenHandlerResult() *TokenHandlerResult {
return &TokenHandlerResult{
runes: []rune{},
tokens: []*Token{},
cursor: &common.Cursor{},
}
}
// ClearRunes clears the runes in the TokenHandlerResult.
func (r *TokenHandlerResult) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the TokenHandlerResult with the provided input.
func (r *TokenHandlerResult) SetRunes(s interface{}) {
r.ClearRunes()
r.addRunes(s)
}
// AddRunes is used to add runes to the TokenHandlerResult.
func (r *TokenHandlerResult) AddRunes(set ...interface{}) {
r.addRunes(set...)
}
// AddRunes is used to add runes to the TokenHandlerResult.
func (r *TokenHandlerResult) addRunes(set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
common.CallerPanic(2, "tokenize.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the TokenHandlerResult.
func (r *TokenHandlerResult) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the TokenHandlerResult at the specified index.
func (r *TokenHandlerResult) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the TokenHandlerResult as a string.
func (r *TokenHandlerResult) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the TokenHandlerResult.
func (r *TokenHandlerResult) ClearTokens() {
r.tokens = []*Token{}
}
// SetTokens replaces the Tokens from the TokenHandlerResult with the provided input.
func (r *TokenHandlerResult) SetTokens(tokens []*Token) {
r.ClearTokens()
for _, t := range tokens {
r.AddToken(t)
}
}
// AddToken is used to add a Token to the TokenHandlerResult.
func (r *TokenHandlerResult) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// SliceOfTokens is an alias for []*Token type. The method Tokens() returns
// this type. A String() method is defined for it, to make it easy to
// format the tokens as a string for testing / debugging purposes.
type SliceOfTokens []*Token
func (ts SliceOfTokens) String() string {
parts := make([]string, len(ts))
for i, t := range ts {
parts[i] = t.String()
}
return strings.Join(parts, " ")
}
// Tokens retrieves the Tokens from the TokenHandlerResult.
func (r *TokenHandlerResult) Tokens() SliceOfTokens {
return r.tokens
}
// Token retrieves a single Token from the TokenHandlerResult at the specified index.
func (r *TokenHandlerResult) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the TokenHandlerResult Tokens.
func (r *TokenHandlerResult) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the TokenHandlerResult Token at the specified index.
func (r *TokenHandlerResult) Value(idx int) interface{} {
return r.tokens[idx].Value
}
// Cursor retrieves the read cursor from the TokenHandlerResult. This is the
// first cursor position after the runes that were read by the TokenHandler.
func (r *TokenHandlerResult) Cursor() *common.Cursor {
return r.cursor
}

20
tokenize/tokenize.go Normal file
View File

@ -0,0 +1,20 @@
package tokenize
import (
"fmt"
)
type Func func(input interface{}) (*Result, error)
func New(tokenHandler Handler) Func {
return func(input interface{}) (*Result, error) {
api := NewAPI(input)
ok := tokenHandler(api)
if !ok {
err := fmt.Errorf("mismatch at %s", Cursor{})
return nil, err
}
return api.Result(), nil
}
}

View File

@ -1,34 +0,0 @@
package tokenize
import (
"git.makaay.nl/mauricem/go-parsekit/common"
)
// Tokenizer is the top-level struct that holds the configuration for
// a parser that is based solely on a TokenHandler function.
// The Tokenizer can be instantiated using the parsekit.NewTokenizer()
// method.
type Tokenizer struct {
handler TokenHandler
}
// NewTokenizer instantiates a new Tokenizer, based on the provided TokenHandler.
func NewTokenizer(tokenHandler TokenHandler) *Tokenizer {
return &Tokenizer{tokenHandler}
}
// Execute feeds the input to the wrapped TokenHandler function.
// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New().
//
// It returns the TokenHandler's TokenHandlerResult. When an error occurred
// during parsing, the error will be set, nil otherwise.
func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *common.Error) {
api := NewTokenAPI(input)
ok := t.handler(api)
if !ok {
err := &common.Error{Message: "mismatch", Cursor: common.Cursor{}}
return nil, err
}
return api.Result(), nil
}

View File

@ -27,7 +27,7 @@ func ExampleTokenizer_Execute() {
ip := t.Str("ip", a.IPv4)
mask := t.Int8("mask", a.IPv4CIDRMask)
cidr := c.Seq(ip, a.Slash, mask)
tokenizer := tokenize.NewTokenizer(cidr)
tokenizer := tokenize.New(cidr)
for _, input := range []string{
"000.000.000.000/000",
@ -36,8 +36,8 @@ func ExampleTokenizer_Execute() {
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a TokenHandlerResult and an error, which is nil on success.
result, err := tokenizer.Execute(input)
// Execute returns a Result and an error, which is nil on success.
result, err := tokenizer(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens())
@ -46,9 +46,9 @@ func ExampleTokenizer_Execute() {
}
}
// Output:
// Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)
// Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)
// Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)
// Result: [ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)]
// Result: [ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)]
// Result: [ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}
@ -77,7 +77,7 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
i.NextRune()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
})
}
@ -85,7 +85,7 @@ func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
})
}
@ -96,7 +96,7 @@ func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
i.Merge()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
@ -108,7 +108,7 @@ func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
f.Merge()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
@ -120,7 +120,7 @@ func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
f.Merge()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`})
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
@ -132,12 +132,12 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
i.Accept()
},
Regexp: true,
Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
})
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
@ -155,7 +155,7 @@ func TestAccept_UpdatesCursor(t *testing.T) {
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("X"))
i := tokenize.NewAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
@ -163,7 +163,7 @@ func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := tokenize.NewTokenAPI(strings.NewReader("X"))
i := tokenize.NewAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
@ -174,6 +174,6 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *tokenize.TokenAPI {
return tokenize.NewTokenAPI("Testing")
func mkInput() *tokenize.API {
return tokenize.NewAPI("Testing")
}

View File

@ -6,7 +6,7 @@ import (
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := NewTokenAPI("Testing")
i := NewAPI("Testing")
i.NextRune()
i.Accept() // T
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
@ -31,7 +31,7 @@ func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := NewTokenAPI("Testing")
i := NewAPI("Testing")
i.NextRune()
i.Accept()
f1 := i.Fork()
@ -63,7 +63,7 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := NewTokenAPI("Testing")
i := NewAPI("Testing")
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
@ -99,13 +99,13 @@ func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := NewTokenAPI("Testing")
i := NewAPI("Testing")
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil")
AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil")
i.Accept()
AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset")
AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil")
AssertEqual(t, 1, i.result.offset, "API.result.offset")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}