diff --git a/common/error.go b/common/error.go deleted file mode 100644 index de5c415..0000000 --- a/common/error.go +++ /dev/null @@ -1,44 +0,0 @@ -package common - -import ( - "fmt" - "runtime" - "strings" -) - -// Error is used as the error type when parsing errors occur. -// The error includes some context information to allow for useful -// error messages to the user. -type Error struct { - Message string - Cursor Cursor -} - -func (err *Error) Error() string { - if err == nil { - CallerPanic(1, "common.Error.Error(): method called with nil error at {caller}") - } - return fmt.Sprintf("%s at %s", err.Message, err.Cursor) -} - -func CallerFunc(depth int) string { - // No error handling, because we call this method ourselves with safe depth values. - pc, _, _, _ := runtime.Caller(depth + 1) - caller := runtime.FuncForPC(pc) - parts := strings.Split(caller.Name(), ".") - funcName := parts[len(parts)-1] - return funcName -} - -func callerFilepos(depth int) string { - // No error handling, because we call this method ourselves with safe depth values. - _, file, line, _ := runtime.Caller(depth + 1) - return fmt.Sprintf("%s:%d", file, line) -} - -func CallerPanic(depth int, f string, args ...interface{}) { - filepos := callerFilepos(depth + 1) - m := fmt.Sprintf(f, args...) - m = strings.Replace(m, "{caller}", filepos, 1) - panic(m) -} diff --git a/common/error_test.go b/common/error_test.go deleted file mode 100644 index 28107f7..0000000 --- a/common/error_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package common_test - -import ( - "fmt" - - "git.makaay.nl/mauricem/go-parsekit/common" -) - -func ExampleError() { - err := &common.Error{ - Message: "it broke down", - Cursor: common.Cursor{Line: 9, Column: 41}, - } - - fmt.Println(err.Error()) - fmt.Printf("%s\n", err) - // Output: - // it broke down at line 10, column 42 - // it broke down at line 10, column 42 -} diff --git a/examples/example_basiccalculator1_test.go b/examples/example_basiccalculator1_test.go index 896f1fb..3ce04c6 100644 --- a/examples/example_basiccalculator1_test.go +++ b/examples/example_basiccalculator1_test.go @@ -10,8 +10,7 @@ package examples import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) @@ -49,17 +48,17 @@ func Example_basicCalculator1() { // Input: "42+ ", got error: unexpected input (expected integer number) at line 1, column 4 } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // Implementation of the parser -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // ComputeSimple interprets a simple calculation, consisting of only integers // and add or subtract operators. It returns the result of the calculation. // An error is returned in case the calculation failed. -func ComputeSimple(calculation string) (int64, *common.Error) { +func ComputeSimple(calculation string) (int64, error) { calculator := &simpleCalculator{op: +1} - parser := parsekit.NewParser(calculator.number) - err := parser.Execute(calculation) + parser := parse.New(calculator.number) + err := parser(calculation) return calculator.Result, err } @@ -76,7 +75,7 @@ var dropBlank = tokenize.M.Drop(tokenize.C.Opt(tokenize.A.Blanks)) var bareInteger = tokenize.C.Seq(dropBlank, tokenize.A.Integer, dropBlank) var int64Token = tokenize.T.Int64(nil, bareInteger) -func (c *simpleCalculator) number(p *parsekit.ParseAPI) { +func (c *simpleCalculator) number(p *parse.API) { if p.Accept(int64Token) { c.Result += c.op * p.Result().Value(0).(int64) p.Handle(c.operatorOrEndOfFile) @@ -85,7 +84,7 @@ func (c *simpleCalculator) number(p *parsekit.ParseAPI) { } } -func (c *simpleCalculator) operatorOrEndOfFile(p *parsekit.ParseAPI) { +func (c *simpleCalculator) operatorOrEndOfFile(p *parse.API) { var A = tokenize.A switch { case p.Accept(A.Add): diff --git a/examples/example_basiccalculator2_test.go b/examples/example_basiccalculator2_test.go index 1ea5fda..ad2d597 100644 --- a/examples/example_basiccalculator2_test.go +++ b/examples/example_basiccalculator2_test.go @@ -16,8 +16,7 @@ import ( "fmt" "math" - "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) @@ -62,9 +61,9 @@ func Example_basicCalculator2() { // Input: "10+20-((4*10) + 17", got error: unexpected end of file (expected ')') at line 1, column 19 } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // Implementation of the parser -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // calculator implements a recursive descent parser that is responsible for parsing // the input calculation string according to the grammar. @@ -77,15 +76,15 @@ type calculator struct { // Compute takes a calculation string as input and returns the interpreted result // value for the calculation. An error can be returned as well, in case the // calculation fails for some reason. -func Compute(input string) (float64, *common.Error) { +func Compute(input string) (float64, error) { calc := &calculator{} - parser := parsekit.NewParser(calc.calculation) - err := parser.Execute(input) + parser := parse.New(calc.calculation) + err := parser(input) return calc.result, err } // = -func (calc *calculator) calculation(p *parsekit.ParseAPI) { +func (calc *calculator) calculation(p *parse.API) { if p.Handle(calc.expr) { p.ExpectEndOfFile() calc.result = calc.interpreter.result @@ -93,7 +92,7 @@ func (calc *calculator) calculation(p *parsekit.ParseAPI) { } // = ( | (ADD|SUB) ) -func (calc *calculator) expr(p *parsekit.ParseAPI) { +func (calc *calculator) expr(p *parse.API) { calc.interpreter.push() var A = tokenize.A @@ -111,7 +110,7 @@ func (calc *calculator) expr(p *parsekit.ParseAPI) { } // = ( | (MUL|DIV) ) -func (calc *calculator) term(p *parsekit.ParseAPI) { +func (calc *calculator) term(p *parse.API) { calc.interpreter.push() var A = tokenize.A @@ -130,7 +129,7 @@ func (calc *calculator) term(p *parsekit.ParseAPI) { // = ( (SPACE|TAB) | "") // = (FLOAT | LPAREN RPAREN) -func (calc *calculator) factor(p *parsekit.ParseAPI) { +func (calc *calculator) factor(p *parse.API) { var A, T = tokenize.A, tokenize.T p.Accept(A.Blanks) switch { @@ -152,9 +151,9 @@ func (calc *calculator) factor(p *parsekit.ParseAPI) { p.Accept(A.Blanks) } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // The computational interpreter, used by the calculator. -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― type stackFrame struct { a float64 diff --git a/examples/example_dutchpostcode_test.go b/examples/example_dutchpostcode_test.go index 65bc810..92177f9 100644 --- a/examples/example_dutchpostcode_test.go +++ b/examples/example_dutchpostcode_test.go @@ -1,5 +1,5 @@ // In this example, a Parser is created that can parse and normalize Dutch postcodes -// The implementation uses only TokenHandler functions and does not implement a +// The implementation uses only Handler functions and does not implement a // full-fledged state-based Parser for it. package examples @@ -11,7 +11,7 @@ import ( ) func Example_dutchPostcodeUsingTokenizer() { - parser := createPostcodeTokenizer() + tokenizer := createPostcodeTokenizer() for i, input := range []string{ "1234 AB", @@ -24,7 +24,7 @@ func Example_dutchPostcodeUsingTokenizer() { "", "\xcd2222AB", } { - result, err := parser.Execute(input) + result, err := tokenizer(input) if err != nil { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { @@ -47,27 +47,27 @@ func Example_dutchPostcodeUsingTokenizer() { // [8] Input: "\xcd2222AB" Error: mismatch at start of file } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // Implementation of the parser -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― -func createPostcodeTokenizer() *tokenize.Tokenizer { - // Easy access to the parsekit definitions. +func createPostcodeTokenizer() tokenize.Func { + // Easy access to the tokenize definitions. C, A, M, T := tokenize.C, tokenize.A, tokenize.M, tokenize.T - // TokenHandler functions are created and combined to satisfy these rules: - // - A Dutch postcode consists of 4 digits and 2 letters (1234XX). - // - The first digit is never a zero. - // - A space between letters and digits is optional. - // - It is good form to write the letters in upper case. - // - It is good form to use a single space between digits and letters. + // Handler functions are created and combined to satisfy these rules: + // • A Dutch postcode consists of 4 digits and 2 letters (1234XX). + // • The first digit is never a zero. + // • A space between letters and digits is optional. + // • It is good form to write the letters in upper case. + // • It is good form to use a single space between digits and letters. pcDigits := A.DigitNotZero.Then(A.Digit.Times(3)) pcLetter := A.ASCIILower.Or(A.ASCIIUpper) pcLetters := M.ToUpper(pcLetter.Times(2)) space := M.Replace(A.Blanks.Optional(), " ") postcode := C.Seq(T.Str("PCD", pcDigits), space, T.Str("PCL", pcLetters), A.EndOfFile) - // Create a Tokenizer that wraps the 'postcode' TokenHandler and allows + // Create a Tokenizer that wraps the 'postcode' Handler and allows // us to match some input against that handler. - return tokenize.NewTokenizer(postcode) + return tokenize.New(postcode) } diff --git a/examples/example_helloManyStateParser_test.go b/examples/example_helloManyStateParser_test.go index eaaddb7..392848a 100644 --- a/examples/example_helloManyStateParser_test.go +++ b/examples/example_helloManyStateParser_test.go @@ -2,8 +2,8 @@ // like "Hello, !", and that extracts the name from it. // // This implementation uses a state-based Parser for it, and it does not -// implement any custom parser/combinator TokenHandler functions. Note that -// things are much easier to implement using custom TokenHandlers (see the +// implement any custom parser/combinator Handler functions. Note that +// things are much easier to implement using custom Handlers (see the // helloParserCombinator example for this). Doing this fully parser-based // implementation is mainly for your learning pleasure. // @@ -19,8 +19,7 @@ import ( "fmt" "strings" - "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) @@ -68,21 +67,21 @@ func Example_helloWorldUsingParser1() { // [15] Input: "hello, \t!" Error: The name cannot be empty } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // Implementation of the parser -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― type helloparser1 struct { greetee string } -func (h *helloparser1) Parse(input string) (string, *common.Error) { - parser := parsekit.NewParser(h.start) - err := parser.Execute(input) +func (h *helloparser1) Parse(input string) (string, error) { + parser := parse.New(h.start) + err := parser(input) return h.greetee, err } -func (h *helloparser1) start(p *parsekit.ParseAPI) { +func (h *helloparser1) start(p *parse.API) { a := tokenize.A if p.Accept(a.StrNoCase("hello")) { p.Handle(h.comma) @@ -91,7 +90,7 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) { } } -func (h *helloparser1) comma(p *parsekit.ParseAPI) { +func (h *helloparser1) comma(p *parse.API) { a := tokenize.A switch { case p.Accept(a.Blanks): @@ -103,7 +102,7 @@ func (h *helloparser1) comma(p *parsekit.ParseAPI) { } } -func (h *helloparser1) startName(p *parsekit.ParseAPI) { +func (h *helloparser1) startName(p *parse.API) { a := tokenize.A p.Accept(a.Blanks) if p.Peek(a.AnyRune) { @@ -113,7 +112,7 @@ func (h *helloparser1) startName(p *parsekit.ParseAPI) { } } -func (h *helloparser1) name(p *parsekit.ParseAPI) { +func (h *helloparser1) name(p *parse.API) { a := tokenize.A switch { case p.Peek(a.Excl): @@ -126,7 +125,7 @@ func (h *helloparser1) name(p *parsekit.ParseAPI) { } } -func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { +func (h *helloparser1) exclamation(p *parse.API) { a := tokenize.A if p.Accept(a.Excl) { p.Handle(h.end) @@ -138,7 +137,7 @@ func (h *helloparser1) exclamation(p *parsekit.ParseAPI) { // Here we could have used p.ExpectEndOfFile() as well, but a slightly // different route was taken to implement a more friendly 'end of greeting' // error message. -func (h *helloparser1) end(p *parsekit.ParseAPI) { +func (h *helloparser1) end(p *parse.API) { var a = tokenize.A if !p.Accept(a.EndOfFile) { p.Expected("end of greeting") diff --git a/examples/example_helloParserCombinator_test.go b/examples/example_helloParserCombinator_test.go index f7b295d..3d4ed1b 100644 --- a/examples/example_helloParserCombinator_test.go +++ b/examples/example_helloParserCombinator_test.go @@ -1,7 +1,7 @@ // In this example, a parser is created that is able to parse input that looks // like "Hello, !", and that extracts the name from it. // -// The implementation uses only parser/combinator TokenHandler functions and does +// The implementation uses only parser/combinator Handler functions and does // not implement a full-fledged state-based Parser for it. If you want to see the // same kind of functionality, implementated using a Parser, take a look at the // other hello examples. @@ -14,7 +14,7 @@ import ( ) func Example_helloWorldUsingTokenizer() { - parser := createHelloTokenizer() + tokenizer := createHelloTokenizer() for i, input := range []string{ "Hello, world!", @@ -25,7 +25,7 @@ func Example_helloWorldUsingTokenizer() { "Hello, world", "Hello,!", } { - output, err := parser.Execute(input) + output, err := tokenizer(input) if err != nil { fmt.Printf("[%d] Input: %q Error: %s\n", i, input, err) } else { @@ -42,16 +42,16 @@ func Example_helloWorldUsingTokenizer() { // [6] Input: "Hello,!" Error: mismatch at start of file } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // Implementation of the parser -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― -func createHelloTokenizer() *tokenize.Tokenizer { +func createHelloTokenizer() tokenize.Func { // Easy access to parsekit definition. c, a, m := tokenize.C, tokenize.A, tokenize.M - // Using the parser/combinator support of parsekit, we create a TokenHandler function - // that does all the work. The 'greeting' TokenHandler matches the whole input and + // Using the parser/combinator support of parsekit, we create a Handler function + // that does all the work. The 'greeting' Handler matches the whole input and // drops all but the name from it. hello := a.StrNoCase("hello") comma := c.Seq(c.Opt(a.Blank), a.Comma, c.Opt(a.Blank)) @@ -63,7 +63,7 @@ func createHelloTokenizer() *tokenize.Tokenizer { Then(m.Drop(a.Excl)). Then(a.EndOfFile) - // Create a Tokenizer that wraps the 'greeting' TokenHandler and allows + // Create a Tokenizer that wraps the 'greeting' Handler and allows // us to match some input against that handler. - return tokenize.NewTokenizer(greeting) + return tokenize.New(greeting) } diff --git a/examples/example_helloSingleStateParser_test.go b/examples/example_helloSingleStateParser_test.go index 94b9d81..ffe6a48 100644 --- a/examples/example_helloSingleStateParser_test.go +++ b/examples/example_helloSingleStateParser_test.go @@ -1,9 +1,9 @@ // This is the same as the other hello examples, except that in this // implementation the state machine is implemented using a combination of some -// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls +// Handlers and only a single state, in which multiple API.On() calls // are combined to do all the work in one go. // -// Note that things are much easier to implement using custom TokenHandlers (see +// Note that things are much easier to implement using custom Handlers (see // the other helloParserCombinator example for this). Doing this implementation // is mainly for your learning pleasure. // @@ -16,8 +16,7 @@ package examples import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) @@ -66,21 +65,21 @@ func Example_helloWorldUsingParser2() { // [14] Input: "HELLO, Buster! Eat this!" Error: too much stuff going on after the closing '!' at line 1, column 15 } -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― // Implementation of the parser -// --------------------------------------------------------------------------- +// ――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――― type helloparser2 struct { greetee string } -func (h *helloparser2) Parse(input string) (string, *common.Error) { - parser := parsekit.NewParser(h.start) - err := parser.Execute(input) +func (h *helloparser2) Parse(input string) (string, error) { + parser := parse.New(h.start) + err := parser(input) return h.greetee, err } -func (h *helloparser2) start(p *parsekit.ParseAPI) { +func (h *helloparser2) start(p *parse.API) { c, a, m := tokenize.C, tokenize.A, tokenize.M if !p.Accept(a.StrNoCase("hello")) { p.Error("the greeting is not being friendly") diff --git a/examples/examples_state_test.go b/examples/examples_state_test.go index dcc6b9b..550c0ab 100644 --- a/examples/examples_state_test.go +++ b/examples/examples_state_test.go @@ -1,8 +1,8 @@ // In this example, we show that any type can be extended into a parser, -// filling that type with data from the ParseHandler methods. +// filling that type with data from the Handler methods. // // Here, we create a custom type 'Chunks', which is an alias -// for []string. We add a ParseHandler method directly to that type +// for []string. We add a Handler method directly to that type // and let the parsing code fill the slice with strings during parsing. package examples @@ -10,23 +10,22 @@ package examples import ( "fmt" - "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/common" + "git.makaay.nl/mauricem/go-parsekit/parse" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) type Chunks []string -func (l *Chunks) AddChopped(s string, chunkSize int) *common.Error { +func (l *Chunks) AddChopped(s string, chunkSize int) error { c, a := tokenize.C, tokenize.A chunkOfRunes := c.MinMax(1, chunkSize, a.AnyRune) - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { + parser := parse.New(func(p *parse.API) { for p.Accept(chunkOfRunes) { *l = append(*l, p.Result().String()) } }) - return parser.Execute(s) + return parser(s) } func Example_usingSliceAsParserState() { diff --git a/parse/api.go b/parse/api.go new file mode 100644 index 0000000..9645e5d --- /dev/null +++ b/parse/api.go @@ -0,0 +1,235 @@ +package parse + +import ( + "fmt" + "io" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +// API holds the internal state of a parse run and provides an API that +// parse.Handler functions can use to: +// +// • communicate with tokenize.Handler functions (Peek, Accept, ExpectEndOfFile, Result) +// +// • update the parser status (Error, Expected, Stop) +// +// • call other parse.Handler functions, the core of recursive-descent parsing (Handle) +type API struct { + tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions + result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek() + loopCheck map[string]bool // used for parser loop detection + err error // parse error, retrieved by Error(), using API methods is denied when set + stopped bool // a boolean set to true by Stop(), using API methods is denied when true +} + +// Peek checks if the upcoming input data matches the provided tokenize.Handler. +// If it does, then true will be returned, false otherwise. The read cursor +// will be kept at the same position, so the next call to Peek() or Accept() +// will start from the same cursor position. +// +// After calling this method, you can retrieve the produced tokenize.Result +// struct using the Result() method. +func (p *API) Peek(tokenHandler tokenize.Handler) bool { + p.result = nil + forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) + if ok { + p.result = forkedAPI.Result() + p.tokenAPI.Reset() + } + return ok +} + +// Accept checks if the upcoming input data matches the provided tokenize.Handler. +// If it does, then true will be returned and the read cursor will be moved +// forward to beyond the match that was found. Otherwise false will be +// and the read cursor will stay at the same position. +// +// After calling this method, you can retrieve the tokenize.Result +// using the Result() method. +func (p *API) Accept(tokenHandler tokenize.Handler) bool { + p.result = nil + forkedAPI, ok := p.invokeHandler("Accept", tokenHandler) + if ok { + forkedAPI.Merge() + p.result = p.tokenAPI.Result() + forkedAPI.Dispose() + if p.tokenAPI.FlushInput() { + p.initLoopCheck() + } + } + return ok +} + +func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (*tokenize.API, bool) { + p.panicWhenStoppedOrInError(name) + p.checkForLoops() + if tokenHandler == nil { + callerPanic(2, "parse.API.%s(): %s() called with nil tokenHandler argument at {caller}", name, name) + } + + p.result = nil + p.tokenAPI.Reset() + child := p.tokenAPI.Fork() + ok := tokenHandler(child) + + return child, ok +} + +// panicWhenStoppedOrInError will panic when the parser has produced an error +// or when it has been stopped. It is used from the API methods, to +// prevent further calls to the API on these occasions. +// +// Basically, this guard helps with proper coding of parsers, making sure +// that clean routes are followed. You can consider this check a runtime +// unit test. +func (p *API) panicWhenStoppedOrInError(name string) { + if !p.isStoppedOrInError() { + return + } + + after := "Error()" + if p.stopped { + after = "Stop()" + } + + callerPanic(2, "parse.API.%s(): Illegal call to %s() at {caller}: "+ + "no calls allowed after API.%s", name, name, after) +} + +func (p *API) isStoppedOrInError() bool { + return p.stopped || p.err != nil +} + +// initLoopCheck clears the loop check data, a map in which we keep +// track of the lines of code from which Accept() and/or Peek() are called. +// When Accept() is called, and the parser moved forward in the input data, +// this method is called to reset the map for the new read cursor position. +func (p *API) initLoopCheck() { + p.loopCheck = map[string]bool{} +} + +// checkForLoops checks if the line of code from which Accept() or Peek() +// was called has been seen before for the current read cursor position. +// If yes, then the parser is in a loop and the method will panic. +func (p *API) checkForLoops() { + filepos := callerFilepos(3) + if _, ok := p.loopCheck[filepos]; ok { + callerPanic(3, "parse.API: Loop detected in parser at {caller}") + } + p.loopCheck[filepos] = true +} + +// Result returns the tokenize.Result struct, containing results as produced by the +// last Peek() or Accept() call. +// +// When Result() is called without first doing a Peek() or Accept(), then no +// result will be available and the method will panic. +func (p *API) Result() *tokenize.Result { + result := p.result + if p.result == nil { + callerPanic(1, "parse.API.Result(): Result() called "+ + "at {caller} without calling API.Peek() or API.Accept() on beforehand") + } + return result +} + +// Handle executes another parse.Handler function from within the active +// parse.Handler function. +// +// The boolean return value is true when the parser can still continue. +// It will be false when either an error was set using Error(), or the +// parser was stopped using Stop(). +// +// Instead of calling another handler using this method, you can also call +// that other handler directly. However, it is generally advised to make use +// of this method, because it performs some sanity checks and it will return +// an easy to use boolean indicating whether the parser can continue or not. +func (p *API) Handle(parseHandler Handler) bool { + p.panicWhenStoppedOrInError("Handle") + p.panicWhenHandlerNil(parseHandler) + parseHandler(p) + return !p.isStoppedOrInError() +} + +func (p *API) panicWhenHandlerNil(parseHandler Handler) { + if parseHandler == nil { + callerPanic(2, "parse.API.Handle(): Handle() called with nil input at {caller}") + } +} + +// Stop tells the parser that the parsing process has been completed. +// +// When the initial parse.Handler function returns without stopping first +// and without running into an error, the method ExpectEndOfFile() is automatically +// called to verify if the end of the file was reached. If not, then things will +// end in an unexpected input error. +// +// Note: +// Even though this fallback mechanism will work in a lot of cases, try to make +// your parser explicit about things and call Stop() actively yourself. +// +// After stopping, no more calls to API methods are allowed. +// Calling a method in this state will result in a panic. +func (p *API) Stop() { + p.stopped = true +} + +// Error sets the error message in the API. +// +// After setting an error, no more calls to API methods are allowed. +// Calling a method in this state will result in a panic. +// TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example? +func (p *API) Error(format string, args ...interface{}) { + // No call to p.panicWhenStoppedOrInError(), to allow a parser to + // set a different error message when needed. + message := fmt.Sprintf(format, args...) + p.err = fmt.Errorf("%s at %s", message, *p.tokenAPI.Result().Cursor()) +} + +// ExpectEndOfFile can be used to check if the input is at end of file. +// +// When it finds that the end of the file was indeed reached, then the parser +// will be stopped through Stop(). Otherwise, the unexpected input is reported +// using Expected("end of file"). +func (p *API) ExpectEndOfFile() { + p.panicWhenStoppedOrInError("ExpectEndofFile") + if p.Peek(tokenize.A.EndOfFile) { + p.Stop() + } else { + p.Expected("end of file") + } +} + +// Expected sets a parser error that indicates that some unexpected +// input was encountered. +// +// The 'expected' argument can be an empty string. In that case the error +// message will not contain a description of the expected input. +// +// This method automatically produces an error message for a couple of situations: +// +// • the input simply didn't match the expectation +// +// • the end of the input was reached +// +// • there was an error while reading the input. +func (p *API) Expected(expected string) { + p.panicWhenStoppedOrInError("Expected") + _, err := p.tokenAPI.NextRune() + switch { + case err == nil: + p.Error("unexpected input%s", fmtExpects(expected)) + case err == io.EOF: + p.Error("unexpected end of file%s", fmtExpects(expected)) + default: + p.Error("unexpected error '%s'%s", err, fmtExpects(expected)) + } +} + +func fmtExpects(expected string) string { + if expected == "" { + return "" + } + return fmt.Sprintf(" (expected %s)", expected) +} diff --git a/assertions_test.go b/parse/assertions_test.go similarity index 64% rename from assertions_test.go rename to parse/assertions_test.go index 7d9a365..dcd8fc8 100644 --- a/assertions_test.go +++ b/parse/assertions_test.go @@ -1,6 +1,6 @@ -package parsekit +package parse -// This file contains some tools that are used for writing parsekit tests. +// This file contains some tools that are used for writing tests. import ( "regexp" @@ -17,12 +17,6 @@ func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat } } -func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) { - if notExpected == actual { - t.Errorf("Unexpected value for %s: %q", forWhat, actual) - } -} - func AssertTrue(t *testing.T, b bool, assertion string) { if !b { t.Errorf("Assertion %s is false", assertion) @@ -63,38 +57,10 @@ func AssertPanic(t *testing.T, p PanicT) { p.Function() } -type TokenHandlerT struct { - Input string - TokenHandler tokenize.TokenHandler - MustMatch bool - Expected string -} - -func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) { - for _, test := range testSet { - AssertTokenHandler(t, test) - } -} - -func AssertTokenHandler(t *testing.T, test TokenHandlerT) { - result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) - if test.MustMatch { - if err != nil { - t.Errorf("Test %q failed with error: %s", test.Input, err) - } else if output := result.String(); output != test.Expected { - t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output) - } - } else { - if err == nil { - t.Errorf("Test %q failed: should not match, but it did", test.Input) - } - } -} - type TokenMakerT struct { - Input string - TokenHandler tokenize.TokenHandler - Expected []tokenize.Token + Input string + Handler tokenize.Handler + Expected []tokenize.Token } func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { @@ -104,7 +70,8 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { } func AssertTokenMaker(t *testing.T, test TokenMakerT) { - result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) + tokenizer := tokenize.New(test.Handler) + result, err := tokenizer(test.Input) if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) } else { diff --git a/parse/callerinfo.go b/parse/callerinfo.go new file mode 100644 index 0000000..a9a9d0f --- /dev/null +++ b/parse/callerinfo.go @@ -0,0 +1,20 @@ +package parse + +import ( + "fmt" + "runtime" + "strings" +) + +func callerFilepos(depth int) string { + // No error handling, because we call this method ourselves with safe depth values. + _, file, line, _ := runtime.Caller(depth + 1) + return fmt.Sprintf("%s:%d", file, line) +} + +func callerPanic(depth int, f string, args ...interface{}) { + filepos := callerFilepos(depth + 1) + m := fmt.Sprintf(f, args...) + m = strings.Replace(m, "{caller}", filepos, 1) + panic(m) +} diff --git a/parse/handler.go b/parse/handler.go new file mode 100644 index 0000000..3e32639 --- /dev/null +++ b/parse/handler.go @@ -0,0 +1,10 @@ +// Package parse provides tooling to build a state machine-style recursive descent parser. +package parse + +// Handler defines the type of function that must be implemented to handle +// a parsing state in a Parser state machine. +// +// A Handler function gets an API struct as its input. This struct holds +// all the internal state for the parsing state machine and provides the +// interface that the Handler uses to interact with the parser. +type Handler func(*API) diff --git a/parse/parse.go b/parse/parse.go new file mode 100644 index 0000000..7dbb28b --- /dev/null +++ b/parse/parse.go @@ -0,0 +1,46 @@ +// Package parse provides tooling to build a state machine-style recursive descent parser. +package parse + +import ( + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +// Func is the function signature as returned by New: a function that takes +// any supported type of input, executes a parse run and returns an error +// (or nil when all went right). +type Func func(interface{}) error + +// New instantiates a new parser. +// +// The parser is a state machine-style recursive descent parser, in which +// parse.Handler functions are used to move the state machine forward during +// parsing. This style of parser is typically used for parsing programming +// languages and structured data formats (like json, xml, toml, etc.) +// +// The startHandler argument points the parser to the parse.Handler function +// that must be executed at the start of the parsing process. From there on +// other parse.Handler functions can be invoked recursively to implement the +// parsing process. +// +// This function returns a function that can be invoked to run the parser +// on the provided input data. For an overview of allowed inputs, take a +// look at the documentation for parsekit.read.New(). +func New(startHandler Handler) Func { + if startHandler == nil { + callerPanic(1, "parsekit.parse.New(): New() called with nil input at {caller}") + } + return func(input interface{}) error { + api := &API{ + tokenAPI: tokenize.NewAPI(input), + loopCheck: map[string]bool{}, + } + if api.Handle(startHandler) { + // Handle returned true, indicating that parsing could still continue. + // There was no error and that the parsing has not actively been Stop()-ed. + // Let's assume that we actually reached the end of the parsing successfully + // and try to make the best of it. + api.ExpectEndOfFile() + } + return api.err + } +} diff --git a/parse/parse_test.go b/parse/parse_test.go new file mode 100644 index 0000000..c49299e --- /dev/null +++ b/parse/parse_test.go @@ -0,0 +1,337 @@ +package parse_test + +import ( + "fmt" + "testing" + + "git.makaay.nl/mauricem/go-parsekit/parse" + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +func ExampleNew_usingAcceptedRunes() { + // Easy access to the tokenize definitions. + a := tokenize.A + + matches := []string{} + + parser := parse.New(func(p *parse.API) { + for p.Accept(a.AnyRune) { + matches = append(matches, p.Result().String()) + } + p.ExpectEndOfFile() + }) + err := parser("¡Any will dö!") + + fmt.Printf("Matches = %q, Error = %v\n", matches, err) + // Output: + // Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = +} + +func ExampleNew_usingTokens() { + // Easy access to the tokenize definitions. + c, a, tok := tokenize.C, tokenize.A, tokenize.T + + parser := parse.New(func(p *parse.API) { + if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) { + fmt.Printf("Runes accepted: %q\n", p.Result().String()) + fmt.Printf("Tokens:\n") + for i, token := range p.Result().Tokens() { + fmt.Printf("[%d] %s\n", i, token) + } + } + p.ExpectEndOfFile() + }) + parser("¡ök!") + + // Output: + // Runes accepted: "¡ök!" + // Tokens: + // [0] RUNE("¡", value = (int32)161) + // [1] RUNE("ö", value = (int32)246) + // [2] RUNE("k", value = (int32)107) + // [3] RUNE("!", value = (int32)33) +} + +func ExampleAPI_Expected() { + parser := parse.New(func(p *parse.API) { + p.Expected("a thing") + }) + err := parser("Whatever, this parser will never be happy...") + fmt.Printf("Error: %s\n", err) + + // Output: + // Error: unexpected input (expected a thing) at start of file +} + +func ExampleAPI_Accept_inIfStatement() { + parser := parse.New(func(p *parse.API) { + // When a case-insensitive match on "Yowza!" is found by the + // tokenizer, then Accept() will make the result available + // through API.Result() + if p.Accept(tokenize.A.StrNoCase("Yowza!")) { + // Result.String() returns a string containing all + // accepted runes that were matched against. + fmt.Println(p.Result().String()) + } + }) + parser("YOWZA!") + + // Output: + // YOWZA! +} + +func ExampleAPI_Accept_inSwitchStatement() { + var result string + parser := parse.New(func(p *parse.API) { + for loop := true; loop; { + switch { + case p.Accept(tokenize.A.Rune('X')): + // NOOP, skip this rune + case p.Accept(tokenize.A.AnyRune): + result += p.Result().String() + default: + loop = false + } + } + }) + parser("HXeXllXoXX, XXwoXrlXXXd!") + fmt.Println(result) + + // Output: + // Hello, world! +} + +func ExampleAPI_Stop() { + C, A := tokenize.C, tokenize.A + + parser := parse.New(func(p *parse.API) { + fmt.Printf("First word: ") + for p.Accept(C.Not(A.Space)) { + fmt.Printf("%s", p.Result()) + } + p.Stop() + }) + parser("Input with spaces") + + // Output: + // First word: Input +} + +func ExampleAPI_Stop_notCalledAndNoInputPending() { + C, A := tokenize.C, tokenize.A + + parser := parse.New(func(p *parse.API) { + fmt.Printf("Word: ") + for p.Accept(C.Not(A.Space)) { + fmt.Printf("%s", p.Result()) + } + fmt.Printf("\n") + }) + err := parser("Troglodyte") + fmt.Printf("Error is nil: %t\n", err == nil) + + // Output: + // Word: Troglodyte + // Error is nil: true +} + +func ExampleAPI_Stop_notCalledButInputPending() { + C, A := tokenize.C, tokenize.A + + parser := parse.New(func(p *parse.API) { + fmt.Printf("First word: ") + for p.Accept(C.Not(A.Space)) { + fmt.Printf("%s", p.Result()) + } + fmt.Printf("\n") + }) + err := parser("Input with spaces") + fmt.Printf("Error: %s\n", err) + + // Output: + // First word: Input + // Error: unexpected input (expected end of file) at line 1, column 6 +} + +func ExampleAPI_Peek() { + // Definition of a fantasy serial number format. + C, A := tokenize.C, tokenize.A + serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits) + + // This handler is able to handle serial numbers. + serialnrHandler := func(p *parse.API) { + if p.Accept(serialnr) { + fmt.Println(p.Result().String()) + } + } + + // Start could function as a sort of dispatcher, handing over + // control to the correct Handler function, based on the input. + start := func(p *parse.API) { + if p.Peek(tokenize.A.Asterisk) { + p.Handle(serialnrHandler) + return + } + // ... other cases could go here ... + } + + parser := parse.New(start) + parser("#XX1234") + parser("*ay432566") + parser("*ZD987112") + + // Output: + // *ZD987112 +} + +func TestGivenNullHandler_NewPanics(t *testing.T) { + parse.AssertPanic(t, parse.PanicT{ + Function: func() { parse.New(nil) }, + Regexp: true, + Expect: `parsekit\.parse\.New\(\): New\(\) called ` + + `with nil input at /.*/parse_test\.go:\d+`}) +} + +func TestGivenNullHandler_HandlePanics(t *testing.T) { + brokenHandler := func(p *parse.API) { + p.Handle(nil) + } + parser := parse.New(brokenHandler) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { parser("") }, + Regexp: true, + Expect: `parse\.API\.Handle\(\): Handle\(\) called with nil input ` + + `at /.*/parse_test\.go:\d+`}) +} +func TestGivenNilHandler_AcceptPanics(t *testing.T) { + p := parse.New(func(p *parse.API) { + p.Accept(nil) + }) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { p("") }, + Regexp: true, + Expect: `parse\.API\.Accept\(\): Accept\(\) called with nil ` + + `tokenHandler argument at /.*/parse_test\.go:\d+`}) +} + +func TestGivenNilHandler_PeekPanics(t *testing.T) { + p := parse.New(func(p *parse.API) { + p.Peek(nil) + }) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { p("") }, + Regexp: true, + Expect: `parse\.API\.Peek\(\): Peek\(\) called with nil ` + + `tokenHandler argument at /.*/parse_test\.go:\d+`}) +} + +func TestGivenStoppedParser_HandlePanics(t *testing.T) { + otherHandler := func(p *parse.API) { + panic("This is not the handler you're looking for") + } + p := parse.New(func(p *parse.API) { + p.Stop() + p.Handle(otherHandler) + }) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { p("") }, + Regexp: true, + Expect: `parse\.API\.Handle\(\): Illegal call to Handle\(\) ` + + `at /.*/parse_test\.go:\d+: no calls allowed after API\.Stop\(\)`}) +} + +func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) { + otherHandler := func(p *parse.API) { + panic("This is not the handler you're looking for") + } + p := parse.New(func(p *parse.API) { + p.Error("It ends here") + p.Handle(otherHandler) + }) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { p("") }, + Regexp: true, + Expect: `parse\.API\.Handle\(\): Illegal call to Handle\(\) ` + + `at /.*/parse_test\.go:\d+: no calls allowed after API\.Error\(\)`}) +} + +func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) { + p := parse.New(func(p *parse.API) { + p.Result() + }) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { p("") }, + Regexp: true, + Expect: `parse\.API\.Result\(\): Result\(\) called at ` + + `/.*/parse_test.go:\d+ without calling API.Peek\(\) or API.Accept\(\) on beforehand`}) +} + +func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) { + p := parse.New(func(p *parse.API) {}) + err := p("") + parse.AssertTrue(t, err == nil, "err") +} + +func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) { + p := parse.New(func(p *parse.API) {}) + err := p("x") + parse.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err") +} + +type parserWithLoop struct { + loopCounter int +} + +func (l *parserWithLoop) first(p *parse.API) { + p.Accept(tokenize.A.ASCII) + p.Handle(l.second) +} + +func (l *parserWithLoop) second(p *parse.API) { + p.Accept(tokenize.A.ASCII) + p.Handle(l.third) +} + +func (l *parserWithLoop) third(p *parse.API) { + if l.loopCounter++; l.loopCounter > 100 { + p.Error("Loop not detected by parsekit") + return + } + p.Accept(tokenize.A.ASCII) + p.Handle(l.first) +} + +func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { + looper := &parserWithLoop{} + parser := parse.New(looper.first) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { parser("Het houdt niet op, niet vanzelf") }, + Regexp: true, + Expect: `parse\.API: Loop detected in parser at /.*/parse_test.go:\d+`}) +} + +// This test incorporates an actual loop bug that I dropped on myself and +// that I could not easily spot in my code. It sounded so logical: +// I want to get chunks of 5 chars from the input, so I simply loop on: +// +// p.On(c.Max(5, a.AnyRune)) +// +// The problem here is that Max(5, ...) will also match when there is +// no more input, since Max(5, ...) is actually MinMax(0, 5, ...). +// Therefore the loop will never stop. Solving the loop was simple: +// +// p.On(c.MinMax(1, 5, a.AnyRune)) +// +// Now the loop stops when the parser finds no more matching input data. +func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { + var c, a = tokenize.C, tokenize.A + parser := parse.New(func(p *parse.API) { + for p.Accept(c.Max(5, a.AnyRune)) { + } + p.Stop() + }) + parse.AssertPanic(t, parse.PanicT{ + Function: func() { parser("This will end soon") }, + Regexp: true, + Expect: `parse\.API: Loop detected in parser at .*/parse_test.go:\d+`}) +} diff --git a/parseapi.go b/parseapi.go deleted file mode 100644 index c7cc242..0000000 --- a/parseapi.go +++ /dev/null @@ -1,216 +0,0 @@ -package parsekit - -import ( - "fmt" - "io" - - "git.makaay.nl/mauricem/go-parsekit/common" - "git.makaay.nl/mauricem/go-parsekit/tokenize" -) - -// ParseAPI holds the internal state of a parse run and provides an API that -// ParseHandler methods can use to communicate with the parser. -type ParseAPI struct { - tokenAPI *tokenize.TokenAPI // the TokenAPI, used for communicating with TokenHandler functions - loopCheck map[string]bool // used for parser loop detection - result *tokenize.TokenHandlerResult // Last TokenHandler result as produced by On(...).Accept() - err *common.Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored - stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored -} - -// Peek checks if the upcoming input data matches the provided TokenHandler. -// If it does, then true will be returned, false otherwise. The read cursor -// will be kept at the same position, so the next call to Peek() or Accept() -// will start from the same cursor position. -// -// After calling this method, you can retrieve the produced TokenHandlerResult -// using the ParseAPI.Result() method. -func (p *ParseAPI) Peek(tokenHandler tokenize.TokenHandler) bool { - p.result = nil - forkedTokenAPI, ok := p.invokeTokenHandler("Peek", tokenHandler) - if ok { - p.result = forkedTokenAPI.Result() - p.tokenAPI.ClearResults() - p.tokenAPI.DetachChilds() - } - return ok -} - -// Accept checks if the upcoming input data matches the provided TokenHandler. -// If it does, then true will be returned, false otherwise. The read cursor -// will be moved forward to beyond the match that was found. -// -// After calling this method, you can retrieve the produced TokenHandlerResult -// using the ParseAPI.Result() method. -func (p *ParseAPI) Accept(tokenHandler tokenize.TokenHandler) bool { - p.result = nil - forkedTokenAPI, ok := p.invokeTokenHandler("Accept", tokenHandler) - if ok { - forkedTokenAPI.Merge() - p.result = p.tokenAPI.Result() - p.tokenAPI.DetachChilds() - if p.tokenAPI.FlushReader() { - p.initLoopCheck() - } - } - return ok -} - -func (p *ParseAPI) invokeTokenHandler(name string, tokenHandler tokenize.TokenHandler) (*tokenize.TokenAPI, bool) { - p.panicWhenStoppedOrInError() - p.checkForLoops() - if tokenHandler == nil { - common.CallerPanic(2, "parsekit.ParseAPI.%s(): %s() called with nil tokenHandler argument at {caller}", name, name) - } - - p.result = nil - p.tokenAPI.ClearResults() - child := p.tokenAPI.Fork() - ok := tokenHandler(child) - - return child, ok -} - -// panicWhenStoppedOrInError will panic when the parser has produced an error -// or when it has been stopped. It is used from the ParseAPI methods, to -// prevent further calls to the ParseAPI on these occasions. -// -// Basically, this guard ensures proper coding of parsers, making sure -// that clean routes are followed. You can consider this check a runtime -// unit test. -func (p *ParseAPI) panicWhenStoppedOrInError() { - if !p.isStoppedOrInError() { - return - } - - called := common.CallerFunc(1) - - after := "Error()" - if p.stopped { - after = "Stop()" - } - - common.CallerPanic(2, "parsekit.ParseAPI.%s(): Illegal call to %s() at {caller}: "+ - "no calls allowed after ParseAPI.%s", called, called, after) -} - -func (p *ParseAPI) isStoppedOrInError() bool { - return p.stopped || p.err != nil -} - -func (p *ParseAPI) initLoopCheck() { - p.loopCheck = map[string]bool{} -} - -func (p *ParseAPI) checkForLoops() { - filepos := common.CallerFilePos(3) - if _, ok := p.loopCheck[filepos]; ok { - common.CallerPanic(3, "parsekit.ParseAPI: Loop detected in parser at {caller}") - } - p.loopCheck[filepos] = true -} - -// Result returns a TokenHandlerResult struct, containing results as produced by the -// last Peek() or Accept() call. -// -// When Result() is called without first doing a Peek() or Accept(), then no -// result will be available and the method will panic. -func (p *ParseAPI) Result() *tokenize.TokenHandlerResult { - result := p.result - if p.result == nil { - common.CallerPanic(1, "parsekit.ParseAPI.TokenHandlerResult(): TokenHandlerResult() called "+ - "at {caller} without calling ParseAPI.Peek() or ParseAPI.Accept() on beforehand") - } - return result -} - -// Handle is used to execute other ParseHandler functions from within your -// ParseHandler function. -// -// The boolean return value is true when the parser can still continue. -// It will be false when either an error was set (using ParseAPI.Error()), -// or the parser was stopped (using ParseAPI.Stop()). -func (p *ParseAPI) Handle(parseHandler ParseHandler) bool { - p.panicWhenStoppedOrInError() - p.panicWhenParseHandlerNil(parseHandler) - parseHandler(p) - return !p.isStoppedOrInError() -} - -func (p *ParseAPI) panicWhenParseHandlerNil(parseHandler ParseHandler) { - if parseHandler == nil { - common.CallerPanic(2, "parsekit.ParseAPI.Handle(): Handle() called with nil input at {caller}") - } -} - -// Stop is used by the parser impementation to tell the ParseAPI that it has -// completed the parsing process successfully. -// -// When the parser implementation returns without stopping first (and -// without running into an error), the Parser.Execute() will call -// ParserAPI.ExpectEndOfFile() to check if the end of the file was reached. -// If not, then things will end in an unexpected input error. -// Even though this fallback mechanism will work in a lot of cases, try to make -// your parser explicit about things and call Stop() actively yourself. -// -// After stopping, no more calls to ParseAPI methods are allowed. -// Calling a method in this state will result in a panic. -func (p *ParseAPI) Stop() { - p.stopped = true -} - -// Error sets the error message in the ParseAPI. -// -// After setting an error, no more calls to ParseAPI methods are allowed. -// Calling a method in this state will result in a panic. -func (p *ParseAPI) Error(format string, args ...interface{}) { - // No call to p.panicWhenStoppedOrInError(), to allow a parser to - // set a different error message when needed. - message := fmt.Sprintf(format, args...) - p.err = &common.Error{message, *p.tokenAPI.Result().Cursor()} -} - -// ExpectEndOfFile can be used to check if the input is at end of file. -// -// When it finds that the end of the file was indeed reached, then the -// parser will be stopped through ParseAPI.Stop(). Otherwise unexpected -// input is reported through ParseAPI.Expected() with "end of file" -// as the expectation. -func (p *ParseAPI) ExpectEndOfFile() { - p.panicWhenStoppedOrInError() - if p.Peek(tokenize.A.EndOfFile) { - p.Stop() - } else { - p.Expected("end of file") - } -} - -// Expected is used to set an error that tells the user that some -// unexpected input was encountered, and what input was expected. -// -// The 'expected' argument can be an empty string. In that case the error -// message will not contain a description of the expected input. -// -// It automatically produces an error message for a couple of situations: -// 1) the input simply didn't match the expectation -// 2) the end of the input was reached -// 3) there was an error while reading the input. -func (p *ParseAPI) Expected(expected string) { - p.panicWhenStoppedOrInError() - _, err := p.tokenAPI.NextRune() - switch { - case err == nil: - p.Error("unexpected input%s", fmtExpects(expected)) - case err == io.EOF: - p.Error("unexpected end of file%s", fmtExpects(expected)) - default: - p.Error("unexpected error '%s'%s", err, fmtExpects(expected)) - } -} - -func fmtExpects(expected string) string { - if expected == "" { - return "" - } - return fmt.Sprintf(" (expected %s)", expected) -} diff --git a/parser.go b/parser.go deleted file mode 100644 index 877b6fa..0000000 --- a/parser.go +++ /dev/null @@ -1,57 +0,0 @@ -package parsekit - -import ( - "git.makaay.nl/mauricem/go-parsekit/common" - "git.makaay.nl/mauricem/go-parsekit/tokenize" -) - -// Parser is the top-level struct that holds the configuration for a parser. -// The Parser can be instantiated using the parsekit.NewParser() method. -type Parser struct { - startHandler ParseHandler // the function that handles the very first state -} - -// ParseHandler defines the type of function that must be implemented to handle -// a parsing state in a Parser state machine. -// -// A ParseHandler function gets a ParseAPI struct as its input. This struct holds -// all the internal state for the parsing state machine and provides the -// interface that the ParseHandler uses to interact with the parser. -type ParseHandler func(*ParseAPI) - -// NewParser instantiates a new Parser. -// -// The Parser is a state machine-style recursive descent parser, in which -// ParseHandler functions are used to move the state machine forward during -// parsing. This style of parser is typically used for parsing programming -// languages and structured data formats (like json, xml, toml, etc.) -// -// The startHandler argument points the Parser to the ParseHandler function -// that must be executed at the start of the parsing process. -// -// To parse input data, use the method Parser.Execute(). -func NewParser(startHandler ParseHandler) *Parser { - if startHandler == nil { - common.CallerPanic(1, "parsekit.NewParser(): NewParser() called with nil input at {caller}") - } - return &Parser{startHandler: startHandler} -} - -// Execute starts the parser for the provided input. -// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). -// -// When an error occurs during parsing, then this error is returned, nil otherwise. -func (p *Parser) Execute(input interface{}) *common.Error { - api := &ParseAPI{ - tokenAPI: tokenize.NewTokenAPI(input), - loopCheck: map[string]bool{}, - } - if api.Handle(p.startHandler) { - // Handle returned true, indicating that parsing could still continue. - // There was no error and that the parsing has not actively been Stop()-ed. - // Let's assume that we actually reached the end of the parsing successfully - // and try to make the best of it. - api.ExpectEndOfFile() - } - return api.err -} diff --git a/parser_test.go b/parser_test.go deleted file mode 100644 index cb32552..0000000 --- a/parser_test.go +++ /dev/null @@ -1,330 +0,0 @@ -package parsekit_test - -import ( - "fmt" - "testing" - - "git.makaay.nl/mauricem/go-parsekit" - "git.makaay.nl/mauricem/go-parsekit/tokenize" -) - -func ExampleParser_usingAcceptedRunes() { - // Easy access to the parsekit definitions. - a := tokenize.A - - matches := []string{} - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - for p.Accept(a.AnyRune) { - matches = append(matches, p.Result().String()) - } - p.ExpectEndOfFile() - }) - err := parser.Execute("¡Any will dö!") - - fmt.Printf("Matches = %q, Error = %s\n", matches, err) - // Output: - // Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = -} - -func ExampleParser_usingTokens() { - // Easy access to the parsekit definitions. - c, a, tok := tokenize.C, tokenize.A, tokenize.T - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) { - fmt.Printf("Runes accepted: %q\n", p.Result().String()) - fmt.Printf("Token values: %s\n", p.Result().Tokens()) - } - p.ExpectEndOfFile() - }) - parser.Execute("¡ök!") - - // Output: - // Runes accepted: "¡ök!" - // Token values: RUNE("¡", value = (int32)161) RUNE("ö", value = (int32)246) RUNE("k", value = (int32)107) RUNE("!", value = (int32)33) -} - -func ExampleParseAPI_Expected() { - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Expected("a thing") - }) - err := parser.Execute("Whatever, this parser will never be happy...") - fmt.Printf("Error: %s\n", err) - - // Output: - // Error: unexpected input (expected a thing) at start of file -} - -func ExampleParseAPI_Accept_inIfStatement() { - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - // When a case-insensitive match on "Yowza!" is found by the - // tokenizer, then Accept() will make the result available - // through ParseAPI.Result() - if p.Accept(tokenize.A.StrNoCase("Yowza!")) { - // Result.String() returns a string containing all - // accepted runes that were matched against. - fmt.Println(p.Result().String()) - } - }) - parser.Execute("YOWZA!") - - // Output: - // YOWZA! -} - -func ExampleParseAPI_Accept_inSwitchStatement() { - var result string - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - for loop := true; loop; { - switch { - case p.Accept(tokenize.A.Rune('X')): - // NOOP, skip this rune - case p.Accept(tokenize.A.AnyRune): - result += p.Result().String() - default: - loop = false - } - } - }) - parser.Execute("HXeXllXoXX, XXwoXrlXXXd!") - fmt.Println(result) - - // Output: - // Hello, world! -} - -func ExampleParseAPI_Stop() { - C, A := tokenize.C, tokenize.A - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - fmt.Printf("First word: ") - for p.Accept(C.Not(A.Space)) { - fmt.Printf("%s", p.Result()) - } - p.Stop() - }) - parser.Execute("Input with spaces") - - // Output: - // First word: Input -} - -func ExampleParseAPI_Stop_notCalledAndNoInputPending() { - C, A := tokenize.C, tokenize.A - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - fmt.Printf("Word: ") - for p.Accept(C.Not(A.Space)) { - fmt.Printf("%s", p.Result()) - } - fmt.Printf("\n") - }) - err := parser.Execute("Troglodyte") - fmt.Printf("Error is nil: %t\n", err == nil) - - // Output: - // Word: Troglodyte - // Error is nil: true -} - -func ExampleParseAPI_Stop_notCalledButInputPending() { - C, A := tokenize.C, tokenize.A - - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - fmt.Printf("First word: ") - for p.Accept(C.Not(A.Space)) { - fmt.Printf("%s", p.Result()) - } - fmt.Printf("\n") - }) - err := parser.Execute("Input with spaces") - fmt.Printf("Error: %s\n", err) - - // Output: - // First word: Input - // Error: unexpected input (expected end of file) at line 1, column 6 -} - -func ExampleParseAPI_Peek() { - // Definition of a fantasy serial number format. - C, A := tokenize.C, tokenize.A - serialnr := C.Seq(A.Asterisk, A.ASCIIUpper, A.ASCIIUpper, A.Digits) - - // This handler is able to handle serial numbers. - serialnrHandler := func(p *parsekit.ParseAPI) { - if p.Accept(serialnr) { - fmt.Println(p.Result().String()) - } - } - - // Start could function as a sort of dispatcher, handing over - // control to the correct ParseHandler function, based on the input. - start := func(p *parsekit.ParseAPI) { - if p.Peek(tokenize.A.Asterisk) { - p.Handle(serialnrHandler) - return - } - // ... other cases could go here ... - } - - parser := parsekit.NewParser(start) - parser.Execute("#XX1234") - parser.Execute("*ay432566") - parser.Execute("*ZD987112") - - // Output: - // *ZD987112 -} - -func TestGivenNullHandler_NewParserPanics(t *testing.T) { - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parsekit.NewParser(nil) }, - Regexp: true, - Expect: `parsekit\.NewParser\(\): NewParser\(\) called ` + - `with nil input at /.*/parser_test\.go:\d+`}) -} - -func TestGivenNullHandler_HandlePanics(t *testing.T) { - brokenParseHandler := func(p *parsekit.ParseAPI) { - p.Handle(nil) - } - parser := parsekit.NewParser(brokenParseHandler) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parser.Execute("") }, - Regexp: true, - Expect: `parsekit\.ParseAPI\.Handle\(\): Handle\(\) called with nil input ` + - `at /.*/parser_test\.go:\d+`}) -} -func TestGivenNilTokenHandler_AcceptPanics(t *testing.T) { - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Accept(nil) - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { p.Execute("") }, - Regexp: true, - Expect: `parsekit\.ParseAPI\.Accept\(\): Accept\(\) called with nil ` + - `tokenHandler argument at /.*/parser_test\.go:\d+`}) -} - -func TestGivenNilTokenHandler_PeekPanics(t *testing.T) { - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Peek(nil) - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { p.Execute("") }, - Regexp: true, - Expect: `parsekit\.ParseAPI\.Peek\(\): Peek\(\) called with nil ` + - `tokenHandler argument at /.*/parser_test\.go:\d+`}) -} - -func TestGivenStoppedParser_HandlePanics(t *testing.T) { - otherHandler := func(p *parsekit.ParseAPI) { - panic("This is not the handler you're looking for") - } - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Stop() - p.Handle(otherHandler) - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { p.Execute("") }, - Regexp: true, - Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` + - `at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Stop\(\)`}) -} - -func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) { - otherHandler := func(p *parsekit.ParseAPI) { - panic("This is not the handler you're looking for") - } - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Error("It ends here") - p.Handle(otherHandler) - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { p.Execute("") }, - Regexp: true, - Expect: `parsekit\.ParseAPI\.Handle\(\): Illegal call to Handle\(\) ` + - `at /.*/parser_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`}) -} - -func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) { - p := parsekit.NewParser(func(p *parsekit.ParseAPI) { - p.Result() - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { p.Execute("") }, - Regexp: true, - Expect: `parsekit\.ParseAPI\.TokenHandlerResult\(\): TokenHandlerResult\(\) called at ` + - `/.*/parser_test.go:\d+ without calling ParseAPI.Peek\(\) or ParseAPI.Accept\(\) on beforehand`}) -} - -func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) { - p := parsekit.NewParser(func(p *parsekit.ParseAPI) {}) - err := p.Execute("") - parsekit.AssertTrue(t, err == nil, "err") -} - -func TestGivenParserWhichIsNotStopped_WithMoreInput_ProducesError(t *testing.T) { - p := parsekit.NewParser(func(p *parsekit.ParseAPI) {}) - err := p.Execute("x") - parsekit.AssertEqual(t, "unexpected input (expected end of file) at start of file", err.Error(), "err") -} - -type parserWithLoop struct { - loopCounter int -} - -func (l *parserWithLoop) first(p *parsekit.ParseAPI) { - p.Accept(tokenize.A.ASCII) - p.Handle(l.second) -} - -func (l *parserWithLoop) second(p *parsekit.ParseAPI) { - p.Accept(tokenize.A.ASCII) - p.Handle(l.third) -} - -func (l *parserWithLoop) third(p *parsekit.ParseAPI) { - if l.loopCounter++; l.loopCounter > 100 { - p.Error("Loop not detected by parsekit") - return - } - p.Accept(tokenize.A.ASCII) - p.Handle(l.first) -} - -func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) { - looper := &parserWithLoop{} - parser := parsekit.NewParser(looper.first) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parser.Execute("Het houdt niet op, niet vanzelf") }, - Regexp: true, - Expect: `parsekit\.ParseAPI: Loop detected in parser at /.*/parser_test.go:\d+`}) -} - -// This test incorporates an actual loop bug that I dropped on myself and -// that I could not easily spot in my code. It sounded so logical: -// I want to get chunks of 5 chars from the input, so I simply loop on: -// -// p.On(c.Max(5, a.AnyRune)) -// -// The problem here is that Max(5, ...) will also match when there is -// no more input, since Max(5, ...) is actually MinMax(0, 5, ...). -// Therefore the loop will never stop. Solving the loop was simple: -// -// p.On(c.MinMax(1, 5, a.AnyRune)) -// -// Now the loop stops when the parser finds no more matching input data. -func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) { - var c, a = tokenize.C, tokenize.A - parser := parsekit.NewParser(func(p *parsekit.ParseAPI) { - for p.Accept(c.Max(5, a.AnyRune)) { - } - p.Stop() - }) - parsekit.AssertPanic(t, parsekit.PanicT{ - Function: func() { parser.Execute("This will end soon") }, - Regexp: true, - Expect: `parsekit\.ParseAPI: Loop detected in parser at .*/parser_test.go:\d+`}) -} diff --git a/read/assertions_test.go b/read/assertions_test.go new file mode 100644 index 0000000..cf39026 --- /dev/null +++ b/read/assertions_test.go @@ -0,0 +1,38 @@ +package read_test + +// This file contains some tools that are used for writing tests. + +import ( + "fmt" + "runtime" + "testing" +) + +func AssertEqual(t *testing.T, expected interface{}, actual interface{}) { + if expected != actual { + t.Errorf( + "Unexpected value at %s:\nexpected: %q\nactual: %q", + callerFilepos(1), expected, actual) + } +} + +func callerFilepos(depth int) string { + // No error handling, because we call this method ourselves with safe depth values. + _, file, line, _ := runtime.Caller(depth + 1) + return fmt.Sprintf("%s:%d", file, line) +} + +func AssertPanic(t *testing.T, code func(), expected string) { + defer func() { + if r := recover(); r != nil { + if expected != r.(string) { + t.Errorf( + "Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q", + expected, r) + } + } else { + t.Errorf("Function did not panic (expected panic message: %s)", expected) + } + }() + code() +} diff --git a/read/reader.go b/read/read.go similarity index 81% rename from read/reader.go rename to read/read.go index 6d9afd8..544f437 100644 --- a/read/reader.go +++ b/read/read.go @@ -48,32 +48,17 @@ import ( "unicode/utf8" ) -// Reader wraps around a bufio.Reader and provides an additional layer of -// buffering that allows us to read the same runes over and over again. -// This is useful for implementing a parser that must be able to do lookahead -// on the input, returning to the original input position after finishing -// that lookahead). -// -// To minimze memory use, it is also possible to flush the read buffer when there is -// no more need to go back to previously read runes. -// -// The parserkit.reader.Reader is used internally by tokenize.TokenAPI. -type Reader struct { - bufio *bufio.Reader // Used for ReadRune() - buffer []rune // Input buffer, holding runes that were read from input - err error // A read error, if one occurred - errOffset int // The offset in the buffer at which the read error was encountered - firstReadDone bool // Whether or not the first read was done -} - -// New initializes a new reader struct, wrapped around the provided input. +// New initializes a new Buffer struct, wrapped around the provided input. // // The input can be any one of the following types: -// - string -// - type implementing io.Reader -// - bufio.Reader -func New(input interface{}) *Reader { - return &Reader{ +// +// • string +// +// • a type implementing io.Reader +// +// • bufio.Reader +func New(input interface{}) *Buffer { + return &Buffer{ bufio: makeBufioReader(input), } } @@ -93,12 +78,29 @@ func makeBufioReader(input interface{}) *bufio.Reader { } } +// Buffer wraps around a bufio.Reader and provides an additional layer of +// buffering that allows us to read the same runes over and over again. +// This is useful for implementing a parser that must be able to do lookahead +// on the input, returning to the original input position after finishing +// that lookahead). +// +// To minimze memory use, it is also possible to flush the read buffer when there is +// no more need to go back to previously read runes. +// +// The parserkit.reader.Reader is used internally by tokenize.API. +type Buffer struct { + bufio *bufio.Reader // used for ReadRune() + buffer []rune // input buffer, holding runes that were read from input + err error // a read error, if one occurred + errOffset int // the offset in the buffer at which the read error was encountered + firstReadDone bool // whether or not the first read was done +} + // RuneAt reads the rune at the provided rune offset. // -// This offset is relative to the current starting position of the buffer in -// the reader. When starting reading, offset 0 will point at the start of the -// input. After flushing, offset 0 will point at the input up to where -// the flush was done. +// This offset is relative to the current starting position of the Buffer. +// When starting reading, offset 0 will point at the start of the input. +// After flushing, offset 0 will point at the input up to where the flush was done. // // The error return value will be nil when reading was successful. // When an invalid rune is encountered on the input, the error will be nil, @@ -107,9 +109,10 @@ func makeBufioReader(input interface{}) *bufio.Reader { // When reading failed, the rune will be utf8.RuneError and the error will // be not nil. One special read fail is actually a normal situation: end // of file reached. In that case, the returned error wille be io.EOF. +// // Once a read error is encountered, that same read error will guaranteed // be return on every subsequent read at or beyond the provided offset. -func (r *Reader) RuneAt(offset int) (rune, error) { +func (r *Buffer) RuneAt(offset int) (rune, error) { // Re-issue a previously seen read error. if r.err != nil && offset >= r.errOffset { return utf8.RuneError, r.err @@ -153,12 +156,12 @@ func (r *Reader) RuneAt(offset int) (rune, error) { const smallBufferSize = 64 // ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer. -var ErrTooLarge = errors.New("parsekit.read: too large") +var ErrTooLarge = errors.New("parsekit.read.Buffer: too large") // grow grows the buffer to guarantee space for n more bytes. // It returns the index where bytes should be written. // If the buffer can't grow it will panic with ErrTooLarge. -func (r *Reader) grow(n int) { +func (r *Buffer) grow(n int) { // Instantiate new buffer. if r.buffer == nil { b := smallBufferSize @@ -193,14 +196,14 @@ func makeSlice(n int) []rune { return make([]rune, n) } -// Flush deletes the provided number of runes from the start of the -// reader buffer. After flushing the buffer, offset 0 as used by RuneAt() -// will point to the rune that comes after the flushed runes. -// So what this basically does is turn the Reader into a sliding window. -func (r *Reader) Flush(numberOfRunes int) { +// Flush deletes the provided number of runes from the start of the Buffer. +// After flushing the Buffer, offset 0 as used by RuneAt() will point to +// the rune that comes after the runes that were flushed. +// So what this basically does, is turn the Buffer into a sliding window. +func (r *Buffer) Flush(numberOfRunes int) { if numberOfRunes > len(r.buffer) { panic(fmt.Sprintf( - "parsekit.read.Reader.Flush(): number of runes to flush (%d) "+ + "parsekit.read.Buffer.Flush(): number of runes to flush (%d) "+ "exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer))) } r.buffer = r.buffer[numberOfRunes:] diff --git a/read/reader_test.go b/read/read_test.go similarity index 77% rename from read/reader_test.go rename to read/read_test.go index 28f9c45..06468ca 100644 --- a/read/reader_test.go +++ b/read/read_test.go @@ -9,7 +9,6 @@ import ( "unicode/utf8" "git.makaay.nl/mauricem/go-parsekit/read" - "github.com/stretchr/testify/assert" ) func ExampleNew() { @@ -46,41 +45,41 @@ func TestNew_VariousInputTypesCanBeUsed(t *testing.T) { } func TestNew_UnhandledInputType_Panics(t *testing.T) { - assert.PanicsWithValue(t, - "parsekit.read.New(): no support for input of type int", - func() { read.New(12345) }) + AssertPanic(t, + func() { read.New(12345) }, + "parsekit.read.New(): no support for input of type int") } -func TestReader_RuneAt(t *testing.T) { +func TestBuffer_RuneAt(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // It is possible to go back and forth while reading the input. result := fmt.Sprintf("%c%c%c%c", at(0), at(12), at(7), at(0)) - assert.Equal(t, "H!wH", result) + AssertEqual(t, "H!wH", result) } -func TestReader_RuneAt_endOfFile(t *testing.T) { +func TestBuffer_RuneAt_endOfFile(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) rn, err := r.RuneAt(13) result := fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) - assert.Equal(t, "'�' EOF true", result) + AssertEqual(t, "'�' EOF true", result) rn, err = r.RuneAt(20) result = fmt.Sprintf("%q %s %t", rn, err, err == io.EOF) - assert.Equal(t, "'�' EOF true", result) + AssertEqual(t, "'�' EOF true", result) } -func TestReader_RuneAt_invalidRune(t *testing.T) { +func TestBuffer_RuneAt_invalidRune(t *testing.T) { r := read.New(strings.NewReader("Hello, \xcdworld!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } result := fmt.Sprintf("%c%c%c%c", at(6), at(7), at(8), at(9)) - assert.Equal(t, " �wo", result, "result") + AssertEqual(t, " �wo", result) } -func ExampleReader_RuneAt() { +func ExampleBuffer_RuneAt() { reader := read.New(strings.NewReader("Hello, world!")) fmt.Printf("Runes: ") @@ -104,16 +103,16 @@ func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) { o, _ := r.RuneAt(1) m, _ := r.RuneAt(2) bom := fmt.Sprintf("%c%c%c", b, o, m) - assert.Equal(t, "Bom", bom, "first three runes") + AssertEqual(t, "Bom", bom) } -func TestReader_Flush(t *testing.T) { +func TestBuffer_Flush(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) at := func(i int) rune { r, _ := r.RuneAt(i); return r } // Fills the buffer with the first 8 runes on the input: "Hello, w" result := fmt.Sprintf("%c", at(7)) - assert.Equal(t, "w", result, "first read") + AssertEqual(t, "w", result) // Now flush the first 4 runes from the buffer (dropping "Hell" from it) r.Flush(4) @@ -121,10 +120,10 @@ func TestReader_Flush(t *testing.T) { // Rune 0 is now pointing at what originally was rune offset 4. // We can continue reading from there. result = fmt.Sprintf("%c%c%c%c%c%c", at(0), at(1), at(2), at(3), at(4), at(5)) - assert.Equal(t, "o, wor", result) + AssertEqual(t, "o, wor", result) } -func ExampleReader_Flush() { +func ExampleBuffer_Flush() { r := read.New(strings.NewReader("dog eat dog!")) at := func(offset int) rune { c, _ := r.RuneAt(offset); return c } @@ -154,33 +153,33 @@ func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) { r.RuneAt(10) // However, we flush 12 runes, which exceeds the buffer size. - assert.PanicsWithValue(t, - "parsekit.read.Reader.Flush(): number of runes to flush "+ - "(12) exceeds size of the buffer (11)", - func() { r.Flush(12) }) + AssertPanic(t, + func() { r.Flush(12) }, + "parsekit.read.Buffer.Flush(): number of runes to flush "+ + "(12) exceeds size of the buffer (11)") } func TestGivenEOFFollowedByFlush_EOFCanStillBeRead(t *testing.T) { r := read.New(strings.NewReader("Hello, world!")) _, err := r.RuneAt(13) - assert.Equal(t, err.Error(), "EOF") + AssertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(13) - assert.Equal(t, err.Error(), "EOF") + AssertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(14) - assert.Equal(t, err.Error(), "EOF") + AssertEqual(t, err.Error(), "EOF") r.Flush(13) _, err = r.RuneAt(0) - assert.Equal(t, err.Error(), "EOF") + AssertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(1) - assert.Equal(t, err.Error(), "EOF") + AssertEqual(t, err.Error(), "EOF") _, err = r.RuneAt(2) - assert.Equal(t, err.Error(), "EOF") + AssertEqual(t, err.Error(), "EOF") } -// In this test, I want to make sure that once a Reader returns an error, +// In this test, I want to make sure that once a Buffer returns an error, // that error is cached and will be returned when data for the offset where // the error occurred is read at a later time. -func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) { +func TestGivenErrorFromBuffer_ErrorIsCached(t *testing.T) { input := &StubReader{ bytes: []byte{'a', 'b', 'c', 'd'}, errors: []error{ @@ -192,43 +191,43 @@ func TestGivenErrorFromReader_ErrorIsCached(t *testing.T) { // Read the last availble rune. readRune, _ := r.RuneAt(3) - assert.Equal(t, 'd', readRune) + AssertEqual(t, 'd', readRune) // Reading the next offset must result in the io.EOF error from the stub. readRune, err := r.RuneAt(4) - assert.Equal(t, utf8.RuneError, readRune) - assert.Equal(t, io.EOF, err) + AssertEqual(t, utf8.RuneError, readRune) + AssertEqual(t, io.EOF, err) // Reading even further should yield the same io.EOF error. readRune, err = r.RuneAt(5) - assert.Equal(t, utf8.RuneError, readRune) - assert.Equal(t, io.EOF, err) + AssertEqual(t, utf8.RuneError, readRune) + AssertEqual(t, io.EOF, err) // After an error, we must still be able to read the last rune. readRune, _ = r.RuneAt(3) - assert.Equal(t, 'd', readRune) + AssertEqual(t, 'd', readRune) // Flushing updates the error index too. r.Flush(3) // The last rune is now at offset 0. readRune, _ = r.RuneAt(0) - assert.Equal(t, 'd', readRune) + AssertEqual(t, 'd', readRune) // The io.EOF is now at offset 1. _, err = r.RuneAt(1) - assert.Equal(t, io.EOF, err) + AssertEqual(t, io.EOF, err) // Let's flush that last rune too. r.Flush(1) // The io.EOF is now at offset 0. _, err = r.RuneAt(0) - assert.Equal(t, io.EOF, err) + AssertEqual(t, io.EOF, err) // And reading beyond that offset also yields io.EOF. _, err = r.RuneAt(1) - assert.Equal(t, io.EOF, err) + AssertEqual(t, io.EOF, err) } func TestInputLargerThanDefaultBufSize64(t *testing.T) { @@ -236,13 +235,13 @@ func TestInputLargerThanDefaultBufSize64(t *testing.T) { r := read.New(input) readRune, err := r.RuneAt(0) - assert.Equal(t, 'X', readRune) + AssertEqual(t, 'X', readRune) readRune, err = r.RuneAt(size - 1) - assert.Equal(t, 'Y', readRune) + AssertEqual(t, 'Y', readRune) readRune, err = r.RuneAt(size) - assert.Equal(t, io.EOF, err) + AssertEqual(t, io.EOF, err) readRune, err = r.RuneAt(10) - assert.Equal(t, 'X', readRune) + AssertEqual(t, 'X', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *testing.T) { @@ -250,9 +249,9 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadLargerThanBufSize64(t *tes r := read.New(input) readRune, _ := r.RuneAt(size - 200) - assert.Equal(t, 'X', readRune) + AssertEqual(t, 'X', readRune) readRune, _ = r.RuneAt(size - 1) - assert.Equal(t, 'Y', readRune) + AssertEqual(t, 'Y', readRune) } func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { @@ -260,7 +259,7 @@ func TestInputLargerThanDefaultBufSize64_WithFirstReadToLastByte(t *testing.T) { r := read.New(input) readRune, _ := r.RuneAt(size - 1) - assert.Equal(t, 'Y', readRune) + AssertEqual(t, 'Y', readRune) } func makeLargeStubReader() (*StubReader, int) { diff --git a/tokenize/api.go b/tokenize/api.go new file mode 100644 index 0000000..8b6ce9c --- /dev/null +++ b/tokenize/api.go @@ -0,0 +1,245 @@ +package tokenize + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit/read" +) + +// API holds the internal state of a tokenizer run and provides an API that +// tokenize.Handler functions can use to: +// +// • read and accept runes from the input (NextRune, Accept) +// +// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose) +// +// • flush already read input data when not needed anymore (FlushInput) +// +// • retrieve the tokenizer Result struct (Result) to read or modify the results +// +// BASIC OPERATION: +// +// To retrieve the next rune from the API, call the NextRune() method. +// +// When the rune is to be accepted as input, call the method Accept(). The rune +// is then added to the result runes of the API and the read cursor is moved +// forward. +// +// By invoking NextRune() + Accept() multiple times, the result can be extended +// with as many runes as needed. Runes collected this way can later on be +// retrieved using the method Result().Runes(). +// +// It is mandatory to call Accept() after retrieving a rune, before calling +// NextRune() again. Failing to do so will result in a panic. +// +// Next to adding runes to the result, it is also possible to modify the +// stored runes or to add lexical Tokens to the result. For all things +// concerning results, take a look at the Result struct, which +// can be accessed though the method Result(). +// +// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT: +// +// Sometimes, we must be able to perform a lookahead, which might either +// succeed or fail. In case of a failing lookahead, the state of the +// API must be brought back to the original state, so we can try +// a different route. +// +// The way in which this is supported, is by forking an API struct by +// calling method Fork(). This will return a forked child API, with +// empty result data, but using the same read cursor position as the +// forked parent. +// +// After forking, the same interface as described for BASIC OPERATION can be +// used to fill the results. When the lookahead was successful, then +// Merge() can be called on the forked child to append the child's results +// to the parent's results, and to move the read cursor position to that +// of the child. +// +// When the lookahead was unsuccessful, then the forked child API can +// disposed by calling Dispose() on the forked child. This is not mandatory. +// Garbage collection will take care of this automatically. +// The parent API was never modified, so it can safely be used after disposal +// as if the lookahead never happened. +// +// Opinionized note: +// Many tokenizers/parsers take a different approach on lookaheads by using +// peeks and by moving the read cursor position back and forth, or by putting +// read input back on the input stream. That often leads to code that is +// efficient, however, in my opinion, not very intuitive to read. It can also +// be tedious to get the cursor position back at the correct position, which +// can lead to hard to track bugs. I much prefer this forking method, since +// no bookkeeping has to be implemented when implementing a parser. +type API struct { + reader *read.Buffer + parent *API // parent API in case this API is a forked child + child *API // child API in case this API has a forked child + result *Result // results as produced by a Handler (runes, Tokens, cursor position) +} + +// NewAPI initializes a new API struct, wrapped around the provided input. +// For an overview of allowed inputs, take a look at the documentation +// for parsekit.read.New(). +func NewAPI(input interface{}) *API { + return &API{ + reader: read.New(input), + result: newResult(), + } +} + +// NextRune returns the rune at the current read offset. +// +// When an invalid UTF8 rune is encountered on the input, it is replaced with +// the utf.RuneError rune. It's up to the caller to handle this as an error +// when needed. +// +// After reading a rune it must be Accept()-ed to move the read cursor forward +// to the next rune. Doing so is mandatory. When doing a second call to NextRune() +// without explicitly accepting, this method will panic. You can see this as a +// built-in unit test, enforcing correct serialization of API method calls. +func (i *API) NextRune() (rune, error) { + if i.result.lastRune != nil { + callerPanic(1, "tokenize.API.NextRune(): NextRune() called at {caller} "+ + "without a prior call to Accept()") + } + i.detachChild() + + readRune, err := i.reader.RuneAt(i.result.offset) + i.result.lastRune = &runeInfo{r: readRune, err: err} + return readRune, err +} + +// Accept the last rune as read by NextRune() into the Result runes and move +// the cursor forward. +// +// It is not allowed to call Accept() when the previous call to NextRune() +// returned an error. Calling Accept() in such case will result in a panic. +func (i *API) Accept() { + if i.result.lastRune == nil { + callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller} without first calling NextRune()") + } else if i.result.lastRune.err != nil { + callerPanic(1, "tokenize.API.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") + } + i.result.runes = append(i.result.runes, i.result.lastRune.r) + i.result.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r)) + i.result.offset++ + i.result.lastRune = nil +} + +// Fork forks off a child of the API struct. It will reuse the same +// read buffer and cursor position, but for the rest this is a fresh API. +// +// By forking an API, you can freely work with the forked child, without +// affecting the parent API. This is for example useful when you must perform +// some form of lookahead. +// +// When processing of the Handler was successful and you want to add the results +// to the parent API, you can call Merge() on the forked child. +// This will add the results to the results of the parent (runes, tokens). +// It also updates the read cursor position of the parent to that of the child. +// +// When the lookahead was unsuccessful, then the forked child API can +// disposed by calling Dispose() on the forked child. This is not mandatory. +// Garbage collection will take care of this automatically. +// The parent API was never modified, so it can safely be used after disposal +// as if the lookahead never happened. +func (i *API) Fork() *API { + // Cleanup current forking / reading state. + i.detachChild() + i.result.lastRune = nil + + // Create the new fork. + child := &API{ + reader: i.reader, + parent: i, + } + child.result = newResult() + i.syncCursorTo(child) + i.child = child + return child +} + +// Merge appends the results of a forked child API (runes, tokens) to the +// results of its parent. The read cursor of the parent is also updated +// to that of the forked child. +// +// After the merge operation, the child results are reset so it can immediately +// be reused for performing another match. This means that all Result data are +// cleared, but the read cursor position is kept at its current position. +// This allows a child to feed results in chunks to its parent. +func (i *API) Merge() { + if i.parent == nil { + callerPanic(1, "tokenize.API.Merge(): Merge() called at {caller} on a non-forked API") + } + i.addResultsToParent() + i.syncCursorTo(i.parent) + i.clearResults() + i.detachChild() +} + +func (i *API) addResultsToParent() { + i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) + i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...) +} + +func (i *API) syncCursorTo(to *API) { + to.result.offset = i.result.offset + *to.result.cursor = *i.result.cursor +} + +// Reset clears the API results and - when forked - detaches the forked child. +func (i *API) Reset() { + i.clearResults() + i.detachChild() +} + +// Dispose resets the API and - when it is a fork - detaches itself from its parent. +func (i *API) Dispose() { + i.Reset() + if i.parent != nil { + i.parent.detachChild() + } +} + +func (i *API) clearResults() { + i.result.lastRune = nil + i.result.runes = []rune{} + i.result.tokens = []*Token{} + i.result.err = nil +} + +func (i *API) detachChild() { + if i.child != nil { + i.child.detachChildsRecurse() + i.child = nil + } +} + +func (i *API) detachChildsRecurse() { + if i.child != nil { + i.child.detachChildsRecurse() + } + i.child = nil + i.parent = nil +} + +// FlushInput flushes processed input data from the read.Buffer. +// In this context 'processed' means all runes that were read using NextRune() +// and that were added to the results using Accept(). +// +// Note: +// When writing your own TokenHandler, you normally won't have to call this +// method yourself. It is automatically called by parsekit when needed. +func (i *API) FlushInput() bool { + if i.result.offset > 0 { + i.reader.Flush(i.result.offset) + i.result.offset = 0 + return true + } + return false +} + +// Result returns the Result struct from the API. The returned struct +// can be used to retrieve and to modify result data. +func (i *API) Result() *Result { + return i.result +} diff --git a/tokenize/tokenapi_example_test.go b/tokenize/api_test.go similarity index 56% rename from tokenize/tokenapi_example_test.go rename to tokenize/api_test.go index bf2ae90..a21d2e1 100644 --- a/tokenize/tokenapi_example_test.go +++ b/tokenize/api_test.go @@ -6,9 +6,9 @@ import ( "git.makaay.nl/mauricem/go-parsekit/tokenize" ) -func ExampleTokenAPI_Fork() { - // This custom TokenHandler checks for input 'a', 'b' or 'c'. - abcHandler := func(t *tokenize.TokenAPI) bool { +func ExampleAPI_Fork() { + // This custom Handler checks for input 'a', 'b' or 'c'. + abcHandler := func(t *tokenize.API) bool { a := tokenize.A for _, r := range []rune{'a', 'b', 'c'} { child := t.Fork() // fork, so we won't change parent t @@ -22,19 +22,19 @@ func ExampleTokenAPI_Fork() { return false } - // Note: a custom TokenHandler is normally not what you need. - // You can make use of the parser/combinator tooling to do things - // a lot simpler and take care of forking at the appropriate places. - // The handler from above can be replaced with: + // Note: a custom Handler is normally not what you need. + // You can make use of the parser/combinator tooling to make the + // implementation a lot simpler and to take care of forking at + // the appropriate places. The handler from above can be replaced with: simpler := tokenize.A.RuneRange('a', 'c') - result, err := tokenize.NewTokenizer(abcHandler).Execute("another test") + result, err := tokenize.New(abcHandler)("another test") fmt.Println(result, err) - result, err = tokenize.NewTokenizer(simpler).Execute("curious") + result, err = tokenize.New(simpler)("curious") fmt.Println(result, err) - result, err = tokenize.NewTokenizer(abcHandler).Execute("bang on!") + result, err = tokenize.New(abcHandler)("bang on!") fmt.Println(result, err) - result, err = tokenize.NewTokenizer(abcHandler).Execute("not a match") + result, err = tokenize.New(abcHandler)("not a match") fmt.Println(result, err) // Output: @@ -44,8 +44,8 @@ func ExampleTokenAPI_Fork() { // mismatch at start of file } -func ExampleTokenAPI_Merge() { - tokenHandler := func(t *tokenize.TokenAPI) bool { +func ExampleAPI_Merge() { + tokenHandler := func(t *tokenize.API) bool { child1 := t.Fork() child1.NextRune() // reads 'H' child1.Accept() @@ -62,7 +62,7 @@ func ExampleTokenAPI_Merge() { return true } - result, _ := tokenize.NewTokenizer(tokenHandler).Execute("Hi mister X!") + result, _ := tokenize.New(tokenHandler)("Hi mister X!") fmt.Println(result) // Output: diff --git a/tokenize/assertions_test.go b/tokenize/assertions_test.go index 0b7477c..6c48ee5 100644 --- a/tokenize/assertions_test.go +++ b/tokenize/assertions_test.go @@ -1,6 +1,6 @@ package tokenize_test -// This file contains some tools that are used for writing parsekit tests. +// This file contains some tools that are used for writing tests. import ( "regexp" @@ -17,12 +17,6 @@ func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat } } -// func AssertNotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) { -// if notExpected == actual { -// t.Errorf("Unexpected value for %s: %q", forWhat, actual) -// } -// } - func AssertTrue(t *testing.T, b bool, assertion string) { if !b { t.Errorf("Assertion %s is false", assertion) @@ -63,21 +57,21 @@ func AssertPanic(t *testing.T, p PanicT) { p.Function() } -type TokenHandlerT struct { - Input string - TokenHandler tokenize.TokenHandler - MustMatch bool - Expected string +type HandlerT struct { + Input string + Handler tokenize.Handler + MustMatch bool + Expected string } -func AssertTokenHandlers(t *testing.T, testSet []TokenHandlerT) { +func AssertHandlers(t *testing.T, testSet []HandlerT) { for _, test := range testSet { - AssertTokenHandler(t, test) + AssertHandler(t, test) } } -func AssertTokenHandler(t *testing.T, test TokenHandlerT) { - result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) +func AssertHandler(t *testing.T, test HandlerT) { + result, err := tokenize.New(test.Handler)(test.Input) if test.MustMatch { if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) @@ -92,9 +86,9 @@ func AssertTokenHandler(t *testing.T, test TokenHandlerT) { } type TokenMakerT struct { - Input string - TokenHandler tokenize.TokenHandler - Expected []tokenize.Token + Input string + Handler tokenize.Handler + Expected []tokenize.Token } func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { @@ -104,7 +98,7 @@ func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) { } func AssertTokenMaker(t *testing.T, test TokenMakerT) { - result, err := tokenize.NewTokenizer(test.TokenHandler).Execute(test.Input) + result, err := tokenize.New(test.Handler)(test.Input) if err != nil { t.Errorf("Test %q failed with error: %s", test.Input, err) } else { diff --git a/tokenize/callerinfo.go b/tokenize/callerinfo.go new file mode 100644 index 0000000..604dfb2 --- /dev/null +++ b/tokenize/callerinfo.go @@ -0,0 +1,20 @@ +package tokenize + +import ( + "fmt" + "runtime" + "strings" +) + +func callerPanic(depth int, f string, args ...interface{}) { + filepos := callerFilepos(depth + 1) + m := fmt.Sprintf(f, args...) + m = strings.Replace(m, "{caller}", filepos, 1) + panic(m) +} + +func callerFilepos(depth int) string { + // No error handling, because we call this method ourselves with safe depth values. + _, file, line, _ := runtime.Caller(depth + 1) + return fmt.Sprintf("%s:%d", file, line) +} diff --git a/common/cursor.go b/tokenize/cursor.go similarity index 52% rename from common/cursor.go rename to tokenize/cursor.go index 77e8c8e..5c1ea9f 100644 --- a/common/cursor.go +++ b/tokenize/cursor.go @@ -1,11 +1,11 @@ -package common +package tokenize import "fmt" // Cursor represents the position of a cursor in various ways. type Cursor struct { - Byte int // The cursor offset in bytes, relative to start of file - Rune int // The cursor offset in UTF8 runes, relative to start of file + Byte int // The cursor offset in bytes + Rune int // The cursor offset in UTF8 runes Column int // The column at which the cursor is (0-indexed) Line int // The line at which the cursor is (0-indexed) } @@ -18,15 +18,11 @@ func (c Cursor) String() string { return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1) } -// Move updates the position of the cursor, based on the provided input string. -// The input string represents the runes that has been skipped over. This -// method will take newlines into account to keep track of line numbers and +// move updates the position of the cursor, based on the provided input string. +// The input string represents the runes that the cursor must be moved over. +// This method will take newlines into account to keep track of line numbers and // column positions automatically. -// -// Note: when you are writing a parser using parsekit, it's unlikely -// that you will use this method directly. The parsekit package takes care -// of calling it at the correct time. -func (c *Cursor) Move(input string) *Cursor { +func (c *Cursor) move(input string) *Cursor { c.Byte += len(input) for _, r := range input { c.Rune++ diff --git a/common/cursor_test.go b/tokenize/cursor_test.go similarity index 80% rename from common/cursor_test.go rename to tokenize/cursor_test.go index ff29405..49ddf2c 100644 --- a/common/cursor_test.go +++ b/tokenize/cursor_test.go @@ -1,18 +1,16 @@ -package common_test +package tokenize import ( "fmt" "testing" - - "git.makaay.nl/mauricem/go-parsekit/common" ) -func ExampleCursor_Move() { - c := &common.Cursor{} +func ExampleCursor_move() { + c := &Cursor{} fmt.Printf("after initialization : %s\n", c) - fmt.Printf("after 'some words' : %s\n", c.Move("some words")) - fmt.Printf("after '\\n' : %s\n", c.Move("\n")) - fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.Move("\r\nskip\nlines")) + fmt.Printf("after 'some words' : %s\n", c.move("some words")) + fmt.Printf("after '\\n' : %s\n", c.move("\n")) + fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines")) // Output: // after initialization : start of file @@ -22,10 +20,10 @@ func ExampleCursor_Move() { } func ExampleCursor_String() { - c := &common.Cursor{} + c := &Cursor{} fmt.Println(c.String()) - c.Move("\nfoobar") + c.move("\nfoobar") fmt.Println(c.String()) // Output: @@ -51,9 +49,9 @@ func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) { {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, } { - c := common.Cursor{} + c := Cursor{} for _, s := range test.input { - c.Move(s) + c.move(s) } if c.Byte != test.byte { t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte) diff --git a/tokenize/tokenhandler.go b/tokenize/handler.go similarity index 59% rename from tokenize/tokenhandler.go rename to tokenize/handler.go index 1d1c891..3a12708 100644 --- a/tokenize/tokenhandler.go +++ b/tokenize/handler.go @@ -1,40 +1,47 @@ package tokenize -// TokenHandler is the function type that is involved in turning a low level +// Handler is the function type that is involved in turning a low level // stream of UTF8 runes into lexical tokens. Its purpose is to check if input // data matches some kind of pattern and to report back the results. // -// A TokenHandler function gets a TokenAPI as its input and returns a boolean to -// indicate whether or not it found a match on the input. The TokenAPI is used +// A Handler function gets an API as its input and returns a boolean to +// indicate whether or not it found a match on the input. The API is used // for retrieving input data to match against and for reporting back results. -type TokenHandler func(t *TokenAPI) bool +type Handler func(t *API) bool + +// Match is syntactic sugar that allows you to write a construction like +// NewTokenizer(handler).Execute(input) as handler.Match(input). +func (handler Handler) Match(input interface{}) (*Result, error) { + tokenizer := New(handler) + return tokenizer(input) +} // Or is syntactic sugar that allows you to write a construction like // MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2). -func (handler TokenHandler) Or(otherHandler TokenHandler) TokenHandler { +func (handler Handler) Or(otherHandler Handler) Handler { return MatchAny(handler, otherHandler) } // Times is syntactic sugar that allows you to write a construction like // MatchRep(3, handler) as handler.Times(3). -func (handler TokenHandler) Times(n int) TokenHandler { +func (handler Handler) Times(n int) Handler { return MatchRep(n, handler) } // Then is syntactic sugar that allows you to write a construction like // MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3). -func (handler TokenHandler) Then(otherHandler TokenHandler) TokenHandler { +func (handler Handler) Then(otherHandler Handler) Handler { return MatchSeq(handler, otherHandler) } // SeparatedBy is syntactic sugar that allows you to write a construction like // MatchSeparated(handler, separator) as handler.SeparatedBy(separator). -func (handler TokenHandler) SeparatedBy(separatorHandler TokenHandler) TokenHandler { +func (handler Handler) SeparatedBy(separatorHandler Handler) Handler { return MatchSeparated(separatorHandler, handler) } // Optional is syntactic sugar that allows you to write a construction like // MatchOpt(handler) as handler.Optional(). -func (handler TokenHandler) Optional() TokenHandler { +func (handler Handler) Optional() Handler { return MatchOpt(handler) } diff --git a/tokenize/handler_test.go b/tokenize/handler_test.go new file mode 100644 index 0000000..ad22c3b --- /dev/null +++ b/tokenize/handler_test.go @@ -0,0 +1,97 @@ +package tokenize_test + +import ( + "fmt" + "testing" + + "git.makaay.nl/mauricem/go-parsekit/tokenize" +) + +func TestSyntacticSugar(t *testing.T) { + var a = tokenize.A + AssertHandlers(t, []HandlerT{ + {"aaaaaa", a.Rune('a').Times(4), true, "aaaa"}, + {"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"}, + {"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"}, + {"bababa", a.Rune('a').Then(a.Rune('b')), false, ""}, + {"cccccc", a.Rune('c').Optional(), true, "c"}, + {"dddddd", a.Rune('c').Optional(), true, ""}, + {"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"}, + }) +} + +func ExampleHandler_Times() { + c, a := tokenize.C, tokenize.A + phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9)) + + fmt.Println(phoneNumber.Match("0201234567")) + // Output: + // 0201234567 +} + +func ExampleHandler_Then() { + c, a := tokenize.C, tokenize.A + phoneNumber := a.Rune('0').Then(c.Rep(9, a.Digit)) + + fmt.Println(phoneNumber.Match("0208888888")) + // Output: + // 0208888888 +} + +func ExampleHandler_Or() { + c, a := tokenize.C, tokenize.A + phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Rep(8, a.Digit)) + + fmt.Println(phoneNumber.Match("+31209876543")) + fmt.Println(phoneNumber.Match("0031209876543")) + fmt.Println(phoneNumber.Match("0031020991234")) + fmt.Println(phoneNumber.Match("0031201234")) + // Output: + // +31209876543 + // 0031209876543 + // mismatch at start of file + // mismatch at start of file +} + +func ExampleHandler_SeparatedBy() { + a, t := tokenize.A, tokenize.T + csv := t.Int("number", a.Digits).SeparatedBy(a.Comma) + + r, _ := csv.Match("123,456,7,8,9") + for i, token := range r.Tokens() { + fmt.Printf("[%d] %v\n", i, token) + } + // Output: + // [0] number("123", value = (int)123) + // [1] number("456", value = (int)456) + // [2] number("7", value = (int)7) + // [3] number("8", value = (int)8) + // [4] number("9", value = (int)9) +} + +func ExampleHandler_Optional() { + c, a := tokenize.C, tokenize.A + + spanish := c.Seq( + a.Rune('¿').Optional(), + c.OneOrMore(c.Except(a.Question, a.AnyRune)), + a.Rune('?').Optional()) + + fmt.Println(spanish.Match("¿Habla español María?")) + fmt.Println(spanish.Match("Sí, María habla español.")) + // Output: + // ¿Habla español María? + // Sí, María habla español. +} + +func ExampleHandler_Match() { + r, err := tokenize.A.IPv4.Match("001.002.003.004") + fmt.Println(r, err) + + r, err = tokenize.A.IPv4.Match("1.2.3") + fmt.Println(r, err) + + // Output: + // 1.2.3.4 + // mismatch at start of file +} diff --git a/tokenize/tokenhandlers_builtin.go b/tokenize/handlers_builtin.go similarity index 55% rename from tokenize/tokenhandlers_builtin.go rename to tokenize/handlers_builtin.go index 9fd84bf..d01c136 100644 --- a/tokenize/tokenhandlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -9,37 +9,35 @@ import ( "strings" "unicode" "unicode/utf8" - - "git.makaay.nl/mauricem/go-parsekit/common" ) // C provides convenient access to a range of parser/combinators that can be -// used to construct TokenHandler functions. +// used to construct Handler functions. // // Parser/combinators are so called higher order functions that take in one -// or more other TokenHandlers and output a new TokenHandler. They can be -// used to combine TokenHandlers in useful ways to create new more complex -// TokenHandlers. +// or more other Handler functions and output a new Handler. They can be +// used to combine Handler functions in useful ways to create new more complex +// Handler functions. // // When using C in your own parser, then it is advised to create a variable -// to reference it: +// to reference it, for example: // -// var c = tokenize.C +// c := tokenize.C // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var C = struct { - Any func(...TokenHandler) TokenHandler - Not func(TokenHandler) TokenHandler - Opt func(TokenHandler) TokenHandler - Seq func(...TokenHandler) TokenHandler - Rep func(times int, handler TokenHandler) TokenHandler - Min func(min int, handler TokenHandler) TokenHandler - Max func(max int, handler TokenHandler) TokenHandler - ZeroOrMore func(TokenHandler) TokenHandler - OneOrMore func(TokenHandler) TokenHandler - MinMax func(min int, max int, handler TokenHandler) TokenHandler - Separated func(separated TokenHandler, separator TokenHandler) TokenHandler - Except func(except TokenHandler, handler TokenHandler) TokenHandler + Any func(...Handler) Handler + Not func(Handler) Handler + Opt func(Handler) Handler + Seq func(...Handler) Handler + Rep func(times int, handler Handler) Handler + Min func(min int, handler Handler) Handler + Max func(max int, handler Handler) Handler + ZeroOrMore func(Handler) Handler + OneOrMore func(Handler) Handler + MinMax func(min int, max int, handler Handler) Handler + Separated func(separated Handler, separator Handler) Handler + Except func(except Handler, handler Handler) Handler }{ Opt: MatchOpt, Any: MatchAny, @@ -60,91 +58,91 @@ var C = struct { // When using A in your own parser, then it is advised to create a variable // to reference it: // -// var a = tokenize.A +// a := tokenize.A // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var A = struct { - Rune func(rune) TokenHandler - Runes func(...rune) TokenHandler - RuneRange func(rune, rune) TokenHandler - Str func(string) TokenHandler - StrNoCase func(string) TokenHandler - EndOfFile TokenHandler - AnyRune TokenHandler - ValidRune TokenHandler - Space TokenHandler - Tab TokenHandler - CR TokenHandler - LF TokenHandler - CRLF TokenHandler - Excl TokenHandler - DoubleQuote TokenHandler - Hash TokenHandler - Dollar TokenHandler - Percent TokenHandler - Amp TokenHandler - SingleQuote TokenHandler - RoundOpen TokenHandler - LeftParen TokenHandler - RoundClose TokenHandler - RightParen TokenHandler - Asterisk TokenHandler - Multiply TokenHandler - Plus TokenHandler - Add TokenHandler - Comma TokenHandler - Minus TokenHandler - Subtract TokenHandler - Dot TokenHandler - Slash TokenHandler - Divide TokenHandler - Colon TokenHandler - Semicolon TokenHandler - AngleOpen TokenHandler - LessThan TokenHandler - Equal TokenHandler - AngleClose TokenHandler - GreaterThan TokenHandler - Question TokenHandler - At TokenHandler - SquareOpen TokenHandler - Backslash TokenHandler - SquareClose TokenHandler - Caret TokenHandler - Underscore TokenHandler - Backquote TokenHandler - CurlyOpen TokenHandler - Pipe TokenHandler - CurlyClose TokenHandler - Tilde TokenHandler - Newline TokenHandler - Blank TokenHandler - Blanks TokenHandler - Whitespace TokenHandler - EndOfLine TokenHandler - Digit TokenHandler - DigitNotZero TokenHandler - Digits TokenHandler - Float TokenHandler - Boolean TokenHandler - Integer TokenHandler - Signed func(TokenHandler) TokenHandler - IntegerBetween func(min int64, max int64) TokenHandler - ASCII TokenHandler - ASCIILower TokenHandler - ASCIIUpper TokenHandler - Letter TokenHandler - Lower TokenHandler - Upper TokenHandler - HexDigit TokenHandler - Octet TokenHandler - IPv4 TokenHandler - IPv4CIDRMask TokenHandler - IPv4Netmask TokenHandler - IPv4Net TokenHandler - IPv6 TokenHandler - IPv6CIDRMask TokenHandler - IPv6Net TokenHandler + Rune func(rune) Handler + Runes func(...rune) Handler + RuneRange func(rune, rune) Handler + Str func(string) Handler + StrNoCase func(string) Handler + EndOfFile Handler + AnyRune Handler + ValidRune Handler + Space Handler + Tab Handler + CR Handler + LF Handler + CRLF Handler + Excl Handler + DoubleQuote Handler + Hash Handler + Dollar Handler + Percent Handler + Amp Handler + SingleQuote Handler + RoundOpen Handler + LeftParen Handler + RoundClose Handler + RightParen Handler + Asterisk Handler + Multiply Handler + Plus Handler + Add Handler + Comma Handler + Minus Handler + Subtract Handler + Dot Handler + Slash Handler + Divide Handler + Colon Handler + Semicolon Handler + AngleOpen Handler + LessThan Handler + Equal Handler + AngleClose Handler + GreaterThan Handler + Question Handler + At Handler + SquareOpen Handler + Backslash Handler + SquareClose Handler + Caret Handler + Underscore Handler + Backquote Handler + CurlyOpen Handler + Pipe Handler + CurlyClose Handler + Tilde Handler + Newline Handler + Blank Handler + Blanks Handler + Whitespace Handler + EndOfLine Handler + Digit Handler + DigitNotZero Handler + Digits Handler + Float Handler + Boolean Handler + Integer Handler + Signed func(Handler) Handler + IntegerBetween func(min int64, max int64) Handler + ASCII Handler + ASCIILower Handler + ASCIIUpper Handler + Letter Handler + Lower Handler + Upper Handler + HexDigit Handler + Octet Handler + IPv4 Handler + IPv4CIDRMask Handler + IPv4Netmask Handler + IPv4Net Handler + IPv6 Handler + IPv6CIDRMask Handler + IPv6Net Handler }{ Rune: MatchRune, Runes: MatchRunes, @@ -229,28 +227,28 @@ var A = struct { } // M provides convenient access to a range of modifiers (which in their nature are -// parser/combinators) that can be used when creating TokenHandler functions. +// parser/combinators) that can be used when creating Handler functions. // -// In parsekit, a modifier is defined as a TokenHandler function that modifies the -// resulting output of another TokenHandler in some way. It does not do any matching +// In parsekit, a modifier is defined as a Handler function that modifies the +// resulting output of another Handler in some way. It does not do any matching // against input of its own. // // When using M in your own parser, then it is advised to create a variable // to reference it: // -// var m = tokenize.M +// m := tokenize.M // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var M = struct { - Drop func(TokenHandler) TokenHandler - Trim func(handler TokenHandler, cutset string) TokenHandler - TrimLeft func(handler TokenHandler, cutset string) TokenHandler - TrimRight func(handler TokenHandler, cutset string) TokenHandler - TrimSpace func(handler TokenHandler) TokenHandler - ToLower func(TokenHandler) TokenHandler - ToUpper func(TokenHandler) TokenHandler - Replace func(handler TokenHandler, replaceWith string) TokenHandler - ByCallback func(TokenHandler, func(string) string) TokenHandler + Drop func(Handler) Handler + Trim func(handler Handler, cutset string) Handler + TrimLeft func(handler Handler, cutset string) Handler + TrimRight func(handler Handler, cutset string) Handler + TrimSpace func(handler Handler) Handler + ToLower func(Handler) Handler + ToUpper func(Handler) Handler + Replace func(handler Handler, replaceWith string) Handler + ByCallback func(Handler, func(string) string) Handler }{ Drop: ModifyDrop, Trim: ModifyTrim, @@ -264,34 +262,34 @@ var M = struct { } // T provides convenient access to a range of Token producers (which in their -// nature are parser/combinators) that can be used when creating TokenHandler +// nature are parser/combinators) that can be used when creating Handler // functions. // // When using T in your own parser, then it is advised to create a variable // to reference it: // -// var t = tokenize.T +// t := tokenize.T // // Doing so saves you a lot of typing, and it makes your code a lot cleaner. var T = struct { - Str func(interface{}, TokenHandler) TokenHandler - StrInterpreted func(interface{}, TokenHandler) TokenHandler - Byte func(interface{}, TokenHandler) TokenHandler - Rune func(interface{}, TokenHandler) TokenHandler - Int func(interface{}, TokenHandler) TokenHandler - Int8 func(interface{}, TokenHandler) TokenHandler - Int16 func(interface{}, TokenHandler) TokenHandler - Int32 func(interface{}, TokenHandler) TokenHandler - Int64 func(interface{}, TokenHandler) TokenHandler - Uint func(interface{}, TokenHandler) TokenHandler - Uint8 func(interface{}, TokenHandler) TokenHandler - Uint16 func(interface{}, TokenHandler) TokenHandler - Uint32 func(interface{}, TokenHandler) TokenHandler - Uint64 func(interface{}, TokenHandler) TokenHandler - Float32 func(interface{}, TokenHandler) TokenHandler - Float64 func(interface{}, TokenHandler) TokenHandler - Boolean func(interface{}, TokenHandler) TokenHandler - ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler + Str func(interface{}, Handler) Handler + StrInterpreted func(interface{}, Handler) Handler + Byte func(interface{}, Handler) Handler + Rune func(interface{}, Handler) Handler + Int func(interface{}, Handler) Handler + Int8 func(interface{}, Handler) Handler + Int16 func(interface{}, Handler) Handler + Int32 func(interface{}, Handler) Handler + Int64 func(interface{}, Handler) Handler + Uint func(interface{}, Handler) Handler + Uint8 func(interface{}, Handler) Handler + Uint16 func(interface{}, Handler) Handler + Uint32 func(interface{}, Handler) Handler + Uint64 func(interface{}, Handler) Handler + Float32 func(interface{}, Handler) Handler + Float64 func(interface{}, Handler) Handler + Boolean func(interface{}, Handler) Handler + ByCallback func(Handler, func(t *API) *Token) Handler }{ Str: MakeStrLiteralToken, StrInterpreted: MakeStrInterpretedToken, @@ -313,64 +311,64 @@ var T = struct { ByCallback: MakeTokenByCallback, } -// MatchRune creates a TokenHandler function that matches against the provided rune. -func MatchRune(expected rune) TokenHandler { +// MatchRune creates a Handler function that matches against the provided rune. +func MatchRune(expected rune) Handler { return MatchRuneByCallback(func(r rune) bool { return r == expected }) } -// MatchRunes creates a TokenHandler function that checks if the input matches +// MatchRunes creates a Handler function that checks if the input matches // one of the provided runes. -func MatchRunes(expected ...rune) TokenHandler { +func MatchRunes(expected ...rune) Handler { s := string(expected) return MatchRuneByCallback(func(r rune) bool { return strings.ContainsRune(s, r) }) } -// MatchRuneRange creates a TokenHandler function that checks if the input +// MatchRuneRange creates a Handler function that checks if the input // matches the provided rune range. The rune range is defined by a start and // an end rune, inclusive, so: // // MatchRuneRange('g', 'k') // -// creates a TokenHandler that will match any of 'g', 'h', 'i', 'j' or 'k'. -func MatchRuneRange(start rune, end rune) TokenHandler { +// creates a Handler that will match any of 'g', 'h', 'i', 'j' or 'k'. +func MatchRuneRange(start rune, end rune) Handler { if end < start { - common.CallerPanic(1, "TokenHandler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end) + callerPanic(1, "Handler: MatchRuneRange definition error at {caller}: start %q must not be < end %q", start, end) } return MatchRuneByCallback(func(r rune) bool { return r >= start && r <= end }) } -// MatchBlank creates a TokenHandler that matches one rune from the input +// MatchBlank creates a Handler that matches one rune from the input // against blank characters, meaning tabs and spaces. // // When you need whitespace matching, which also includes characters like // newlines, then take a look at MatchWhitespace(). -func MatchBlank() TokenHandler { +func MatchBlank() Handler { return MatchRuneByCallback(func(r rune) bool { return r == ' ' || r == '\t' }) } -// MatchBlanks creates a TokenHandler that matches the input against one +// MatchBlanks creates a Handler that matches the input against one // or more blank characters, meaning tabs and spaces. // // When you need whitespace matching, which also includes characters like // newlines, then make use of MatchSpace(). -func MatchBlanks() TokenHandler { +func MatchBlanks() Handler { return MatchOneOrMore(MatchBlank()) } -// MatchWhitespace creates a TokenHandler that matches the input against one or more +// MatchWhitespace creates a Handler that matches the input against one or more // whitespace characters, as defined by unicode. -func MatchWhitespace() TokenHandler { +func MatchWhitespace() Handler { return MatchOneOrMore(MatchRuneByCallback(unicode.IsSpace)) } -// MatchRuneByCallback creates a TokenHandler that matches a single rune from the +// MatchRuneByCallback creates a Handler that matches a single rune from the // input against the provided callback function. When the callback returns true, // it is considered a match. // // Note that the callback function matches the signature of the unicode.Is* functions, // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). -func MatchRuneByCallback(callback func(rune) bool) TokenHandler { - return func(t *TokenAPI) bool { +func MatchRuneByCallback(callback func(rune) bool) Handler { + return func(t *API) bool { input, err := t.NextRune() if err == nil && callback(input) { t.Accept() @@ -380,24 +378,24 @@ func MatchRuneByCallback(callback func(rune) bool) TokenHandler { } } -// MatchEndOfLine creates a TokenHandler that matches a newline ("\r\n" or "\n") or EOF. -func MatchEndOfLine() TokenHandler { +// MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. +func MatchEndOfLine() Handler { return MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()) } -// MatchStr creates a TokenHandler that matches the input against the provided string. -func MatchStr(expected string) TokenHandler { - var handlers = []TokenHandler{} +// MatchStr creates a Handler that matches the input against the provided string. +func MatchStr(expected string) Handler { + var handlers = []Handler{} for _, r := range expected { handlers = append(handlers, MatchRune(r)) } return MatchSeq(handlers...) } -// MatchStrNoCase creates a TokenHandler that matches the input against the +// MatchStrNoCase creates a Handler that matches the input against the // provided string in a case-insensitive manner. -func MatchStrNoCase(expected string) TokenHandler { - var handlers = []TokenHandler{} +func MatchStrNoCase(expected string) Handler { + var handlers = []Handler{} for _, r := range expected { u := unicode.ToUpper(r) l := unicode.ToLower(r) @@ -406,19 +404,19 @@ func MatchStrNoCase(expected string) TokenHandler { return MatchSeq(handlers...) } -// MatchOpt creates a TokenHandler that makes the provided TokenHandler optional. -// When the provided TokenHandler applies, then its output is used, otherwise +// MatchOpt creates a Handler that makes the provided Handler optional. +// When the provided Handler applies, then its output is used, otherwise // no output is generated but still a successful match is reported (but the // result will be empty). -func MatchOpt(handler TokenHandler) TokenHandler { +func MatchOpt(handler Handler) Handler { return MatchMinMax(0, 1, handler) } -// MatchSeq creates a TokenHandler that checks if the provided TokenHandlers can be -// applied in their exact order. Only if all TokenHandlers apply, the sequence +// MatchSeq creates a Handler that checks if the provided Handlers can be +// applied in their exact order. Only if all Handlers apply, the sequence // reports successful match. -func MatchSeq(handlers ...TokenHandler) TokenHandler { - return func(t *TokenAPI) bool { +func MatchSeq(handlers ...Handler) Handler { + return func(t *API) bool { child := t.Fork() for _, handler := range handlers { subchild := child.Fork() @@ -432,11 +430,11 @@ func MatchSeq(handlers ...TokenHandler) TokenHandler { } } -// MatchAny creates a TokenHandler that checks if any of the provided TokenHandlers -// can be applied. They are applied in their provided order. The first TokenHandler +// MatchAny creates a Handler that checks if any of the provided Handlers +// can be applied. They are applied in their provided order. The first Handler // that applies is used for reporting back a match. -func MatchAny(handlers ...TokenHandler) TokenHandler { - return func(t *TokenAPI) bool { +func MatchAny(handlers ...Handler) Handler { + return func(t *API) bool { for _, handler := range handlers { child := t.Fork() if handler(child) { @@ -448,11 +446,11 @@ func MatchAny(handlers ...TokenHandler) TokenHandler { } } -// MatchNot creates a TokenHandler that checks if the provided TokenHandler applies to +// MatchNot creates a Handler that checks if the provided Handler applies to // the current input. If it does, then a failed match will be reported. If it // does not, then the next rune from the input will be reported as a match. -func MatchNot(handler TokenHandler) TokenHandler { - return func(t *TokenAPI) bool { +func MatchNot(handler Handler) Handler { + return func(t *API) bool { if handler(t.Fork()) { return false } @@ -465,7 +463,7 @@ func MatchNot(handler TokenHandler) TokenHandler { } } -// MatchRep creates a TokenHandler that checks if the provided TokenHandler can be +// MatchRep creates a Handler that checks if the provided Handler can be // applied exactly the provided amount of times. // // Note that the input can contain more than the provided number of matches, e.g.: @@ -478,62 +476,62 @@ func MatchNot(handler TokenHandler) TokenHandler { // Another way to use this method, is by applying the following syntactic sugar: // // MatchRune('X').Times(4) -func MatchRep(times int, handler TokenHandler) TokenHandler { +func MatchRep(times int, handler Handler) Handler { return matchMinMax(times, times, handler, "MatchRep") } -// MatchMin creates a TokenHandler that checks if the provided TokenHandler can be +// MatchMin creates a Handler that checks if the provided Handler can be // applied at least the provided minimum number of times. // When more matches are possible, these will be included in the output. -func MatchMin(min int, handler TokenHandler) TokenHandler { +func MatchMin(min int, handler Handler) Handler { if min < 0 { - common.CallerPanic(1, "TokenHandler: MatchMin definition error at {caller}: min must be >= 0") + callerPanic(1, "Handler: MatchMin definition error at {caller}: min must be >= 0") } return matchMinMax(min, -1, handler, "MatchMin") } -// MatchMax creates a TokenHandler that checks if the provided TokenHandler can be +// MatchMax creates a Handler that checks if the provided Handler can be // applied at maximum the provided minimum number of times. // When more matches are possible, thhandler(ese will be included in the output. // Zero matches are considered a successful match. -func MatchMax(max int, handler TokenHandler) TokenHandler { +func MatchMax(max int, handler Handler) Handler { if max < 0 { - common.CallerPanic(1, "TokenHandler: MatchMax definition error at {caller}: max must be >= 0") + callerPanic(1, "Handler: MatchMax definition error at {caller}: max must be >= 0") } return matchMinMax(0, max, handler, "MatchMax") } -// MatchZeroOrMore creates a TokenHandler that checks if the provided TokenHandler can +// MatchZeroOrMore creates a Handler that checks if the provided Handler can // be applied zero or more times. All matches will be included in the output. // Zero matches are considered a successful match. -func MatchZeroOrMore(handler TokenHandler) TokenHandler { +func MatchZeroOrMore(handler Handler) Handler { return matchMinMax(0, -1, handler, "MatchZeroOfMore") } -// MatchOneOrMore creates a TokenHandler that checks if the provided TokenHandler can +// MatchOneOrMore creates a Handler that checks if the provided Handler can // be applied one or more times. All matches will be included in the output. -func MatchOneOrMore(handler TokenHandler) TokenHandler { +func MatchOneOrMore(handler Handler) Handler { return matchMinMax(1, -1, handler, "MatchOneOrMore") } -// MatchMinMax creates a TokenHandler that checks if the provided TokenHandler can +// MatchMinMax creates a Handler that checks if the provided Handler can // be applied between the provided minimum and maximum number of times, // inclusive. All matches will be included in the output. -func MatchMinMax(min int, max int, handler TokenHandler) TokenHandler { +func MatchMinMax(min int, max int, handler Handler) Handler { if max < 0 { - common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: max must be >= 0") + callerPanic(1, "Handler: MatchMinMax definition error at {caller}: max must be >= 0") } if min < 0 { - common.CallerPanic(1, "TokenHandler: MatchMinMax definition error at {caller}: min must be >= 0") + callerPanic(1, "Handler: MatchMinMax definition error at {caller}: min must be >= 0") } return matchMinMax(min, max, handler, "MatchMinMax") } -func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandler { +func matchMinMax(min int, max int, handler Handler, name string) Handler { if max >= 0 && min > max { - common.CallerPanic(2, "TokenHandler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) + callerPanic(2, "Handler: %s definition error at {caller}: max %d must not be < min %d", name, max, min) } - return func(t *TokenAPI) bool { + return func(t *API) bool { total := 0 // Check for the minimum required amount of matches. for total < min { @@ -559,20 +557,20 @@ func matchMinMax(min int, max int, handler TokenHandler, name string) TokenHandl } } -// MatchSeparated creates a TokenHandler that checks for a pattern of one or more -// TokenHandlers of one type (the separated), separated by TokenHandler of another type +// MatchSeparated creates a Handler that checks for a pattern of one or more +// Handlers of one type (the separated), separated by Handler of another type // (the separator). All matches (separated + separator) are included in the // output. -func MatchSeparated(separator TokenHandler, separated TokenHandler) TokenHandler { +func MatchSeparated(separator Handler, separated Handler) Handler { return MatchSeq(separated, MatchZeroOrMore(MatchSeq(separator, separated))) } -// MatchExcept creates a TokenHandler that checks if the provided TokenHandler can be -// applied to the upcoming input. It also checks if the except TokenHandler can be -// applied. If the handler applies, but the except TokenHandler as well, then the match +// MatchExcept creates a Handler that checks if the provided Handler can be +// applied to the upcoming input. It also checks if the except Handler can be +// applied. If the handler applies, but the except Handler as well, then the match // as a whole will be treated as a mismatch. -func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler { - return func(t *TokenAPI) bool { +func MatchExcept(except Handler, handler Handler) Handler { + return func(t *API) bool { if except(t.Fork()) { return false } @@ -580,26 +578,26 @@ func MatchExcept(except TokenHandler, handler TokenHandler) TokenHandler { } } -// MatchSigned creates a TokenHandler that checks if the provided TokenHandler is +// MatchSigned creates a Handler that checks if the provided Handler is // prefixed by an optional '+' or '-' sign. This can be used to turn numeric // atoms into a signed version, e.g. // // C.Signed(A.Integer) -func MatchSigned(handler TokenHandler) TokenHandler { +func MatchSigned(handler Handler) Handler { sign := MatchOpt(MatchAny(MatchRune('+'), MatchRune('-'))) return MatchSeq(sign, handler) } -// MatchIntegerBetween creates a TokenHandler that checks for an integer +// MatchIntegerBetween creates a Handler that checks for an integer // value between the provided min and max boundaries (inclusive). // It uses an int64 for checking internally, so you can check values // ranging from -9223372036854775808 to 9223372036854775807. -func MatchIntegerBetween(min int64, max int64) TokenHandler { +func MatchIntegerBetween(min int64, max int64) Handler { if max < min { - common.CallerPanic(1, "TokenHandler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min) + callerPanic(1, "Handler: MatchIntegerBetween definition error at {caller}: max %d must not be < min %d", max, min) } digits := MatchSigned(MatchDigits()) - return func(t *TokenAPI) bool { + return func(t *API) bool { if !digits(t) { return false } @@ -611,22 +609,22 @@ func MatchIntegerBetween(min int64, max int64) TokenHandler { } } -// MatchEndOfFile creates a TokenHandler that checks if the end of the input data -// has been reached. This TokenHandler will never produce output. It only reports +// MatchEndOfFile creates a Handler that checks if the end of the input data +// has been reached. This Handler will never produce output. It only reports // a successful or a failing match through its boolean return value. -func MatchEndOfFile() TokenHandler { - return func(t *TokenAPI) bool { +func MatchEndOfFile() Handler { + return func(t *API) bool { child := t.Fork() _, err := child.NextRune() return err == io.EOF } } -// MatchAnyRune creates a TokenHandler function that checks if a rune can be +// MatchAnyRune creates a Handler function that checks if a rune can be // read from the input. Invalid runes on the input are replaced with the UTF8 // replacement rune \uFFFD (i.e. utf8.RuneError), which displays as �. -func MatchAnyRune() TokenHandler { - return func(t *TokenAPI) bool { +func MatchAnyRune() Handler { + return func(t *API) bool { _, err := t.NextRune() if err == nil { t.Accept() @@ -636,10 +634,10 @@ func MatchAnyRune() TokenHandler { } } -// MatchValidRune creates a TokenHandler function that checks if a valid +// MatchValidRune creates a Handler function that checks if a valid // UTF8 rune can be read from the input. -func MatchValidRune() TokenHandler { - return func(t *TokenAPI) bool { +func MatchValidRune() Handler { + return func(t *API) bool { r, err := t.NextRune() if err == nil && r != utf8.RuneError { t.Accept() @@ -649,98 +647,98 @@ func MatchValidRune() TokenHandler { } } -// MatchDigit creates a TokenHandler that checks if a single digit can be read +// MatchDigit creates a Handler that checks if a single digit can be read // from the input. -func MatchDigit() TokenHandler { +func MatchDigit() Handler { return MatchRuneRange('0', '9') } -// MatchDigits creates a TokenHandler that checks if one or more digits can be read +// MatchDigits creates a Handler that checks if one or more digits can be read // from the input. -func MatchDigits() TokenHandler { +func MatchDigits() Handler { return MatchOneOrMore(MatchDigit()) } -// MatchDigitNotZero creates a TokenHandler that checks if a single digit not equal +// MatchDigitNotZero creates a Handler that checks if a single digit not equal // to zero '0' can be read from the input. -func MatchDigitNotZero() TokenHandler { +func MatchDigitNotZero() Handler { return MatchRuneRange('1', '9') } -// MatchInteger creates a TokenHandler function that checks if a valid integer +// MatchInteger creates a Handler function that checks if a valid integer // can be read from the input. In line with Go, an integer cannot start with // a zero. Starting with a zero is used to indicate other bases, like octal or // hexadecimal. -func MatchInteger() TokenHandler { +func MatchInteger() Handler { justZero := MatchRune('0') integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit())) return MatchAny(integer, justZero) } -// MatchFloat creates a TokenHandler function that checks if a valid float value +// MatchFloat creates a Handler function that checks if a valid float value // can be read from the input. In case the fractional part is missing, this -// TokenHandler will report a match, so both "123" and "123.123" will match. -func MatchFloat() TokenHandler { +// Handler will report a match, so both "123" and "123.123" will match. +func MatchFloat() Handler { digits := MatchDigits() return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits))) } -// MatchBoolean creates a TokenHandler function that checks if a boolean +// MatchBoolean creates a Handler function that checks if a boolean // value can be read from the input. It supports the boolean values as understood // by Go's strconv.ParseBool() function. // // True values: true, TRUE, True, 1, t, T // // False falues: false, FALSE, False, 0, f, F -func MatchBoolean() TokenHandler { +func MatchBoolean() Handler { trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T')) falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F')) return MatchAny(trues, falses) } -// MatchASCII creates a TokenHandler function that matches against any +// MatchASCII creates a Handler function that matches against any // ASCII value on the input. -func MatchASCII() TokenHandler { +func MatchASCII() Handler { return MatchRuneRange('\x00', '\x7F') } -// MatchASCIILower creates a TokenHandler function that matches against any +// MatchASCIILower creates a Handler function that matches against any // lower case ASCII letter on the input (a - z). -func MatchASCIILower() TokenHandler { +func MatchASCIILower() Handler { return MatchRuneRange('a', 'z') } -// MatchASCIIUpper creates a TokenHandler function that matches against any +// MatchASCIIUpper creates a Handler function that matches against any // upper case ASCII letter on the input (a - z). -func MatchASCIIUpper() TokenHandler { +func MatchASCIIUpper() Handler { return MatchRuneRange('A', 'Z') } -// MatchUnicodeLetter creates a TokenHandler function that matches against any +// MatchUnicodeLetter creates a Handler function that matches against any // unicode letter on the input (see unicode.IsLetter(rune)). -func MatchUnicodeLetter() TokenHandler { +func MatchUnicodeLetter() Handler { return MatchRuneByCallback(unicode.IsLetter) } -// MatchUnicodeUpper creates a TokenHandler function that matches against any +// MatchUnicodeUpper creates a Handler function that matches against any // upper case unicode letter on the input (see unicode.IsUpper(rune)). -func MatchUnicodeUpper() TokenHandler { +func MatchUnicodeUpper() Handler { return MatchRuneByCallback(unicode.IsUpper) } -// MatchUnicodeLower creates a TokenHandler function that matches against any +// MatchUnicodeLower creates a Handler function that matches against any // lower case unicode letter on the input (see unicode.IsLower(rune)). -func MatchUnicodeLower() TokenHandler { +func MatchUnicodeLower() Handler { return MatchRuneByCallback(unicode.IsLower) } -// MatchHexDigit creates a TokenHandler function that check if a single hexadecimal +// MatchHexDigit creates a Handler function that check if a single hexadecimal // digit can be read from the input. -func MatchHexDigit() TokenHandler { +func MatchHexDigit() Handler { return MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')) } -// MatchOctet creates a TokenHandler function that checks if a valid octet value +// MatchOctet creates a Handler function that checks if a valid octet value // can be read from the input (octet = byte value representation, with a value // between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming // digits, not if there's a non-digit after it, meaning that "123255" would be @@ -748,9 +746,9 @@ func MatchHexDigit() TokenHandler { // // When the normalize parameter is set to true, then leading zeroes will be // stripped from the octet. -func MatchOctet(normalize bool) TokenHandler { +func MatchOctet(normalize bool) Handler { max3Digits := MatchMinMax(1, 3, MatchDigit()) - return func(t *TokenAPI) bool { + return func(t *API) bool { if !max3Digits(t) { return false } @@ -769,24 +767,24 @@ func MatchOctet(normalize bool) TokenHandler { } } -// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4 +// MatchIPv4 creates a Handler function that checks if a valid IPv4 // IP address value can be read from the input. // // When the normalize parameter is true, IP-addresses that look like // "192.168.001.012" will be normalize to "192.168.1.12". -func MatchIPv4(normalize bool) TokenHandler { +func MatchIPv4(normalize bool) Handler { octet := MatchOctet(normalize) dot := MatchRune('.') return MatchSeq(octet, dot, octet, dot, octet, dot, octet) } -// MatchIPv4CIDRMask creates a TokenHandler function that checks if a +// MatchIPv4CIDRMask creates a Handler function that checks if a // valid IPv4 CIDR mask (0 - 32) value can be read from the input. -func MatchIPv4CIDRMask(normalize bool) TokenHandler { +func MatchIPv4CIDRMask(normalize bool) Handler { return matchCIDRMask(32, normalize) } -// MatchIPv4Netmask creates a TokenHandler function that checks if a valid +// MatchIPv4Netmask creates a Handler function that checks if a valid // IPv4 netmask can be read from input (e.g. 255.255.255.0). // Only a netmask in canonical form is accepted (meaning that in binary form // it start with zero or more 1-bits, followed by only 0-bits up to the @@ -794,12 +792,12 @@ func MatchIPv4CIDRMask(normalize bool) TokenHandler { // // When the normalize parameter is true, netmasks that look like // "255.255.192.000" will be normalized to "255.255.192.0". -func MatchIPv4Netmask(normalize bool) TokenHandler { +func MatchIPv4Netmask(normalize bool) Handler { octet := MakeUint8Token(nil, MatchOctet(normalize)) dot := MatchRune('.') netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) - return func(t *TokenAPI) bool { + return func(t *API) bool { if !netmask(t) { return false } @@ -817,14 +815,14 @@ func MatchIPv4Netmask(normalize bool) TokenHandler { } } -// MatchIPv4Net creates a TokenHandler function that checks the input for an +// MatchIPv4Net creates a Handler function that checks the input for an // IPv4 + mask input. Both / (e.g. 192.168.0.1/24) and / // (e.g. 172.16.10.254/255.255.192.0) are acceptable. // // When the normalize parameter is true, then the IP address and the mask are // normalized. The mask will be normalized to cidr, so the above example would // be normalized to 172.16.10.254/18. -func MatchIPv4Net(normalize bool) TokenHandler { +func MatchIPv4Net(normalize bool) Handler { ip := MakeStrLiteralToken("ip", MatchIPv4(normalize)) slash := MatchRune('/') mask := MatchAny( @@ -832,7 +830,7 @@ func MatchIPv4Net(normalize bool) TokenHandler { MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize))) ipnet := MatchSeq(ip, slash, mask) - return func(t *TokenAPI) bool { + return func(t *API) bool { if !ipnet(t) { return false } @@ -858,14 +856,14 @@ func MatchIPv4Net(normalize bool) TokenHandler { } } -// MatchIPv6 creates a TokenHandler function that checks if an IPv6 address +// MatchIPv6 creates a Handler function that checks if an IPv6 address // can be read from the input. -func MatchIPv6(normalize bool) TokenHandler { +func MatchIPv6(normalize bool) Handler { hextet := MatchMinMax(1, 4, MatchHexDigit()) colon := MatchRune(':') empty := MatchSeq(colon, colon) - return func(t *TokenAPI) bool { + return func(t *API) bool { nrOfHextets := 0 for nrOfHextets < 8 { if hextet(t) { @@ -894,20 +892,20 @@ func MatchIPv6(normalize bool) TokenHandler { } } -// MatchIPv6CIDRMask creates a TokenHandler function that checks if a +// MatchIPv6CIDRMask creates a Handler function that checks if a // valid IPv6 CIDR mask (0 - 128) value can be read from the input. -func MatchIPv6CIDRMask(normalize bool) TokenHandler { +func MatchIPv6CIDRMask(normalize bool) Handler { return matchCIDRMask(128, normalize) } -func matchCIDRMask(bits int64, normalize bool) TokenHandler { +func matchCIDRMask(bits int64, normalize bool) Handler { mask := MatchIntegerBetween(0, bits) if !normalize { return mask } - return func(t *TokenAPI) bool { + return func(t *API) bool { if !mask(t) { return false } @@ -918,22 +916,22 @@ func matchCIDRMask(bits int64, normalize bool) TokenHandler { } } -// MatchIPv6Net creates a TokenHandler function that checks the input for an +// MatchIPv6Net creates a Handler function that checks the input for an // IPv6 + mask input, e.g. fe80:0:0:0:0216:3eff:fe96:0002/64. // // When the normalize parameter is true, then the IP address and the mask are // normalized. The above example would be normalized to fe08::216:3eff:fe96:2/64. -func MatchIPv6Net(normalize bool) TokenHandler { +func MatchIPv6Net(normalize bool) Handler { ip := MatchIPv6(normalize) slash := MatchRune('/') mask := MatchIPv6CIDRMask(normalize) return MatchSeq(ip, slash, mask) } -// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies. -// If it does, then its output is discarded completely. +// ModifyDrop creates a Handler that checks if the provided Handler applies. +// If it does, then its output is disposed completely. // -// Note that if the TokenHandler does not apply, a mismatch will be reported back, +// Note that if the Handler does not apply, a mismatch will be reported back, // even though we would have dropped the output anyway. So if you would like // to drop optional blanks (spaces and tabs), then use something like: // @@ -946,34 +944,34 @@ func MatchIPv6Net(normalize bool) TokenHandler { // Since A.Blanks is defined as "1 or more spaces and/or tabs", the input // string "bork" would not match against the second form, but " bork" would. // In both cases, it would match the first form. -func ModifyDrop(handler TokenHandler) TokenHandler { +func ModifyDrop(handler Handler) Handler { return ModifyByCallback(handler, func(s string) string { return "" }) } -// ModifyTrim creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyTrim creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from both the left and the right of the output. -func ModifyTrim(handler TokenHandler, cutset string) TokenHandler { +func ModifyTrim(handler Handler, cutset string) Handler { return modifyTrim(handler, cutset, true, true) } -// ModifyTrimLeft creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyTrimLeft creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from the left of the output. -func ModifyTrimLeft(handler TokenHandler, cutset string) TokenHandler { +func ModifyTrimLeft(handler Handler, cutset string) Handler { return modifyTrim(handler, cutset, true, false) } -// ModifyTrimRight creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyTrimRight creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are trimmed from the right of the output. -func ModifyTrimRight(handler TokenHandler, cutset string) TokenHandler { +func ModifyTrimRight(handler Handler, cutset string) Handler { return modifyTrim(handler, cutset, false, true) } -func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bool) TokenHandler { +func modifyTrim(handler Handler, cutset string, trimLeft bool, trimRight bool) Handler { modfunc := func(s string) string { if trimLeft { s = strings.TrimLeft(s, cutset) @@ -986,42 +984,42 @@ func modifyTrim(handler TokenHandler, cutset string, trimLeft bool, trimRight bo return ModifyByCallback(handler, modfunc) } -// ModifyTrimSpace creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyTrimSpace creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and all leading and trailing whitespace characters, // as defined by Unicode are removed from it. -func ModifyTrimSpace(handler TokenHandler) TokenHandler { +func ModifyTrimSpace(handler Handler) Handler { return ModifyByCallback(handler, strings.TrimSpace) } -// ModifyToUpper creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyToUpper creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are converted into upper case. -func ModifyToUpper(handler TokenHandler) TokenHandler { +func ModifyToUpper(handler Handler) Handler { return ModifyByCallback(handler, strings.ToUpper) } -// ModifyToLower creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyToLower creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and characters from the provided // cutset are converted into lower case. -func ModifyToLower(handler TokenHandler) TokenHandler { +func ModifyToLower(handler Handler) Handler { return ModifyByCallback(handler, strings.ToLower) } -// ModifyReplace creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyReplace creates a Handler that checks if the provided Handler applies. // If it does, then its output is replaced by the provided string. -func ModifyReplace(handler TokenHandler, replaceWith string) TokenHandler { +func ModifyReplace(handler Handler, replaceWith string) Handler { return ModifyByCallback(handler, func(string) string { return replaceWith }) } -// ModifyByCallback creates a TokenHandler that checks if the provided TokenHandler applies. +// ModifyByCallback creates a Handler that checks if the provided Handler applies. // If it does, then its output is taken and it is fed to the provided modfunc. // This is a simple function that takes a string on input and returns a possibly // modified string on output. The return value of the modfunc will replace the // resulting output. -func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHandler { - return func(t *TokenAPI) bool { +func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { + return func(t *API) bool { child := t.Fork() if handler(child) { s := modfunc(child.Result().String()) @@ -1033,24 +1031,24 @@ func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHa } } -// MakeStrLiteralToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to a string-typed +// MakeStrLiteralToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to a string-typed // representation of the read Runes. This string is literal, meaning that an // escape sequence like "\n" is kept as-is (a backslash character, followed by // an 'n'-character). -func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler { - return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { +func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler { + return MakeTokenByCallback(handler, func(t *API) *Token { literal := t.Result().String() return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal} }) } -// MakeStrInterpretedToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to a string-typed +// MakeStrInterpretedToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to a string-typed // representation of the read Runes. This string is interpreted, meaning that an // escape sequence like "\n" is translated to an actual newline control character -func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler { - return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { +func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler { + return MakeTokenByCallback(handler, func(t *API) *Token { // TODO ERROR HANDLING interpreted, _ := interpretString(t.Result().String()) return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted} @@ -1070,40 +1068,40 @@ func interpretString(str string) (string, error) { return sb.String(), nil } -// MakeRuneToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to a Rune-representation +// MakeRuneToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to a Rune-representation // of the read Rune. -func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler { - return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { +func MakeRuneToken(toktype interface{}, handler Handler) Handler { + return MakeTokenByCallback(handler, func(t *API) *Token { // TODO ERROR HANDLING --- not a 1 rune input return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)} }) } -// MakeByteToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to a Byte-representation +// MakeByteToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to a Byte-representation // of the read Rune. -func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler { - return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { +func MakeByteToken(toktype interface{}, handler Handler) Handler { + return MakeTokenByCallback(handler, func(t *API) *Token { // TODO ERROR HANDLING --- not a 1 byte input return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))} }) } -// MakeIntToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an int-representation +// MakeIntToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an int-representation // of the read Rune. -func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeIntToken(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { return strconv.Atoi(s) }) } -// MakeInt8Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an int8-representation +// MakeInt8Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an int8-representation // of the read Rune. // TODO allow other Go types for oct and hex too. -func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeInt8Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 8) @@ -1114,10 +1112,10 @@ func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeInt16Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an int16-representation +// MakeInt16Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an int16-representation // of the read Rune. -func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeInt16Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 16) @@ -1128,10 +1126,10 @@ func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeInt32Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an int32-representation +// MakeInt32Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an int32-representation // of the read Rune. -func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeInt32Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 32) @@ -1142,10 +1140,10 @@ func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeInt64Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an int64-representation +// MakeInt64Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an int64-representation // of the read Rune. -func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeInt64Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseInt(s, 10, 64) @@ -1156,10 +1154,10 @@ func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeUintToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an uint-representation +// MakeUintToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an uint-representation // of the read Rune. -func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeUintToken(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 0) @@ -1170,11 +1168,11 @@ func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeUint8Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an uint8-representation +// MakeUint8Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an uint8-representation // of the read Rune. // TODO allow other Go types for oct and hex too. -func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeUint8Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 8) @@ -1185,10 +1183,10 @@ func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeUint16Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an uint16-representation +// MakeUint16Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an uint16-representation // of the read Rune. -func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeUint16Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 16) @@ -1199,10 +1197,10 @@ func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeUint32Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an uint32-representation +// MakeUint32Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an uint32-representation // of the read Rune. -func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeUint32Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 32) @@ -1213,10 +1211,10 @@ func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeUint64Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an uint64-representation +// MakeUint64Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an uint64-representation // of the read Rune. -func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeUint64Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseUint(s, 10, 64) @@ -1227,10 +1225,10 @@ func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeFloat32Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an float32-representation +// MakeFloat32Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an float32-representation // of the read Rune. -func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeFloat32Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseFloat(s, 32) @@ -1241,10 +1239,10 @@ func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeFloat64Token creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an float64-representation +// MakeFloat64Token creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an float64-representation // of the read Rune. -func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeFloat64Token(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseFloat(s, 64) @@ -1255,10 +1253,10 @@ func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler { }) } -// MakeBooleanToken creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token.Value is set to an bool-representation +// MakeBooleanToken creates a Handler that will add a Token to the +// Result, for which the Token.Value is set to an bool-representation // of the read Rune. -func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler { +func MakeBooleanToken(toktype interface{}, handler Handler) Handler { return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) { value, err := strconv.ParseBool(s) @@ -1269,17 +1267,17 @@ func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler { }) } -func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler { +func makeStrconvToken(toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler { pc, _, _, _ := runtime.Caller(1) fullName := runtime.FuncForPC(pc).Name() parts := strings.Split(fullName, ".") name := parts[len(parts)-1] - return MakeTokenByCallback(handler, func(t *TokenAPI) *Token { + return MakeTokenByCallback(handler, func(t *API) *Token { value, err := convert(t.Result().String()) if err != nil { // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? panic(fmt.Sprintf( - "TokenHandler error: %s cannot handle input %q: %s "+ + "Handler error: %s cannot handle input %q: %s "+ "(only use a type conversion token maker, when the input has been "+ "validated on beforehand)", name, t.Result().String(), err)) } @@ -1287,15 +1285,15 @@ func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s }) } -// MakeTokenByCallback creates a TokenHandler that will add a Token to the -// TokenHandlerResult, for which the Token is to be generated by the provided -// callback function. The function gets the current TokenAPI as its input and +// MakeTokenByCallback creates a Handler that will add a Token to the +// Result, for which the Token is to be generated by the provided +// callback function. The function gets the current API as its input and // must return a complete Token. -func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler { - return func(t *TokenAPI) bool { +func MakeTokenByCallback(handler Handler, callback func(t *API) *Token) Handler { + return func(t *API) bool { child := t.Fork() if handler(child) { - t.Result().AddToken(callback(child)) + t.Result().AddTokens(callback(child)) child.Merge() return true } diff --git a/tokenize/tokenhandlers_builtin_test.go b/tokenize/handlers_builtin_test.go similarity index 90% rename from tokenize/tokenhandlers_builtin_test.go rename to tokenize/handlers_builtin_test.go index 080ab28..520920d 100644 --- a/tokenize/tokenhandlers_builtin_test.go +++ b/tokenize/handlers_builtin_test.go @@ -9,7 +9,7 @@ import ( func TestCombinators(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M - AssertTokenHandlers(t, []TokenHandlerT{ + AssertHandlers(t, []HandlerT{ {"abc", c.Not(a.Rune('b')), true, "a"}, {"bcd", c.Not(a.Rune('b')), false, ""}, {"bcd", c.Not(a.Rune('b')), false, ""}, @@ -71,25 +71,25 @@ func TestCombinatorPanics(t *testing.T) { var c, a = tokenize.C, tokenize.A AssertPanics(t, []PanicT{ {func() { a.RuneRange('z', 'a') }, true, - `TokenHandler: MatchRuneRange definition error at /.*/tokenhandlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`}, + `Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`}, {func() { c.MinMax(-1, 1, a.Space) }, true, - `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`}, + `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`}, {func() { c.MinMax(1, -1, a.Space) }, true, - `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`}, + `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`}, {func() { c.MinMax(10, 5, a.Space) }, true, - `TokenHandler: MatchMinMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max 5 must not be < min 10`}, + `Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`}, {func() { c.Min(-10, a.Space) }, true, - `TokenHandler: MatchMin definition error at /.*/tokenhandlers_builtin_test\.go:\d+: min must be >= 0`}, + `Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`}, {func() { c.Max(-42, a.Space) }, true, - `TokenHandler: MatchMax definition error at /.*/tokenhandlers_builtin_test\.go:\d+: max must be >= 0`}, + `Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`}, {func() { a.IntegerBetween(10, -10) }, true, - `TokenHandler: MatchIntegerBetween definition error at /.*/tokenhandlers_builtin_test.go:\d+: max -10 must not be < min 10`}, + `Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`}, }) } func TestAtoms(t *testing.T) { var a = tokenize.A - AssertTokenHandlers(t, []TokenHandlerT{ + AssertHandlers(t, []HandlerT{ {"dd", a.RuneRange('b', 'e'), true, "d"}, {"ee", a.RuneRange('b', 'e'), true, "e"}, {"ff", a.RuneRange('b', 'e'), false, ""}, @@ -226,7 +226,7 @@ func TestAtoms(t *testing.T) { func TestIPv4Atoms(t *testing.T) { var a = tokenize.A - AssertTokenHandlers(t, []TokenHandlerT{ + AssertHandlers(t, []HandlerT{ {"0X", a.Octet, true, "0"}, {"00X", a.Octet, true, "00"}, {"000X", a.Octet, true, "000"}, @@ -258,7 +258,7 @@ func TestIPv4Atoms(t *testing.T) { func TestIPv6Atoms(t *testing.T) { var a = tokenize.A - AssertTokenHandlers(t, []TokenHandlerT{ + AssertHandlers(t, []HandlerT{ {"", a.IPv6, false, ""}, {"::", a.IPv6, true, "::"}, {"1::", a.IPv6, true, "1::"}, @@ -287,7 +287,7 @@ func TestIPv6Atoms(t *testing.T) { func TestModifiers(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M - AssertTokenHandlers(t, []TokenHandlerT{ + AssertHandlers(t, []HandlerT{ {"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"}, {" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"}, {" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"}, @@ -308,10 +308,10 @@ func TestModifiers(t *testing.T) { func TestTokenMakerErrorHandling(t *testing.T) { var a, tok = tokenize.A, tokenize.T invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool() - parser := tokenize.NewTokenizer(invalid) + tokenizer := tokenize.New(invalid) AssertPanic(t, PanicT{ - func() { parser.Execute("no") }, false, - `TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + + func() { tokenizer("no") }, false, + `Handler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` + `invalid syntax (only use a type conversion token maker, when the input has been validated on beforehand)`, }) } @@ -378,19 +378,6 @@ func TestTokenMakers(t *testing.T) { }) } -func TestSyntacticSugar(t *testing.T) { - var a = tokenize.A - AssertTokenHandlers(t, []TokenHandlerT{ - {"aaaaaa", a.Rune('a').Times(4), true, "aaaa"}, - {"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"}, - {"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"}, - {"bababa", a.Rune('a').Then(a.Rune('b')), false, ""}, - {"cccccc", a.Rune('c').Optional(), true, "c"}, - {"dddddd", a.Rune('c').Optional(), true, ""}, - {"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"}, - }) -} - // I know, this is hell, but that's the whole point for this test :-> func TestCombination(t *testing.T) { var c, a, m = tokenize.C, tokenize.A, tokenize.M @@ -414,7 +401,7 @@ func TestCombination(t *testing.T) { c.Opt(a.SquareClose), ) - AssertTokenHandlers(t, []TokenHandlerT{ + AssertHandlers(t, []HandlerT{ {"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"}, {"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"}, {">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"}, diff --git a/tokenize/result.go b/tokenize/result.go new file mode 100644 index 0000000..79d141b --- /dev/null +++ b/tokenize/result.go @@ -0,0 +1,157 @@ +package tokenize + +import ( + "fmt" +) + +// Result is a struct that is used for holding tokenizer results as produced +// by a tokenize.Handler. It also provides the API that Handlers and Parsers +// can use to store and retrieve the results. +type Result struct { + lastRune *runeInfo // Information about the last rune read using NextRune() + runes []rune // runes as added to the result by tokenize.Handler functions + tokens []*Token // Tokens as added to the result by tokenize.Handler functions + cursor *Cursor // current read cursor position, relative to the start of the file + offset int // current rune offset relative to the Reader's sliding window + err error // can be used by a Handler to report a specific issue with the input +} + +type runeInfo struct { + r rune + err error +} + +// Token defines a lexical token as produced by tokenize.Handlers. +// +// The only mandatory data in a Token are the Runes. The Type and Value fields +// are optional fields that can be filled with data at will. +// +// The use of the Type field is to let a tokenizer communicate to +// the parser what type of token it's handling. +// +// The use of the Value field is to store any kind af data along with the token. +// One use of this can be found in the built-in token maker functions like +// MakeInt8Token(), which store an interpreted version of the input string +// in the Value field. +type Token struct { + Runes []rune // the runes that make up the token + Type interface{} // optional token type, can be any type that a parser author sees fit + Value interface{} // optional token value, of any type as well +} + +func (t Token) String() string { + tokenType := "" + if t.Type != nil { + tokenType = fmt.Sprintf("%v", t.Type) + } + + value := "" + if t.Value != nil { + value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value) + } + + return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value) +} + +// newResult initializes an empty Result struct. +func newResult() *Result { + return &Result{ + runes: []rune{}, + tokens: []*Token{}, + cursor: &Cursor{}, + } +} + +// ClearRunes clears the runes in the Result. +func (r *Result) ClearRunes() { + r.runes = []rune{} +} + +// SetRunes replaces the Runes from the Result with the provided input. +func (r *Result) SetRunes(s interface{}) { + r.ClearRunes() + r.addRunes(s) +} + +// AddRunes is used to add runes to the Result. +func (r *Result) AddRunes(set ...interface{}) { + r.addRunes(set...) +} + +func (r *Result) addRunes(set ...interface{}) { + for _, s := range set { + switch s := s.(type) { + case string: + r.runes = append(r.runes, []rune(s)...) + case []rune: + r.runes = append(r.runes, s...) + case rune: + r.runes = append(r.runes, s) + default: + callerPanic(2, "tokenize.Result.AddRunes(): unsupported type '%T' used at {caller}", s) + } + } +} + +// Runes retrieves the Runes from the Result. +func (r *Result) Runes() []rune { + return r.runes +} + +// Rune retrieve a single rune from the Result at the specified index. +func (r *Result) Rune(idx int) rune { + return r.runes[idx] +} + +// String returns the Runes from the Result as a string. +func (r *Result) String() string { + return string(r.runes) +} + +// ClearTokens clears the tokens in the Result. +func (r *Result) ClearTokens() { + r.tokens = []*Token{} +} + +// SetTokens replaces the Tokens from the Result with the provided tokens. +func (r *Result) SetTokens(tokens []*Token) { + r.ClearTokens() + for _, t := range tokens { + r.AddTokens(t) + } +} + +// AddTokens is used to add Tokens to the Result. +func (r *Result) AddTokens(tokens ...*Token) { + r.tokens = append(r.tokens, tokens...) +} + +// Tokens retrieves the Tokens from the Result. +func (r *Result) Tokens() []*Token { + return r.tokens +} + +// Token retrieves a single Token from the Result at the specified index. +func (r *Result) Token(idx int) *Token { + return r.tokens[idx] +} + +// Values retrieves a slice containing only the Values for the Result Tokens. +func (r *Result) Values() []interface{} { + values := make([]interface{}, len(r.tokens)) + for i, tok := range r.tokens { + values[i] = tok.Value + } + return values +} + +// Value retrieves a single Value from the Result Token at the specified index. +func (r *Result) Value(idx int) interface{} { + return r.tokens[idx].Value +} + +// Cursor retrieves the read cursor from the Result. This is the first +// cursor position after the runes that were read and accepted by the Handler. +func (r *Result) Cursor() *Cursor { + return r.cursor +} diff --git a/tokenize/tokenhandlerresult_test.go b/tokenize/result_test.go similarity index 83% rename from tokenize/tokenhandlerresult_test.go rename to tokenize/result_test.go index 6972884..c810d6e 100644 --- a/tokenize/tokenhandlerresult_test.go +++ b/tokenize/result_test.go @@ -41,7 +41,7 @@ func ExampleToken() { } func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { - i := tokenize.NewTokenAPI(strings.NewReader("Testing")) + i := tokenize.NewAPI(strings.NewReader("Testing")) i.Result().SetRunes("string") AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input") i.Result().SetRunes([]rune("rune slice")) @@ -53,10 +53,10 @@ func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) { func TestSetResult_PanicsOnUnhandledInput(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := tokenize.NewTokenAPI(strings.NewReader("Testing")) + i := tokenize.NewAPI(strings.NewReader("Testing")) i.Result().SetRunes(1234567) }, Regexp: true, - Expect: `tokenize\.TokenHandlerResult\.AddRunes\(\): unsupported type 'int' used at /.*/tokenhandlerresult_test.go:\d+`, + Expect: `tokenize\.Result\.AddRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`, }) } diff --git a/tokenize/tokenapi.go b/tokenize/tokenapi.go deleted file mode 100644 index 7419f2c..0000000 --- a/tokenize/tokenapi.go +++ /dev/null @@ -1,212 +0,0 @@ -package tokenize - -import ( - "fmt" - - "git.makaay.nl/mauricem/go-parsekit/common" - "git.makaay.nl/mauricem/go-parsekit/read" -) - -// TokenAPI wraps a parsekit.reader and its purpose is to retrieve data from -// a parsekit.read.Reader and to report back tokenizing results. For easy -// lookahead support, a forking strategy is provided. -// -// BASIC OPERATION: -// -// To retrieve the next rune from the TokenAPI, call the NextRune() method. -// -// When the rune is to be accepted as input, call the method Accept(). The rune -// is then added to the result runes of the TokenAPI and the read cursor is moved -// forward. -// -// By invoking NextRune() + Accept() multiple times, the result can be extended -// with as many runes as needed. Runes collected this way can later on be -// retrieved using the method Result().Runes(). -// -// It is mandatory to call Accept() after retrieving a rune, before calling -// NextRune() again. Failing to do so will result in a panic. -// -// Next to adding runes to the result, it is also possible to modify the -// stored runes or to add lexical Tokens to the result. For all things -// concerning results, take a look at the TokenHandlerResult struct, which -// can be accessed though the method Result(). -// -// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT: -// -// Sometimes, we must be able to perform a lookahead, which might either -// succeed or fail. In case of a failing lookahead, the state of the -// TokenAPI must be brought back to the original state, so we can try -// a different route. -// -// The way in which this is supported, is by forking a TokenAPI struct by -// calling method Fork(). This will return a forked child TokenAPI, with -// empty result data, but using the same read cursor position as the -// forked parent. -// -// After forking, the same interface as described for BASIC OPERATION can be -// used to fill the results. When the lookahead was successful, then -// Merge() can be called on the forked child to append the child's results -// to the parent's results, and to move the read cursor position to that -// of the child. -// -// When the lookahead was unsuccessful, then the forked child TokenAPI can -// simply be discarded. The parent TokenAPI was never modified, so it can -// safely be used as if the lookahead never happened. -// -// Opinionized note: -// Many tokenizers/parsers take a different approach on lookaheads by using -// peeks and by moving the read cursor position back and forth, or by putting -// read input back on the input stream. That often leads to code that is -// efficient, however, in my opinion, not very intuitive to read. It can also -// be tedious to get the cursor position back at the correct position, which -// can lead to hard to track bugs. I much prefer this forking method, since -// no bookkeeping has to be implemented when implementing a parser. -type TokenAPI struct { - reader *read.Reader - parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child - child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent - result *TokenHandlerResult // results as produced by a TokenHandler (runes, Tokens, cursor position) -} - -// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader. -func NewTokenAPI(input interface{}) *TokenAPI { - return &TokenAPI{ - reader: read.New(input), - result: newTokenHandlerResult(), - } -} - -// NextRune returns the rune at the current read offset. -// -// When an invalid UTF8 rune is encountered on the input, it is replaced with -// the utf.RuneError rune. It's up to the caller to handle this as an error -// when needed. -// -// After reading a rune it must be Accept()-ed to move the read cursor forward -// to the next rune. Doing so is mandatory. When doing a second call to NextRune() -// without explicitly accepting, this method will panic. -func (i *TokenAPI) NextRune() (rune, error) { - if i.result.lastRune != nil { - common.CallerPanic(1, "tokenize.TokenAPI.NextRune(): NextRune() called at {caller} "+ - "without a prior call to Accept()") - } - i.DetachChilds() - - readRune, err := i.reader.RuneAt(i.result.offset) - i.result.lastRune = &runeInfo{r: readRune, err: err} - return readRune, err -} - -// Accept the last rune as read by NextRune() into the result buffer and move -// the cursor forward. -// -// It is not allowed to call Accept() when the previous call to NextRune() -// returned an error. Calling Accept() in such case will result in a panic. -func (i *TokenAPI) Accept() { - if i.result.lastRune == nil { - common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller} without first calling NextRune()") - } else if i.result.lastRune.err != nil { - common.CallerPanic(1, "tokenize.TokenAPI.Accept(): Accept() called at {caller}, but the prior call to NextRune() failed") - } - i.result.runes = append(i.result.runes, i.result.lastRune.r) - i.result.cursor.Move(fmt.Sprintf("%c", i.result.lastRune.r)) - i.result.offset++ - i.result.lastRune = nil -} - -// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and -// read cursor position, but for the rest this is a fresh TokenAPI. -// -// By forking a TokenAPI, you can freely work with the forked child, without -// affecting the parent TokenAPI. This is for example useful when you must perform -// some form of lookahead. -// -// When processing of the TokenHandler was successful and you want to add the results -// to the parent TokenAPI, you can call TokenAPIold.Merge() on the forked -// child. This will add the runes in the result buffer to the result buffer of -// the parent. It also updates the read cursor position of the parent to that -// of the child. -// -// When processing failed, or you don't want to use the results as produced by that -// lookahead, the forked child can simply be discarded. You can continue to work -// with the parent TokenAPI as if nothing ever happened. -func (i *TokenAPI) Fork() *TokenAPI { - // Cleanup current forking / reading state. - i.DetachChilds() - i.result.lastRune = nil - - // Create the new fork. - child := &TokenAPI{ - reader: i.reader, - parent: i, - } - child.result = newTokenHandlerResult() - i.syncCursorTo(child) - i.child = child - return child -} - -// Merge appends the TokenHandlerResult of a forked child TokenAPI to the TokenHandlerResult -// of its parent. The read cursor position of the parent is also updated to -// that of the forked child. -// -// After the merge operation, the child is reset so it can immediately be -// reused for performing another match. This means that all TokenHandlerResult data are -// cleared, but the read cursor position is kept at its current position. -// This allows a child to feed results in chunks to its parent. -func (i *TokenAPI) Merge() { - if i.parent == nil { - common.CallerPanic(1, "tokenize.TokenAPI.Merge(): Merge() called at {caller} on a non-forked TokenAPI") - } - i.addResultsToParent() - i.syncCursorTo(i.parent) - i.ClearResults() - i.DetachChilds() -} - -func (i *TokenAPI) addResultsToParent() { - i.parent.result.runes = append(i.parent.result.runes, i.result.runes...) - i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...) -} - -func (i *TokenAPI) syncCursorTo(to *TokenAPI) { - to.result.offset = i.result.offset - *to.result.cursor = *i.result.cursor -} - -func (i *TokenAPI) ClearResults() { - i.result.lastRune = nil - i.result.runes = []rune{} - i.result.tokens = []*Token{} - i.result.err = nil -} - -func (i *TokenAPI) DetachChilds() { - if i.child != nil { - i.child.DetachChildsRecurse() - i.child = nil - } -} - -func (i *TokenAPI) DetachChildsRecurse() { - if i.child != nil { - i.child.DetachChildsRecurse() - } - i.child = nil - i.parent = nil -} - -func (i *TokenAPI) FlushReader() bool { - if i.result.offset > 0 { - i.reader.Flush(i.result.offset) - i.result.offset = 0 - return true - } - return false -} - -// Result returns the TokenHandlerResult data for the TokenAPI. The returned struct -// can be used to retrieve and to modify result data. -func (i *TokenAPI) Result() *TokenHandlerResult { - return i.result -} diff --git a/tokenize/tokenhandlerresult.go b/tokenize/tokenhandlerresult.go deleted file mode 100644 index 62ed1ca..0000000 --- a/tokenize/tokenhandlerresult.go +++ /dev/null @@ -1,174 +0,0 @@ -package tokenize - -import ( - "fmt" - "strings" - - "git.makaay.nl/mauricem/go-parsekit/common" -) - -// TokenHandlerResult is a struct that is used for holding tokenizing results -// as produced by a TokenHandler. It also provides the API that TokenHandlers -// and Parsers can use to respectively store and access the results. -type TokenHandlerResult struct { - lastRune *runeInfo // Information about the last rune read using NextRune() - runes []rune - tokens []*Token - cursor *common.Cursor // current read cursor position, relative to the start of the file - offset int // current rune offset relative to the Reader's sliding window - err *common.Error // can be used by a TokenHandler to report a specific issue with the input -} - -type runeInfo struct { - r rune - err error -} - -// Token defines a lexical token as produced by TokenHandlers. -// -// The only mandatory data in a Token are the Runes. The Type and Value fields -// are optional fields that can be filled with data at will. -// -// The use of the Type field is to let a tokenizer communicate to -// the parser what type of token it's handling. -// -// The use of the Value field is to store any kind af data along with the token. -// One use of this can be found in the built-in token maker functions like -// MakeInt8Token(), which store an interpreted version of the input string -// in the Value field. -type Token struct { - Runes []rune // the runes that make up the token - Type interface{} // optional token type, can be any type that a parser author sees fit - Value interface{} // optional token value, of any type as well -} - -func (t Token) String() string { - tokenType := "" - if t.Type != nil { - tokenType = fmt.Sprintf("%v", t.Type) - } - - value := "" - if t.Value != nil { - value = fmt.Sprintf(", value = (%T)%v", t.Value, t.Value) - } - - return fmt.Sprintf("%v(%q%s)", tokenType, string(t.Runes), value) -} - -// newTokenHandlerResult initializes an empty TokenHandlerResult struct. -func newTokenHandlerResult() *TokenHandlerResult { - return &TokenHandlerResult{ - runes: []rune{}, - tokens: []*Token{}, - cursor: &common.Cursor{}, - } -} - -// ClearRunes clears the runes in the TokenHandlerResult. -func (r *TokenHandlerResult) ClearRunes() { - r.runes = []rune{} -} - -// SetRunes replaces the Runes from the TokenHandlerResult with the provided input. -func (r *TokenHandlerResult) SetRunes(s interface{}) { - r.ClearRunes() - r.addRunes(s) -} - -// AddRunes is used to add runes to the TokenHandlerResult. -func (r *TokenHandlerResult) AddRunes(set ...interface{}) { - r.addRunes(set...) -} - -// AddRunes is used to add runes to the TokenHandlerResult. -func (r *TokenHandlerResult) addRunes(set ...interface{}) { - for _, s := range set { - switch s := s.(type) { - case string: - r.runes = append(r.runes, []rune(s)...) - case []rune: - r.runes = append(r.runes, s...) - case rune: - r.runes = append(r.runes, s) - default: - common.CallerPanic(2, "tokenize.TokenHandlerResult.AddRunes(): unsupported type '%T' used at {caller}", s) - } - } -} - -// Runes retrieves the Runes from the TokenHandlerResult. -func (r *TokenHandlerResult) Runes() []rune { - return r.runes -} - -// Rune retrieve a single rune from the TokenHandlerResult at the specified index. -func (r *TokenHandlerResult) Rune(idx int) rune { - return r.runes[idx] -} - -// String returns the Runes from the TokenHandlerResult as a string. -func (r *TokenHandlerResult) String() string { - return string(r.runes) -} - -// ClearTokens clears the tokens in the TokenHandlerResult. -func (r *TokenHandlerResult) ClearTokens() { - r.tokens = []*Token{} -} - -// SetTokens replaces the Tokens from the TokenHandlerResult with the provided input. -func (r *TokenHandlerResult) SetTokens(tokens []*Token) { - r.ClearTokens() - for _, t := range tokens { - r.AddToken(t) - } -} - -// AddToken is used to add a Token to the TokenHandlerResult. -func (r *TokenHandlerResult) AddToken(t *Token) { - r.tokens = append(r.tokens, t) -} - -// SliceOfTokens is an alias for []*Token type. The method Tokens() returns -// this type. A String() method is defined for it, to make it easy to -// format the tokens as a string for testing / debugging purposes. -type SliceOfTokens []*Token - -func (ts SliceOfTokens) String() string { - parts := make([]string, len(ts)) - for i, t := range ts { - parts[i] = t.String() - } - return strings.Join(parts, " ") -} - -// Tokens retrieves the Tokens from the TokenHandlerResult. -func (r *TokenHandlerResult) Tokens() SliceOfTokens { - return r.tokens -} - -// Token retrieves a single Token from the TokenHandlerResult at the specified index. -func (r *TokenHandlerResult) Token(idx int) *Token { - return r.tokens[idx] -} - -// Values retrieves a slice containing only the Values for the TokenHandlerResult Tokens. -func (r *TokenHandlerResult) Values() []interface{} { - values := make([]interface{}, len(r.tokens)) - for i, tok := range r.tokens { - values[i] = tok.Value - } - return values -} - -// Value retrieves a single Value from the TokenHandlerResult Token at the specified index. -func (r *TokenHandlerResult) Value(idx int) interface{} { - return r.tokens[idx].Value -} - -// Cursor retrieves the read cursor from the TokenHandlerResult. This is the -// first cursor position after the runes that were read by the TokenHandler. -func (r *TokenHandlerResult) Cursor() *common.Cursor { - return r.cursor -} diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go new file mode 100644 index 0000000..70bf722 --- /dev/null +++ b/tokenize/tokenize.go @@ -0,0 +1,20 @@ +package tokenize + +import ( + "fmt" +) + +type Func func(input interface{}) (*Result, error) + +func New(tokenHandler Handler) Func { + return func(input interface{}) (*Result, error) { + api := NewAPI(input) + ok := tokenHandler(api) + + if !ok { + err := fmt.Errorf("mismatch at %s", Cursor{}) + return nil, err + } + return api.Result(), nil + } +} diff --git a/tokenize/tokenizer.go b/tokenize/tokenizer.go deleted file mode 100644 index 1ae1a1d..0000000 --- a/tokenize/tokenizer.go +++ /dev/null @@ -1,34 +0,0 @@ -package tokenize - -import ( - "git.makaay.nl/mauricem/go-parsekit/common" -) - -// Tokenizer is the top-level struct that holds the configuration for -// a parser that is based solely on a TokenHandler function. -// The Tokenizer can be instantiated using the parsekit.NewTokenizer() -// method. -type Tokenizer struct { - handler TokenHandler -} - -// NewTokenizer instantiates a new Tokenizer, based on the provided TokenHandler. -func NewTokenizer(tokenHandler TokenHandler) *Tokenizer { - return &Tokenizer{tokenHandler} -} - -// Execute feeds the input to the wrapped TokenHandler function. -// For an overview of allowed inputs, take a look at the documentation for parsekit.reader.New(). -// -// It returns the TokenHandler's TokenHandlerResult. When an error occurred -// during parsing, the error will be set, nil otherwise. -func (t *Tokenizer) Execute(input interface{}) (*TokenHandlerResult, *common.Error) { - api := NewTokenAPI(input) - ok := t.handler(api) - - if !ok { - err := &common.Error{Message: "mismatch", Cursor: common.Cursor{}} - return nil, err - } - return api.Result(), nil -} diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index b24fba7..aace292 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -27,7 +27,7 @@ func ExampleTokenizer_Execute() { ip := t.Str("ip", a.IPv4) mask := t.Int8("mask", a.IPv4CIDRMask) cidr := c.Seq(ip, a.Slash, mask) - tokenizer := tokenize.NewTokenizer(cidr) + tokenizer := tokenize.New(cidr) for _, input := range []string{ "000.000.000.000/000", @@ -36,8 +36,8 @@ func ExampleTokenizer_Execute() { "10.0.300.1/24", "not an IPv4 CIDR", } { - // Execute returns a TokenHandlerResult and an error, which is nil on success. - result, err := tokenizer.Execute(input) + // Execute returns a Result and an error, which is nil on success. + result, err := tokenizer(input) if err == nil { fmt.Printf("Result: %s\n", result.Tokens()) @@ -46,9 +46,9 @@ func ExampleTokenizer_Execute() { } } // Output: - // Result: ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0) - // Result: ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24) - // Result: ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32) + // Result: [ip("0.0.0.0", value = (string)0.0.0.0) mask("0", value = (int8)0)] + // Result: [ip("192.168.0.1", value = (string)192.168.0.1) mask("24", value = (int8)24)] + // Result: [ip("255.255.255.255", value = (string)255.255.255.255) mask("32", value = (int8)32)] // Error: mismatch at start of file // Error: mismatch at start of file } @@ -77,7 +77,7 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) { i.NextRune() }, Regexp: true, - Expect: `tokenize\.TokenAPI\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, + Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`, }) } @@ -85,7 +85,7 @@ func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: mkInput().Accept, Regexp: true, - Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`, + Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`, }) } @@ -96,7 +96,7 @@ func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) { i.Merge() }, Regexp: true, - Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`}) } func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { @@ -108,7 +108,7 @@ func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) { f.Merge() }, Regexp: true, - Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`}) } func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { @@ -120,7 +120,7 @@ func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) { f.Merge() }, Regexp: true, - Expect: `tokenize\.TokenAPI\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked TokenAPI`}) + Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*/tokenizer_test\.go:\d+ on a non-forked API`}) } func TestForkingInput_ClearsLastRune(t *testing.T) { @@ -132,12 +132,12 @@ func TestForkingInput_ClearsLastRune(t *testing.T) { i.Accept() }, Regexp: true, - Expect: `tokenize\.TokenAPI\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, + Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`, }) } func TestAccept_UpdatesCursor(t *testing.T) { - i := tokenize.NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines")) + i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1") for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" i.NextRune() @@ -155,7 +155,7 @@ func TestAccept_UpdatesCursor(t *testing.T) { } func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { - i := tokenize.NewTokenAPI(strings.NewReader("X")) + i := tokenize.NewAPI(strings.NewReader("X")) i.NextRune() i.Accept() r, err := i.NextRune() @@ -163,7 +163,7 @@ func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) { AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") } func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) { - i := tokenize.NewTokenAPI(strings.NewReader("X")) + i := tokenize.NewAPI(strings.NewReader("X")) f := i.Fork() f.NextRune() f.Accept() @@ -174,6 +174,6 @@ func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()") } -func mkInput() *tokenize.TokenAPI { - return tokenize.NewTokenAPI("Testing") +func mkInput() *tokenize.API { + return tokenize.NewAPI("Testing") } diff --git a/tokenize/tokenizer_unexported_test.go b/tokenize/tokenizer_whitebox_test.go similarity index 92% rename from tokenize/tokenizer_unexported_test.go rename to tokenize/tokenizer_whitebox_test.go index df69d0b..5898895 100644 --- a/tokenize/tokenizer_unexported_test.go +++ b/tokenize/tokenizer_whitebox_test.go @@ -6,7 +6,7 @@ import ( func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { // Create input, accept the first rune. - i := NewTokenAPI("Testing") + i := NewAPI("Testing") i.NextRune() i.Accept() // T AssertEqual(t, "T", i.Result().String(), "accepted rune in input") @@ -31,7 +31,7 @@ func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { } func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { - i := NewTokenAPI("Testing") + i := NewAPI("Testing") i.NextRune() i.Accept() f1 := i.Fork() @@ -63,7 +63,7 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult } func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) { - i := NewTokenAPI("Testing") + i := NewAPI("Testing") f1 := i.Fork() f2 := f1.Fork() f3 := f2.Fork() @@ -99,13 +99,13 @@ func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t } func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) { - i := NewTokenAPI("Testing") + i := NewAPI("Testing") r, _ := i.NextRune() AssertEqual(t, 'T', r, "result from 1st call to NextRune()") - AssertTrue(t, i.result.lastRune != nil, "TokenAPI.result.lastRune after NextRune() is not nil") + AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil") i.Accept() - AssertTrue(t, i.result.lastRune == nil, "TokenAPI.result.lastRune after Accept() is nil") - AssertEqual(t, 1, i.result.offset, "TokenAPI.result.offset") + AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil") + AssertEqual(t, 1, i.result.offset, "API.result.offset") r, _ = i.NextRune() AssertEqual(t, 'e', r, "result from 2nd call to NextRune()") }