diff --git a/parse/api.go b/parse/api.go index 1a8d758..ad61e0b 100644 --- a/parse/api.go +++ b/parse/api.go @@ -22,64 +22,99 @@ type API struct { stopped bool // a boolean set to true by Stop() } -// Peek checks if the upcoming input data matches the provided tokenize.Handler. +// PeekWithResult checks if the upcoming input data matches the provided tokenize.Handler. // If it does, then true will be returned, false otherwise. The read cursor // will be kept at the same position, so the next call to Peek() or Accept() // will start from the same cursor position. -func (p *API) PeekWithResult(tokenHandler tokenize.Handler) bool { - forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) - t := p.tokenAPI +// +// On a successful peek, the results (data + tokens) are returned by the peek. +// They are availablel (as with Accept()) through parse.API.Result. +func (parseAPI *API) PeekWithResult(tokenHandler tokenize.Handler) bool { + forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler) + t := parseAPI.tokenAPI if ok { - p.Result.Tokens = t.Output.Tokens() - p.Result.Runes = t.Output.Runes() + parseAPI.Result.Tokens = t.Output.Tokens() + parseAPI.Result.Runes = t.Output.Runes() } t.Dispose(forkedAPI) return ok } -func (p *API) Peek(tokenHandler tokenize.Handler) bool { - forkedAPI, ok := p.invokeHandler("Peek", tokenHandler) - t := p.tokenAPI - p.Result.Tokens = nil - p.Result.Runes = nil +// Peek checks if the upcoming input data matches the provided tokenize.Handler. +// If it does, then true will be returned, false otherwise. The read cursor +// will be kept at the same position, so the next call to Peek() or Accept() +// will start from the same cursor position. +// +// No results (data + tokens) are returned by Peek(). If want access to the data +// through parse.API.Result, make use of PeekWithResult() instead. +func (parseAPI *API) Peek(tokenHandler tokenize.Handler) bool { + forkedAPI, ok := parseAPI.invokeHandler("Peek", tokenHandler) + t := parseAPI.tokenAPI + parseAPI.Result.Tokens = nil + parseAPI.Result.Runes = nil t.Dispose(forkedAPI) return ok } +// PeekChars is a very lightweight peek command, which takes a look at one or +// more upcoming characters on the input data. +// +// If you need more complex logic for checking the upcoming input data, then +// make use of the Peek() method with a tokenize.Handler function instead. +func (parseAPI *API) PeekChars(chars ...rune) bool { + offset := 0 + for _, r := range chars { + if r <= 0x1F { + b, err := parseAPI.tokenAPI.Byte.Peek(offset) + if err != nil || b != byte(r) { + return false + } + offset++ + } else { + rRead, w, err := parseAPI.tokenAPI.Rune.Peek(offset) + if err != nil || rRead != r { + return false + } + offset += w + } + } + return true +} + // Accept checks if the upcoming input data matches the provided tokenize.Handler. // If it does, then true will be returned and the read cursor will be moved // forward to beyond the match that was found. Otherwise false will be // and the read cursor will stay at the same position. // // After calling this method, you can retrieve the results using the Result() method. -func (p *API) Accept(tokenHandler tokenize.Handler) bool { - t := p.tokenAPI - forkedAPI, ok := p.invokeHandler("Accept", tokenHandler) +func (parseAPI *API) Accept(tokenHandler tokenize.Handler) bool { + t := parseAPI.tokenAPI + forkedAPI, ok := parseAPI.invokeHandler("Accept", tokenHandler) if ok { // Keep track of the results as produced by this child. - p.Result.Tokens = t.Output.Tokens() - p.Result.Runes = t.Output.Runes() + parseAPI.Result.Tokens = t.Output.Tokens() + parseAPI.Result.Runes = t.Output.Runes() // Merge to the parent level. t.Merge(forkedAPI) t.Dispose(forkedAPI) // And flush the input reader buffer. - t.FlushInput() + t.Input.Flush() } else { t.Dispose(forkedAPI) } return ok } -func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) { - p.panicWhenStoppedOrInError(name) +func (parseAPI *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) { + parseAPI.panicWhenStoppedOrInError(name) if tokenHandler == nil { callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil tokenHandler argument at {caller}") } - child := p.tokenAPI.Fork() - ok := tokenHandler(p.tokenAPI) + child := parseAPI.tokenAPI.Fork() + ok := tokenHandler(parseAPI.tokenAPI) return child, ok } @@ -91,13 +126,13 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bo // Basically, this guard helps with proper coding of parsers, making sure // that clean routes are followed. You can consider this check a runtime // unit test. -func (p *API) panicWhenStoppedOrInError(name string) { - if !p.IsStoppedOrInError() { +func (parseAPI *API) panicWhenStoppedOrInError(name string) { + if !parseAPI.IsStoppedOrInError() { return } after := "Error()" - if p.stopped { + if parseAPI.stopped { after = "Stop()" } callerPanic(name, "parsekit.parse.API.{name}(): Illegal call to {name}() at {caller}: "+ @@ -107,8 +142,8 @@ func (p *API) panicWhenStoppedOrInError(name string) { // IsStoppedOrInError checks if the parser has stopped or if an error was set. // When true, then the parser can no longer continue. If your parser tries to // call parse.API methods when true is returned, this will result in a panic. -func (p *API) IsStoppedOrInError() bool { - return p.stopped || p.err != nil +func (parseAPI *API) IsStoppedOrInError() bool { + return parseAPI.stopped || parseAPI.err != nil } // Handle executes other parse.Handler functions from within the active @@ -118,27 +153,21 @@ func (p *API) IsStoppedOrInError() bool { // It will be false when either an error was set using Error(), or the // parser was stopped using Stop(). // -// When multiple parse.Handler functions are provided as arguments, they -// will be executed in the provided order. When one of those handlers stops -// the parser or sets an error, then the following handlers will not be called. -// // Instead of calling another handler using this method, you can also call // that other handler directly. However, it is generally advised to make use // of this method, because it performs some sanity checks and it will return // an easy to use boolean indicating whether the parser can continue or not. -func (p *API) Handle(parseHandler ...Handler) bool { - p.panicWhenStoppedOrInError("Handle") - for _, handler := range parseHandler { - p.panicWhenHandlerNil("Handle", handler) - handler(p) - if p.IsStoppedOrInError() { - return false - } +func (parseAPI *API) Handle(handler Handler) bool { + parseAPI.panicWhenStoppedOrInError("Handle") + parseAPI.panicWhenHandlerNil("Handle", handler) + handler(parseAPI) + if parseAPI.IsStoppedOrInError() { + return false } return true } -func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) { +func (parseAPI *API) panicWhenHandlerNil(name string, parseHandler Handler) { if parseHandler == nil { callerPanic(name, "parsekit.parse.API.{name}(): {name}() called with nil input at {caller}") } @@ -157,8 +186,8 @@ func (p *API) panicWhenHandlerNil(name string, parseHandler Handler) { // // After stopping, no more calls to API methods are allowed. // Calling a method in this state will result in a panic. -func (p *API) Stop() { - p.stopped = true +func (parseAPI *API) Stop() { + parseAPI.stopped = true } // Error sets the error message in the API. @@ -166,11 +195,11 @@ func (p *API) Stop() { // After setting an error, no more calls to API methods are allowed. // Calling a method in this state will result in a panic. // TODO ... wait how do I read the error? I don't I guess, I just return it. Is Error() a good name or SetError() better for example? -func (p *API) Error(format string, data ...interface{}) { +func (parseAPI *API) Error(format string, data ...interface{}) { // No call to p.panicWhenStoppedOrInError(), to allow a parser to // set a different error message when needed. message := fmt.Sprintf(format, data...) - p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor()) + parseAPI.err = fmt.Errorf("%s at %s", message, parseAPI.tokenAPI.Input.Cursor()) } // ExpectEndOfFile can be used to check if the input is at end of file. @@ -178,12 +207,12 @@ func (p *API) Error(format string, data ...interface{}) { // When it finds that the end of the file was indeed reached, then the parser // will be stopped through Stop(). Otherwise, the unexpected input is reported // using Expected("end of file"). -func (p *API) ExpectEndOfFile() { - p.panicWhenStoppedOrInError("ExpectEndofFile") - if p.Peek(tokenize.A.EndOfFile) { - p.Stop() +func (parseAPI *API) ExpectEndOfFile() { + parseAPI.panicWhenStoppedOrInError("ExpectEndofFile") + if parseAPI.Peek(tokenize.A.EndOfFile) { + parseAPI.Stop() } else { - p.Expected("end of file") + parseAPI.Expected("end of file") } } @@ -200,16 +229,16 @@ func (p *API) ExpectEndOfFile() { // • the end of the input was reached // // • there was an error while reading the input. -func (p *API) Expected(expected string) { - p.panicWhenStoppedOrInError("Expected") - _, err := p.tokenAPI.Byte.Peek(0) +func (parseAPI *API) Expected(expected string) { + parseAPI.panicWhenStoppedOrInError("Expected") + _, err := parseAPI.tokenAPI.Byte.Peek(0) switch { case err == nil: - p.Error("unexpected input%s", fmtExpects(expected)) + parseAPI.Error("unexpected input%s", fmtExpects(expected)) case err == io.EOF: - p.Error("unexpected end of file%s", fmtExpects(expected)) + parseAPI.Error("unexpected end of file%s", fmtExpects(expected)) default: - p.Error("unexpected error '%s'%s", err, fmtExpects(expected)) + parseAPI.Error("unexpected error '%s'%s", err, fmtExpects(expected)) } } diff --git a/tokenize/api.go b/tokenize/api.go index 0509dc6..ba08046 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -1,8 +1,6 @@ package tokenize import ( - "fmt" - "git.makaay.nl/mauricem/go-parsekit/read" ) @@ -73,15 +71,18 @@ import ( // can lead to hard to track bugs. I much prefer this forking method, since // no bookkeeping has to be implemented when implementing a parser. type API struct { - stackFrames []stackFrame // the stack frames, containing stack level-specific dat - stackLevel int // the current stack level - stackFrame *stackFrame // the current stack frame - reader *read.Buffer // the buffered input reader - Byte ByteMode // access to a set of byte-based input methods - Rune RuneMode // access to a set of rune-based input methods - Output Output // provides output-related functionality - outputTokens []Token // accepted tokens - outputData []byte // accepted data + stackFrames []stackFrame // the stack frames, containing stack level-specific dat + stackLevel int // the current stack level + stackFrame *stackFrame // the current stack frame + + reader *read.Buffer // the buffered input reader + Input Input // provides input-related functionality + Byte InputByteMode // access to a set of byte-based input methods + Rune InputRuneMode // access to a set of rune-based input methods + + Output Output // provides output-related functionality + outputTokens []Token // accepted tokens + outputBytes []byte // accepted bytes } type stackFrame struct { @@ -106,16 +107,19 @@ const initialByteStoreLength = 1024 // for parsekit.read.New(). func NewAPI(input interface{}) *API { reader := read.New(input) - api := &API{ - stackFrames: make([]stackFrame, initialStackDepth), - reader: reader, + tokenAPI := &API{ + stackFrames: make([]stackFrame, initialStackDepth), + outputBytes: make([]byte, initialByteStoreLength), + outputTokens: make([]Token, initialTokenStoreLength), + reader: reader, } - api.Byte = ByteMode{api: api, reader: reader} - api.Rune = RuneMode{api: api, reader: reader} - api.Output = Output{api: api} - api.stackFrame = &api.stackFrames[0] + tokenAPI.Input = Input{api: tokenAPI, reader: reader} + tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader} + tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader} + tokenAPI.Output = Output{api: tokenAPI} + tokenAPI.stackFrame = &tokenAPI.stackFrames[0] - return api + return tokenAPI } // Fork forks off a child of the API struct. It will reuse the same @@ -214,14 +218,14 @@ func (tokenAPI *API) Merge(stackLevel int) { // Reset moves the read cursor back to the beginning for the currently active API child. // Aditionally, all output (bytes and tokens) that was emitted from the API child is // cleared as well. -func (api *API) Reset() { - f := api.stackFrame - if api.stackLevel == 0 { +func (tokenAPI *API) Reset() { + f := tokenAPI.stackFrame + if tokenAPI.stackLevel == 0 { f.column = 0 f.line = 0 f.offset = 0 } else { - parent := api.stackFrames[api.stackLevel-1] + parent := tokenAPI.stackFrames[tokenAPI.stackLevel-1] f.column = parent.column f.line = parent.line f.offset = parent.offset @@ -245,25 +249,3 @@ func (tokenAPI *API) Dispose(stackLevel int) { tokenAPI.stackLevel = stackLevel - 1 tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1] } - -// FlushInput flushes input data from the read.Buffer up to the current -// read offset of the parser. -// -// Note: -// When writing your own TokenHandler, you normally won't have to call this -// method yourself. It is automatically called by parsekit when possible. -func (api *API) FlushInput() bool { - if api.stackFrame.offset > 0 { - api.reader.Flush(api.stackFrame.offset) - api.stackFrame.offset = 0 - return true - } - return false -} - -func (api *API) Cursor() string { - if api.stackFrame.line == 0 && api.stackFrame.column == 0 { - return fmt.Sprintf("start of file") - } - return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1) -} diff --git a/tokenize/api_bytemode.go b/tokenize/api_bytemode.go index 3eb5fcf..75baafc 100644 --- a/tokenize/api_bytemode.go +++ b/tokenize/api_bytemode.go @@ -2,8 +2,8 @@ package tokenize import "git.makaay.nl/mauricem/go-parsekit/read" -// ByteMode provides byte-driven input/output functionality for the tokenize API. -type ByteMode struct { +// InputByteMode provides byte-driven input/output functionality for the tokenize API. +type InputByteMode struct { api *API reader *read.Buffer // the buffered input reader } @@ -13,11 +13,11 @@ type ByteMode struct { // When an error occurs during reading the input, an error will be returned. // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. -func (byteMode ByteMode) Peek(offset int) (byte, error) { +func (byteMode InputByteMode) Peek(offset int) (byte, error) { return byteMode.reader.ByteAt(byteMode.api.stackFrame.offset + offset) } -func (byteMode ByteMode) Accept(b byte) { +func (byteMode InputByteMode) Accept(b byte) { byteMode.api.Output.AddByte(b) byteMode.MoveCursor(b) } @@ -33,7 +33,7 @@ func (byteMode ByteMode) Accept(b byte) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted bytes. -func (byteMode ByteMode) AcceptMulti(bytes ...byte) { +func (byteMode InputByteMode) AcceptMulti(bytes ...byte) { byteMode.api.Output.AddBytes(bytes...) byteMode.MoveCursorMulti(bytes...) } @@ -44,7 +44,7 @@ func (byteMode ByteMode) AcceptMulti(bytes ...byte) { // // After the call, byte offset 0 for Peek() and PeekMulti() will point at // the first byte at the new cursor position. -func (byteMode ByteMode) MoveCursor(b byte) { +func (byteMode InputByteMode) MoveCursor(b byte) { f := byteMode.api.stackFrame if b == '\n' { f.column = 0 @@ -62,7 +62,7 @@ func (byteMode ByteMode) MoveCursor(b byte) { // // After the call, byte offset 0 for Peek() and PeekMulti() will point at // the first byte at the new cursor position. -func (byteMode ByteMode) MoveCursorMulti(bytes ...byte) { +func (byteMode InputByteMode) MoveCursorMulti(bytes ...byte) { for _, b := range bytes { byteMode.MoveCursor(b) } diff --git a/tokenize/api_input.go b/tokenize/api_input.go new file mode 100644 index 0000000..469ec0d --- /dev/null +++ b/tokenize/api_input.go @@ -0,0 +1,39 @@ +package tokenize + +import ( + "fmt" + + "git.makaay.nl/mauricem/go-parsekit/read" +) + +// Input provides input-related functionality for the tokenize API, +// which is not specifically bound to a specific read mode (byte, rune). +type Input struct { + api *API + reader *read.Buffer // the buffered input reader +} + +// Cursor returns a string that describes the current read cursor position. +func (i Input) Cursor() string { + f := i.api.stackFrame + if f.line == 0 && f.column == 0 { + return fmt.Sprintf("start of file") + } + return fmt.Sprintf("line %d, column %d", f.line+1, f.column+1) +} + +// Flush flushes input data from the read buffer up to the current +// read cursor position of the tokenizer. +// +// Note: in most cases, you won't have to call this method yourself. +// Parsekit will call this method at points where it knows it is a +// safe thing to do. +func (i Input) Flush() bool { + f := i.api.stackFrame + if f.offset > 0 { + i.reader.Flush(f.offset) + f.offset = 0 + return true + } + return false +} diff --git a/tokenize/api_input_test.go b/tokenize/api_input_test.go new file mode 100644 index 0000000..98313a6 --- /dev/null +++ b/tokenize/api_input_test.go @@ -0,0 +1,65 @@ +package tokenize + +import ( + "testing" +) + +func TestMoveCursorByBytes(t *testing.T) { + tokenAPI := NewAPI("") + tokenAPI.Byte.MoveCursor('a') + tokenAPI.Byte.MoveCursor('b') + tokenAPI.Byte.MoveCursor('c') + tokenAPI.Byte.MoveCursor('\r') + tokenAPI.Byte.MoveCursor('\n') + tokenAPI.Byte.MoveCursor('a') + tokenAPI.Byte.MoveCursor('b') + + AssertEqual(t, "line 2, column 3", tokenAPI.Input.Cursor(), "Cursor position after moving by byte") + AssertEqual(t, 7, tokenAPI.stackFrame.offset, "Offset after moving by byte") +} + +func TestMoveCursorByRunes(t *testing.T) { + tokenAPI := NewAPI("") + tokenAPI.Rune.MoveCursor('ɹ') + tokenAPI.Rune.MoveCursor('n') + tokenAPI.Rune.MoveCursor('u') + tokenAPI.Rune.MoveCursor('\r') + tokenAPI.Rune.MoveCursor('\n') + tokenAPI.Rune.MoveCursor('ǝ') + + AssertEqual(t, "line 2, column 2", tokenAPI.Input.Cursor(), "Cursor position after moving by rune") + AssertEqual(t, 8, tokenAPI.stackFrame.offset, "Offset after moving by rune") +} + +func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) { + for _, test := range []struct { + name string + input []string + byte int + rune int + line int + column int + }{ + {"No input at all", []string{""}, 0, 0, 0, 0}, + {"One ASCII char", []string{"a"}, 1, 1, 0, 1}, + {"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3}, + {"One newline", []string{"\n"}, 1, 1, 1, 0}, + {"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3}, + {"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1}, + {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, + {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, + } { + tokenAPI := NewAPI("") + for _, s := range test.input { + for _, r := range s { + tokenAPI.Rune.MoveCursor(r) + } + } + if tokenAPI.stackFrame.line != test.line { + t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, tokenAPI.stackFrame.line, test.line) + } + if tokenAPI.stackFrame.column != test.column { + t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, tokenAPI.stackFrame.column, test.column) + } + } +} diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 1684bdb..e2c2d27 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -12,7 +12,7 @@ type Output struct { func (o Output) String() string { a := o.api f := a.stackFrame - bytes := a.outputData[f.bytesStart:f.bytesEnd] + bytes := a.outputBytes[f.bytesStart:f.bytesEnd] return string(bytes) } @@ -22,7 +22,7 @@ func (o Output) Runes() []rune { func (o Output) Rune(offset int) rune { a := o.api - r, _ := utf8.DecodeRune(a.outputData[a.stackFrame.bytesStart+offset:]) + r, _ := utf8.DecodeRune(a.outputBytes[a.stackFrame.bytesStart+offset:]) return r } @@ -41,7 +41,7 @@ func (o Output) AddByte(b byte) { f := a.stackFrame curBytesEnd := f.bytesEnd a.growOutputData(curBytesEnd + 1) - a.outputData[curBytesEnd] = b + a.outputBytes[curBytesEnd] = b f.bytesEnd++ } @@ -56,7 +56,7 @@ func (o Output) AddBytes(bytes ...byte) { curBytesEnd := f.bytesEnd newBytesEnd := curBytesEnd + len(bytes) a.growOutputData(newBytesEnd) - copy(a.outputData[curBytesEnd:], bytes) + copy(a.outputBytes[curBytesEnd:], bytes) f.bytesEnd = newBytesEnd } @@ -66,7 +66,7 @@ func (o Output) AddRunes(runes ...rune) { runesAsString := string(runes) newBytesEnd := f.bytesEnd + len(runesAsString) a.growOutputData(newBytesEnd) - copy(a.outputData[f.bytesEnd:], runesAsString) + copy(a.outputBytes[f.bytesEnd:], runesAsString) f.bytesEnd = newBytesEnd } @@ -133,9 +133,9 @@ func (api *API) growOutputTokens(requiredTokens int) { } func (api *API) growOutputData(requiredBytes int) { - if cap(api.outputData) < requiredBytes { + if cap(api.outputBytes) < requiredBytes { newBytes := make([]byte, requiredBytes*2) - copy(newBytes, api.outputData) - api.outputData = newBytes + copy(newBytes, api.outputBytes) + api.outputBytes = newBytes } } diff --git a/tokenize/api_runemode.go b/tokenize/api_runemode.go index 868603d..188fb92 100644 --- a/tokenize/api_runemode.go +++ b/tokenize/api_runemode.go @@ -6,8 +6,8 @@ import ( "git.makaay.nl/mauricem/go-parsekit/read" ) -// RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API. -type RuneMode struct { +// InputRuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API. +type InputRuneMode struct { api *API reader *read.Buffer // the buffered input reader } @@ -25,7 +25,7 @@ type RuneMode struct { // When an error occurs during reading the input, an error will be returned. // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. -func (runeMode RuneMode) Peek(offset int) (rune, int, error) { +func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) { return runeMode.reader.RuneAt(runeMode.api.stackFrame.offset + offset) } @@ -40,14 +40,14 @@ func (runeMode RuneMode) Peek(offset int) (rune, int, error) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted rune. -func (runeMode RuneMode) Accept(r rune) { +func (runeMode InputRuneMode) Accept(r rune) { a := runeMode.api f := a.stackFrame curBytesEnd := f.bytesEnd maxRequiredBytes := curBytesEnd + utf8.UTFMax a.growOutputData(maxRequiredBytes) - w := utf8.EncodeRune(a.outputData[curBytesEnd:], r) + w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r) f.bytesEnd += w runeMode.MoveCursor(r) @@ -64,7 +64,7 @@ func (runeMode RuneMode) Accept(r rune) { // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted runes. -func (runeMode RuneMode) AcceptMulti(runes ...rune) { +func (runeMode InputRuneMode) AcceptMulti(runes ...rune) { a := runeMode.api f := a.stackFrame @@ -73,7 +73,7 @@ func (runeMode RuneMode) AcceptMulti(runes ...rune) { a.growOutputData(maxBytes) for _, r := range runes { - w := utf8.EncodeRune(a.outputData[curBytesEnd:], r) + w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r) curBytesEnd += w runeMode.MoveCursor(r) } @@ -86,7 +86,7 @@ func (runeMode RuneMode) AcceptMulti(runes ...rune) { // // After the call, byte offset 0 for Peek() and PeekMulti() will point at // the first rune at the new cursor position. -func (runeMode RuneMode) MoveCursor(r rune) int { +func (runeMode InputRuneMode) MoveCursor(r rune) int { f := runeMode.api.stackFrame if r == '\n' { f.column = 0 @@ -106,7 +106,7 @@ func (runeMode RuneMode) MoveCursor(r rune) int { // // After the call, byte offset 0 for Peek() and PeekMulti() will point at // the first rune at the new cursor position. -func (runeMode RuneMode) MoveCursorMulti(runes ...rune) { +func (runeMode InputRuneMode) MoveCursorMulti(runes ...rune) { for _, r := range runes { runeMode.MoveCursor(r) } diff --git a/tokenize/api_test.go b/tokenize/api_test.go index 5f95163..72f6aaf 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -13,12 +13,12 @@ func ExampleNewAPI() { } func ExampleAPI_PeekByte() { - api := tokenize.NewAPI("The input that the API will handle") + tokenAPI := tokenize.NewAPI("The input that the API will handle") - r1, _, err := api.Rune.Peek(19) // 'A', - r2, _, err := api.Rune.Peek(20) // 'P' - r3, _, err := api.Rune.Peek(21) // 'I' - _, _, err = api.Rune.Peek(100) // EOF + r1, _, err := tokenAPI.Rune.Peek(19) // 'A', + r2, _, err := tokenAPI.Rune.Peek(20) // 'P' + r3, _, err := tokenAPI.Rune.Peek(21) // 'I' + _, _, err = tokenAPI.Rune.Peek(100) // EOF fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) @@ -27,12 +27,12 @@ func ExampleAPI_PeekByte() { } func ExampleAPI_PeekRune() { - api := tokenize.NewAPI("The input that the ДPI will handle") + tokenAPI := tokenize.NewAPI("The input that the ДPI will handle") - r1, _, err := api.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21 - r2, _, err := api.Rune.Peek(21) // 'P' - r3, _, err := api.Rune.Peek(22) // 'I' - _, _, err = api.Rune.Peek(100) // EOF + r1, _, err := tokenAPI.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21 + r2, _, err := tokenAPI.Rune.Peek(21) // 'P' + r3, _, err := tokenAPI.Rune.Peek(22) // 'I' + _, _, err = tokenAPI.Rune.Peek(100) // EOF fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) @@ -41,49 +41,49 @@ func ExampleAPI_PeekRune() { } func ExampleAPI_AcceptRune() { - api := tokenize.NewAPI("The input that the ДPI will handle") + tokenAPI := tokenize.NewAPI("The input that the ДPI will handle") // Reads 'T' and accepts it to the API output data. - r, _, _ := api.Rune.Peek(0) - api.Rune.Accept(r) + r, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // Reads 'h' and accepts it to the API output data. - r, _, _ = api.Rune.Peek(0) - api.Rune.Accept(r) + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // Reads 'e', but does not accept it to the API output data. - r, _, _ = api.Rune.Peek(0) + r, _, _ = tokenAPI.Rune.Peek(0) - fmt.Printf("API results: %q\n", api.Output.String()) + fmt.Printf("API results: %q\n", tokenAPI.Output.String()) // Output: // API results: "Th" } func ExampleAPI_AcceptRunes() { - api := tokenize.NewAPI("The input that the API will handle") + tokenAPI := tokenize.NewAPI("The input that the API will handle") // Peeks at the first two runes 'T' and 'h'. - r0, _, _ := api.Rune.Peek(0) - r1, _, _ := api.Rune.Peek(1) + r0, _, _ := tokenAPI.Rune.Peek(0) + r1, _, _ := tokenAPI.Rune.Peek(1) // Peeks at the third rune 'e'. - api.Rune.Peek(2) + tokenAPI.Rune.Peek(2) // Accepts only 'T' and 'h' into the API results. - api.Rune.AcceptMulti(r0, r1) + tokenAPI.Rune.AcceptMulti(r0, r1) - fmt.Printf("API results: %q\n", api.Output.String()) + fmt.Printf("API results: %q\n", tokenAPI.Output.String()) // Output: // API results: "Th" } func ExampleAPI_SkipRune() { - api := tokenize.NewAPI("The input that the API will handle") + tokenAPI := tokenize.NewAPI("The input that the API will handle") for { - r, _, err := api.Rune.Peek(0) + r, _, err := tokenAPI.Rune.Peek(0) // EOF reached. if err != nil { @@ -92,44 +92,44 @@ func ExampleAPI_SkipRune() { // Only accept runes that are vowels. if strings.ContainsRune("aeiouAEIOU", r) { - api.Rune.Accept(r) + tokenAPI.Rune.Accept(r) } else { - api.Rune.MoveCursor(r) + tokenAPI.Rune.MoveCursor(r) } } - fmt.Printf("API results: %q\n", api.Output.String()) + fmt.Printf("API results: %q\n", tokenAPI.Output.String()) // Output: // API results: "eiuaeAIiae" } func ExampleAPI_modifyingResults() { - api := tokenize.NewAPI("") + tokenAPI := tokenize.NewAPI("") - api.Output.AddString("Some runes") - api.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd') - api.Output.AddRunes(' ', 'i', 'n', ' ') - api.Output.AddString("various ways") - fmt.Printf("API result first 10 runes: %q\n", api.Output.Runes()[0:10]) - fmt.Printf("API result runes as string: %q\n", api.Output.String()) + tokenAPI.Output.AddString("Some runes") + tokenAPI.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd') + tokenAPI.Output.AddRunes(' ', 'i', 'n', ' ') + tokenAPI.Output.AddString("various ways") + fmt.Printf("API result first 10 runes: %q\n", tokenAPI.Output.Runes()[0:10]) + fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String()) - api.Output.SetString("new ") - api.Output.AddString("set ") - api.Output.AddString("of ") - api.Output.AddRunes('r', 'u', 'n', 'e', 's') - fmt.Printf("API result runes as string: %q\n", api.Output.String()) - fmt.Printf("API result runes: %q\n", api.Output.Runes()) - fmt.Printf("API third rune: %q\n", api.Output.Rune(2)) + tokenAPI.Output.SetString("new ") + tokenAPI.Output.AddString("set ") + tokenAPI.Output.AddString("of ") + tokenAPI.Output.AddRunes('r', 'u', 'n', 'e', 's') + fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String()) + fmt.Printf("API result runes: %q\n", tokenAPI.Output.Runes()) + fmt.Printf("API third rune: %q\n", tokenAPI.Output.Rune(2)) - api.Output.AddToken(tokenize.Token{ + tokenAPI.Output.AddToken(tokenize.Token{ Type: 42, Value: "towel"}) - api.Output.AddToken(tokenize.Token{ + tokenAPI.Output.AddToken(tokenize.Token{ Type: 73, Value: "Zaphod"}) - fmt.Printf("API result tokens: %v\n", api.Output.Tokens()) - fmt.Printf("API second result token: %v\n", api.Output.Token(1)) + fmt.Printf("API result tokens: %v\n", tokenAPI.Output.Tokens()) + fmt.Printf("API second result token: %v\n", tokenAPI.Output.Token(1)) // Output: // API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's'] @@ -142,24 +142,24 @@ func ExampleAPI_modifyingResults() { } func ExampleAPI_Reset() { - api := tokenize.NewAPI("Very important input!") + tokenAPI := tokenize.NewAPI("Very important input!") - r, _, _ := api.Rune.Peek(0) // read 'V' - api.Rune.Accept(r) - r, _, _ = api.Rune.Peek(0) // read 'e' - api.Rune.Accept(r) - fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) + r, _, _ := tokenAPI.Rune.Peek(0) // read 'V' + tokenAPI.Rune.Accept(r) + r, _, _ = tokenAPI.Rune.Peek(0) // read 'e' + tokenAPI.Rune.Accept(r) + fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // Reset clears the results. - api.Reset() - fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) + tokenAPI.Reset() + fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // So then doing the same read operations, the same data are read. - r, _, _ = api.Rune.Peek(0) // read 'V' - api.Rune.Accept(r) - r, _, _ = api.Rune.Peek(0) // read 'e' - api.Rune.Accept(r) - fmt.Printf("API results: %q at %s\n", api.Output.String(), api.Cursor()) + r, _, _ = tokenAPI.Rune.Peek(0) // read 'V' + tokenAPI.Rune.Accept(r) + r, _, _ = tokenAPI.Rune.Peek(0) // read 'e' + tokenAPI.Rune.Accept(r) + fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // Output: // API results: "Ve" at line 1, column 3 @@ -233,155 +233,155 @@ func ExampleAPI_Merge() { } func TestMultipleLevelsOfForksAndMerges(t *testing.T) { - api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz") + tokenAPI := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz") // Fork a few levels. - child1 := api.Fork() - child2 := api.Fork() - child3 := api.Fork() - child4 := api.Fork() + child1 := tokenAPI.Fork() + child2 := tokenAPI.Fork() + child3 := tokenAPI.Fork() + child4 := tokenAPI.Fork() // Read a rune 'a' from child4. - r, _, _ := api.Rune.Peek(0) + r, _, _ := tokenAPI.Rune.Peek(0) AssertEqual(t, 'a', r, "child4 rune 1") - api.Rune.Accept(r) - AssertEqual(t, "a", api.Output.String(), "child4 runes after rune 1") + tokenAPI.Rune.Accept(r) + AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1") // Read another rune 'b' from child4. - r, _, _ = api.Rune.Peek(0) + r, _, _ = tokenAPI.Rune.Peek(0) AssertEqual(t, 'b', r, "child4 rune 2") - api.Rune.Accept(r) - AssertEqual(t, "ab", api.Output.String(), "child4 runes after rune 2") + tokenAPI.Rune.Accept(r) + AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2") // Merge "ab" from child4 to child3. - api.Merge(child4) - AssertEqual(t, "", api.Output.String(), "child4 runes after first merge") + tokenAPI.Merge(child4) + AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge") // Read some more from child4. - r, _, _ = api.Rune.Peek(0) + r, _, _ = tokenAPI.Rune.Peek(0) AssertEqual(t, 'c', r, "child4 rune 3") - api.Rune.Accept(r) - AssertEqual(t, "c", api.Output.String(), "child4 runes after rune 1") - AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child4 rune 3") + tokenAPI.Rune.Accept(r) + AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1") + AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3") // Merge "c" from child4 to child3. - api.Merge(child4) + tokenAPI.Merge(child4) // And dispose of child4, making child3 the active stack level. - api.Dispose(child4) + tokenAPI.Dispose(child4) // Child3 should now have the compbined results "abc" from child4's work. - AssertEqual(t, "abc", api.Output.String(), "child3 after merge of child4") - AssertEqual(t, "line 1, column 4", api.Cursor(), "cursor child3 rune 3, after merge of child4") + AssertEqual(t, "abc", tokenAPI.Output.String(), "child3 after merge of child4") + AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4") // Now read some data from child3. - r, _, _ = api.Rune.Peek(0) + r, _, _ = tokenAPI.Rune.Peek(0) AssertEqual(t, 'd', r, "child3 rune 5") - api.Rune.Accept(r) + tokenAPI.Rune.Accept(r) - r, _, _ = api.Rune.Peek(0) + r, _, _ = tokenAPI.Rune.Peek(0) AssertEqual(t, 'e', r, "child3 rune 5") - api.Rune.Accept(r) + tokenAPI.Rune.Accept(r) - r, _, _ = api.Rune.Peek(0) + r, _, _ = tokenAPI.Rune.Peek(0) AssertEqual(t, 'f', r, "child3 rune 5") - api.Rune.Accept(r) + tokenAPI.Rune.Accept(r) - AssertEqual(t, "abcdef", api.Output.String(), "child3 total result after rune 6") + AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6") // Temporarily go some new forks from here, but don't use their outcome. - child3sub1 := api.Fork() - r, _, _ = api.Rune.Peek(0) - api.Rune.Accept(r) - r, _, _ = api.Rune.Peek(0) - api.Rune.Accept(r) - child3sub2 := api.Fork() - r, _, _ = api.Rune.Peek(0) - api.Rune.Accept(r) - api.Merge(child3sub2) // do merge sub2 down to sub1 - api.Dispose(child3sub2) // and dispose of sub2 - api.Dispose(child3sub1) // but dispose of sub1 without merging + child3sub1 := tokenAPI.Fork() + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) + child3sub2 := tokenAPI.Fork() + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) + tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1 + tokenAPI.Dispose(child3sub2) // and dispose of sub2 + tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging // Instead merge the results from before this forking segway from child3 to child2 // and dispose of it. - api.Merge(child3) - api.Dispose(child3) + tokenAPI.Merge(child3) + tokenAPI.Dispose(child3) - AssertEqual(t, "abcdef", api.Output.String(), "child2 total result after merge of child3") - AssertEqual(t, "line 1, column 7", api.Cursor(), "cursor child2 after merge child3") + AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child2 total result after merge of child3") + AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor child2 after merge child3") // Merge child2 to child1 and dispose of it. - api.Merge(child2) - api.Dispose(child2) + tokenAPI.Merge(child2) + tokenAPI.Dispose(child2) // Merge child1 a few times to the top level api. - api.Merge(child1) - api.Merge(child1) - api.Merge(child1) - api.Merge(child1) + tokenAPI.Merge(child1) + tokenAPI.Merge(child1) + tokenAPI.Merge(child1) + tokenAPI.Merge(child1) // And dispose of it. - api.Dispose(child1) + tokenAPI.Dispose(child1) // Read some data from the top level api. - r, _, _ = api.Rune.Peek(0) - api.Rune.Accept(r) + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) - AssertEqual(t, "abcdefg", api.Output.String(), "api string end result") - AssertEqual(t, "line 1, column 8", api.Cursor(), "api cursor end result") + AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result") + AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result") } func TestClearData(t *testing.T) { - api := tokenize.NewAPI("Laphroaig") - r, _, _ := api.Rune.Peek(0) // Read 'L' - api.Rune.Accept(r) // Add to runes - r, _, _ = api.Rune.Peek(0) // Read 'a' - api.Rune.Accept(r) // Add to runes - api.Output.ClearData() // Clear the runes, giving us a fresh start. - r, _, _ = api.Rune.Peek(0) // Read 'p' - api.Rune.Accept(r) // Add to runes - r, _, _ = api.Rune.Peek(0) // Read 'r' - api.Rune.Accept(r) // Add to runes + tokenAPI := tokenize.NewAPI("Laphroaig") + r, _, _ := tokenAPI.Rune.Peek(0) // Read 'L' + tokenAPI.Rune.Accept(r) // Add to runes + r, _, _ = tokenAPI.Rune.Peek(0) // Read 'a' + tokenAPI.Rune.Accept(r) // Add to runes + tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start. + r, _, _ = tokenAPI.Rune.Peek(0) // Read 'p' + tokenAPI.Rune.Accept(r) // Add to runes + r, _, _ = tokenAPI.Rune.Peek(0) // Read 'r' + tokenAPI.Rune.Accept(r) // Add to runes - AssertEqual(t, "ph", api.Output.String(), "api string end result") + AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result") } func TestMergeScenariosForTokens(t *testing.T) { - api := tokenize.NewAPI("") + tokenAPI := tokenize.NewAPI("") token1 := tokenize.Token{Value: 1} token2 := tokenize.Token{Value: 2} token3 := tokenize.Token{Value: 3} token4 := tokenize.Token{Value: 4} - api.Output.SetTokens(token1) - tokens := api.Output.Tokens() + tokenAPI.Output.SetTokens(token1) + tokens := tokenAPI.Output.Tokens() AssertEqual(t, 1, len(tokens), "Tokens 1") - child := api.Fork() + child := tokenAPI.Fork() - tokens = api.Output.Tokens() + tokens = tokenAPI.Output.Tokens() AssertEqual(t, 0, len(tokens), "Tokens 2") - api.Output.AddToken(token2) + tokenAPI.Output.AddToken(token2) - api.Merge(child) - api.Dispose(child) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) - tokens = api.Output.Tokens() + tokens = tokenAPI.Output.Tokens() AssertEqual(t, 2, len(tokens), "Tokens 3") - child = api.Fork() - api.Output.AddToken(token3) - api.Reset() - api.Output.AddToken(token4) + child = tokenAPI.Fork() + tokenAPI.Output.AddToken(token3) + tokenAPI.Reset() + tokenAPI.Output.AddToken(token4) - api.Merge(child) - api.Dispose(child) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) - tokens = api.Output.Tokens() + tokens = tokenAPI.Output.Tokens() AssertEqual(t, 3, len(tokens), "Tokens 4") - AssertEqual(t, 1, api.Output.TokenValue(0).(int), "Tokens 4, value 0") - AssertEqual(t, 2, api.Output.TokenValue(1).(int), "Tokens 4, value 1") - AssertEqual(t, 4, api.Output.TokenValue(2).(int), "Tokens 4, value 2") + AssertEqual(t, 1, tokenAPI.Output.TokenValue(0).(int), "Tokens 4, value 0") + AssertEqual(t, 2, tokenAPI.Output.TokenValue(1).(int), "Tokens 4, value 1") + AssertEqual(t, 4, tokenAPI.Output.TokenValue(2).(int), "Tokens 4, value 2") } diff --git a/tokenize/cursor_test.go b/tokenize/cursor_test.go deleted file mode 100644 index 257c181..0000000 --- a/tokenize/cursor_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package tokenize - -import ( - "testing" -) - -func TestMoveCursorByBytes(t *testing.T) { - api := NewAPI("") - api.Byte.MoveCursor('a') - api.Byte.MoveCursor('b') - api.Byte.MoveCursor('c') - api.Byte.MoveCursor('\r') - api.Byte.MoveCursor('\n') - api.Byte.MoveCursor('a') - api.Byte.MoveCursor('b') - - AssertEqual(t, "line 2, column 3", api.Cursor(), "Cursor position after moving by byte") - AssertEqual(t, 7, api.stackFrame.offset, "Offset after moving by byte") -} - -func TestMoveCursorByRunes(t *testing.T) { - api := NewAPI("") - api.Rune.MoveCursor('ɹ') - api.Rune.MoveCursor('n') - api.Rune.MoveCursor('u') - api.Rune.MoveCursor('\r') - api.Rune.MoveCursor('\n') - api.Rune.MoveCursor('ǝ') - - AssertEqual(t, "line 2, column 2", api.Cursor(), "Cursor position after moving by rune") - AssertEqual(t, 8, api.stackFrame.offset, "Offset after moving by rune") -} - -func TestWhenMovingCursor_CursorPositionIsUpdated(t *testing.T) { - for _, test := range []struct { - name string - input []string - byte int - rune int - line int - column int - }{ - {"No input at all", []string{""}, 0, 0, 0, 0}, - {"One ASCII char", []string{"a"}, 1, 1, 0, 1}, - {"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3}, - {"One newline", []string{"\n"}, 1, 1, 1, 0}, - {"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3}, - {"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1}, - {"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9}, - {"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10}, - } { - api := NewAPI("") - for _, s := range test.input { - for _, r := range s { - api.Rune.MoveCursor(r) - } - } - if api.stackFrame.line != test.line { - t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, api.stackFrame.line, test.line) - } - if api.stackFrame.column != test.column { - t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, api.stackFrame.column, test.column) - } - } -} diff --git a/tokenize/handler.go b/tokenize/handler.go index ff3a4e5..e43890d 100644 --- a/tokenize/handler.go +++ b/tokenize/handler.go @@ -7,7 +7,7 @@ package tokenize // A Handler function gets an API as its input and returns a boolean to // indicate whether or not it found a match on the input. The API is used // for retrieving input data to match against and for reporting back results. -type Handler func(t *API) bool +type Handler func(tokenAPI *API) bool // Match is syntactic sugar that allows you to write a construction like // NewTokenizer(handler).Execute(input) as handler.Match(input). diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index cb99bb4..335e416 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -320,7 +320,7 @@ var T = struct { Float64 func(interface{}, Handler) Handler Boolean func(interface{}, Handler) Handler ByValue func(toktype interface{}, handler Handler, value interface{}) Handler - ByCallback func(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler + ByCallback func(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler Group func(interface{}, Handler) Handler }{ Str: MakeStrLiteralToken, @@ -349,10 +349,10 @@ var T = struct { // MatchByte creates a Handler function that matches against the provided byte. func MatchByte(expected byte) Handler { - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err == nil && b == expected { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } return false @@ -364,10 +364,10 @@ func MatchRune(expected rune) Handler { if expected <= '\x7F' { return MatchByte(byte(expected)) } - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err == nil && r == expected { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -377,14 +377,14 @@ func MatchRune(expected rune) Handler { // MatchBytes creates a Handler function that checks if the input matches // one of the provided bytes. The first match counts. func MatchBytes(expected ...byte) Handler { - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err != nil { return false } for _, e := range expected { if b == e { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } } @@ -407,14 +407,14 @@ func MatchRunes(expected ...rune) Handler { if onlyBytes { return MatchBytes(expectedBytes...) } - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err != nil { return false } for _, e := range expected { if r == e { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } } @@ -433,10 +433,10 @@ func MatchByteRange(start byte, end byte) Handler { if end < start { callerPanic("MatchByteRange", "Handler: {name} definition error at {caller}: start %q must not be < end %q", start, end) } - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err == nil && b >= start && b <= end { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } return false @@ -457,10 +457,10 @@ func MatchRuneRange(start rune, end rune) Handler { if end <= '\x7F' { return MatchByteRange(byte(start), byte(end)) } - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err == nil && r >= start && r <= end { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -470,19 +470,19 @@ func MatchRuneRange(start rune, end rune) Handler { // MatchNewline creates a handler that matches a newline, which is either // a DOS-style newline (CRLF, \r\n) or a UNIX-style newline (just a LF, \n). func MatchNewline() Handler { - return func(t *API) bool { - b1, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b1, err := tokenAPI.Byte.Peek(0) if err != nil { return false } if b1 == '\n' { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } if b1 == '\r' { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) if err == nil && b2 == '\n' { - t.Byte.AcceptMulti(b1, b2) + tokenAPI.Byte.AcceptMulti(b1, b2) return true } } @@ -496,10 +496,10 @@ func MatchNewline() Handler { // When you need whitespace matching, which also includes characters like // newlines, then take a look at MatchWhitespace(). func MatchBlank() Handler { - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err == nil && (b == ' ' || b == '\t') { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } return false @@ -514,22 +514,22 @@ func MatchBlank() Handler { // When you need unicode whitespace matching, which also includes characters // like a vertical tab, then make use of MatchUnicodeSpace(). func MatchBlanks() Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { // Match the first blank. - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || (b != ' ' && b != '\t') { return false } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) // Now match any number of followup blanks. We've already got // a successful match at this point, so we'll always return true at the end. for { - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || (b != ' ' && b != '\t') { return true } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) } } } @@ -538,37 +538,37 @@ func MatchBlanks() Handler { // whitespace characters, defined as space ' ', tab, ' ', newline '\n' (LF) and // carriage return '\r' followed by a newline '\n' (CRLF). func MatchWhitespace() Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { // Match the first whitespace. - b1, err := t.Byte.Peek(0) + b1, err := tokenAPI.Byte.Peek(0) if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { return false } if b1 == '\r' { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) if err != nil || b2 != '\n' { return false } - t.Byte.AcceptMulti(b1, b2) + tokenAPI.Byte.AcceptMulti(b1, b2) } else { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) } // Now match any number of followup whitespace. We've already got // a successful match at this point, so we'll always return true at the end. for { - b1, err := t.Byte.Peek(0) + b1, err := tokenAPI.Byte.Peek(0) if err != nil || (b1 != ' ' && b1 != '\t' && b1 != '\n' && b1 != '\r') { return true } if b1 == '\r' { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) if err != nil || b2 != '\n' { return true } - t.Byte.AcceptMulti(b1, b2) + tokenAPI.Byte.AcceptMulti(b1, b2) } else { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) } } } @@ -587,10 +587,10 @@ func MatchUnicodeSpace() Handler { // Note that the callback function matches the signature of the unicode.Is* functions, // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). func MatchByteByCallback(callback func(byte) bool) Handler { - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err == nil && callback(b) { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } return false @@ -604,10 +604,10 @@ func MatchByteByCallback(callback func(byte) bool) Handler { // Note that the callback function matches the signature of the unicode.Is* functions, // so those can be used. E.g. MatchRuneByCallback(unicode.IsLower). func MatchRuneByCallback(callback func(rune) bool) Handler { - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err == nil && callback(r) { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -616,19 +616,19 @@ func MatchRuneByCallback(callback func(rune) bool) Handler { // MatchEndOfLine creates a Handler that matches a newline ("\r\n" or "\n") or EOF. func MatchEndOfLine() Handler { - return func(t *API) bool { - b1, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b1, err := tokenAPI.Byte.Peek(0) if err != nil { return err == io.EOF } if b1 == '\n' { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } if b1 == '\r' { - b2, _ := t.Byte.Peek(1) + b2, _ := tokenAPI.Byte.Peek(1) if b2 == '\n' { - t.Byte.AcceptMulti(b1, b2) + tokenAPI.Byte.AcceptMulti(b1, b2) return true } } @@ -640,24 +640,24 @@ func MatchEndOfLine() Handler { func MatchStr(expected string) Handler { expectedRunes := []rune(expected) - return func(t *API) bool { + return func(tokenAPI *API) bool { offset := 0 for _, e := range expectedRunes { if e <= '\x7F' { - b, err := t.Byte.Peek(offset) + b, err := tokenAPI.Byte.Peek(offset) if err != nil || b != byte(e) { return false } offset++ } else { - r, w, err := t.Rune.Peek(offset) + r, w, err := tokenAPI.Rune.Peek(offset) if err != nil || e != r { return false } offset += w } } - t.Rune.AcceptMulti(expectedRunes...) + tokenAPI.Rune.AcceptMulti(expectedRunes...) return true } } @@ -667,20 +667,20 @@ func MatchStr(expected string) Handler { func MatchStrNoCase(expected string) Handler { l := utf8.RuneCountInString(expected) - return func(t *API) bool { + return func(tokenAPI *API) bool { matches := make([]rune, l) width := 0 i := 0 for _, e := range expected { if e <= '\x7F' { - b, err := t.Byte.Peek(width) + b, err := tokenAPI.Byte.Peek(width) if err != nil || (b != byte(e) && unicode.ToUpper(rune(b)) != unicode.ToUpper(e)) { return false } matches[i] = rune(b) width++ } else { - r, w, err := t.Rune.Peek(width) + r, w, err := tokenAPI.Rune.Peek(width) if err != nil || (r != e && unicode.ToUpper(r) != unicode.ToUpper(e)) { return false } @@ -689,7 +689,7 @@ func MatchStrNoCase(expected string) Handler { } i++ } - t.Rune.AcceptMulti(matches...) + tokenAPI.Rune.AcceptMulti(matches...) return true } } @@ -699,12 +699,12 @@ func MatchStrNoCase(expected string) Handler { // no output is generated but still a successful match is reported (but the // result will be empty). func MatchOptional(handler Handler) Handler { - return func(t *API) bool { - child := t.Fork() - if handler(t) { - t.Merge(child) + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + if handler(tokenAPI) { + tokenAPI.Merge(child) } - t.Dispose(child) + tokenAPI.Dispose(child) return true } } @@ -713,20 +713,20 @@ func MatchOptional(handler Handler) Handler { // applied in their exact order. Only if all Handlers apply, the sequence // reports successful match. func MatchSeq(handlers ...Handler) Handler { - return func(t *API) bool { - child := t.Fork() + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() for _, handler := range handlers { - subchild := t.Fork() - if !handler(t) { - t.Dispose(subchild) - t.Dispose(child) + subchild := tokenAPI.Fork() + if !handler(tokenAPI) { + tokenAPI.Dispose(subchild) + tokenAPI.Dispose(child) return false } - t.Merge(subchild) - t.Dispose(subchild) + tokenAPI.Merge(subchild) + tokenAPI.Dispose(subchild) } - t.Merge(child) - t.Dispose(child) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } } @@ -735,17 +735,17 @@ func MatchSeq(handlers ...Handler) Handler { // can be applied. They are applied in their provided order. The first Handler // that applies is used for reporting back a match. func MatchAny(handlers ...Handler) Handler { - return func(t *API) bool { - child := t.Fork() + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() for _, handler := range handlers { - if handler(t) { - t.Merge(child) - t.Dispose(child) + if handler(tokenAPI) { + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } - t.Reset() + tokenAPI.Reset() } - t.Dispose(child) + tokenAPI.Dispose(child) return false } @@ -755,16 +755,16 @@ func MatchAny(handlers ...Handler) Handler { // the current input. If it does, then a failed match will be reported. If it // does not, then the next rune from the input will be reported as a match. func MatchNot(handler Handler) Handler { - return func(t *API) bool { - child := t.Fork() - if handler(t) { - t.Dispose(child) + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + if handler(tokenAPI) { + tokenAPI.Dispose(child) return false } - t.Dispose(child) - r, _, err := t.Rune.Peek(0) + tokenAPI.Dispose(child) + r, _, err := tokenAPI.Rune.Peek(0) if err == nil { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -839,15 +839,15 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler { if max >= 0 && min > max { callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min) } - return func(t *API) bool { + return func(tokenAPI *API) bool { total := 0 // Check for the minimum required amount of matches. - child := t.Fork() + child := tokenAPI.Fork() for total < min { total++ - if !handler(t) { - t.Dispose(child) + if !handler(tokenAPI) { + tokenAPI.Dispose(child) return false } } @@ -857,12 +857,12 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler { //child.Merge() for max < 0 || total < max { total++ - if !handler(t) { + if !handler(tokenAPI) { break } } - t.Merge(child) - t.Dispose(child) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } } @@ -880,14 +880,14 @@ func MatchSeparated(separator Handler, separated Handler) Handler { // applied. If the handler applies, but the except Handler as well, then the match // as a whole will be treated as a mismatch. func MatchExcept(handler Handler, except Handler) Handler { - return func(t *API) bool { - child := t.Fork() - if except(t) { - t.Dispose(child) + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + if except(tokenAPI) { + tokenAPI.Dispose(child) return false } - t.Dispose(child) - return handler(t) + tokenAPI.Dispose(child) + return handler(tokenAPI) } } @@ -896,11 +896,11 @@ func MatchExcept(handler Handler, except Handler) Handler { // When both handlers match, the match for the handler is accepted and the match // for the lookAhead handler is ignored. func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { - return func(t *API) bool { - if handler(t) { - child := t.Fork() - result := lookAhead(t) - t.Dispose(child) + return func(tokenAPI *API) bool { + if handler(tokenAPI) { + child := tokenAPI.Fork() + result := lookAhead(tokenAPI) + tokenAPI.Dispose(child) return result } return false @@ -912,11 +912,11 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler { // If the handler matches and the lookAhead handler doesn't, then the match for // the handler is accepted. func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { - return func(t *API) bool { - if handler(t) { - child := t.Fork() - result := !lookAhead(t) - t.Dispose(child) + return func(tokenAPI *API) bool { + if handler(tokenAPI) { + child := tokenAPI.Fork() + result := !lookAhead(tokenAPI) + tokenAPI.Dispose(child) return result } return false @@ -939,9 +939,9 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler { // Rule of thumb is: only use it when you have to actually fix a memory // hogging issue for your use case. func MakeInputFlusher(handler Handler) Handler { - return func(t *API) bool { - if handler(t) { - t.FlushInput() + return func(tokenAPI *API) bool { + if handler(tokenAPI) { + tokenAPI.Input.Flush() return true } return false @@ -954,22 +954,22 @@ func MakeInputFlusher(handler Handler) Handler { // // C.Signed(A.Integer) func MatchSigned(handler Handler) Handler { - return func(t *API) bool { - child := t.Fork() - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + b, err := tokenAPI.Byte.Peek(0) if err != nil { - t.Dispose(child) + tokenAPI.Dispose(child) return false } if b == '-' || b == '+' { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) } - if handler(t) { - t.Merge(child) - t.Dispose(child) + if handler(tokenAPI) { + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } - t.Dispose(child) + tokenAPI.Dispose(child) return false } } @@ -984,11 +984,11 @@ func MatchIntegerBetween(min int64, max int64) Handler { } digits := MatchSigned(MatchDigits()) - return func(t *API) bool { - if !digits(t) { + return func(tokenAPI *API) bool { + if !digits(tokenAPI) { return false } - value, _ := strconv.ParseInt(t.Output.String(), 10, 64) + value, _ := strconv.ParseInt(tokenAPI.Output.String(), 10, 64) if value < min || value > max { return false } @@ -1000,10 +1000,10 @@ func MatchIntegerBetween(min int64, max int64) Handler { // has been reached. This Handler will never produce output. It only reports // a successful or a failing match through its boolean return value. func MatchEndOfFile() Handler { - return func(t *API) bool { - child := t.Fork() - _, err := t.Byte.Peek(0) - t.Dispose(child) + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + _, err := tokenAPI.Byte.Peek(0) + tokenAPI.Dispose(child) return err == io.EOF } } @@ -1017,10 +1017,10 @@ func MatchUntilEndOfLine() Handler { // MatchAnyByte creates a Handler function that accepts any byte from the input. func MatchAnyByte() Handler { - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err == nil { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } return false @@ -1031,10 +1031,10 @@ func MatchAnyByte() Handler { // read from the input. Invalid runes on the input are replaced with the UTF8 // replacement rune \uFFFD (i.e. utf8.RuneError), which displays as �. func MatchAnyRune() Handler { - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err == nil { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -1044,10 +1044,10 @@ func MatchAnyRune() Handler { // MatchValidRune creates a Handler function that checks if a valid // UTF8 rune can be read from the input. func MatchValidRune() Handler { - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err == nil && r != utf8.RuneError { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -1057,10 +1057,10 @@ func MatchValidRune() Handler { // MatchInvalidRune creates a Handler function that checks if an invalid // UTF8 rune can be read from the input. func MatchInvalidRune() Handler { - return func(t *API) bool { - r, _, err := t.Rune.Peek(0) + return func(tokenAPI *API) bool { + r, _, err := tokenAPI.Rune.Peek(0) if err == nil && r == utf8.RuneError { - t.Rune.Accept(r) + tokenAPI.Rune.Accept(r) return true } return false @@ -1076,21 +1076,21 @@ func MatchDigit() Handler { // MatchDigits creates a Handler that checks if one or more digits can be read // from the input. func MatchDigits() Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { // Check if the first character is a digit. - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) // Continue accepting bytes as long as they are digits. for { - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return true } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) } } } @@ -1107,9 +1107,9 @@ func MatchDigitNotZero() Handler { // Leading zeroes are allowed. When the normalize parameter is true, these // will be stripped from the input. func MatchInteger(normalize bool) Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { // Check if the first character is a digit. - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } @@ -1117,33 +1117,33 @@ func MatchInteger(normalize bool) Handler { // When normalization is requested, drop leading zeroes. if normalize && b == '0' { for { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { - t.Byte.MoveCursor('0') + tokenAPI.Byte.MoveCursor('0') continue } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { - t.Byte.Accept('0') + tokenAPI.Byte.Accept('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. - t.Byte.MoveCursor('0') - t.Byte.Accept(b2) + tokenAPI.Byte.MoveCursor('0') + tokenAPI.Byte.Accept(b2) break } } // Continue accepting bytes as long as they are digits. for { - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return true } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) } } } @@ -1156,9 +1156,9 @@ func MatchInteger(normalize bool) Handler { // Leading zeroes are allowed. When the normalize parameter is true, these // will be stripped from the input. func MatchDecimal(normalize bool) Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { // Check if the first character is a digit. - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { return false } @@ -1166,58 +1166,58 @@ func MatchDecimal(normalize bool) Handler { // When normalization is requested, drop leading zeroes. if normalize && b == '0' { for { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) // The next character is a zero, skip the leading zero and check again. if err == nil && b2 == b { - t.Byte.MoveCursor('0') + tokenAPI.Byte.MoveCursor('0') continue } // The next character is a dot, go with the zero before the dot and // let the upcoming code handle the dot. if err == nil && b2 == '.' { - t.Byte.Accept('0') + tokenAPI.Byte.Accept('0') break } // The next character is not a zero, nor a digit at all. // We're looking at a zero on its own here. if err != nil || b2 < '1' || b2 > '9' { - t.Byte.Accept('0') + tokenAPI.Byte.Accept('0') return true } // The next character is a digit. SKip the leading zero and go with the digit. - t.Byte.MoveCursor('0') - t.Byte.Accept(b2) + tokenAPI.Byte.MoveCursor('0') + tokenAPI.Byte.Accept(b2) break } } // Continue accepting bytes as long as they are digits. for { - b, err = t.Byte.Peek(0) + b, err = tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { break } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) } // No dot or no digit after a dot? Then we're done. if b != '.' { return true } - b, err = t.Byte.Peek(1) + b, err = tokenAPI.Byte.Peek(1) if err != nil || b < '0' || b > '9' { return true } // Continue accepting bytes as long as they are digits. - t.Byte.AcceptMulti('.', b) + tokenAPI.Byte.AcceptMulti('.', b) for { - b, err = t.Byte.Peek(0) + b, err = tokenAPI.Byte.Peek(0) if err != nil || b < '0' || b > '9' { break } - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) } return true } @@ -1231,54 +1231,54 @@ func MatchDecimal(normalize bool) Handler { // // False falues: false, FALSE, False, 0, f, F func MatchBoolean() Handler { - return func(t *API) bool { - b1, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b1, err := tokenAPI.Byte.Peek(0) if err != nil { return false } if b1 == '1' || b1 == '0' { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } if b1 == 't' || b1 == 'T' { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) if err != nil || (b2 != 'R' && b2 != 'r') { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } - b3, _ := t.Byte.Peek(2) - b4, err := t.Byte.Peek(3) + b3, _ := tokenAPI.Byte.Peek(2) + b4, err := tokenAPI.Byte.Peek(3) if err == nil && b2 == 'r' && b3 == 'u' && b4 == 'e' { - t.Byte.AcceptMulti(b1, b2, b3, b4) + tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4) return true } if err == nil && b1 == 'T' && b2 == 'R' && b3 == 'U' && b4 == 'E' { - t.Byte.AcceptMulti(b1, b2, b3, b4) + tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4) return true } - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } if b1 == 'f' || b1 == 'F' { - b2, err := t.Byte.Peek(1) + b2, err := tokenAPI.Byte.Peek(1) if err != nil || (b2 != 'A' && b2 != 'a') { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } // TODO Multibyte peeks (also useful for strings) - b3, _ := t.Byte.Peek(2) - b4, _ := t.Byte.Peek(3) - b5, err := t.Byte.Peek(4) + b3, _ := tokenAPI.Byte.Peek(2) + b4, _ := tokenAPI.Byte.Peek(3) + b5, err := tokenAPI.Byte.Peek(4) if err == nil && b2 == 'a' && b3 == 'l' && b4 == 's' && b5 == 'e' { - t.Byte.AcceptMulti(b1, b2, b3, b4, b5) + tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5) return true } if err == nil && b1 == 'F' && b2 == 'A' && b3 == 'L' && b4 == 'S' && b5 == 'E' { - t.Byte.AcceptMulti(b1, b2, b3, b4, b5) + tokenAPI.Byte.AcceptMulti(b1, b2, b3, b4, b5) return true } - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) return true } return false @@ -1324,10 +1324,10 @@ func MatchUnicodeLower() Handler { // MatchHexDigit creates a Handler function that check if a single hexadecimal // digit can be read from the input. func MatchHexDigit() Handler { - return func(t *API) bool { - b, err := t.Byte.Peek(0) + return func(tokenAPI *API) bool { + b, err := tokenAPI.Byte.Peek(0) if err == nil && ((b >= '0' && b <= '9') || (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F')) { - t.Byte.Accept(b) + tokenAPI.Byte.Accept(b) return true } return false @@ -1343,30 +1343,30 @@ func MatchHexDigit() Handler { // When the normalize parameter is set to true, then leading zeroes will be // stripped from the octet. func MatchOctet(normalize bool) Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { // Digit 1 - b0, err := t.Byte.Peek(0) + b0, err := tokenAPI.Byte.Peek(0) if err != nil || b0 < '0' || b0 > '9' { return false } // Digit 2 - b1, err := t.Byte.Peek(1) + b1, err := tokenAPI.Byte.Peek(1) if err != nil || b1 < '0' || b1 > '9' { // Output 1-digit octet. - t.Byte.Accept(b0) + tokenAPI.Byte.Accept(b0) return true } // Digit 3 - b2, err := t.Byte.Peek(2) + b2, err := tokenAPI.Byte.Peek(2) if err != nil || b2 < '0' || b2 > '9' { // Output 2-digit octet. if normalize && b0 == '0' { - t.Byte.MoveCursor(b0) - t.Byte.Accept(b1) + tokenAPI.Byte.MoveCursor(b0) + tokenAPI.Byte.Accept(b1) } else { - t.Byte.AcceptMulti(b0, b1) + tokenAPI.Byte.AcceptMulti(b0, b1) } return true } @@ -1378,15 +1378,15 @@ func MatchOctet(normalize bool) Handler { // Output 3-digit octet. if normalize && b0 == '0' { - t.Byte.MoveCursor(b0) + tokenAPI.Byte.MoveCursor(b0) if b1 == '0' { - t.Byte.MoveCursor(b1) + tokenAPI.Byte.MoveCursor(b1) } else { - t.Byte.Accept(b1) + tokenAPI.Byte.Accept(b1) } - t.Byte.Accept(b2) + tokenAPI.Byte.Accept(b2) } else { - t.Byte.AcceptMulti(b0, b1, b2) + tokenAPI.Byte.AcceptMulti(b0, b1, b2) } return true } @@ -1422,20 +1422,20 @@ func MatchIPv4Netmask(normalize bool) Handler { dot := MatchRune('.') netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet) - return func(t *API) bool { - if !netmask(t) { + return func(tokenAPI *API) bool { + if !netmask(tokenAPI) { return false } // Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes). - val := t.Output.TokenValue + val := tokenAPI.Output.TokenValue mask := net.IPv4Mask(val(0).(byte), val(1).(byte), val(2).(byte), val(3).(byte)) ones, bits := mask.Size() if ones == 0 && bits == 0 { return false } - t.Output.ClearTokens() + tokenAPI.Output.ClearTokens() return true } } @@ -1455,8 +1455,8 @@ func MatchIPv4Net(normalize bool) Handler { MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize))) ipnet := MatchSeq(ip, slash, mask) - return func(t *API) bool { - if !ipnet(t) { + return func(tokenAPI *API) bool { + if !ipnet(tokenAPI) { return false } @@ -1464,19 +1464,19 @@ func MatchIPv4Net(normalize bool) Handler { return true } - maskToken := t.Output.Token(1) - val := t.Output.TokenValue + maskToken := tokenAPI.Output.Token(1) + val := tokenAPI.Output.TokenValue if maskToken.Type == "cidr" { - t.Output.SetString(fmt.Sprintf("%s/%d", val(0), val(1).(uint8))) + tokenAPI.Output.SetString(fmt.Sprintf("%s/%d", val(0), val(1).(uint8))) } else { o := strings.Split(val(1).(string), ".") b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) } mask := net.IPv4Mask(b(0), b(1), b(2), b(3)) bits, _ := mask.Size() - t.Output.SetString(fmt.Sprintf("%s/%d", val(0), bits)) + tokenAPI.Output.SetString(fmt.Sprintf("%s/%d", val(0), bits)) } - t.Output.ClearTokens() + tokenAPI.Output.ClearTokens() return true } } @@ -1488,14 +1488,14 @@ func MatchIPv6(normalize bool) Handler { colon := MatchRune(':') empty := MatchSeq(colon, colon) - return func(t *API) bool { + return func(tokenAPI *API) bool { nrOfHextets := 0 for nrOfHextets < 8 { - if hextet(t) { + if hextet(tokenAPI) { nrOfHextets++ - } else if empty(t) { + } else if empty(tokenAPI) { nrOfHextets += 2 - } else if !colon(t) { + } else if !colon(tokenAPI) { break } } @@ -1505,13 +1505,13 @@ func MatchIPv6(normalize bool) Handler { } // Invalid IPv6, when net.ParseIP() cannot handle it. - parsed := net.ParseIP(t.Output.String()) + parsed := net.ParseIP(tokenAPI.Output.String()) if parsed == nil { return false } if normalize { - t.Output.SetString(parsed.String()) + tokenAPI.Output.SetString(parsed.String()) } return true } @@ -1530,12 +1530,12 @@ func matchCIDRMask(bits int64, normalize bool) Handler { return mask } - return func(t *API) bool { - if !mask(t) { + return func(tokenAPI *API) bool { + if !mask(tokenAPI) { return false } - bits, _ := strconv.Atoi(t.Output.String()) - t.Output.SetString(fmt.Sprintf("%d", bits)) + bits, _ := strconv.Atoi(tokenAPI.Output.String()) + tokenAPI.Output.SetString(fmt.Sprintf("%d", bits)) return true } } @@ -1569,14 +1569,14 @@ func MatchIPv6Net(normalize bool) Handler { // string "bork" would not match against the second form, but " bork" would. // In both cases, it would match the first form. func ModifyDrop(handler Handler) Handler { - return func(t *API) bool { - runeEnd := t.stackFrame.bytesEnd - tokenEnd := t.stackFrame.tokenEnd - if handler(t) { + return func(tokenAPI *API) bool { + runeEnd := tokenAPI.stackFrame.bytesEnd + tokenEnd := tokenAPI.stackFrame.tokenEnd + if handler(tokenAPI) { // We keep offset and cursor updates, but rollback any runes / tokens // that were added by the handler. - t.stackFrame.bytesEnd = runeEnd - t.stackFrame.tokenEnd = tokenEnd + tokenAPI.stackFrame.bytesEnd = runeEnd + tokenAPI.stackFrame.tokenEnd = tokenEnd return true } return false @@ -1587,9 +1587,9 @@ func ModifyDrop(handler Handler) Handler { // (or end of file). This handler is typically used when ignoring any input data after // a comment start like '#' or '//' when parsing code or configuration data. func ModifyDropUntilEndOfLine() Handler { - return func(t *API) bool { + return func(tokenAPI *API) bool { for { - b, err := t.Byte.Peek(0) + b, err := tokenAPI.Byte.Peek(0) if err != nil { if err == io.EOF { return true @@ -1599,7 +1599,7 @@ func ModifyDropUntilEndOfLine() Handler { if b == '\n' { return true } - t.Byte.MoveCursor(b) + tokenAPI.Byte.MoveCursor(b) } } } @@ -1673,19 +1673,19 @@ func ModifyReplace(handler Handler, replaceWith string) Handler { // modified string on output. The return value of the modfunc will replace the // resulting output. func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { - return func(t *API) bool { - child := t.Fork() - if handler(t) { - origS := t.Output.String() + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + if handler(tokenAPI) { + origS := tokenAPI.Output.String() s := modfunc(origS) if s != origS { - t.Output.SetString(s) + tokenAPI.Output.SetString(s) } - t.Merge(child) - t.Dispose(child) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } - t.Dispose(child) + tokenAPI.Dispose(child) return false } } @@ -1696,8 +1696,8 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { // escape sequence like "\n" is kept as-is (a backslash character, followed by // an 'n'-character). func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { - literal := t.Output.String() + return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { + literal := tokenAPI.Output.String() return literal }) } @@ -1707,9 +1707,9 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler { // representation of the read Runes. This string is interpreted, meaning that an // escape sequence like "\n" is translated to an actual newline control character func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { + return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { // TODO ERROR HANDLING - interpreted, _ := interpretString(t.Output.String()) + interpreted, _ := interpretString(tokenAPI.Output.String()) return interpreted }) } @@ -1731,9 +1731,9 @@ func interpretString(str string) (string, error) { // Result, for which the Token.Value is set to a Rune-representation // of the read Rune. func MakeRuneToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { + return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { // TODO ERROR HANDLING --- not a 1 rune input - return t.Output.Rune(0) + return tokenAPI.Output.Rune(0) }) } @@ -1741,9 +1741,9 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler { // Result, for which the Token.Value is set to a Byte-representation // of the read Rune. func MakeByteToken(toktype interface{}, handler Handler) Handler { - return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { + return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { // TODO ERROR HANDLING --- not a 1 byte input - return byte(t.Output.Rune(0)) + return byte(tokenAPI.Output.Rune(0)) }) } @@ -1947,8 +1947,8 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler { } func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler { - return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { - value, err := convert(t.Output.String()) + return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { + value, err := convert(tokenAPI.Output.String()) if err != nil { // TODO meh, panic feels so bad here. Maybe just turn this case into "no match"? panic(fmt.Sprintf("%s token invalid (%s)", name, err)) @@ -1960,17 +1960,17 @@ func makeStrconvToken(name string, toktype interface{}, handler Handler, convert // MakeTokenByValue creates a Handler that will add a static Token value // to the Result. func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler { - return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return value }) + return MakeTokenByCallback(toktype, handler, func(tokenAPI *API) interface{} { return value }) } // MakeTokenByCallback creates a Handler that will add a Token to the // Result, for which the Token.Value is to be generated by the provided // makeValue() callback function. The function gets the current API as // its input and must return the token value. -func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler { - return func(t *API) bool { - child := t.Fork() - if handler(t) { +func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(tokenAPI *API) interface{}) Handler { + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + if handler(tokenAPI) { // The token is not added to the child here. The child might have produced its own // tokens and we want those to come after the token for the current parsing level. // By adding the token to the input API and then merging the child tokens, the order @@ -1978,14 +1978,14 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t // e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the // tokens will end up in the order "date", "year", "month", "day". When we'd add the // token to the child here, the order would have been "year", "month", "day", "date". - token := Token{Type: toktype, Value: makeValue(t)} - t.Output.AddToken(token) - t.Merge(child) - t.Dispose(child) + token := Token{Type: toktype, Value: makeValue(tokenAPI)} + tokenAPI.Output.AddToken(token) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } - t.Dispose(child) + tokenAPI.Dispose(child) return false } } @@ -1993,18 +1993,18 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t // MakeTokenGroup checks if the provided handler matches the input. If yes, then it will // take the tokens as produced by the handler and group them together in a single token. func MakeTokenGroup(toktype interface{}, handler Handler) Handler { - return func(t *API) bool { - child := t.Fork() - if handler(t) { - tokens := t.Output.Tokens() + return func(tokenAPI *API) bool { + child := tokenAPI.Fork() + if handler(tokenAPI) { + tokens := tokenAPI.Output.Tokens() tokensCopy := make([]Token, len(tokens)) copy(tokensCopy, tokens) - t.Output.SetTokens(Token{Type: toktype, Value: tokensCopy}) - t.Merge(child) - t.Dispose(child) + tokenAPI.Output.SetTokens(Token{Type: toktype, Value: tokensCopy}) + tokenAPI.Merge(child) + tokenAPI.Dispose(child) return true } - t.Dispose(child) + tokenAPI.Dispose(child) return false } } diff --git a/tokenize/tokenize.go b/tokenize/tokenize.go index 35c9064..df24951 100644 --- a/tokenize/tokenize.go +++ b/tokenize/tokenize.go @@ -39,16 +39,16 @@ func (result *Result) String() string { // look at the documentation for parsekit.read.New(). func New(tokenHandler Handler) Func { return func(input interface{}) (*Result, error) { - api := NewAPI(input) - ok := tokenHandler(api) + tokenAPI := NewAPI(input) + ok := tokenHandler(tokenAPI) if !ok { - err := fmt.Errorf("mismatch at %s", api.Cursor()) + err := fmt.Errorf("mismatch at %s", tokenAPI.Input.Cursor()) return nil, err } result := &Result{ - Runes: api.Output.Runes(), - Tokens: api.Output.Tokens(), + Runes: tokenAPI.Output.Runes(), + Tokens: tokenAPI.Output.Tokens(), } return result, nil } diff --git a/tokenize/tokenizer_test.go b/tokenize/tokenizer_test.go index 1b211cb..dc7e361 100644 --- a/tokenize/tokenizer_test.go +++ b/tokenize/tokenizer_test.go @@ -54,29 +54,29 @@ func ExampleNew() { } func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) { - api := makeTokenizeAPI() - r, _, _ := api.Rune.Peek(0) + tokenizeAPI := makeTokenizeAPI() + r, _, _ := tokenizeAPI.Rune.Peek(0) AssertEqual(t, 'T', r, "first rune") } func TestInputCanAcceptRunesFromReader(t *testing.T) { - i := makeTokenizeAPI() + tokenAPI := makeTokenizeAPI() - r0, _, _ := i.Rune.Peek(0) - i.Rune.Accept(r0) + r0, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r0) - r1, _, _ := i.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls. - r2, _, _ := i.Rune.Peek(1) - i.Rune.AcceptMulti(r1, r2) + r1, _, _ := tokenAPI.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls. + r2, _, _ := tokenAPI.Rune.Peek(1) + tokenAPI.Rune.AcceptMulti(r1, r2) - AssertEqual(t, "Tes", i.Output.String(), "i.String()") + AssertEqual(t, "Tes", tokenAPI.Output.String(), "i.String()") } func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := makeTokenizeAPI() - i.Merge(0) + tokenAPI := makeTokenizeAPI() + tokenAPI.Merge(0) }, Regexp: true, Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`}) @@ -85,10 +85,10 @@ func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) { func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := makeTokenizeAPI() - child := i.Fork() - i.Fork() - i.Merge(child) + tokenAPI := makeTokenizeAPI() + child := tokenAPI.Fork() + tokenAPI.Fork() + tokenAPI.Merge(child) }, Regexp: true, Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` + @@ -98,8 +98,8 @@ func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) { func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := makeTokenizeAPI() - i.Dispose(0) + tokenAPI := makeTokenizeAPI() + tokenAPI.Dispose(0) }, Regexp: true, Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`}) @@ -108,10 +108,10 @@ func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) { func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := makeTokenizeAPI() - child := i.Fork() - i.Fork() - i.Dispose(child) + tokenAPI := makeTokenizeAPI() + child := tokenAPI.Fork() + tokenAPI.Fork() + tokenAPI.Dispose(child) }, Regexp: true, Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` + @@ -121,11 +121,11 @@ func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) { func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) { AssertPanic(t, PanicT{ Function: func() { - i := makeTokenizeAPI() - i.Fork() - g := i.Fork() - i.Fork() - i.Merge(g) + tokenAPI := makeTokenizeAPI() + tokenAPI.Fork() + g := tokenAPI.Fork() + tokenAPI.Fork() + tokenAPI.Merge(g) }, Regexp: true, Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` + @@ -133,30 +133,30 @@ func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) { } func TestAccept_UpdatesCursor(t *testing.T) { - i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) - AssertEqual(t, "start of file", i.Cursor(), "cursor 1") + tokenAPI := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines")) + AssertEqual(t, "start of file", tokenAPI.Input.Cursor(), "cursor 1") for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n" - r, _, _ := i.Rune.Peek(0) - i.Rune.Accept(r) + r, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) } - AssertEqual(t, "line 1, column 7", i.Cursor(), "cursor 2") + AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor 2") - r, _, _ := i.Rune.Peek(0) // read "\n", cursor ends up at start of new line - i.Rune.Accept(r) - AssertEqual(t, "line 2, column 1", i.Cursor(), "cursor 3") + r, _, _ := tokenAPI.Rune.Peek(0) // read "\n", cursor ends up at start of new line + tokenAPI.Rune.Accept(r) + AssertEqual(t, "line 2, column 1", tokenAPI.Input.Cursor(), "cursor 3") for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i" - b, _ := i.Byte.Peek(0) - i.Byte.Accept(b) + b, _ := tokenAPI.Byte.Peek(0) + tokenAPI.Byte.Accept(b) } - AssertEqual(t, "line 3, column 5", i.Cursor(), "cursor 4") + AssertEqual(t, "line 3, column 5", tokenAPI.Input.Cursor(), "cursor 4") } func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) { - i := tokenize.NewAPI(strings.NewReader("X")) - r, _, _ := i.Rune.Peek(0) - i.Rune.Accept(r) - r, _, err := i.Rune.Peek(0) + tokenAPI := tokenize.NewAPI(strings.NewReader("X")) + r, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) + r, _, err := tokenAPI.Rune.Peek(0) AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()") AssertEqual(t, true, err == io.EOF, "returned error from NextRune()") diff --git a/tokenize/tokenizer_whitebox_test.go b/tokenize/tokenizer_whitebox_test.go index e988396..79ef26b 100644 --- a/tokenize/tokenizer_whitebox_test.go +++ b/tokenize/tokenizer_whitebox_test.go @@ -6,84 +6,84 @@ import ( func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) { // Create input, accept the first rune. - i := NewAPI("Testing") - r, _, _ := i.Rune.Peek(0) - i.Rune.Accept(r) // T - AssertEqual(t, "T", i.Output.String(), "accepted rune in input") + tokenAPI := NewAPI("Testing") + r, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // T + AssertEqual(t, "T", tokenAPI.Output.String(), "accepted rune in input") // Fork - child := i.Fork() - AssertEqual(t, 1, i.stackFrame.offset, "parent offset") - AssertEqual(t, 1, i.stackFrame.offset, "child offset") + child := tokenAPI.Fork() + AssertEqual(t, 1, tokenAPI.stackFrame.offset, "parent offset") + AssertEqual(t, 1, tokenAPI.stackFrame.offset, "child offset") // Accept two runes via fork. - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // e - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // s - AssertEqual(t, "es", i.Output.String(), "result runes in fork") - AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset") - AssertEqual(t, 3, i.stackFrame.offset, "child offset") + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // e + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // s + AssertEqual(t, "es", tokenAPI.Output.String(), "result runes in fork") + AssertEqual(t, 1, tokenAPI.stackFrames[tokenAPI.stackLevel-1].offset, "parent offset") + AssertEqual(t, 3, tokenAPI.stackFrame.offset, "child offset") // Merge fork back into parent - i.Merge(child) - i.Dispose(child) - AssertEqual(t, "Tes", i.Output.String(), "result runes in parent Input after Merge()") - AssertEqual(t, 3, i.stackFrame.offset, "parent offset") + tokenAPI.Merge(child) + tokenAPI.Dispose(child) + AssertEqual(t, "Tes", tokenAPI.Output.String(), "result runes in parent Input after Merge()") + AssertEqual(t, 3, tokenAPI.stackFrame.offset, "parent offset") } func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) { - i := NewAPI("Testing") - r, _, _ := i.Rune.Peek(0) - i.Rune.Accept(r) // T + tokenAPI := NewAPI("Testing") + r, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // T - f1 := i.Fork() - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // e + f1 := tokenAPI.Fork() + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // e - f2 := i.Fork() - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // s - AssertEqual(t, "s", i.Output.String(), "f2 String()") - AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A") + f2 := tokenAPI.Fork() + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // s + AssertEqual(t, "s", tokenAPI.Output.String(), "f2 String()") + AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f2.offset A") - i.Merge(f2) - i.Dispose(f2) - AssertEqual(t, "es", i.Output.String(), "f1 String()") - AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") + tokenAPI.Merge(f2) + tokenAPI.Dispose(f2) + AssertEqual(t, "es", tokenAPI.Output.String(), "f1 String()") + AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f1.offset A") - i.Merge(f1) - i.Dispose(f1) - AssertEqual(t, "Tes", i.Output.String(), "top-level API String()") - AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A") + tokenAPI.Merge(f1) + tokenAPI.Dispose(f1) + AssertEqual(t, "Tes", tokenAPI.Output.String(), "top-level API String()") + AssertEqual(t, 3, tokenAPI.stackFrame.offset, "f1.offset A") } func TestFlushInput(t *testing.T) { - i := NewAPI("cool") + tokenAPI := NewAPI("cool") // Flushing without any read data is okay. FlushInput() will return // false in this case, and nothing else happens. - AssertTrue(t, i.FlushInput() == false, "flush input at start") + AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input at start") - r, _, _ := i.Rune.Peek(0) - i.Rune.Accept(r) // c - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // o + r, _, _ := tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // c + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // o - AssertTrue(t, i.FlushInput() == true, "flush input after reading some data") - AssertEqual(t, 0, i.stackFrame.offset, "offset after flush input") + AssertTrue(t, tokenAPI.Input.Flush() == true, "flush input after reading some data") + AssertEqual(t, 0, tokenAPI.stackFrame.offset, "offset after flush input") - AssertTrue(t, i.FlushInput() == false, "flush input after flush input") + AssertTrue(t, tokenAPI.Input.Flush() == false, "flush input after flush input") // Read offset is now zero, but reading should continue after "co". // The output so far isn't modified, so the following accept calls // will add their runes to the already accepted string "co". - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // o - r, _, _ = i.Rune.Peek(0) - i.Rune.Accept(r) // o + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // o + r, _, _ = tokenAPI.Rune.Peek(0) + tokenAPI.Rune.Accept(r) // o - AssertEqual(t, "cool", i.Output.String(), "end result") + AssertEqual(t, "cool", tokenAPI.Output.String(), "end result") } func TestInputFlusherWrapper(t *testing.T) {