From dd1159e309849d96aab3280fb60f7da6ad991387 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Mon, 22 Jul 2019 07:57:05 +0000 Subject: [PATCH] Committing a bit of code cleanup before trying something bigger. --- tokenize/api_input.go | 37 ++++++--------------------------- tokenize/api_output.go | 47 ++++++++++++++++++++++++++++++++++++------ tokenize/api_test.go | 43 +++++++++++++++++++------------------- 3 files changed, 68 insertions(+), 59 deletions(-) diff --git a/tokenize/api_input.go b/tokenize/api_input.go index 9cbe21d..cec1c09 100644 --- a/tokenize/api_input.go +++ b/tokenize/api_input.go @@ -34,7 +34,8 @@ func (api *API) peekByte(offset int) (byte, error) { // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the skipped byte. func (i *Input) SkipByte(b byte) { - i.api.skipByte(b) + i.api.stackFrame.moveCursorByByte(b) + i.api.stackFrame.offset++ } func (api *API) skipByte(b byte) { @@ -79,20 +80,8 @@ func (i *Input) AcceptByte(b byte) { } func (api *API) acceptByte(b byte) { - curBytesEnd := api.stackFrame.bytesEnd - maxRequiredBytes := curBytesEnd + 1 - - // Grow the bytes capacity when needed. - if cap(api.outputData) < maxRequiredBytes { - newBytes := make([]byte, maxRequiredBytes*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } - - api.outputData[curBytesEnd] = b - api.stackFrame.moveCursorByByte(b) - api.stackFrame.bytesEnd++ - api.stackFrame.offset++ + api.dataAddByte(b) + api.skipByte(b) } // AcceptBytes is used to accept one or more bytes that were read from the input. @@ -111,22 +100,8 @@ func (i *Input) AcceptBytes(bytes ...byte) { } func (api *API) acceptBytes(bytes ...byte) { - curBytesEnd := api.stackFrame.bytesEnd - newBytesEnd := curBytesEnd + len(bytes) - - // Grow the bytes capacity when needed. - if cap(api.outputData) < newBytesEnd { - newBytes := make([]byte, newBytesEnd*2) - copy(newBytes, api.outputData) - api.outputData = newBytes - } - - copy(api.outputData[curBytesEnd:], bytes) - for _, b := range bytes { - api.stackFrame.moveCursorByByte(b) - api.stackFrame.offset++ - } - api.stackFrame.bytesEnd = newBytesEnd + api.dataAddBytes(bytes...) + api.skipBytes(bytes...) } // PeekRune returns the UTF8 rune at the provided byte offset, including its byte width. diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 39752d1..38cb471 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -52,20 +52,41 @@ func (api *API) dataSetBytes(bytes ...byte) { api.dataAddBytes(bytes...) } -func (o *Output) AddBytes(bytes ...byte) { - o.api.dataAddBytes(bytes...) +func (o *Output) AddByte(b byte) { + o.api.dataAddByte(b) } -func (api *API) dataAddBytes(bytes ...byte) { - // Grow the runes capacity when needed. - newBytesEnd := api.stackFrame.bytesEnd + len(bytes) +func (api *API) dataAddByte(b byte) { + curBytesEnd := api.stackFrame.bytesEnd + newBytesEnd := curBytesEnd + 1 + + // Grow the bytes capacity when needed. if cap(api.outputData) < newBytesEnd { newBytes := make([]byte, newBytesEnd*2) copy(newBytes, api.outputData) api.outputData = newBytes } - copy(api.outputData[api.stackFrame.bytesEnd:], bytes) + api.stackFrame.bytesEnd++ + api.outputData[curBytesEnd] = b +} + +func (o *Output) AddBytes(bytes ...byte) { + o.api.dataAddBytes(bytes...) +} + +func (api *API) dataAddBytes(bytes ...byte) { + curBytesEnd := api.stackFrame.bytesEnd + newBytesEnd := curBytesEnd + len(bytes) + + // Grow the runes capacity when needed. + if cap(api.outputData) < newBytesEnd { + newBytes := make([]byte, newBytesEnd*2) + copy(newBytes, api.outputData) + api.outputData = newBytes + } + + copy(api.outputData[curBytesEnd:], bytes) api.stackFrame.bytesEnd = newBytesEnd } @@ -152,6 +173,13 @@ func (o *Output) SetTokens(tokens ...Token) { func (api *API) tokensSet(tokens ...Token) { api.tokensClear() api.tokensAdd(tokens...) + type Func func(input interface{}) (*Result, error) + + // Result holds the runes and tokens as produced by the tokenizer. + type Result struct { + Tokens []Token + Runes []rune + } } func (o *Output) AddTokens(tokens ...Token) { @@ -162,6 +190,13 @@ func (api *API) tokensAdd(tokens ...Token) { // Grow the tokens capacity when needed. newTokenEnd := api.stackFrame.tokenEnd + len(tokens) if cap(api.outputTokens) < newTokenEnd { + type Func func(input interface{}) (*Result, error) + + // Result holds the runes and tokens as produced by the tokenizer. + type Result struct { + Tokens []Token + Runes []rune + } newTokens := make([]Token, newTokenEnd*2) copy(newTokens, api.outputTokens) api.outputTokens = newTokens diff --git a/tokenize/api_test.go b/tokenize/api_test.go index fec74f3..d8830f4 100644 --- a/tokenize/api_test.go +++ b/tokenize/api_test.go @@ -10,27 +10,12 @@ import ( func ExampleNewAPI() { tokenize.NewAPI("The input that the API will handle") - - // Output: } -// func ExampleAPI_NextRune() { -// api := tokenize.NewAPI("The input that the API will handle") -// r, err := api.NextRune() -// fmt.Printf("Rune read from input; %c\n", r) -// fmt.Printf("The error: %v\n", err) -// fmt.Printf("API results: %q\n", api.dataAsString()) - -// // Output: -// // Rune read from input; T -// // The error: -// // API results: "" -// } - -func ExampleAPI_PeekRune() { +func ExampleAPI_PeekByte() { api := tokenize.NewAPI("The input that the API will handle") - r1, _, err := api.Input.PeekRune(19) // 'A' + r1, _, err := api.Input.PeekRune(19) // 'A', r2, _, err := api.Input.PeekRune(20) // 'P' r3, _, err := api.Input.PeekRune(21) // 'I' _, _, err = api.Input.PeekRune(100) // EOF @@ -41,18 +26,32 @@ func ExampleAPI_PeekRune() { // API EOF } -func ExampleAPI_AcceptRune() { - api := tokenize.NewAPI("The input that the API will handle") +func ExampleAPI_PeekRune() { + api := tokenize.NewAPI("The input that the ДPI will handle") - // Reads 'T' and accepts it to the API results. + r1, _, err := api.Input.PeekRune(19) // 'Д', 2 bytes so next rune starts at 21 + r2, _, err := api.Input.PeekRune(21) // 'P' + r3, _, err := api.Input.PeekRune(22) // 'I' + _, _, err = api.Input.PeekRune(100) // EOF + + fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) + + // Output: + // ДPI EOF +} + +func ExampleAPI_AcceptRune() { + api := tokenize.NewAPI("The input that the ДPI will handle") + + // Reads 'T' and accepts it to the API output data. r, _, _ := api.Input.PeekRune(0) api.Input.AcceptRune(r) - // Reads 'h' and accepts it to the API results. + // Reads 'h' and accepts it to the API output data. r, _, _ = api.Input.PeekRune(0) api.Input.AcceptRune(r) - // Reads 'e', but does not accept it to the API results. + // Reads 'e', but does not accept it to the API output data. r, _, _ = api.Input.PeekRune(0) fmt.Printf("API results: %q\n", api.Output.String())