diff --git a/read/read.go b/read/read.go index d48ce6f..21d4b95 100644 --- a/read/read.go +++ b/read/read.go @@ -63,8 +63,8 @@ import ( // • a type implementing io.Reader // // • bufio.Reader -func New(input interface{}) *Buffer { - return &Buffer{ +func New(input interface{}) Buffer { + return Buffer{ bufio: makeBufioReader(input), } } diff --git a/tokenize/api.go b/tokenize/api.go index 50bb6fe..59fde3b 100644 --- a/tokenize/api.go +++ b/tokenize/api.go @@ -71,25 +71,24 @@ import ( // can lead to hard to track bugs. I much prefer this forking method, since // no bookkeeping has to be implemented when implementing a parser. type API struct { - reader *read.Buffer // the buffered input reader - pointers stackFrame // various pointers for keeping track of input, output, cursor. + reader read.Buffer // the buffered input reader + pointers stackFrame // various values for keeping track of input, output, cursor. Input Input // access to a set of general input-related methods Byte InputByteMode // access to a set of byte-based input methods - Rune InputRuneMode // access to a set of rune-based input methods + Rune InputRuneMode // access to a set of UTF8 rune-based input methods Output Output // access to a set of output-related functionality outputTokens []Token // storage for accepted tokens outputBytes []byte // storage for accepted bytes } type stackFrame struct { - offsetLocal int // the read offset, relative to the start if this stack frame - offset int // the read offset, relative to the start of the reader buffer - column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame) - line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame) - bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level - bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level - tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level - tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level + offset int // the read offset, relative to the start of the reader buffer + column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame) + line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame) + bytesStart int // the starting point in the API.bytes slice for runes produced by this stack level + bytesEnd int // the end point in the API.bytes slice for runes produced by this stack level + tokenStart int // the starting point in the API.tokens slice for tokens produced by this stack level + tokenEnd int // the end point in the API.tokens slice for tokens produced by this stack level } const initialTokenStoreLength = 64 @@ -99,25 +98,38 @@ const initialByteStoreLength = 128 // For an overview of allowed inputs, take a look at the documentation // for parsekit.read.New(). func NewAPI(input interface{}) *API { - reader := read.New(input) tokenAPI := &API{ - outputBytes: make([]byte, initialByteStoreLength), - outputTokens: make([]Token, initialTokenStoreLength), - reader: reader, + // outputBytes: make([]byte, initialByteStoreLength), + // outputTokens: make([]Token, initialTokenStoreLength), + reader: read.New(input), } - tokenAPI.Input = Input{api: tokenAPI, reader: reader} - tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader} - tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader} + tokenAPI.Input = Input{api: tokenAPI} + tokenAPI.Byte = InputByteMode{api: tokenAPI} + tokenAPI.Rune = InputRuneMode{api: tokenAPI} tokenAPI.Output = Output{api: tokenAPI} return tokenAPI } type Snapshot stackFrame -func (tokenAPI *API) MakeSnapshot() Snapshot { - return Snapshot(tokenAPI.pointers) +func (tokenAPI *API) MakeSnapshot() stackFrame { + return tokenAPI.pointers } -func (tokenAPI *API) RestoreSnapshot(snap Snapshot) { - tokenAPI.pointers = stackFrame(snap) +func (tokenAPI *API) RestoreSnapshot(snap stackFrame) { + tokenAPI.pointers = snap +} + +type Split [2]int + +func (tokenAPI *API) SplitOutput() Split { + split := Split{tokenAPI.pointers.bytesStart, tokenAPI.pointers.tokenStart} + tokenAPI.pointers.bytesStart = tokenAPI.pointers.bytesEnd + tokenAPI.pointers.tokenStart = tokenAPI.pointers.tokenEnd + return split +} + +func (tokenAPI *API) MergeSplitOutput(split Split) { + tokenAPI.pointers.bytesStart = split[0] + tokenAPI.pointers.tokenStart = split[1] } diff --git a/tokenize/api_bytemode.go b/tokenize/api_bytemode.go index 6decf46..196c13f 100644 --- a/tokenize/api_bytemode.go +++ b/tokenize/api_bytemode.go @@ -1,11 +1,8 @@ package tokenize -import "git.makaay.nl/mauricem/go-parsekit/read" - // InputByteMode provides byte-driven input/output functionality for the tokenize API. type InputByteMode struct { - api *API - reader *read.Buffer // the buffered input reader + api *API } // Peek returns the byte at the provided byte offset. @@ -14,7 +11,8 @@ type InputByteMode struct { // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. func (byteMode InputByteMode) Peek(offset int) (byte, error) { - return byteMode.reader.ByteAt(byteMode.api.pointers.offset + offset) + a := byteMode.api + return a.reader.ByteAt(a.pointers.offset + offset) } // PeekMulti returns at max the provided maximum number of bytes at the provided @@ -22,7 +20,8 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) { // error as such. The returned error can in such case be set to io.EOF to indicate // that the end of the input was reached though. func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) { - return byteMode.reader.BytesAt(byteMode.api.pointers.offset+offset, count) + a := byteMode.api + return a.reader.BytesAt(a.pointers.offset+offset, count) } func (byteMode InputByteMode) Accept(b byte) { @@ -62,7 +61,6 @@ func (byteMode InputByteMode) MoveCursor(b byte) { } a.pointers.offset++ - a.pointers.offsetLocal++ } // MoveCursorMulti updates the position of the read cursor, based on the provided bytes. diff --git a/tokenize/api_input.go b/tokenize/api_input.go index 839dfd9..97eb7ab 100644 --- a/tokenize/api_input.go +++ b/tokenize/api_input.go @@ -2,15 +2,12 @@ package tokenize import ( "fmt" - - "git.makaay.nl/mauricem/go-parsekit/read" ) // Input provides input-related functionality for the tokenize API, // which is not specifically bound to a specific read mode (byte, rune). type Input struct { - api *API - reader *read.Buffer // the buffered input reader + api *API } // Cursor returns a string that describes the current read cursor position. @@ -30,9 +27,8 @@ func (i Input) Cursor() string { func (i Input) Flush() bool { a := i.api if a.pointers.offset > 0 { - i.reader.Flush(a.pointers.offset) + a.reader.Flush(a.pointers.offset) a.pointers.offset = 0 - a.pointers.offsetLocal = 0 return true } return false diff --git a/tokenize/api_output.go b/tokenize/api_output.go index 595f097..5906d0f 100644 --- a/tokenize/api_output.go +++ b/tokenize/api_output.go @@ -25,22 +25,6 @@ func (o Output) Rune(offset int) rune { return r } -type Split [2]int - -func (o Output) Split() Split { - a := o.api - split := Split{a.pointers.bytesStart, a.pointers.tokenStart} - a.pointers.bytesStart = a.pointers.bytesEnd - a.pointers.tokenStart = a.pointers.tokenEnd - return split -} - -func (o Output) MergeSplit(split Split) { - a := o.api - a.pointers.bytesStart = split[0] - a.pointers.tokenStart = split[1] -} - func (o Output) Reset() { a := o.api a.pointers.bytesEnd = a.pointers.bytesStart diff --git a/tokenize/api_runemode.go b/tokenize/api_runemode.go index 8d444c8..6d2b7f6 100644 --- a/tokenize/api_runemode.go +++ b/tokenize/api_runemode.go @@ -26,7 +26,8 @@ type InputRuneMode struct { // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) { - return runeMode.reader.RuneAt(runeMode.api.pointers.offset + offset) + a := runeMode.api + return a.reader.RuneAt(a.pointers.offset + offset) } // Accept is used to accept a single rune that was read from the input. @@ -92,7 +93,6 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int { width := utf8.RuneLen(r) a.pointers.offset += width - a.pointers.offsetLocal += width return width } diff --git a/tokenize/handlers_builtin.go b/tokenize/handlers_builtin.go index ade50fe..c1c1ce2 100644 --- a/tokenize/handlers_builtin.go +++ b/tokenize/handlers_builtin.go @@ -709,12 +709,12 @@ func MatchSeq(handlers ...Handler) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() for _, handler := range handlers { - split := tokenAPI.Output.Split() + split := tokenAPI.SplitOutput() if !handler(tokenAPI) { tokenAPI.RestoreSnapshot(snap) return false } - tokenAPI.Output.MergeSplit(split) + tokenAPI.MergeSplitOutput(split) } return true } @@ -830,9 +830,9 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler { snap := tokenAPI.MakeSnapshot() for total < min { total++ - split := tokenAPI.Output.Split() + split := tokenAPI.SplitOutput() ok := handler(tokenAPI) - tokenAPI.Output.MergeSplit(split) + tokenAPI.MergeSplitOutput(split) if !ok { tokenAPI.RestoreSnapshot(snap) return false @@ -844,9 +844,9 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler { //child.Merge() for max < 0 || total < max { total++ - split := tokenAPI.Output.Split() + split := tokenAPI.SplitOutput() ok := handler(tokenAPI) - tokenAPI.Output.MergeSplit(split) + tokenAPI.MergeSplitOutput(split) if !ok { break } @@ -1621,14 +1621,14 @@ func ModifyReplace(handler Handler, replaceWith string) Handler { func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { return func(tokenAPI *API) bool { snap := tokenAPI.MakeSnapshot() - split := tokenAPI.Output.Split() + split := tokenAPI.SplitOutput() if handler(tokenAPI) { origS := tokenAPI.Output.String() s := modfunc(origS) if s != origS { tokenAPI.Output.SetString(s) } - tokenAPI.Output.MergeSplit(split) + tokenAPI.MergeSplitOutput(split) return true } tokenAPI.RestoreSnapshot(snap)