Backup work on dropping forking support.

This commit is contained in:
Maurice Makaay 2019-07-26 14:51:40 +00:00
parent 4c94374107
commit daf3b9838f
7 changed files with 53 additions and 63 deletions

View File

@ -63,8 +63,8 @@ import (
// • a type implementing io.Reader // • a type implementing io.Reader
// //
// • bufio.Reader // • bufio.Reader
func New(input interface{}) *Buffer { func New(input interface{}) Buffer {
return &Buffer{ return Buffer{
bufio: makeBufioReader(input), bufio: makeBufioReader(input),
} }
} }

View File

@ -71,18 +71,17 @@ import (
// can lead to hard to track bugs. I much prefer this forking method, since // can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser. // no bookkeeping has to be implemented when implementing a parser.
type API struct { type API struct {
reader *read.Buffer // the buffered input reader reader read.Buffer // the buffered input reader
pointers stackFrame // various pointers for keeping track of input, output, cursor. pointers stackFrame // various values for keeping track of input, output, cursor.
Input Input // access to a set of general input-related methods Input Input // access to a set of general input-related methods
Byte InputByteMode // access to a set of byte-based input methods Byte InputByteMode // access to a set of byte-based input methods
Rune InputRuneMode // access to a set of rune-based input methods Rune InputRuneMode // access to a set of UTF8 rune-based input methods
Output Output // access to a set of output-related functionality Output Output // access to a set of output-related functionality
outputTokens []Token // storage for accepted tokens outputTokens []Token // storage for accepted tokens
outputBytes []byte // storage for accepted bytes outputBytes []byte // storage for accepted bytes
} }
type stackFrame struct { type stackFrame struct {
offsetLocal int // the read offset, relative to the start if this stack frame
offset int // the read offset, relative to the start of the reader buffer offset int // the read offset, relative to the start of the reader buffer
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame) column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame) line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
@ -99,25 +98,38 @@ const initialByteStoreLength = 128
// For an overview of allowed inputs, take a look at the documentation // For an overview of allowed inputs, take a look at the documentation
// for parsekit.read.New(). // for parsekit.read.New().
func NewAPI(input interface{}) *API { func NewAPI(input interface{}) *API {
reader := read.New(input)
tokenAPI := &API{ tokenAPI := &API{
outputBytes: make([]byte, initialByteStoreLength), // outputBytes: make([]byte, initialByteStoreLength),
outputTokens: make([]Token, initialTokenStoreLength), // outputTokens: make([]Token, initialTokenStoreLength),
reader: reader, reader: read.New(input),
} }
tokenAPI.Input = Input{api: tokenAPI, reader: reader} tokenAPI.Input = Input{api: tokenAPI}
tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader} tokenAPI.Byte = InputByteMode{api: tokenAPI}
tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader} tokenAPI.Rune = InputRuneMode{api: tokenAPI}
tokenAPI.Output = Output{api: tokenAPI} tokenAPI.Output = Output{api: tokenAPI}
return tokenAPI return tokenAPI
} }
type Snapshot stackFrame type Snapshot stackFrame
func (tokenAPI *API) MakeSnapshot() Snapshot { func (tokenAPI *API) MakeSnapshot() stackFrame {
return Snapshot(tokenAPI.pointers) return tokenAPI.pointers
} }
func (tokenAPI *API) RestoreSnapshot(snap Snapshot) { func (tokenAPI *API) RestoreSnapshot(snap stackFrame) {
tokenAPI.pointers = stackFrame(snap) tokenAPI.pointers = snap
}
type Split [2]int
func (tokenAPI *API) SplitOutput() Split {
split := Split{tokenAPI.pointers.bytesStart, tokenAPI.pointers.tokenStart}
tokenAPI.pointers.bytesStart = tokenAPI.pointers.bytesEnd
tokenAPI.pointers.tokenStart = tokenAPI.pointers.tokenEnd
return split
}
func (tokenAPI *API) MergeSplitOutput(split Split) {
tokenAPI.pointers.bytesStart = split[0]
tokenAPI.pointers.tokenStart = split[1]
} }

View File

@ -1,11 +1,8 @@
package tokenize package tokenize
import "git.makaay.nl/mauricem/go-parsekit/read"
// InputByteMode provides byte-driven input/output functionality for the tokenize API. // InputByteMode provides byte-driven input/output functionality for the tokenize API.
type InputByteMode struct { type InputByteMode struct {
api *API api *API
reader *read.Buffer // the buffered input reader
} }
// Peek returns the byte at the provided byte offset. // Peek returns the byte at the provided byte offset.
@ -14,7 +11,8 @@ type InputByteMode struct {
// When an offset is requested that is beyond the length of the available input // When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF. // data, then the error will be io.EOF.
func (byteMode InputByteMode) Peek(offset int) (byte, error) { func (byteMode InputByteMode) Peek(offset int) (byte, error) {
return byteMode.reader.ByteAt(byteMode.api.pointers.offset + offset) a := byteMode.api
return a.reader.ByteAt(a.pointers.offset + offset)
} }
// PeekMulti returns at max the provided maximum number of bytes at the provided // PeekMulti returns at max the provided maximum number of bytes at the provided
@ -22,7 +20,8 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
// error as such. The returned error can in such case be set to io.EOF to indicate // error as such. The returned error can in such case be set to io.EOF to indicate
// that the end of the input was reached though. // that the end of the input was reached though.
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) { func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
return byteMode.reader.BytesAt(byteMode.api.pointers.offset+offset, count) a := byteMode.api
return a.reader.BytesAt(a.pointers.offset+offset, count)
} }
func (byteMode InputByteMode) Accept(b byte) { func (byteMode InputByteMode) Accept(b byte) {
@ -62,7 +61,6 @@ func (byteMode InputByteMode) MoveCursor(b byte) {
} }
a.pointers.offset++ a.pointers.offset++
a.pointers.offsetLocal++
} }
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes. // MoveCursorMulti updates the position of the read cursor, based on the provided bytes.

View File

@ -2,15 +2,12 @@ package tokenize
import ( import (
"fmt" "fmt"
"git.makaay.nl/mauricem/go-parsekit/read"
) )
// Input provides input-related functionality for the tokenize API, // Input provides input-related functionality for the tokenize API,
// which is not specifically bound to a specific read mode (byte, rune). // which is not specifically bound to a specific read mode (byte, rune).
type Input struct { type Input struct {
api *API api *API
reader *read.Buffer // the buffered input reader
} }
// Cursor returns a string that describes the current read cursor position. // Cursor returns a string that describes the current read cursor position.
@ -30,9 +27,8 @@ func (i Input) Cursor() string {
func (i Input) Flush() bool { func (i Input) Flush() bool {
a := i.api a := i.api
if a.pointers.offset > 0 { if a.pointers.offset > 0 {
i.reader.Flush(a.pointers.offset) a.reader.Flush(a.pointers.offset)
a.pointers.offset = 0 a.pointers.offset = 0
a.pointers.offsetLocal = 0
return true return true
} }
return false return false

View File

@ -25,22 +25,6 @@ func (o Output) Rune(offset int) rune {
return r return r
} }
type Split [2]int
func (o Output) Split() Split {
a := o.api
split := Split{a.pointers.bytesStart, a.pointers.tokenStart}
a.pointers.bytesStart = a.pointers.bytesEnd
a.pointers.tokenStart = a.pointers.tokenEnd
return split
}
func (o Output) MergeSplit(split Split) {
a := o.api
a.pointers.bytesStart = split[0]
a.pointers.tokenStart = split[1]
}
func (o Output) Reset() { func (o Output) Reset() {
a := o.api a := o.api
a.pointers.bytesEnd = a.pointers.bytesStart a.pointers.bytesEnd = a.pointers.bytesStart

View File

@ -26,7 +26,8 @@ type InputRuneMode struct {
// When an offset is requested that is beyond the length of the available input // When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF. // data, then the error will be io.EOF.
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) { func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
return runeMode.reader.RuneAt(runeMode.api.pointers.offset + offset) a := runeMode.api
return a.reader.RuneAt(a.pointers.offset + offset)
} }
// Accept is used to accept a single rune that was read from the input. // Accept is used to accept a single rune that was read from the input.
@ -92,7 +93,6 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int {
width := utf8.RuneLen(r) width := utf8.RuneLen(r)
a.pointers.offset += width a.pointers.offset += width
a.pointers.offsetLocal += width
return width return width
} }

View File

@ -709,12 +709,12 @@ func MatchSeq(handlers ...Handler) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
snap := tokenAPI.MakeSnapshot() snap := tokenAPI.MakeSnapshot()
for _, handler := range handlers { for _, handler := range handlers {
split := tokenAPI.Output.Split() split := tokenAPI.SplitOutput()
if !handler(tokenAPI) { if !handler(tokenAPI) {
tokenAPI.RestoreSnapshot(snap) tokenAPI.RestoreSnapshot(snap)
return false return false
} }
tokenAPI.Output.MergeSplit(split) tokenAPI.MergeSplitOutput(split)
} }
return true return true
} }
@ -830,9 +830,9 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
snap := tokenAPI.MakeSnapshot() snap := tokenAPI.MakeSnapshot()
for total < min { for total < min {
total++ total++
split := tokenAPI.Output.Split() split := tokenAPI.SplitOutput()
ok := handler(tokenAPI) ok := handler(tokenAPI)
tokenAPI.Output.MergeSplit(split) tokenAPI.MergeSplitOutput(split)
if !ok { if !ok {
tokenAPI.RestoreSnapshot(snap) tokenAPI.RestoreSnapshot(snap)
return false return false
@ -844,9 +844,9 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
//child.Merge() //child.Merge()
for max < 0 || total < max { for max < 0 || total < max {
total++ total++
split := tokenAPI.Output.Split() split := tokenAPI.SplitOutput()
ok := handler(tokenAPI) ok := handler(tokenAPI)
tokenAPI.Output.MergeSplit(split) tokenAPI.MergeSplitOutput(split)
if !ok { if !ok {
break break
} }
@ -1621,14 +1621,14 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler { func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
return func(tokenAPI *API) bool { return func(tokenAPI *API) bool {
snap := tokenAPI.MakeSnapshot() snap := tokenAPI.MakeSnapshot()
split := tokenAPI.Output.Split() split := tokenAPI.SplitOutput()
if handler(tokenAPI) { if handler(tokenAPI) {
origS := tokenAPI.Output.String() origS := tokenAPI.Output.String()
s := modfunc(origS) s := modfunc(origS)
if s != origS { if s != origS {
tokenAPI.Output.SetString(s) tokenAPI.Output.SetString(s)
} }
tokenAPI.Output.MergeSplit(split) tokenAPI.MergeSplitOutput(split)
return true return true
} }
tokenAPI.RestoreSnapshot(snap) tokenAPI.RestoreSnapshot(snap)