Backup work on dropping forking support.
This commit is contained in:
parent
4c94374107
commit
daf3b9838f
|
@ -63,8 +63,8 @@ import (
|
||||||
// • a type implementing io.Reader
|
// • a type implementing io.Reader
|
||||||
//
|
//
|
||||||
// • bufio.Reader
|
// • bufio.Reader
|
||||||
func New(input interface{}) *Buffer {
|
func New(input interface{}) Buffer {
|
||||||
return &Buffer{
|
return Buffer{
|
||||||
bufio: makeBufioReader(input),
|
bufio: makeBufioReader(input),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,18 +71,17 @@ import (
|
||||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||||
// no bookkeeping has to be implemented when implementing a parser.
|
// no bookkeeping has to be implemented when implementing a parser.
|
||||||
type API struct {
|
type API struct {
|
||||||
reader *read.Buffer // the buffered input reader
|
reader read.Buffer // the buffered input reader
|
||||||
pointers stackFrame // various pointers for keeping track of input, output, cursor.
|
pointers stackFrame // various values for keeping track of input, output, cursor.
|
||||||
Input Input // access to a set of general input-related methods
|
Input Input // access to a set of general input-related methods
|
||||||
Byte InputByteMode // access to a set of byte-based input methods
|
Byte InputByteMode // access to a set of byte-based input methods
|
||||||
Rune InputRuneMode // access to a set of rune-based input methods
|
Rune InputRuneMode // access to a set of UTF8 rune-based input methods
|
||||||
Output Output // access to a set of output-related functionality
|
Output Output // access to a set of output-related functionality
|
||||||
outputTokens []Token // storage for accepted tokens
|
outputTokens []Token // storage for accepted tokens
|
||||||
outputBytes []byte // storage for accepted bytes
|
outputBytes []byte // storage for accepted bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
type stackFrame struct {
|
type stackFrame struct {
|
||||||
offsetLocal int // the read offset, relative to the start if this stack frame
|
|
||||||
offset int // the read offset, relative to the start of the reader buffer
|
offset int // the read offset, relative to the start of the reader buffer
|
||||||
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
|
column int // the column at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||||
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
|
line int // the line at which the cursor is (0-indexed, relative to the start of the stack frame)
|
||||||
|
@ -99,25 +98,38 @@ const initialByteStoreLength = 128
|
||||||
// For an overview of allowed inputs, take a look at the documentation
|
// For an overview of allowed inputs, take a look at the documentation
|
||||||
// for parsekit.read.New().
|
// for parsekit.read.New().
|
||||||
func NewAPI(input interface{}) *API {
|
func NewAPI(input interface{}) *API {
|
||||||
reader := read.New(input)
|
|
||||||
tokenAPI := &API{
|
tokenAPI := &API{
|
||||||
outputBytes: make([]byte, initialByteStoreLength),
|
// outputBytes: make([]byte, initialByteStoreLength),
|
||||||
outputTokens: make([]Token, initialTokenStoreLength),
|
// outputTokens: make([]Token, initialTokenStoreLength),
|
||||||
reader: reader,
|
reader: read.New(input),
|
||||||
}
|
}
|
||||||
tokenAPI.Input = Input{api: tokenAPI, reader: reader}
|
tokenAPI.Input = Input{api: tokenAPI}
|
||||||
tokenAPI.Byte = InputByteMode{api: tokenAPI, reader: reader}
|
tokenAPI.Byte = InputByteMode{api: tokenAPI}
|
||||||
tokenAPI.Rune = InputRuneMode{api: tokenAPI, reader: reader}
|
tokenAPI.Rune = InputRuneMode{api: tokenAPI}
|
||||||
tokenAPI.Output = Output{api: tokenAPI}
|
tokenAPI.Output = Output{api: tokenAPI}
|
||||||
return tokenAPI
|
return tokenAPI
|
||||||
}
|
}
|
||||||
|
|
||||||
type Snapshot stackFrame
|
type Snapshot stackFrame
|
||||||
|
|
||||||
func (tokenAPI *API) MakeSnapshot() Snapshot {
|
func (tokenAPI *API) MakeSnapshot() stackFrame {
|
||||||
return Snapshot(tokenAPI.pointers)
|
return tokenAPI.pointers
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tokenAPI *API) RestoreSnapshot(snap Snapshot) {
|
func (tokenAPI *API) RestoreSnapshot(snap stackFrame) {
|
||||||
tokenAPI.pointers = stackFrame(snap)
|
tokenAPI.pointers = snap
|
||||||
|
}
|
||||||
|
|
||||||
|
type Split [2]int
|
||||||
|
|
||||||
|
func (tokenAPI *API) SplitOutput() Split {
|
||||||
|
split := Split{tokenAPI.pointers.bytesStart, tokenAPI.pointers.tokenStart}
|
||||||
|
tokenAPI.pointers.bytesStart = tokenAPI.pointers.bytesEnd
|
||||||
|
tokenAPI.pointers.tokenStart = tokenAPI.pointers.tokenEnd
|
||||||
|
return split
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tokenAPI *API) MergeSplitOutput(split Split) {
|
||||||
|
tokenAPI.pointers.bytesStart = split[0]
|
||||||
|
tokenAPI.pointers.tokenStart = split[1]
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,8 @@
|
||||||
package tokenize
|
package tokenize
|
||||||
|
|
||||||
import "git.makaay.nl/mauricem/go-parsekit/read"
|
|
||||||
|
|
||||||
// InputByteMode provides byte-driven input/output functionality for the tokenize API.
|
// InputByteMode provides byte-driven input/output functionality for the tokenize API.
|
||||||
type InputByteMode struct {
|
type InputByteMode struct {
|
||||||
api *API
|
api *API
|
||||||
reader *read.Buffer // the buffered input reader
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Peek returns the byte at the provided byte offset.
|
// Peek returns the byte at the provided byte offset.
|
||||||
|
@ -14,7 +11,8 @@ type InputByteMode struct {
|
||||||
// When an offset is requested that is beyond the length of the available input
|
// When an offset is requested that is beyond the length of the available input
|
||||||
// data, then the error will be io.EOF.
|
// data, then the error will be io.EOF.
|
||||||
func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
||||||
return byteMode.reader.ByteAt(byteMode.api.pointers.offset + offset)
|
a := byteMode.api
|
||||||
|
return a.reader.ByteAt(a.pointers.offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
// PeekMulti returns at max the provided maximum number of bytes at the provided
|
// PeekMulti returns at max the provided maximum number of bytes at the provided
|
||||||
|
@ -22,7 +20,8 @@ func (byteMode InputByteMode) Peek(offset int) (byte, error) {
|
||||||
// error as such. The returned error can in such case be set to io.EOF to indicate
|
// error as such. The returned error can in such case be set to io.EOF to indicate
|
||||||
// that the end of the input was reached though.
|
// that the end of the input was reached though.
|
||||||
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
|
func (byteMode InputByteMode) PeekMulti(offset int, count int) ([]byte, error) {
|
||||||
return byteMode.reader.BytesAt(byteMode.api.pointers.offset+offset, count)
|
a := byteMode.api
|
||||||
|
return a.reader.BytesAt(a.pointers.offset+offset, count)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (byteMode InputByteMode) Accept(b byte) {
|
func (byteMode InputByteMode) Accept(b byte) {
|
||||||
|
@ -62,7 +61,6 @@ func (byteMode InputByteMode) MoveCursor(b byte) {
|
||||||
}
|
}
|
||||||
|
|
||||||
a.pointers.offset++
|
a.pointers.offset++
|
||||||
a.pointers.offsetLocal++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
// MoveCursorMulti updates the position of the read cursor, based on the provided bytes.
|
||||||
|
|
|
@ -2,15 +2,12 @@ package tokenize
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Input provides input-related functionality for the tokenize API,
|
// Input provides input-related functionality for the tokenize API,
|
||||||
// which is not specifically bound to a specific read mode (byte, rune).
|
// which is not specifically bound to a specific read mode (byte, rune).
|
||||||
type Input struct {
|
type Input struct {
|
||||||
api *API
|
api *API
|
||||||
reader *read.Buffer // the buffered input reader
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cursor returns a string that describes the current read cursor position.
|
// Cursor returns a string that describes the current read cursor position.
|
||||||
|
@ -30,9 +27,8 @@ func (i Input) Cursor() string {
|
||||||
func (i Input) Flush() bool {
|
func (i Input) Flush() bool {
|
||||||
a := i.api
|
a := i.api
|
||||||
if a.pointers.offset > 0 {
|
if a.pointers.offset > 0 {
|
||||||
i.reader.Flush(a.pointers.offset)
|
a.reader.Flush(a.pointers.offset)
|
||||||
a.pointers.offset = 0
|
a.pointers.offset = 0
|
||||||
a.pointers.offsetLocal = 0
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|
|
@ -25,22 +25,6 @@ func (o Output) Rune(offset int) rune {
|
||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
type Split [2]int
|
|
||||||
|
|
||||||
func (o Output) Split() Split {
|
|
||||||
a := o.api
|
|
||||||
split := Split{a.pointers.bytesStart, a.pointers.tokenStart}
|
|
||||||
a.pointers.bytesStart = a.pointers.bytesEnd
|
|
||||||
a.pointers.tokenStart = a.pointers.tokenEnd
|
|
||||||
return split
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) MergeSplit(split Split) {
|
|
||||||
a := o.api
|
|
||||||
a.pointers.bytesStart = split[0]
|
|
||||||
a.pointers.tokenStart = split[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) Reset() {
|
func (o Output) Reset() {
|
||||||
a := o.api
|
a := o.api
|
||||||
a.pointers.bytesEnd = a.pointers.bytesStart
|
a.pointers.bytesEnd = a.pointers.bytesStart
|
||||||
|
|
|
@ -26,7 +26,8 @@ type InputRuneMode struct {
|
||||||
// When an offset is requested that is beyond the length of the available input
|
// When an offset is requested that is beyond the length of the available input
|
||||||
// data, then the error will be io.EOF.
|
// data, then the error will be io.EOF.
|
||||||
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
|
||||||
return runeMode.reader.RuneAt(runeMode.api.pointers.offset + offset)
|
a := runeMode.api
|
||||||
|
return a.reader.RuneAt(a.pointers.offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accept is used to accept a single rune that was read from the input.
|
// Accept is used to accept a single rune that was read from the input.
|
||||||
|
@ -92,7 +93,6 @@ func (runeMode InputRuneMode) MoveCursor(r rune) int {
|
||||||
|
|
||||||
width := utf8.RuneLen(r)
|
width := utf8.RuneLen(r)
|
||||||
a.pointers.offset += width
|
a.pointers.offset += width
|
||||||
a.pointers.offsetLocal += width
|
|
||||||
return width
|
return width
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -709,12 +709,12 @@ func MatchSeq(handlers ...Handler) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
snap := tokenAPI.MakeSnapshot()
|
snap := tokenAPI.MakeSnapshot()
|
||||||
for _, handler := range handlers {
|
for _, handler := range handlers {
|
||||||
split := tokenAPI.Output.Split()
|
split := tokenAPI.SplitOutput()
|
||||||
if !handler(tokenAPI) {
|
if !handler(tokenAPI) {
|
||||||
tokenAPI.RestoreSnapshot(snap)
|
tokenAPI.RestoreSnapshot(snap)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
tokenAPI.Output.MergeSplit(split)
|
tokenAPI.MergeSplitOutput(split)
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -830,9 +830,9 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
||||||
snap := tokenAPI.MakeSnapshot()
|
snap := tokenAPI.MakeSnapshot()
|
||||||
for total < min {
|
for total < min {
|
||||||
total++
|
total++
|
||||||
split := tokenAPI.Output.Split()
|
split := tokenAPI.SplitOutput()
|
||||||
ok := handler(tokenAPI)
|
ok := handler(tokenAPI)
|
||||||
tokenAPI.Output.MergeSplit(split)
|
tokenAPI.MergeSplitOutput(split)
|
||||||
if !ok {
|
if !ok {
|
||||||
tokenAPI.RestoreSnapshot(snap)
|
tokenAPI.RestoreSnapshot(snap)
|
||||||
return false
|
return false
|
||||||
|
@ -844,9 +844,9 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
||||||
//child.Merge()
|
//child.Merge()
|
||||||
for max < 0 || total < max {
|
for max < 0 || total < max {
|
||||||
total++
|
total++
|
||||||
split := tokenAPI.Output.Split()
|
split := tokenAPI.SplitOutput()
|
||||||
ok := handler(tokenAPI)
|
ok := handler(tokenAPI)
|
||||||
tokenAPI.Output.MergeSplit(split)
|
tokenAPI.MergeSplitOutput(split)
|
||||||
if !ok {
|
if !ok {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -1621,14 +1621,14 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
|
||||||
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
||||||
return func(tokenAPI *API) bool {
|
return func(tokenAPI *API) bool {
|
||||||
snap := tokenAPI.MakeSnapshot()
|
snap := tokenAPI.MakeSnapshot()
|
||||||
split := tokenAPI.Output.Split()
|
split := tokenAPI.SplitOutput()
|
||||||
if handler(tokenAPI) {
|
if handler(tokenAPI) {
|
||||||
origS := tokenAPI.Output.String()
|
origS := tokenAPI.Output.String()
|
||||||
s := modfunc(origS)
|
s := modfunc(origS)
|
||||||
if s != origS {
|
if s != origS {
|
||||||
tokenAPI.Output.SetString(s)
|
tokenAPI.Output.SetString(s)
|
||||||
}
|
}
|
||||||
tokenAPI.Output.MergeSplit(split)
|
tokenAPI.MergeSplitOutput(split)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
tokenAPI.RestoreSnapshot(snap)
|
tokenAPI.RestoreSnapshot(snap)
|
||||||
|
|
Loading…
Reference in New Issue