go-parsekit/tokenize/api_runemode.go

package tokenize

import (
	"unicode/utf8"

	"git.makaay.nl/mauricem/go-parsekit/read"
)

// InputRuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
type InputRuneMode struct {
	api    *API
	reader *read.Buffer // the buffered input reader
}

// Peek returns the UTF8 rune at the provided byte offset, including its byte width.
//
// The byte width is useful to know what byte offset you'll have to use to peek
// the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the
// first rune starts at offset = 0, the second rune might start at offset = 4.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (runeMode InputRuneMode) Peek(offset int) (rune, int, error) {
	a := runeMode.api
	return a.reader.RuneAt(a.pointers.offset + offset)
}

// Accept is used to accept a single rune that was read from the input.
// This tells the tokenizer: "I've seen this rune. I want to make use of it
// for the final output, so please remember it for me. I will now continue
// reading after this rune."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on intin the input data) and add the rune to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune.
func (runeMode InputRuneMode) Accept(r rune) {
	runeMode.AddRuneToOutput(r)
	runeMode.MoveCursor(r)
}

func (runeMode InputRuneMode) AddRuneToOutput(r rune) {
	a := runeMode.api
	if a.Output.suspended > 0 {
		runeMode.MoveCursor(r)
		return
	}
	curBytesEnd := a.pointers.bytesEnd
	maxRequiredBytes := curBytesEnd + utf8.UTFMax
	a.growOutputData(maxRequiredBytes)
	w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
	a.pointers.bytesEnd += w
}

// AcceptMulti is used to accept one or more runes that were read from the input.
// This tells the tokenizer: "I've seen these runes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these runes."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the runes to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted runes.
func (runeMode InputRuneMode) AcceptMulti(runes ...rune) {
	a := runeMode.api
	if a.Output.suspended > 0 {
		runeMode.MoveCursorMulti(runes...)
		return
	}
	curBytesEnd := a.pointers.bytesEnd
	maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
	a.growOutputData(maxBytes)
	for _, r := range runes {
		w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
		curBytesEnd += w
		runeMode.MoveCursor(r)
	}
	a.pointers.bytesEnd = curBytesEnd
}

func (runeMode InputRuneMode) AddRunesToOutput(runes ...rune) {
	a := runeMode.api
	if a.Output.suspended > 0 {
		return
	}
	curBytesEnd := a.pointers.bytesEnd
	maxBytes := curBytesEnd + len(runes)*utf8.UTFMax
	a.growOutputData(maxBytes)
	for _, r := range runes {
		w := utf8.EncodeRune(a.outputBytes[curBytesEnd:], r)
		curBytesEnd += w
	}
	a.pointers.bytesEnd = curBytesEnd
}

// MoveCursor updates the position of the read cursor, based on the provided rune.
// This method takes newlines into account to keep track of line numbers and
// column positions for the input cursor.
//
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
// the first rune at the new cursor position.
func (runeMode InputRuneMode) MoveCursor(r rune) int {
	a := runeMode.api
	if r == '\n' {
		a.pointers.column = 0
		a.pointers.line++
	} else {
		a.pointers.column++
	}

	width := utf8.RuneLen(r)
	a.pointers.offset += width
	return width
}

// MoveCursorMulti updates the position of the read cursor, based on the provided runes.
// This method takes newlines into account to keep track of line numbers and
// column positions for the input cursor.
//
// After the call, byte offset 0 for Peek() and PeekMulti() will point at
// the first rune at the new cursor position.
func (runeMode InputRuneMode) MoveCursorMulti(runes ...rune) {
	for _, r := range runes {
		runeMode.MoveCursor(r)
	}
}