go-parsekit/tokenize/api_input.go

package tokenize

import (
	"fmt"
	"unicode/utf8"
)

// Input provides input-related functionality for the tokenize API.
type Input struct {
	api *API
}

// PeekByte returns the byte at the provided byte offset.
//
// When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (i *Input) PeekByte(offset int) (byte, error) {
	return i.api.peekByte(offset)
}

func (api *API) peekByte(offset int) (byte, error) {
	return api.reader.ByteAt(api.stackFrame.offset + offset)
}

// SkipByte is used to skip over a single bytes that was read from the input.
// This tells the tokenizer: "I've seen this byte. It is of no interest.
// I will now continue reading after this byte."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The byte is not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped byte.
func (i *Input) SkipByte(b byte) {
	i.api.stackFrame.moveCursorByByte(b)
	i.api.stackFrame.offset++
}

func (api *API) skipByte(b byte) {
	api.stackFrame.moveCursorByByte(b)
	api.stackFrame.offset++
}

// SkipBytes is used to skip over one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. They are of no interest.
// I will now continue reading after these bytes."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The bytes are not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped bytes.
func (i *Input) SkipBytes(bytes ...byte) {
	i.api.skipBytes(bytes...)
}

func (api *API) skipBytes(bytes ...byte) {
	for _, b := range bytes {
		api.stackFrame.moveCursorByByte(b)
		api.stackFrame.offset++
	}
}

// AcceptByte is used to accept a single byte that was read from the input.
// This tells the tokenizer: "I've seen this byte. I want to make use of it
// for the final output, so please remember it for me. I will now continue
// reading after this byte."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the byte to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted byte.
func (i *Input) AcceptByte(b byte) {
	i.api.acceptByte(b)
}

func (api *API) acceptByte(b byte) {
	api.dataAddByte(b)
	api.skipByte(b)
}

// AcceptBytes is used to accept one or more bytes that were read from the input.
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these bytes."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the bytes to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted bytes.
func (i *Input) AcceptBytes(bytes ...byte) {
	i.api.acceptBytes(bytes...)
}

func (api *API) acceptBytes(bytes ...byte) {
	api.dataAddBytes(bytes...)
	api.skipBytes(bytes...)
}

// PeekRune returns the UTF8 rune at the provided byte offset, including its byte width.
//
// The byte width is useful to know what byte offset you'll have to use to peek
// the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the
// first rune starts at offset = 0, the second rune might start at offset = 4.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// When an error occurs during reading the input, an error will be returned.
// When an offset is requested that is beyond the length of the available input
// data, then the error will be io.EOF.
func (i *Input) PeekRune(offset int) (rune, int, error) {
	return i.api.peekRune(offset)
}

func (api *API) peekRune(offset int) (rune, int, error) {
	return api.reader.RuneAt(api.stackFrame.offset + offset)
}

// SkipRune is used to skip over a single rune that was read from the input.
// This tells the tokenizer: "I've seen this rune. It is of no interest.
// I will now continue reading after this rune."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in APIthe input data). The rune is not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped rune.
func (i *Input) SkipRune(r rune) {
	i.api.skipRune(r)
}

func (api *API) skipRune(r rune) {
	api.stackFrame.moveCursorByRune(r)
	api.stackFrame.offset += utf8.RuneLen(r)
}

// SkipRunes is used to skip over one or more runes that were read from the input.
// This tells the tokenizer: "I've seen these runes. They are of no interest.
// I will now continue reading after these runes."
//
// This will merely update the position of the cursor (which keeps track of what
// line and column we are on in the input data). The runes are not added to
// the output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the skipped runes.
func (i *Input) SkipRunes(runes ...rune) {
	i.api.skipRunes(runes...)
}

func (api *API) skipRunes(runes ...rune) {
	for _, r := range runes {
		api.stackFrame.moveCursorByRune(r)
		api.stackFrame.offset += utf8.RuneLen(r)
	}
}

// AcceptRune is used to accept a single rune that was read from the input.
// This tells the tokenizer: "I've seen this rune. I want to make use of it
// for the final output, so please remember it for me. I will now continue
// reading after this rune."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the rune to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted rune.
func (i *Input) AcceptRune(r rune) {
	i.api.acceptRune(r)
}

func (api *API) acceptRune(r rune) {
	curBytesEnd := api.stackFrame.bytesEnd
	maxRequiredBytes := curBytesEnd + utf8.UTFMax

	// Grow the runes capacity when needed.
	if cap(api.outputData) < maxRequiredBytes {
		newBytes := make([]byte, maxRequiredBytes*2)
		copy(newBytes, api.outputData)
		api.outputData = newBytes
	}

	api.stackFrame.moveCursorByRune(r)
	w := utf8.EncodeRune(api.outputData[curBytesEnd:], r)
	api.stackFrame.bytesEnd += w
	api.stackFrame.offset += w
}

// AcceptRunes is used to accept one or more runes that were read from the input.
// This tells the tokenizer: "I've seen these runes. I want to make use of them
// for the final output, so please remember them for me. I will now continue
// reading after these runes."
//
// This will update the position of the cursor (which keeps track of what line
// and column we are on in the input data) and add the runes to the tokenizer
// output.
//
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
// the first byte after the accepted runes.
func (i *Input) AcceptRunes(runes ...rune) {
	i.api.acceptRunes(runes...)
}

func (api *API) acceptRunes(runes ...rune) {
	runesAsString := string(runes)
	byteLen := len(runesAsString)
	curBytesEnd := api.stackFrame.bytesEnd
	newBytesEnd := curBytesEnd + byteLen

	// Grow the runes capacity when needed.
	if cap(api.outputData) < newBytesEnd {
		newBytes := make([]byte, newBytesEnd*2)
		copy(newBytes, api.outputData)
		api.outputData = newBytes
	}

	for _, r := range runes {
		api.stackFrame.moveCursorByRune(r)
	}
	copy(api.outputData[curBytesEnd:], runesAsString)

	api.stackFrame.bytesEnd = newBytesEnd
	api.stackFrame.offset += byteLen
}

// Flush flushes input data from the read.Buffer up to the current
// read offset of the parser.
//
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when possible.
func (i *Input) Flush() bool {
	return i.api.flushInput()
}

func (api *API) flushInput() bool {
	if api.stackFrame.offset > 0 {
		api.reader.Flush(api.stackFrame.offset)
		api.stackFrame.offset = 0
		return true
	}
	return false
}

func (i *Input) Cursor() string {
	return i.api.cursor()
}

func (api *API) cursor() string {
	if api.stackFrame.line == 0 && api.stackFrame.column == 0 {
		return fmt.Sprintf("start of file")
	}
	return fmt.Sprintf("line %d, column %d", api.stackFrame.line+1, api.stackFrame.column+1)
}