117 lines
4.0 KiB
Go
117 lines
4.0 KiB
Go
package tokenize
|
|
|
|
import (
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API.
|
|
type RuneMode struct {
|
|
api *API
|
|
}
|
|
|
|
// Peek returns the UTF8 rune at the provided byte offset, including its byte width.
|
|
//
|
|
// The byte width is useful to know what byte offset you'll have to use to peek
|
|
// the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the
|
|
// first rune starts at offset = 0, the second rune might start at offset = 4.
|
|
//
|
|
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
|
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
|
// when needed.
|
|
//
|
|
// When an error occurs during reading the input, an error will be returned.
|
|
// When an offset is requested that is beyond the length of the available input
|
|
// data, then the error will be io.EOF.
|
|
func (runeMode RuneMode) Peek(offset int) (rune, int, error) {
|
|
a := runeMode.api
|
|
return a.reader.RuneAt(a.stackFrame.offset + offset)
|
|
}
|
|
|
|
// Skip is used to skip over a single rune that was read from the input.
|
|
// This tells the tokenizer: "I've seen this rune. It is of no interest.
|
|
// I will now continue reading after this rune."
|
|
//
|
|
// This will merely update the position of the cursor (which keeps track of what
|
|
// line and column we are on in APIthe input data). The rune is not added to
|
|
// the output.
|
|
//
|
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
// the first byte after the skipped rune.
|
|
func (runeMode *RuneMode) Skip(r rune) {
|
|
f := runeMode.api.stackFrame
|
|
f.moveCursorByRune(r)
|
|
f.offset += utf8.RuneLen(r)
|
|
}
|
|
|
|
// SkipMulti is used to skip over one or more runes that were read from the input.
|
|
// This tells the tokenizer: "I've seen these runes. They are of no interest.
|
|
// I will now continue reading after these runes."
|
|
//
|
|
// This will merely update the position of the cursor (which keeps track of what
|
|
// line and column we are on in the input data). The runes are not added to
|
|
// the output.
|
|
//
|
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
// the first byte after the skipped runes.
|
|
func (runeMode *RuneMode) SkipMulti(runes ...rune) {
|
|
f := runeMode.api.stackFrame
|
|
for _, r := range runes {
|
|
f.moveCursorByRune(r)
|
|
f.offset += utf8.RuneLen(r)
|
|
}
|
|
}
|
|
|
|
// Accept is used to accept a single rune that was read from the input.
|
|
// This tells the tokenizer: "I've seen this rune. I want to make use of it
|
|
// for the final output, so please remember it for me. I will now continue
|
|
// reading after this rune."
|
|
//
|
|
// This will update the position of the cursor (which keeps track of what line
|
|
// and column we are on in the input data) and add the rune to the tokenizer
|
|
// output.
|
|
//
|
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
// the first byte after the accepted rune.
|
|
func (runeMode *RuneMode) Accept(r rune) {
|
|
a := runeMode.api
|
|
f := a.stackFrame
|
|
curBytesEnd := f.bytesEnd
|
|
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
|
|
|
a.growOutputData(maxRequiredBytes)
|
|
w := utf8.EncodeRune(a.outputData[curBytesEnd:], r)
|
|
f.bytesEnd += w
|
|
f.offset += w
|
|
|
|
f.moveCursorByRune(r)
|
|
}
|
|
|
|
// AcceptMulti is used to accept one or more runes that were read from the input.
|
|
// This tells the tokenizer: "I've seen these runes. I want to make use of them
|
|
// for the final output, so please remember them for me. I will now continue
|
|
// reading after these runes."
|
|
//
|
|
// This will update the position of the cursor (which keeps track of what line
|
|
// and column we are on in the input data) and add the runes to the tokenizer
|
|
// output.
|
|
//
|
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
// the first byte after the accepted runes.
|
|
func (runeMode *RuneMode) AcceptMulti(runes ...rune) {
|
|
a := runeMode.api
|
|
f := a.stackFrame
|
|
runesAsString := string(runes)
|
|
byteLen := len(runesAsString)
|
|
curBytesEnd := f.bytesEnd
|
|
newBytesEnd := curBytesEnd + byteLen
|
|
|
|
a.growOutputData(newBytesEnd)
|
|
copy(a.outputData[curBytesEnd:], runesAsString)
|
|
f.bytesEnd = newBytesEnd
|
|
f.offset += byteLen
|
|
|
|
for _, r := range runes {
|
|
f.moveCursorByRune(r)
|
|
}
|
|
}
|