Split up the api.go into three files: api.go, api_input.go and api_output.go. This makes it easier to manage the individual code sets.
This commit is contained in:
parent
93c75af87f
commit
0c057e4a9a
367
tokenize/api.go
367
tokenize/api.go
|
@ -1,9 +1,6 @@
|
||||||
package tokenize
|
package tokenize
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -92,19 +89,6 @@ type stackFrame struct {
|
||||||
err error // can be used by a Handler to report a specific issue with the input
|
err error // can be used by a Handler to report a specific issue with the input
|
||||||
}
|
}
|
||||||
|
|
||||||
// Input provides input-related functionality for the tokenize API.
|
|
||||||
type Input struct {
|
|
||||||
api *API
|
|
||||||
reader *read.Buffer // the input data reader
|
|
||||||
}
|
|
||||||
|
|
||||||
// Output provides output-related functionality for the tokenize API.
|
|
||||||
type Output struct {
|
|
||||||
api *API
|
|
||||||
tokens []Token // accepted tokens
|
|
||||||
data []byte // accepted data
|
|
||||||
}
|
|
||||||
|
|
||||||
const initialStackDepth = 64
|
const initialStackDepth = 64
|
||||||
const initialTokenStoreLength = 64
|
const initialTokenStoreLength = 64
|
||||||
const initialByteStoreLength = 1024
|
const initialByteStoreLength = 1024
|
||||||
|
@ -237,354 +221,3 @@ func (tokenAPI *API) Dispose(stackLevel int) {
|
||||||
tokenAPI.stackLevel = stackLevel - 1
|
tokenAPI.stackLevel = stackLevel - 1
|
||||||
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
|
tokenAPI.stackFrame = &tokenAPI.stackFrames[stackLevel-1]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset moves the input cursor back to the beginning for the currently active API child.
|
|
||||||
// Aditionally, any output (bytes and tokens) that was emitted from the API child are
|
|
||||||
// cleared as well.
|
|
||||||
func (i Input) Reset() {
|
|
||||||
if i.api.stackLevel == 0 {
|
|
||||||
i.api.stackFrame.column = 0
|
|
||||||
i.api.stackFrame.line = 0
|
|
||||||
i.api.stackFrame.offset = 0
|
|
||||||
} else {
|
|
||||||
parent := i.api.stackFrames[i.api.stackLevel-1]
|
|
||||||
i.api.stackFrame.column = parent.column
|
|
||||||
i.api.stackFrame.line = parent.line
|
|
||||||
i.api.stackFrame.offset = parent.offset
|
|
||||||
}
|
|
||||||
i.api.stackFrame.bytesEnd = i.api.stackFrame.bytesStart
|
|
||||||
i.api.stackFrame.tokenEnd = i.api.stackFrame.tokenStart
|
|
||||||
i.api.stackFrame.err = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i Input) Cursor() string {
|
|
||||||
if i.api.stackFrame.line == 0 && i.api.stackFrame.column == 0 {
|
|
||||||
return fmt.Sprintf("start of file")
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("line %d, column %d", i.api.stackFrame.line+1, i.api.stackFrame.column+1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// PeekByte returns the byte at the provided byte offset.
|
|
||||||
//
|
|
||||||
// When an error occurs during reading the input, an error will be returned.
|
|
||||||
// When an offset is requested that is beyond the length of the available input
|
|
||||||
// data, then the error will be io.EOF.
|
|
||||||
func (i Input) PeekByte(offset int) (byte, error) {
|
|
||||||
return i.reader.ByteAt(i.api.stackFrame.offset + offset)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipByte is used to skip over a single bytes that was read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen this byte. It is of no interest.
|
|
||||||
// I will now continue reading after this byte."
|
|
||||||
//
|
|
||||||
// This will merely update the position of the cursor (which keeps track of what
|
|
||||||
// line and column we are on in the input data). The byte is not added to
|
|
||||||
// the output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the skipped byte.
|
|
||||||
func (i Input) SkipByte(b byte) {
|
|
||||||
i.api.stackFrame.moveCursorByByte(b)
|
|
||||||
i.api.stackFrame.offset++
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipBytes is used to skip over one or more bytes that were read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen these bytes. They are of no interest.
|
|
||||||
// I will now continue reading after these bytes."
|
|
||||||
//
|
|
||||||
// This will merely update the position of the cursor (which keeps track of what
|
|
||||||
// line and column we are on in the input data). The bytes are not added to
|
|
||||||
// the output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the skipped bytes.
|
|
||||||
func (i Input) SkipBytes(bytes ...byte) {
|
|
||||||
for _, b := range bytes {
|
|
||||||
i.api.stackFrame.moveCursorByByte(b)
|
|
||||||
i.api.stackFrame.offset++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptByte is used to accept a single byte that was read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen this byte. I want to make use of it
|
|
||||||
// for the final output, so please remember it for me. I will now continue
|
|
||||||
// reading after this byte."
|
|
||||||
//
|
|
||||||
// This will update the position of the cursor (which keeps track of what line
|
|
||||||
// and column we are on in the input data) and add the byte to the tokenizer
|
|
||||||
// output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the accepted byte.
|
|
||||||
func (i Input) AcceptByte(b byte) {
|
|
||||||
curBytesEnd := i.api.stackFrame.bytesEnd
|
|
||||||
maxRequiredBytes := curBytesEnd + 1
|
|
||||||
|
|
||||||
// Grow the bytes capacity when needed.
|
|
||||||
if cap(i.api.Output.data) < maxRequiredBytes {
|
|
||||||
newBytes := make([]byte, maxRequiredBytes*2)
|
|
||||||
copy(newBytes, i.api.Output.data)
|
|
||||||
i.api.Output.data = newBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
i.api.Output.data[curBytesEnd] = b
|
|
||||||
i.api.stackFrame.moveCursorByByte(b)
|
|
||||||
i.api.stackFrame.bytesEnd++
|
|
||||||
i.api.stackFrame.offset++
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptBytes is used to accept one or more bytes that were read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
|
|
||||||
// for the final output, so please remember them for me. I will now continue
|
|
||||||
// reading after these bytes."
|
|
||||||
//
|
|
||||||
// This will update the position of the cursor (which keeps track of what line
|
|
||||||
// and column we are on in the input data) and add the bytes to the tokenizer
|
|
||||||
// output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the accepted bytes.
|
|
||||||
func (i Input) AcceptBytes(bytes ...byte) {
|
|
||||||
curBytesEnd := i.api.stackFrame.bytesEnd
|
|
||||||
newBytesEnd := curBytesEnd + len(bytes)
|
|
||||||
|
|
||||||
// Grow the bytes capacity when needed.
|
|
||||||
if cap(i.api.Output.data) < newBytesEnd {
|
|
||||||
newBytes := make([]byte, newBytesEnd*2)
|
|
||||||
copy(newBytes, i.api.Output.data)
|
|
||||||
i.api.Output.data = newBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
copy(i.api.Output.data[curBytesEnd:], bytes)
|
|
||||||
for _, b := range bytes {
|
|
||||||
i.api.stackFrame.moveCursorByByte(b)
|
|
||||||
i.api.stackFrame.offset++
|
|
||||||
}
|
|
||||||
i.api.stackFrame.bytesEnd = newBytesEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
// PeekRune returns the UTF8 rune at the provided byte offset, including its byte width.
|
|
||||||
//
|
|
||||||
// The byte width is useful to know what byte offset you'll have to use to peek
|
|
||||||
// the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the
|
|
||||||
// first rune starts at offset = 0, the second rune might start at offset = 4.
|
|
||||||
//
|
|
||||||
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
|
||||||
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
|
||||||
// when needed.
|
|
||||||
//
|
|
||||||
// When an error occurs during reading the input, an error will be returned.
|
|
||||||
// When an offset is requested that is beyond the length of the available input
|
|
||||||
// data, then the error will be io.EOF.
|
|
||||||
func (i Input) PeekRune(offset int) (rune, int, error) {
|
|
||||||
return i.reader.RuneAt(i.api.stackFrame.offset + offset)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipRune is used to skip over a single rune that was read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen this rune. It is of no interest.
|
|
||||||
// I will now continue reading after this rune."
|
|
||||||
//
|
|
||||||
// This will merely update the position of the cursor (which keeps track of what
|
|
||||||
// line and column we are on in APIthe input data). The rune is not added to
|
|
||||||
// the output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the skipped rune.
|
|
||||||
func (i Input) SkipRune(r rune) {
|
|
||||||
i.api.stackFrame.moveCursorByRune(r)
|
|
||||||
i.api.stackFrame.offset += utf8.RuneLen(r)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SkipRunes is used to skip over one or more runes that were read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen these runes. They are of no interest.
|
|
||||||
// I will now continue reading after these runes."
|
|
||||||
//
|
|
||||||
// This will merely update the position of the cursor (which keeps track of what
|
|
||||||
// line and column we are on in the input data). The runes are not added to
|
|
||||||
// the output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the skipped runes.
|
|
||||||
func (i Input) SkipRunes(runes ...rune) {
|
|
||||||
for _, r := range runes {
|
|
||||||
i.api.stackFrame.moveCursorByRune(r)
|
|
||||||
i.api.stackFrame.offset += utf8.RuneLen(r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptRune is used to accept a single rune that was read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen this rune. I want to make use of it
|
|
||||||
// for the final output, so please remember it for me. I will now continue
|
|
||||||
// reading after this rune."
|
|
||||||
//
|
|
||||||
// This will update the position of the cursor (which keeps track of what line
|
|
||||||
// and column we are on in the input data) and add the rune to the tokenizer
|
|
||||||
// output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the accepted rune.
|
|
||||||
func (i Input) AcceptRune(r rune) {
|
|
||||||
curBytesEnd := i.api.stackFrame.bytesEnd
|
|
||||||
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
|
||||||
|
|
||||||
// Grow the runes capacity when needed.
|
|
||||||
if cap(i.api.Output.data) < maxRequiredBytes {
|
|
||||||
newBytes := make([]byte, maxRequiredBytes*2)
|
|
||||||
copy(newBytes, i.api.Output.data)
|
|
||||||
i.api.Output.data = newBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
i.api.stackFrame.moveCursorByRune(r)
|
|
||||||
w := utf8.EncodeRune(i.api.Output.data[curBytesEnd:], r)
|
|
||||||
i.api.stackFrame.bytesEnd += w
|
|
||||||
i.api.stackFrame.offset += w
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptRunes is used to accept one or more runes that were read from the input.
|
|
||||||
// This tells the tokenizer: "I've seen these runes. I want to make use of them
|
|
||||||
// for the final output, so please remember them for me. I will now continue
|
|
||||||
// reading after these runes."
|
|
||||||
//
|
|
||||||
// This will update the position of the cursor (which keeps track of what line
|
|
||||||
// and column we are on in the input data) and add the runes to the tokenizer
|
|
||||||
// output.
|
|
||||||
//
|
|
||||||
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
|
||||||
// the first byte after the accepted runes.
|
|
||||||
func (i Input) AcceptRunes(runes ...rune) {
|
|
||||||
runesAsString := string(runes)
|
|
||||||
byteLen := len(runesAsString)
|
|
||||||
curBytesEnd := i.api.stackFrame.bytesEnd
|
|
||||||
newBytesEnd := curBytesEnd + byteLen
|
|
||||||
|
|
||||||
// Grow the runes capacity when needed.
|
|
||||||
if cap(i.api.Output.data) < newBytesEnd {
|
|
||||||
newBytes := make([]byte, newBytesEnd*2)
|
|
||||||
copy(newBytes, i.api.Output.data)
|
|
||||||
i.api.Output.data = newBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, r := range runes {
|
|
||||||
i.api.stackFrame.moveCursorByRune(r)
|
|
||||||
}
|
|
||||||
copy(i.api.Output.data[curBytesEnd:], runesAsString)
|
|
||||||
|
|
||||||
i.api.stackFrame.bytesEnd = newBytesEnd
|
|
||||||
i.api.stackFrame.offset += byteLen
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush flushes input data from the read.Buffer up to the current
|
|
||||||
// read offset of the parser.
|
|
||||||
//
|
|
||||||
// Note:
|
|
||||||
// When writing your own TokenHandler, you normally won't have to call this
|
|
||||||
// method yourself. It is automatically called by parsekit when possible.
|
|
||||||
func (i Input) Flush() bool {
|
|
||||||
if i.api.stackFrame.offset > 0 {
|
|
||||||
i.reader.Flush(i.api.stackFrame.offset)
|
|
||||||
i.api.stackFrame.offset = 0
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) String() string {
|
|
||||||
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
|
||||||
return string(bytes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) Runes() []rune {
|
|
||||||
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
|
||||||
return []rune(string(bytes))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) Rune(offset int) rune {
|
|
||||||
r, _ := utf8.DecodeRune(o.data[o.api.stackFrame.bytesStart+offset:])
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) ClearData() {
|
|
||||||
o.api.stackFrame.bytesEnd = o.api.stackFrame.bytesStart
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) SetBytes(bytes ...byte) {
|
|
||||||
o.ClearData()
|
|
||||||
o.AddBytes(bytes...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) AddBytes(bytes ...byte) {
|
|
||||||
// Grow the runes capacity when needed.
|
|
||||||
newBytesEnd := o.api.stackFrame.bytesEnd + len(bytes)
|
|
||||||
if cap(o.data) < newBytesEnd {
|
|
||||||
newBytes := make([]byte, newBytesEnd*2)
|
|
||||||
copy(newBytes, o.data)
|
|
||||||
o.data = newBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
copy(o.data[o.api.stackFrame.bytesEnd:], bytes)
|
|
||||||
o.api.stackFrame.bytesEnd = newBytesEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) SetRunes(runes ...rune) {
|
|
||||||
o.ClearData()
|
|
||||||
o.AddRunes(runes...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) AddRunes(runes ...rune) {
|
|
||||||
// Grow the runes capacity when needed.
|
|
||||||
runesAsString := string(runes)
|
|
||||||
newBytesEnd := o.api.stackFrame.bytesEnd + len(runesAsString)
|
|
||||||
if cap(o.data) < newBytesEnd {
|
|
||||||
newBytes := make([]byte, newBytesEnd*2)
|
|
||||||
copy(newBytes, o.data)
|
|
||||||
o.data = newBytes
|
|
||||||
}
|
|
||||||
|
|
||||||
copy(o.data[o.api.stackFrame.bytesEnd:], runesAsString)
|
|
||||||
o.api.stackFrame.bytesEnd = newBytesEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) AddString(s string) {
|
|
||||||
o.AddBytes([]byte(s)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) SetString(s string) {
|
|
||||||
o.ClearData()
|
|
||||||
o.SetBytes([]byte(s)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) Tokens() []Token {
|
|
||||||
return o.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) Token(offset int) Token {
|
|
||||||
return o.tokens[o.api.stackFrame.tokenStart+offset]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) TokenValue(offset int) interface{} {
|
|
||||||
return o.tokens[o.api.stackFrame.tokenStart+offset].Value
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) ClearTokens() {
|
|
||||||
o.api.stackFrame.tokenEnd = o.api.stackFrame.tokenStart
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) SetTokens(tokens ...Token) {
|
|
||||||
o.ClearTokens()
|
|
||||||
o.AddTokens(tokens...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (o Output) AddTokens(tokens ...Token) {
|
|
||||||
// Grow the tokens capacity when needed.
|
|
||||||
newTokenEnd := o.api.stackFrame.tokenEnd + len(tokens)
|
|
||||||
if cap(o.tokens) < newTokenEnd {
|
|
||||||
newTokens := make([]Token, newTokenEnd*2)
|
|
||||||
copy(newTokens, o.tokens)
|
|
||||||
o.tokens = newTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
for offset, t := range tokens {
|
|
||||||
o.tokens[o.api.stackFrame.tokenEnd+offset] = t
|
|
||||||
}
|
|
||||||
o.api.stackFrame.tokenEnd = newTokenEnd
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,264 @@
|
||||||
|
package tokenize
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Input provides input-related functionality for the tokenize API.
|
||||||
|
type Input struct {
|
||||||
|
api *API
|
||||||
|
reader *read.Buffer // the input data reader
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset moves the input cursor back to the beginning for the currently active API child.
|
||||||
|
// Aditionally, any output (bytes and tokens) that was emitted from the API child are
|
||||||
|
// cleared as well.
|
||||||
|
func (i Input) Reset() {
|
||||||
|
if i.api.stackLevel == 0 {
|
||||||
|
i.api.stackFrame.column = 0
|
||||||
|
i.api.stackFrame.line = 0
|
||||||
|
i.api.stackFrame.offset = 0
|
||||||
|
} else {
|
||||||
|
parent := i.api.stackFrames[i.api.stackLevel-1]
|
||||||
|
i.api.stackFrame.column = parent.column
|
||||||
|
i.api.stackFrame.line = parent.line
|
||||||
|
i.api.stackFrame.offset = parent.offset
|
||||||
|
}
|
||||||
|
i.api.stackFrame.bytesEnd = i.api.stackFrame.bytesStart
|
||||||
|
i.api.stackFrame.tokenEnd = i.api.stackFrame.tokenStart
|
||||||
|
i.api.stackFrame.err = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i Input) Cursor() string {
|
||||||
|
if i.api.stackFrame.line == 0 && i.api.stackFrame.column == 0 {
|
||||||
|
return fmt.Sprintf("start of file")
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("line %d, column %d", i.api.stackFrame.line+1, i.api.stackFrame.column+1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// PeekByte returns the byte at the provided byte offset.
|
||||||
|
//
|
||||||
|
// When an error occurs during reading the input, an error will be returned.
|
||||||
|
// When an offset is requested that is beyond the length of the available input
|
||||||
|
// data, then the error will be io.EOF.
|
||||||
|
func (i Input) PeekByte(offset int) (byte, error) {
|
||||||
|
return i.reader.ByteAt(i.api.stackFrame.offset + offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipByte is used to skip over a single bytes that was read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen this byte. It is of no interest.
|
||||||
|
// I will now continue reading after this byte."
|
||||||
|
//
|
||||||
|
// This will merely update the position of the cursor (which keeps track of what
|
||||||
|
// line and column we are on in the input data). The byte is not added to
|
||||||
|
// the output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the skipped byte.
|
||||||
|
func (i Input) SkipByte(b byte) {
|
||||||
|
i.api.stackFrame.moveCursorByByte(b)
|
||||||
|
i.api.stackFrame.offset++
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipBytes is used to skip over one or more bytes that were read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen these bytes. They are of no interest.
|
||||||
|
// I will now continue reading after these bytes."
|
||||||
|
//
|
||||||
|
// This will merely update the position of the cursor (which keeps track of what
|
||||||
|
// line and column we are on in the input data). The bytes are not added to
|
||||||
|
// the output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the skipped bytes.
|
||||||
|
func (i Input) SkipBytes(bytes ...byte) {
|
||||||
|
for _, b := range bytes {
|
||||||
|
i.api.stackFrame.moveCursorByByte(b)
|
||||||
|
i.api.stackFrame.offset++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptByte is used to accept a single byte that was read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen this byte. I want to make use of it
|
||||||
|
// for the final output, so please remember it for me. I will now continue
|
||||||
|
// reading after this byte."
|
||||||
|
//
|
||||||
|
// This will update the position of the cursor (which keeps track of what line
|
||||||
|
// and column we are on in the input data) and add the byte to the tokenizer
|
||||||
|
// output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the accepted byte.
|
||||||
|
func (i Input) AcceptByte(b byte) {
|
||||||
|
curBytesEnd := i.api.stackFrame.bytesEnd
|
||||||
|
maxRequiredBytes := curBytesEnd + 1
|
||||||
|
|
||||||
|
// Grow the bytes capacity when needed.
|
||||||
|
if cap(i.api.Output.data) < maxRequiredBytes {
|
||||||
|
newBytes := make([]byte, maxRequiredBytes*2)
|
||||||
|
copy(newBytes, i.api.Output.data)
|
||||||
|
i.api.Output.data = newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
i.api.Output.data[curBytesEnd] = b
|
||||||
|
i.api.stackFrame.moveCursorByByte(b)
|
||||||
|
i.api.stackFrame.bytesEnd++
|
||||||
|
i.api.stackFrame.offset++
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptBytes is used to accept one or more bytes that were read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen these bytes. I want to make use of them
|
||||||
|
// for the final output, so please remember them for me. I will now continue
|
||||||
|
// reading after these bytes."
|
||||||
|
//
|
||||||
|
// This will update the position of the cursor (which keeps track of what line
|
||||||
|
// and column we are on in the input data) and add the bytes to the tokenizer
|
||||||
|
// output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the accepted bytes.
|
||||||
|
func (i Input) AcceptBytes(bytes ...byte) {
|
||||||
|
curBytesEnd := i.api.stackFrame.bytesEnd
|
||||||
|
newBytesEnd := curBytesEnd + len(bytes)
|
||||||
|
|
||||||
|
// Grow the bytes capacity when needed.
|
||||||
|
if cap(i.api.Output.data) < newBytesEnd {
|
||||||
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
|
copy(newBytes, i.api.Output.data)
|
||||||
|
i.api.Output.data = newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
copy(i.api.Output.data[curBytesEnd:], bytes)
|
||||||
|
for _, b := range bytes {
|
||||||
|
i.api.stackFrame.moveCursorByByte(b)
|
||||||
|
i.api.stackFrame.offset++
|
||||||
|
}
|
||||||
|
i.api.stackFrame.bytesEnd = newBytesEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
// PeekRune returns the UTF8 rune at the provided byte offset, including its byte width.
|
||||||
|
//
|
||||||
|
// The byte width is useful to know what byte offset you'll have to use to peek
|
||||||
|
// the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the
|
||||||
|
// first rune starts at offset = 0, the second rune might start at offset = 4.
|
||||||
|
//
|
||||||
|
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
||||||
|
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
||||||
|
// when needed.
|
||||||
|
//
|
||||||
|
// When an error occurs during reading the input, an error will be returned.
|
||||||
|
// When an offset is requested that is beyond the length of the available input
|
||||||
|
// data, then the error will be io.EOF.
|
||||||
|
func (i Input) PeekRune(offset int) (rune, int, error) {
|
||||||
|
return i.reader.RuneAt(i.api.stackFrame.offset + offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipRune is used to skip over a single rune that was read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen this rune. It is of no interest.
|
||||||
|
// I will now continue reading after this rune."
|
||||||
|
//
|
||||||
|
// This will merely update the position of the cursor (which keeps track of what
|
||||||
|
// line and column we are on in APIthe input data). The rune is not added to
|
||||||
|
// the output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the skipped rune.
|
||||||
|
func (i Input) SkipRune(r rune) {
|
||||||
|
i.api.stackFrame.moveCursorByRune(r)
|
||||||
|
i.api.stackFrame.offset += utf8.RuneLen(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SkipRunes is used to skip over one or more runes that were read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen these runes. They are of no interest.
|
||||||
|
// I will now continue reading after these runes."
|
||||||
|
//
|
||||||
|
// This will merely update the position of the cursor (which keeps track of what
|
||||||
|
// line and column we are on in the input data). The runes are not added to
|
||||||
|
// the output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the skipped runes.
|
||||||
|
func (i Input) SkipRunes(runes ...rune) {
|
||||||
|
for _, r := range runes {
|
||||||
|
i.api.stackFrame.moveCursorByRune(r)
|
||||||
|
i.api.stackFrame.offset += utf8.RuneLen(r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptRune is used to accept a single rune that was read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen this rune. I want to make use of it
|
||||||
|
// for the final output, so please remember it for me. I will now continue
|
||||||
|
// reading after this rune."
|
||||||
|
//
|
||||||
|
// This will update the position of the cursor (which keeps track of what line
|
||||||
|
// and column we are on in the input data) and add the rune to the tokenizer
|
||||||
|
// output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the accepted rune.
|
||||||
|
func (i Input) AcceptRune(r rune) {
|
||||||
|
curBytesEnd := i.api.stackFrame.bytesEnd
|
||||||
|
maxRequiredBytes := curBytesEnd + utf8.UTFMax
|
||||||
|
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
if cap(i.api.Output.data) < maxRequiredBytes {
|
||||||
|
newBytes := make([]byte, maxRequiredBytes*2)
|
||||||
|
copy(newBytes, i.api.Output.data)
|
||||||
|
i.api.Output.data = newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
i.api.stackFrame.moveCursorByRune(r)
|
||||||
|
w := utf8.EncodeRune(i.api.Output.data[curBytesEnd:], r)
|
||||||
|
i.api.stackFrame.bytesEnd += w
|
||||||
|
i.api.stackFrame.offset += w
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcceptRunes is used to accept one or more runes that were read from the input.
|
||||||
|
// This tells the tokenizer: "I've seen these runes. I want to make use of them
|
||||||
|
// for the final output, so please remember them for me. I will now continue
|
||||||
|
// reading after these runes."
|
||||||
|
//
|
||||||
|
// This will update the position of the cursor (which keeps track of what line
|
||||||
|
// and column we are on in the input data) and add the runes to the tokenizer
|
||||||
|
// output.
|
||||||
|
//
|
||||||
|
// After the call, byte offset 0 for PeekByte() and PeekRune() will point at
|
||||||
|
// the first byte after the accepted runes.
|
||||||
|
func (i Input) AcceptRunes(runes ...rune) {
|
||||||
|
runesAsString := string(runes)
|
||||||
|
byteLen := len(runesAsString)
|
||||||
|
curBytesEnd := i.api.stackFrame.bytesEnd
|
||||||
|
newBytesEnd := curBytesEnd + byteLen
|
||||||
|
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
if cap(i.api.Output.data) < newBytesEnd {
|
||||||
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
|
copy(newBytes, i.api.Output.data)
|
||||||
|
i.api.Output.data = newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, r := range runes {
|
||||||
|
i.api.stackFrame.moveCursorByRune(r)
|
||||||
|
}
|
||||||
|
copy(i.api.Output.data[curBytesEnd:], runesAsString)
|
||||||
|
|
||||||
|
i.api.stackFrame.bytesEnd = newBytesEnd
|
||||||
|
i.api.stackFrame.offset += byteLen
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush flushes input data from the read.Buffer up to the current
|
||||||
|
// read offset of the parser.
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// When writing your own TokenHandler, you normally won't have to call this
|
||||||
|
// method yourself. It is automatically called by parsekit when possible.
|
||||||
|
func (i Input) Flush() bool {
|
||||||
|
if i.api.stackFrame.offset > 0 {
|
||||||
|
i.reader.Flush(i.api.stackFrame.offset)
|
||||||
|
i.api.stackFrame.offset = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
|
@ -0,0 +1,113 @@
|
||||||
|
package tokenize
|
||||||
|
|
||||||
|
import (
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Output provides output-related functionality for the tokenize API.
|
||||||
|
type Output struct {
|
||||||
|
api *API
|
||||||
|
tokens []Token // accepted tokens
|
||||||
|
data []byte // accepted data
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) String() string {
|
||||||
|
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
||||||
|
return string(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) Runes() []rune {
|
||||||
|
bytes := o.data[o.api.stackFrame.bytesStart:o.api.stackFrame.bytesEnd]
|
||||||
|
return []rune(string(bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) Rune(offset int) rune {
|
||||||
|
r, _ := utf8.DecodeRune(o.data[o.api.stackFrame.bytesStart+offset:])
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) ClearData() {
|
||||||
|
o.api.stackFrame.bytesEnd = o.api.stackFrame.bytesStart
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) SetBytes(bytes ...byte) {
|
||||||
|
o.ClearData()
|
||||||
|
o.AddBytes(bytes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) AddBytes(bytes ...byte) {
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
newBytesEnd := o.api.stackFrame.bytesEnd + len(bytes)
|
||||||
|
if cap(o.data) < newBytesEnd {
|
||||||
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
|
copy(newBytes, o.data)
|
||||||
|
o.data = newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
copy(o.data[o.api.stackFrame.bytesEnd:], bytes)
|
||||||
|
o.api.stackFrame.bytesEnd = newBytesEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) SetRunes(runes ...rune) {
|
||||||
|
o.ClearData()
|
||||||
|
o.AddRunes(runes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) AddRunes(runes ...rune) {
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
runesAsString := string(runes)
|
||||||
|
newBytesEnd := o.api.stackFrame.bytesEnd + len(runesAsString)
|
||||||
|
if cap(o.data) < newBytesEnd {
|
||||||
|
newBytes := make([]byte, newBytesEnd*2)
|
||||||
|
copy(newBytes, o.data)
|
||||||
|
o.data = newBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
copy(o.data[o.api.stackFrame.bytesEnd:], runesAsString)
|
||||||
|
o.api.stackFrame.bytesEnd = newBytesEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) AddString(s string) {
|
||||||
|
o.AddBytes([]byte(s)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) SetString(s string) {
|
||||||
|
o.ClearData()
|
||||||
|
o.SetBytes([]byte(s)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) Tokens() []Token {
|
||||||
|
return o.tokens[o.api.stackFrame.tokenStart:o.api.stackFrame.tokenEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) Token(offset int) Token {
|
||||||
|
return o.tokens[o.api.stackFrame.tokenStart+offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) TokenValue(offset int) interface{} {
|
||||||
|
return o.tokens[o.api.stackFrame.tokenStart+offset].Value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) ClearTokens() {
|
||||||
|
o.api.stackFrame.tokenEnd = o.api.stackFrame.tokenStart
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) SetTokens(tokens ...Token) {
|
||||||
|
o.ClearTokens()
|
||||||
|
o.AddTokens(tokens...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o Output) AddTokens(tokens ...Token) {
|
||||||
|
// Grow the tokens capacity when needed.
|
||||||
|
newTokenEnd := o.api.stackFrame.tokenEnd + len(tokens)
|
||||||
|
if cap(o.tokens) < newTokenEnd {
|
||||||
|
newTokens := make([]Token, newTokenEnd*2)
|
||||||
|
copy(newTokens, o.tokens)
|
||||||
|
o.tokens = newTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
for offset, t := range tokens {
|
||||||
|
o.tokens[o.api.stackFrame.tokenEnd+offset] = t
|
||||||
|
}
|
||||||
|
o.api.stackFrame.tokenEnd = newTokenEnd
|
||||||
|
}
|
Loading…
Reference in New Issue