package tokenize import ( "unicode/utf8" ) // RuneMode provides (UTF8) rune-driven input/output functionality for the tokenize API. type RuneMode struct { api *API } // Peek returns the UTF8 rune at the provided byte offset, including its byte width. // // The byte width is useful to know what byte offset you'll have to use to peek // the next byte or rune. Some UTF8 runes take up 4 bytes of data, so when the // first rune starts at offset = 0, the second rune might start at offset = 4. // // When an invalid UTF8 rune is encountered on the input, it is replaced with // the utf.RuneError rune. It's up to the caller to handle this as an error // when needed. // // When an error occurs during reading the input, an error will be returned. // When an offset is requested that is beyond the length of the available input // data, then the error will be io.EOF. func (runeMode RuneMode) Peek(offset int) (rune, int, error) { a := runeMode.api return a.reader.RuneAt(a.stackFrame.offset + offset) } // Skip is used to skip over a single rune that was read from the input. // This tells the tokenizer: "I've seen this rune. It is of no interest. // I will now continue reading after this rune." // // This will merely update the position of the cursor (which keeps track of what // line and column we are on in APIthe input data). The rune is not added to // the output. // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the skipped rune. func (runeMode RuneMode) Skip(r rune) { f := runeMode.api.stackFrame f.moveCursorByRune(r) f.offset += utf8.RuneLen(r) } // SkipMulti is used to skip over one or more runes that were read from the input. // This tells the tokenizer: "I've seen these runes. They are of no interest. // I will now continue reading after these runes." // // This will merely update the position of the cursor (which keeps track of what // line and column we are on in the input data). The runes are not added to // the output. // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the skipped runes. func (runeMode RuneMode) SkipMulti(runes ...rune) { f := runeMode.api.stackFrame for _, r := range runes { f.moveCursorByRune(r) f.offset += utf8.RuneLen(r) } } // Accept is used to accept a single rune that was read from the input. // This tells the tokenizer: "I've seen this rune. I want to make use of it // for the final output, so please remember it for me. I will now continue // reading after this rune." // // This will update the position of the cursor (which keeps track of what line // and column we are on in the input data) and add the rune to the tokenizer // output. // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted rune. func (runeMode RuneMode) Accept(r rune) { a := runeMode.api f := a.stackFrame curBytesEnd := f.bytesEnd maxRequiredBytes := curBytesEnd + utf8.UTFMax a.growOutputData(maxRequiredBytes) w := utf8.EncodeRune(a.outputData[curBytesEnd:], r) f.bytesEnd += w f.offset += w f.moveCursorByRune(r) } // AcceptMulti is used to accept one or more runes that were read from the input. // This tells the tokenizer: "I've seen these runes. I want to make use of them // for the final output, so please remember them for me. I will now continue // reading after these runes." // // This will update the position of the cursor (which keeps track of what line // and column we are on in the input data) and add the runes to the tokenizer // output. // // After the call, byte offset 0 for PeekByte() and PeekRune() will point at // the first byte after the accepted runes. func (runeMode RuneMode) AcceptMulti(runes ...rune) { a := runeMode.api f := a.stackFrame runesAsString := string(runes) byteLen := len(runesAsString) curBytesEnd := f.bytesEnd newBytesEnd := curBytesEnd + byteLen a.growOutputData(newBytesEnd) copy(a.outputData[curBytesEnd:], runesAsString) f.bytesEnd = newBytesEnd f.offset += byteLen for _, r := range runes { f.moveCursorByRune(r) } }