package parsekit import ( "bufio" "fmt" "io" "unicode/utf8" ) // Reader wraps around an io.Reader and provides buffering to allows us to read // the same runes over and over again. This is useful for implementing a parser // that must be able to do lookahead on the input, returning to the original // input position after finishing that lookahead). // // To minimze memory use, it is also possible to flush the buffer when there is // no more need to go back to previously read runes. type Reader struct { bufio *bufio.Reader // Used for ReadRune() buffer []rune // Input buffer, holding runes that were read from input bufferOffset int // The offset of the buffer, relative to the start of the input bufferLen int // Input size, the number of runes in the buffer } // NewReader initializes a new Reader struct, wrapped around the provided io.Reader. func NewReader(r io.Reader) *Reader { return &Reader{ bufio: bufio.NewReader(r), buffer: []rune{}, } } // RuneAt reads the rune at the provided rune offset. // // This offset is relative to the current starting position of the buffer in // the reader. When starting reading, offset 0 will point at the start of the // input. After flushing, offset 0 will point at the input up to where // the flush was done. // // The error return value will be nil when reading was successful. // When an invalid rune is encountered on the input, the error will be nil, // but the rune will be utf8.RuneError // // When reading failed, the rune will be utf8.RuneError. One special read // fail is actually a normal situation: end of file reached. In that case, // the returned error wille be io.EOF. func (r *Reader) RuneAt(offset int) (rune, error) { // Rune at provided offset is not yet available in the input buffer. // Read runes until we have enough runes to satisfy the offset. for r.bufferLen <= offset { readRune, _, err := r.bufio.ReadRune() // Handle errors. if err != nil { return utf8.RuneError, err } // Skip BOM. if readRune == '\uFEFF' && r.bufferOffset == 0 { r.bufferOffset++ continue } r.buffer = append(r.buffer, readRune) r.bufferLen++ } return r.buffer[offset], nil } // RunesAt reads a slice of runes of length 'len', starting from offset 'offset'. // // This offset is relative to the current starting position of the buffer in // the reader. When starting reading, offset 0 will point at the start of the // input. After flushing, offset 0 will point at the input up to where // the flush was done. // // When an error is encountered during reading (EOF or other error), then the // error return value will be set. In case of an error, any runes that could be // successfully read are returned along with the error. // TODO Do I actually use this interface? func (r *Reader) RunesAt(start int, len int) ([]rune, error) { if len == 0 { return r.buffer[0:0], nil } end := start + len _, err := r.RuneAt(end) if err != nil { if end > r.bufferLen { end = r.bufferLen } return r.buffer[start:end], err } return r.buffer[start:end], nil } // Flush deletes the provided number of runes from the start of the // reader buffer. After flushing the buffer, offset 0 as used by RuneAt() // will point to the rune that comes after the flushed runes. // So what this basically does is turn the Reader into a sliding window. func (r *Reader) Flush(numberOfRunes int) { if numberOfRunes > r.bufferLen { panic(fmt.Sprintf( "parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+ "exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen)) } r.bufferOffset += numberOfRunes r.bufferLen -= numberOfRunes r.buffer = r.buffer[numberOfRunes:] }