// Package reader provides a buffered Reader that wraps around an io.Reader.
//
// Functionally, it provides an input buffer in the form of a sliding window.
// Let's say we've got the following input coming up in the io.Reader that is
// wrapped by the Reader:
//
//     |H|e|l|l|o|,| |w|o|r|l|d|!|  <-- runes
//      0           6           12  <-- rune offset
//
// The Reader can now be used to retrieve runes from the input, based on their
// offset, using RuneAt(offset). Normally these runes will be retrieved in
// sequence, but that is not a requirement. Let's say we retrieve the rune with
// offset 6 from the input (the 'w'), then the Reader buffer be filled with runes
// from the io.Reader until there are enough runes available to return the rune
// for offset 6:
//
//     |H|e|l|l|o| |w|
//      0           6
//
// Using RuneAt, you can retrieve arbitrary runes. If you request one that is
// in the Reader buffer, then the buffered rune is returned. If you request one
// that is not in the buffer, then the buffer will be expanded.
//
// To make this into a sliding window, the Reader provides the method
// Flush(numberOfRunes). This method will drop the provided number of runes from
// the Reader buffer. So when we'd do a Flush(3) on the example buffer from above,
// then the Reader buffer would become:
//
//     |l|o| |w|
//      0     3
//
// Note that the offset for the first rune 'l' in the buffer is now 0.
// You can consider the input to be changed in a similar way:
//
//     |l|o|,| |w|o|r|l|d|!|
//      0           6     9
//
// So after a flush, the first upcoming rune after the flushed runes
// will always have index 0.
package reader

import (
	"bufio"
	"errors"
	"fmt"
	"io"
	"strings"
	"unicode/utf8"
)

// Reader wraps around a bufio.Reader and provides an additional layer of
// buffering that allows us to read the same runes over and over again.
// This is useful for implementing a parser that must be able to do lookahead
// on the input, returning to the original input position after finishing
// that lookahead).
//
// To minimze memory use, it is also possible to flush the read buffer when there is
// no more need to go back to previously read runes.
//
// The parserkit.reader.Reader is used internally by parsekit.TokenAPI.
type Reader struct {
	bufio         *bufio.Reader // Used for ReadRune()
	buffer        []rune        // Input buffer, holding runes that were read from input
	err           error         // A read error, if one occurred
	errOffset     int           // The offset in the buffer at which the read error was encountered
	firstReadDone bool          // Whether or not the first read was done
}

// New initializes a new reader struct, wrapped around the provided input.
//
// The input can be any one of the following types:
// - string
// - type implementing io.Reader
// - bufio.Reader
func New(input interface{}) *Reader {
	return &Reader{
		bufio: makeBufioReader(input),
	}
}

func makeBufioReader(input interface{}) *bufio.Reader {
	switch input := input.(type) {
	case bufio.Reader:
		return &input
	case *bufio.Reader:
		return input
	case io.Reader:
		return bufio.NewReader(input)
	case string:
		return bufio.NewReader(strings.NewReader(input))
	default:
		panic(fmt.Sprintf("parsekit.reader.New(): no support for input of type %T", input))
	}
}

// RuneAt reads the rune at the provided rune offset.
//
// This offset is relative to the current starting position of the buffer in
// the reader. When starting reading, offset 0 will point at the start of the
// input. After flushing, offset 0 will point at the input up to where
// the flush was done.
//
// The error return value will be nil when reading was successful.
// When an invalid rune is encountered on the input, the error will be nil,
// but the rune will be utf8.RuneError
//
// When reading failed, the rune will be utf8.RuneError and the error will
// be not nil. One special read fail is actually a normal situation: end
// of file reached. In that case, the returned error wille be io.EOF.
// Once a read error is encountered, that same read error will guaranteed
// be return on every subsequent read at or beyond the provided offset.
func (r *Reader) RuneAt(offset int) (rune, error) {
	// Re-issue a previously seen read error.
	if r.err != nil && offset >= r.errOffset {
		return utf8.RuneError, r.err
	}

	// Rune at provided offset is not yet available in the input buffer.
	// Read runes until we have enough runes to satisfy the offset.
	l := len(r.buffer)
	n := offset - l + 1 // nr of runes to add to the buffer to get to offset
	if n > 0 {
		r.grow(n)
		for writeAt := l; writeAt <= offset; writeAt++ {
			readRune, _, err := r.bufio.ReadRune()

			// Skip BOM.
			if !r.firstReadDone {
				r.firstReadDone = true
				if readRune == '\uFEFF' {
					writeAt--
					continue
				}
			}

			// Handle errors.
			if err != nil {
				r.err = err
				r.errOffset = writeAt
				return utf8.RuneError, err
			}

			r.buffer[writeAt] = readRune
		}
	}

	return r.buffer[offset], nil
}

// The upcoming code was inspired heavily by the Go built-in 'bytes' package.

// smallBufferSize is an initial allocation minimal capacity.
const smallBufferSize = 64

// ErrTooLarge is passed to panic if memory cannot be allocated to store data in a buffer.
var ErrTooLarge = errors.New("parsekit.reader: too large")

// grow grows the buffer to guarantee space for n more bytes.
// It returns the index where bytes should be written.
// If the buffer can't grow it will panic with ErrTooLarge.
func (r *Reader) grow(n int) {
	// Instantiate new buffer.
	if r.buffer == nil {
		b := smallBufferSize
		if b < n {
			b = n
		}
		r.buffer = make([]rune, n, b)
		return
	}
	l := len(r.buffer)
	c := cap(r.buffer)
	// Grow the buffer by reslicing within the available capacity.
	if n <= c-l {
		r.buffer = r.buffer[:l+n]
		return
	}
	// Grow the buffer by allocating a new one and copying the data.
	buf := makeSlice(2*c + n)
	copy(buf, r.buffer)
	r.buffer = buf[:l+n]
}

// makeSlice allocates a slice of size n. If the allocation fails, it panics
// with ErrTooLarge.
func makeSlice(n int) []rune {
	// If the make fails, give a known error.
	defer func() {
		if recover() != nil {
			panic(ErrTooLarge)
		}
	}()
	return make([]rune, n)
}

// Flush deletes the provided number of runes from the start of the
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
// will point to the rune that comes after the flushed runes.
// So what this basically does is turn the Reader into a sliding window.
func (r *Reader) Flush(numberOfRunes int) {
	if numberOfRunes > len(r.buffer) {
		panic(fmt.Sprintf(
			"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
				"exceeds size of the buffer (%d)", numberOfRunes, len(r.buffer)))
	}
	r.buffer = r.buffer[numberOfRunes:]
	if r.err != nil {
		r.errOffset -= numberOfRunes
	}
}