go-toml/parsekit/internals.go

package parsekit

import (
	"unicode/utf8"
)

// P holds the internal state of the parser.
type P struct {
	state        StateFn      // the function that handles the current state
	nextState    StateFn      // the function that will handle the next state
	stack        []StateFn    // state function stack, for nested parsing
	input        string       // the scanned input
	len          int          // the total length of the input in bytes
	pos          int          // current byte scanning position in the input
	newline      bool         // keep track of when we have scanned a newline
	cursorRow    int          // current row number in the input
	cursorColumn int          // current column position in the input
	expecting    string       // a description of what the current state expects to find
	buffer       stringBuffer // an efficient buffer, used to build string values
	items        chan Item    // channel of resulting Parser items
	item         Item         // the current item as reached by Next() and retrieved by Get()
	err          *Error       // an error when lexing failed, retrieved by Error()
}

// peek returns but does not advance the cursor to the next rune(s) in the input.
// Returns the rune, its width in bytes and a boolean.
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character).
func (p *P) peek(offsetInBytes int) (rune, int, bool) {
	r, w := utf8.DecodeRuneInString(p.input[p.pos+offsetInBytes:])
	return handleRuneError(r, w)
}

// handleRuneError is used to normale rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// * w == 0: end of file is reached
// * w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own EOF or INVALID runes, to make it easy for client
// code to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
	if r == utf8.RuneError {
		if w == 0 {
			return EOF, 0, false
		}
		return INVALID, w, false
	}
	return r, w, true
}

// EOF is a special rune, which is used to indicate an end of file when
// reading a character from the input.
// It can be treated as a rune when writing parsing rules, so a valid way to
// say 'I now expect the end of the file' is using something like:
// if (p.On(c.Rune(EOF)).Skip()) { ... }
const EOF rune = -1

// INVALID is a special rune, which is used to indicate an invalid UTF8
// rune on the input.
const INVALID rune = utf8.RuneError

// StateFn defines the type of function that can be used to
// handle a parser state.
type StateFn func(*P)

// ItemType represents the type of a parser Item.
type ItemType int

// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1

// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2

// Item represents an item returned from the parser.
type Item struct {
	Type  ItemType
	Value string
}

// Error is used as the error type when parsing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
	Message string
	Row     int
	Column  int
}

func (err *Error) Error() string {
	return err.Message
}