go-toml/lexer/lexer.go

package lexer

import (
	"errors"
	"fmt"
	"strings"
	"unicode/utf8"
)

// Lexer holds the state of the scanner.
type Lexer struct {
	input    string          // the scanned input string
	state    stateFn         // the current state
	stack    []stateFn       // state stack, for nested parsing
	start    int             // start position of the currently scanned item
	pos      int             // current scanning position in the input
	width    int             // width of the last rune read
	strValue strings.Builder // used to build string values
	items    chan Item       // channel of scanned items
	nextItem Item            // the current item as reached by Next() and retrieved by Get()
	err      error           // an error message when lexing failed, retrieved by Error()
}

// Lex takes an input string and initializes the TOML lexer for it.
// Usage:
//
//     l := lexer.Lex("...inputstring...")
//     for l.Next() {
//         item := l.Get()
//         ... handle item ...
//     }
//     if e := l.Error(); e != nil {
//         ... handle error message ...
//     }
func Lex(input string) *Lexer {
	return &Lexer{
		input: input,
		state: stateKeyValuePair,
		items: make(chan Item, 2),
	}
}

// Next advances to the next lexer item in the input string.
// When a next item was found, then true is returned.
// On error or reaching the end of the input, false is returned.
func (l *Lexer) Next() bool {
	if l.state == nil {
		panic("This should not happen: nil state reached, but entering Next()")
	}
	for {
		select {
		case i := <-l.items:
			if i.Type == ItemEOF {
				return false
			}
			if i.Type == ItemError {
				l.err = errors.New(i.Value)
				return false
			}
			l.nextItem = i
			return true
		default:
			l.state = l.state(l)
		}
	}
}

func (l *Lexer) Error() error {
	return l.err
}

// ToArray returns lexer items as an array.
// When an error occurs during scanning, a partial result will be
// returned, accompanied by the error that occurred.
func (l *Lexer) ToArray() ([]Item, error) {
	var items []Item
	for l.Next() {
		items = append(items, l.Get())
	}
	return items, l.Error()
}

// Get returns the next lexer item, as reached by Next()
func (l *Lexer) Get() Item {
	return l.nextItem
}

// pushState adds the state function to its stack.
// This is used for implementing nested parsing.
func (l *Lexer) pushState(state stateFn) {
	l.stack = append(l.stack, state)
}

// popState pops the last pushed state from its stack.
func (l *Lexer) popState() stateFn {
	last := len(l.stack) - 1
	head, tail := l.stack[:last], l.stack[last]
	l.stack = head
	return tail
}

// getAcceptedString returns the string as accepted by the
// accept* methods so far.
func (l *Lexer) getAcceptedString() string {
	return l.input[l.start:l.pos]
}

// emit passes a scanned item back to the client.
func (l *Lexer) emit(t itemType, v string) {
	l.items <- Item{t, v}
	l.start = l.pos
}

// ignore skips over the pending input before the current position.
func (l *Lexer) ignore() {
	l.start = l.pos
}

func (l *Lexer) atEndOfFile() bool {
	return l.pos >= len(l.input)
}

// backup steps back one rune
// Can be called only once per call of next.
func (l *Lexer) backup() {
	l.pos -= l.width
}

// peek returns but does not advance to the next rune(s) in the input.
func (l *Lexer) peek() rune {
	r := l.next()
	l.backup()
	return r
}

// accept consumes the next rune if it's from the valid set of runes.
func (l *Lexer) accept(runes string) bool {
	if strings.IndexRune(runes, l.next()) >= 0 {
		return true
	}
	l.backup()
	return false
}

func (l *Lexer) upcoming(runes string) bool {
	if l.accept(runes) {
		l.backup()
		return true
	}
	return false
}

// acceptNot consumes the next rune if it's not from the set of runes.
func (l *Lexer) acceptNot(runes string) bool {
	r := l.next()
	if r == endOfFile {
		l.backup()
		return false
	}
	if strings.IndexRune(runes, r) < 0 {
		return true
	}
	l.backup()
	return false
}

// acceptUntil consumes a run of runes until ones from the
// valid set is encountered.
func (l *Lexer) acceptUntil(runes string) bool {
	accepted := false
	for l.acceptNot(runes) {
		accepted = true
	}
	return accepted
}

// acceptRun consumes a run of runes from the set of accepted runes.
func (l *Lexer) acceptWhile(runes string) bool {
	accepted := false
	for l.accept(runes) {
		accepted = true
	}
	return accepted
}

// skip skips a run of runes from the set of accepted runs.
func (l *Lexer) skip(runes string) {
	if l.acceptWhile(runes) {
		l.ignore()
	}
}

// skipUntil skips a run of runes, until a rune from the set of
// runes of EOF is reached.
func (l *Lexer) skipUntil(runes string) {
	if l.acceptUntil(runes) {
		l.ignore()
	}
}

// resetStringBuild initializes a new string builder, used for building
// string by interpreting input data, e.g. for translating
// double quoted strings with escape codes into an actual
// Go string value.
func (l *Lexer) resetStringBuilder() {
	l.strValue.Reset()
}

// addToString adds a rune to the string builder.
func (l *Lexer) addToString(r rune) {
	l.strValue.WriteRune(r)
}

// getString returns the runes in the string builder as a string value.
func (l *Lexer) getString() string {
	return l.strValue.String()
}

var endOfFile rune = -1

// next returns the next rune in the input.
func (l *Lexer) next() rune {
	if l.atEndOfFile() {
		l.width = 0
		return endOfFile // TODO phase out this bizarro rune?
	}
	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
	l.width = w
	l.pos += w
	return r
}

// error returns an error token and terminates the scan
// by returning nil to l.run.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
	l.items <- Item{
		ItemError,
		fmt.Sprintf(format, args...),
	}
	return nil
}

func (l *Lexer) unexpectedTokenError(expected string) stateFn {
	var actual string
	switch {
	case l.peek() == endOfFile:
		actual = "end of file"
	case !utf8.ValidString(l.input[l.start:]):
		actual = "non-UTF8 data"
	default:
		actual = fmt.Sprintf("token '%c'", l.peek())
	}
	return l.errorf("Unexpected %s (expected %s)", actual, expected)
}

func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
	return l.errorf("Unexpected end of file (expected %s)", expected)
}