go-toml/lexer/lexer.go

259 lines
6.1 KiB
Go

package lexer
import (
"errors"
"fmt"
"strings"
"unicode/utf8"
)
// Lexer holds the state of the scanner.
type Lexer struct {
input string // the scanned input string
state stateFn // the current state
stack []stateFn // state stack, for nested parsing
start int // start position of the currently scanned item
pos int // current scanning position in the input
width int // width of the last rune read
strValue strings.Builder // used to build string values
items chan Item // channel of scanned items
nextItem Item // the current item as reached by Next() and retrieved by Get()
err error // an error message when lexing failed, retrieved by Error()
}
// Lex takes an input string and initializes the TOML lexer for it.
// Usage:
//
// l := lexer.Lex("...inputstring...")
// for l.Next() {
// item := l.Get()
// ... handle item ...
// }
// if e := l.Error(); e != nil {
// ... handle error message ...
// }
func Lex(input string) *Lexer {
return &Lexer{
input: input,
state: stateKeyValuePair,
items: make(chan Item, 2),
}
}
// Next advances to the next lexer item in the input string.
// When a next item was found, then true is returned.
// On error or reaching the end of the input, false is returned.
func (l *Lexer) Next() bool {
if l.state == nil {
panic("This should not happen: nil state reached, but entering Next()")
}
for {
select {
case i := <-l.items:
if i.Type == ItemEOF {
return false
}
if i.Type == ItemError {
l.err = errors.New(i.Value)
return false
}
l.nextItem = i
return true
default:
l.state = l.state(l)
}
}
}
func (l *Lexer) Error() error {
return l.err
}
// ToArray returns lexer items as an array.
// When an error occurs during scanning, a partial result will be
// returned, accompanied by the error that occurred.
func (l *Lexer) ToArray() ([]Item, error) {
var items []Item
for l.Next() {
items = append(items, l.Get())
}
return items, l.Error()
}
// Get returns the next lexer item, as reached by Next()
func (l *Lexer) Get() Item {
return l.nextItem
}
// pushState adds the state function to its stack.
// This is used for implementing nested parsing.
func (l *Lexer) pushState(state stateFn) {
l.stack = append(l.stack, state)
}
// popState pops the last pushed state from its stack.
func (l *Lexer) popState() stateFn {
last := len(l.stack) - 1
head, tail := l.stack[:last], l.stack[last]
l.stack = head
return tail
}
// getAcceptedString returns the string as accepted by the
// accept* methods so far.
func (l *Lexer) getAcceptedString() string {
return l.input[l.start:l.pos]
}
// emit passes a scanned item back to the client.
func (l *Lexer) emit(t itemType, v string) {
l.items <- Item{t, v}
l.start = l.pos
}
// ignore skips over the pending input before the current position.
func (l *Lexer) ignore() {
l.start = l.pos
}
func (l *Lexer) atEndOfFile() bool {
return l.pos >= len(l.input)
}
// backup steps back one rune
// Can be called only once per call of next.
func (l *Lexer) backup() {
l.pos -= l.width
}
// peek returns but does not advance to the next rune(s) in the input.
func (l *Lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// accept consumes the next rune if it's from the valid set of runes.
func (l *Lexer) accept(runes string) bool {
if strings.IndexRune(runes, l.next()) >= 0 {
return true
}
l.backup()
return false
}
func (l *Lexer) upcoming(runes string) bool {
if l.accept(runes) {
l.backup()
return true
}
return false
}
// acceptNot consumes the next rune if it's not from the set of runes.
func (l *Lexer) acceptNot(runes string) bool {
r := l.next()
if r == endOfFile {
l.backup()
return false
}
if strings.IndexRune(runes, r) < 0 {
return true
}
l.backup()
return false
}
// acceptUntil consumes a run of runes until ones from the
// valid set is encountered.
func (l *Lexer) acceptUntil(runes string) bool {
accepted := false
for l.acceptNot(runes) {
accepted = true
}
return accepted
}
// acceptRun consumes a run of runes from the set of accepted runes.
func (l *Lexer) acceptWhile(runes string) bool {
accepted := false
for l.accept(runes) {
accepted = true
}
return accepted
}
// skip skips a run of runes from the set of accepted runs.
func (l *Lexer) skip(runes string) {
if l.acceptWhile(runes) {
l.ignore()
}
}
// skipUntil skips a run of runes, until a rune from the set of
// runes of EOF is reached.
func (l *Lexer) skipUntil(runes string) {
if l.acceptUntil(runes) {
l.ignore()
}
}
// resetStringBuild initializes a new string builder, used for building
// string by interpreting input data, e.g. for translating
// double quoted strings with escape codes into an actual
// Go string value.
func (l *Lexer) resetStringBuilder() {
l.strValue.Reset()
}
// addToString adds a rune to the string builder.
func (l *Lexer) addToString(r rune) {
l.strValue.WriteRune(r)
}
// getString returns the runes in the string builder as a string value.
func (l *Lexer) getString() string {
return l.strValue.String()
}
var endOfFile rune = -1
// next returns the next rune in the input.
func (l *Lexer) next() rune {
if l.atEndOfFile() {
l.width = 0
return endOfFile // TODO phase out this bizarro rune?
}
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.width = w
l.pos += w
return r
}
// error returns an error token and terminates the scan
// by returning nil to l.run.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- Item{
ItemError,
fmt.Sprintf(format, args...),
}
return nil
}
func (l *Lexer) unexpectedTokenError(expected string) stateFn {
var actual string
switch {
case l.peek() == endOfFile:
actual = "end of file"
case !utf8.ValidString(l.input[l.start:]):
actual = "non-UTF8 data"
default:
actual = fmt.Sprintf("token '%c'", l.peek())
}
return l.errorf("Unexpected %s (expected %s)", actual, expected)
}
func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
return l.errorf("Unexpected end of file (expected %s)", expected)
}