306 lines
9.1 KiB
Plaintext
306 lines
9.1 KiB
Plaintext
package lexer
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"github.com/mmakaay/toml/parser"
|
|
)
|
|
|
|
// Lexer holds the state of the lexer.
|
|
type Lexer struct {
|
|
input string // the scanned input
|
|
state parser.StateFn // a function that handles the current state
|
|
stack []parser.StateFn // state function stack, for nested parsing
|
|
len int // the total length of the input in bytes
|
|
pos int // current byte scanning position in the input
|
|
newline bool // keep track of when we have scanned a newline
|
|
cursorRow int // current row number in the input
|
|
cursorColumn int // current column position in the input
|
|
buffer StringBuffer // an efficient buffer, used to build string values
|
|
items chan parser.Item // channel of resulting lexer items
|
|
item parser.Item // the current item as reached by Next() and retrieved by Get()
|
|
err *Error // an error when lexing failed, retrieved by Error()
|
|
}
|
|
|
|
// Error is used as the error type when lexing errors occur.
|
|
// The error includes some extra meta information to allow for useful
|
|
// error messages to the user.
|
|
type Error struct {
|
|
Message string
|
|
Row int
|
|
Column int
|
|
}
|
|
|
|
func (err *Error) Error() string {
|
|
return err.Message
|
|
}
|
|
|
|
// New takes an input string and initializes the lexer for it.
|
|
func New(input string) *Lexer {
|
|
return &Lexer{
|
|
input: input,
|
|
len: len(input),
|
|
state: stateKeyValuePair,
|
|
items: make(chan parser.Item, 2),
|
|
}
|
|
}
|
|
|
|
// Next advances to the next lexer item in the input string.
|
|
// When a valid item was found, then the boolean return parameter will be true.
|
|
// On error or when reaching the end of the input, false is returned.
|
|
// When an error occurred, it will be set in the error return value, nil otherwise.
|
|
func (l *Lexer) Next() (parser.Item, *Error, bool) {
|
|
for {
|
|
select {
|
|
case i := <-l.items:
|
|
switch {
|
|
case i.Type == ItemEOF:
|
|
return i, nil, false
|
|
case i.Type == ItemError:
|
|
l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
|
|
return i, l.err, false
|
|
default:
|
|
l.item = i
|
|
return i, nil, true
|
|
}
|
|
default:
|
|
l.state = l.state(l)
|
|
}
|
|
}
|
|
}
|
|
|
|
// ToArray returns lexer items as an array (mainly intended for testing purposes)
|
|
// When an error occurs during scanning, a partial result will be
|
|
// returned, accompanied by the error that occurred.
|
|
func (l *Lexer) ToArray() ([]parser.Item, *Error) {
|
|
var items []parser.Item
|
|
for {
|
|
item, err, more := l.Next()
|
|
if !more {
|
|
return items, err
|
|
}
|
|
items = append(items, item)
|
|
}
|
|
}
|
|
|
|
// pushState adds the state function to its stack.
|
|
// This is used for implementing nested parsing.
|
|
func (l *Lexer) pushState(state stateFn) {
|
|
l.stack = append(l.stack, state)
|
|
}
|
|
|
|
// popState pops the last pushed state from its stack.
|
|
func (l *Lexer) popState() stateFn {
|
|
last := len(l.stack) - 1
|
|
head, tail := l.stack[:last], l.stack[last]
|
|
l.stack = head
|
|
return tail
|
|
}
|
|
|
|
// atEndOfFile returns true when there is no more data available in the input.
|
|
func (l *Lexer) atEndOfFile() bool {
|
|
return l.pos >= l.len
|
|
}
|
|
|
|
// emit passes a lexer item back to the client, including the provided string.
|
|
func (l *Lexer) emit(t parser.ItemType, s string) {
|
|
l.items <- parser.Item{Type: t, Value: s}
|
|
l.buffer.Reset()
|
|
}
|
|
|
|
// emitLiteral passes a lexer item back to the client, including the accumulated
|
|
// string buffer data as a literal string.
|
|
func (l *Lexer) emitLiteral(t parser.ItemType) {
|
|
l.emit(t, l.buffer.AsLiteralString())
|
|
}
|
|
|
|
// emitTrimmedLiteral passes a lexer item back to the client, including the
|
|
// accumulated string buffer data as a literal string with whitespace
|
|
// trimmed from it.
|
|
func (l *Lexer) emitTrimmedLiteral(t parser.ItemType) {
|
|
l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
|
|
}
|
|
|
|
// emitInterpreted passes a lexer item back to the client, including the
|
|
// accumulated string buffer data an interpreted string (handling escape
|
|
// codes like \n, \t, \uXXXX, etc.)
|
|
// This method might return an error, in case there is data in the
|
|
// string buffer that is not valid for string interpretation.
|
|
func (l *Lexer) emitInterpreted(t parser.ItemType) error {
|
|
s, err := l.buffer.AsInterpretedString()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
l.emit(t, s)
|
|
return nil
|
|
}
|
|
|
|
// emitError emits a lexer error item back to the client.
|
|
func (l *Lexer) emitError(format string, args ...interface{}) stateFn {
|
|
message := fmt.Sprintf(format, args...)
|
|
l.emit(ItemError, message)
|
|
return nil
|
|
}
|
|
|
|
// peek returns but does not advance to the next rune(s) in the input.
|
|
// Returns the rune, its width and a boolean. The boolean will be false in case
|
|
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
|
func (l *Lexer) peek() (rune, int, bool) {
|
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
return r, w, r != utf8.RuneError
|
|
}
|
|
|
|
// peekMulti takes a peek at multiple upcoming runes in the input.
|
|
// Returns a slice of runes and a boolean. The boolean will be false in case
|
|
// less upcoming runes can be peeked than the requested amount
|
|
// (end of data or invalid UTF8 character).
|
|
func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
|
|
width := 0
|
|
var peeked []rune
|
|
for i := 0; i < amount; i++ {
|
|
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
|
switch {
|
|
case r == utf8.RuneError:
|
|
return peeked, width, false
|
|
default:
|
|
width += w
|
|
peeked = append(peeked, r)
|
|
}
|
|
}
|
|
return peeked, width, true
|
|
}
|
|
|
|
// acceptAny adds the next rune from the input to the string buffer.
|
|
// If no rune could be read (end of file or invalid UTF8 data), then
|
|
// false is returned.
|
|
func (l *Lexer) acceptAny() bool {
|
|
if r, ok := l.next(); ok {
|
|
l.buffer.WriteRune(r)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// accept adds the next rune to the string buffer and returns true if it's
|
|
// from the valid set of runes. Otherwise false is returned.
|
|
func (l *Lexer) accept(matches ...string) bool {
|
|
return l.acceptPattern(matches...)
|
|
}
|
|
|
|
// AcceptMatching adds the next runes to the string buffer, but only
|
|
// if the upcoming runes satisfy the provided pattern.
|
|
// When runes were added then true is returned, false otherwise.
|
|
func (l *Lexer) acceptPattern(pattern ...string) bool {
|
|
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, pattern...)
|
|
}
|
|
|
|
func (l *Lexer) progress(callback func(rune), matches ...string) bool {
|
|
if runes, w, ok := l.match(matches...); ok {
|
|
l.pos += w
|
|
for _, r := range runes {
|
|
callback(r)
|
|
l.advanceCursor(r)
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// acceptConsecutive adds consecutive runes from the input to the string
|
|
// buffer when they match the rune match.
|
|
// If any runes were added then true is returned, false otherwise.
|
|
func (l *Lexer) acceptConsecutive(match string) bool {
|
|
accepted := false
|
|
for l.accept(match) {
|
|
accepted = true
|
|
}
|
|
return accepted
|
|
}
|
|
|
|
// advanceCursor advances the rune cursor one position in the
|
|
// input data. While doing so, it keeps tracks of newlines,
|
|
// so we can report on row + column positions on error.
|
|
func (l *Lexer) advanceCursor(r rune) {
|
|
if l.newline {
|
|
l.cursorColumn = 0
|
|
l.cursorRow++
|
|
} else {
|
|
l.cursorColumn++
|
|
}
|
|
l.newline = r == '\n'
|
|
}
|
|
|
|
// skip skips runes, but only when all provided matches are satisfied.
|
|
// Returns true when one or more runes were skipped.
|
|
func (l *Lexer) skipMatching(pattern ...string) bool {
|
|
return l.progress(func(r rune) {}, pattern...)
|
|
}
|
|
|
|
// skipConsecutive skips consecutive runes from the provided match.
|
|
// Returns true when one or more runes were skipped.
|
|
func (l *Lexer) skipConsecutive(pattern string) bool {
|
|
didSkip := false
|
|
for l.skipMatching(pattern) {
|
|
didSkip = true
|
|
}
|
|
return didSkip
|
|
}
|
|
|
|
// upcoming checks if the upcoming runes satisfy the provided rune matches.
|
|
// This is a lot like the match method, with the difference that
|
|
// this one only returns the boolean value.
|
|
func (l *Lexer) upcoming(matches ...string) bool {
|
|
_, _, ok := l.match(matches...)
|
|
return ok
|
|
}
|
|
|
|
// next returns the next rune from the input and a boolean indicating if
|
|
// reading the input was successful.
|
|
// When the end of input is reached, or an invalid UTF8 character is
|
|
// read, then false is returned.
|
|
func (l *Lexer) next() (rune, bool) {
|
|
r, w, ok := l.peek()
|
|
if ok {
|
|
l.pos += w
|
|
l.advanceCursor(r)
|
|
return r, true
|
|
}
|
|
if r == utf8.RuneError && w == 0 {
|
|
l.emitError("unexpected end of file")
|
|
} else {
|
|
l.emitError("invalid UTF8 character")
|
|
}
|
|
return r, false
|
|
}
|
|
|
|
// match checks if the upcoming runes satisfy the provided rune matches.
|
|
// It returns a slice of runes that were found, their total byte width
|
|
// and a boolean indicating whether or not all provided matches matched
|
|
// the input data.
|
|
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
|
|
peeked, width, ok := l.peekMulti(len(matches))
|
|
if ok {
|
|
for i, r := range matches {
|
|
if strings.IndexRune(r, peeked[i]) < 0 {
|
|
return peeked, width, false
|
|
}
|
|
}
|
|
return peeked, width, true
|
|
}
|
|
return peeked, width, false
|
|
}
|
|
|
|
func (l *Lexer) unexpectedInputError(expected string) stateFn {
|
|
// next() takes care of emitting errors for ok == false.
|
|
if r, ok := l.next(); ok {
|
|
return l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
|
|
return l.emitError("Unexpected end of file (expected %s)", expected)
|
|
}
|