go-toml/parser/parser.go

262 lines
8.1 KiB
Go

package parser
import (
"fmt"
"strings"
"unicode/utf8"
)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, startState StateFn) *Parser {
return &Parser{
input: input,
len: len(input),
state: startState,
items: make(chan Item, 2),
}
}
// PushState adds the state function to the state stack.
// This is used for implementing nested parsing.
func (l *Parser) PushState(state StateFn) {
l.stack = append(l.stack, state)
}
// PopState pops the last pushed state from the state stack.
func (l *Parser) PopState() StateFn {
last := len(l.stack) - 1
head, tail := l.stack[:last], l.stack[last]
l.stack = head
return tail
}
// AtEndOfFile returns true when there is no more data available in the input.
func (l *Parser) AtEndOfFile() bool {
return l.pos >= l.len
}
// Emit passes a Parser item to the client, including the provided string.
func (l *Parser) Emit(t ItemType, s string) {
l.items <- Item{t, s}
l.buffer.Reset()
}
// EmitLiteral passes a Parser item to the client, including the accumulated
// string buffer data as a literal string.
func (l *Parser) EmitLiteral(t ItemType) {
l.Emit(t, l.buffer.AsLiteralString())
}
// EmitLiteralTrim passes a Parser item to the client, including the
// accumulated string buffer data as a literal string with whitespace
// trimmed from it.
func (l *Parser) EmitLiteralTrim(t ItemType) {
l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
}
// EmitInterpreted passes a Parser item to the client, including the
// accumulated string buffer data a Go doubled quoted interpreted string
// (handling escape codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation.
func (l *Parser) EmitInterpreted(t ItemType) error {
s, err := l.buffer.AsInterpretedString()
if err != nil {
return err
}
l.Emit(t, s)
return nil
}
// EmitError emits a Parser error item to the client.
func (l *Parser) EmitError(format string, args ...interface{}) StateFn {
message := fmt.Sprintf(format, args...)
l.Emit(ItemError, message)
return nil
}
// Match checks if the upcoming runes satisfy all provided patterns.
// It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided patterns were
// satisfied by the input data.
func (l *Parser) Match(patterns ...string) ([]rune, int, bool) {
peeked, width, ok := l.peekMulti(len(patterns))
if ok {
for i, r := range patterns {
if strings.IndexRune(r, peeked[i]) < 0 {
return peeked, width, false
}
}
return peeked, width, true
}
return peeked, width, false
}
// Upcoming checks if the upcoming runes satisfy all provided patterns.
// Returns true if all provided patterns are satisfied.
func (l *Parser) Upcoming(patterns ...string) bool {
_, _, ok := l.Match(patterns...)
return ok
}
// AcceptAny adds the next rune from the input to the string buffer.
// If no rune could be read (end of file or invalid UTF8 data),
// then false is returned.
func (l *Parser) AcceptAny() bool {
if r, ok := l.next(); ok {
l.buffer.WriteRune(r)
return true
}
return false
}
// AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided patterns.
// When runes were added then true is returned, false otherwise.
func (l *Parser) AcceptMatching(patterns ...string) bool {
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...)
}
// AcceptConsecutive adds consecutive runes from the input to the string
// buffer, as long as they exist in the pattern.
// If any runes were added then true is returned, false otherwise.
func (l *Parser) AcceptConsecutive(pattern string) bool {
accepted := false
for l.AcceptMatching(pattern) {
accepted = true
}
return accepted
}
// SkipMatching skips runes, but only when all provided patterns are satisfied.
// Returns true when one or more runes were skipped.
func (l *Parser) SkipMatching(patterns ...string) bool {
if runes, w, ok := l.Match(patterns...); ok {
l.pos += w
for _, r := range runes {
l.advanceCursor(r)
}
return true
}
return false
}
// SkipConsecutive skips consecutive runes from the provided pattern.
// Returns true when one or more runes were skipped.
func (l *Parser) SkipConsecutive(pattern string) bool {
didSkip := false
for l.SkipMatching(pattern) {
didSkip = true
}
return didSkip
}
// ============================================================================
// EMIT DATA AND ERRORS
// ============================================================================
// UnexpectedInputError is used by a parser implementation to emit an
// error item that tells the client that an unexpected rune was
// encountered in the input.
// The parameter 'expected' is used to provide some context to the error.
func (l *Parser) UnexpectedInputError(expected string) StateFn {
// next() takes care of error messages for ok == false.
if r, ok := l.next(); ok {
return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
}
return nil
}
// UnexpectedEndOfFile is used by a parser implementation to emit an
// error item that tells the client that more data was expected from
// the input.
// The parameter 'expected' is used to provide some context to the error.
func (l *Parser) UnexpectedEndOfFile(expected string) StateFn {
return l.EmitError("Unexpected end of file (expected %s)", expected)
}
// ============================================================================
// LEXER : our lexer is quite low level, it only returns UTF8 runes
// ============================================================================
// peek returns but does not advance to the next rune(s) in the input.
// Returns the rune, its width and a boolean. The boolean will be false in case
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
func (l *Parser) peek() (rune, int, bool) {
peeked, width := utf8.DecodeRuneInString(l.input[l.pos:])
return peeked, width, peeked != utf8.RuneError
}
// peekMulti takes a peek at multiple upcoming runes in the input.
// Returns a slice of runes, their total width in bytes and a boolean.
// The boolean will be false in case less runes can be peeked than
// the requested amount (end of data or invalid UTF8 character).
func (l *Parser) peekMulti(amount int) ([]rune, int, bool) {
width := 0
var peeked []rune
for i := 0; i < amount; i++ {
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
switch {
case r == utf8.RuneError:
return peeked, width, false
default:
width += w
peeked = append(peeked, r)
}
}
return peeked, width, true
}
// progress moves the cursor forward in the input, returning one rune
// for every specified pattern. The cursor is only moved forward when
// all patterns are satisfied.
// Returns true when all patterns were satisfied and the cursor was
// moved forward, false otherwise.
// A callback function can be provided to specify what to do with
// the runes that are encountered in the input.
func (l *Parser) progress(callback func(rune), patterns ...string) bool {
if runes, w, ok := l.Match(patterns...); ok {
l.pos += w
for _, r := range runes {
callback(r)
l.advanceCursor(r)
}
return true
}
return false
}
// next returns the next rune from the input and a boolean indicating if
// reading the input was successful.
// When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned. Both are considered error cases,
// and for that reason these automatically emit an error to the client.
func (l *Parser) next() (rune, bool) {
r, w, ok := l.peek()
if ok {
l.pos += w
l.advanceCursor(r)
return r, true
}
if r == utf8.RuneError && w == 0 {
l.EmitError("unexpected end of file")
} else {
l.EmitError("invalid UTF8 character")
}
return r, false
}
// advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error.
func (l *Parser) advanceCursor(r rune) {
if l.newline {
l.cursorColumn = 0
l.cursorRow++
} else {
l.cursorColumn++
}
l.newline = r == '\n'
}