go-toml/parser/parser.go

275 lines
8.3 KiB
Go

package parser
import (
"fmt"
"strings"
"unicode/utf8"
)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, startState StateFn) *Parser {
return &Parser{
input: input,
len: len(input),
state: startState,
items: make(chan Item, 2),
}
}
// AtEndOfFile returns true when there is no more data available in the input.
func (p *Parser) AtEndOfFile() bool {
return p.pos >= p.len
}
func (p *Parser) AtEndOfLine() bool {
return p.AtEndOfFile() ||
p.Upcoming("\r", "\n") ||
p.Upcoming("\n")
}
func (p *Parser) SkipEndOfLine() bool {
return p.AtEndOfFile() ||
p.SkipMatching("\r", "\n") ||
p.SkipMatching("\n")
}
func (p *Parser) AcceptEndOfLine() bool {
// No newline, but we're defintely at the end of the line here.
if p.AtEndOfFile() {
return true
}
// If we see some kind of end of line, then we accept a
// normalized newline, which is just a '\n'. This will normalize
// '\r\n' into '\n'.
if p.SkipEndOfLine() {
p.buffer.writeRune('\n')
return true
}
return false
}
// Emit passes a Parser item to the client, including the provided string.
func (p *Parser) Emit(t ItemType, s string) {
p.items <- Item{t, s}
p.buffer.reset()
}
// EmitLiteral passes a Parser item to the client, including the accumulated
// string buffer data as a literal string.
func (p *Parser) EmitLiteral(t ItemType) {
p.Emit(t, p.buffer.asLiteralString())
}
// EmitLiteralTrim passes a Parser item to the client, including the
// accumulated string buffer data as a literal string with whitespace
// trimmed from it.
func (p *Parser) EmitLiteralTrim(t ItemType) {
p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
}
// EmitInterpreted passes a Parser item to the client, including the
// accumulated string buffer data a Go doubled quoted interpreted string
// (handling escape codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation.
func (p *Parser) EmitInterpreted(t ItemType) error {
s, err := p.buffer.asInterpretedString()
if err != nil {
return err
}
p.Emit(t, s)
return nil
}
// EmitError emits a Parser error item to the client.
func (p *Parser) EmitError(format string, args ...interface{}) StateFn {
message := fmt.Sprintf(format, args...)
p.Emit(ItemError, message)
return nil
}
// Match checks if the upcoming runes satisfy all provided patterns.
// It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided patterns were
// satisfied by the input data.
func (p *Parser) Match(patterns ...string) ([]rune, int, bool) {
peeked, width, ok := p.peekMulti(len(patterns))
if ok {
for i, r := range patterns {
if strings.IndexRune(r, peeked[i]) < 0 {
return peeked, width, false
}
}
return peeked, width, true
}
return peeked, width, false
}
// Upcoming checks if the upcoming runes satisfy all provided patterns.
// Returns true if all provided patterns are satisfied.
func (p *Parser) Upcoming(patterns ...string) bool {
_, _, ok := p.Match(patterns...)
return ok
}
// AcceptAny adds the next rune from the input to the string buffer.
// If no rune could be read (end of file or invalid UTF8 data),
// then false is returned.
func (p *Parser) AcceptAny() bool {
if r, ok := p.next(); ok {
p.buffer.writeRune(r)
return true
}
return false
}
// AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided patterns.
// When runes were added then true is returned, false otherwise.
func (p *Parser) AcceptMatching(patterns ...string) bool {
return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
}
// AcceptConsecutive adds consecutive runes from the input to the string
// buffer, as long as they exist in the pattern.
// If any runes were added then true is returned, false otherwise.
func (p *Parser) AcceptConsecutive(pattern string) bool {
accepted := false
for p.AcceptMatching(pattern) {
accepted = true
}
return accepted
}
// SkipMatching skips runes, but only when all provided patterns are satisfied.
// Returns true when one or more runes were skipped.
func (p *Parser) SkipMatching(patterns ...string) bool {
if runes, w, ok := p.Match(patterns...); ok {
p.pos += w
for _, r := range runes {
p.advanceCursor(r)
}
return true
}
return false
}
// SkipConsecutive skips consecutive runes from the provided pattern.
// Returns true when one or more runes were skipped.
func (p *Parser) SkipConsecutive(pattern string) bool {
didSkip := false
for p.SkipMatching(pattern) {
didSkip = true
}
return didSkip
}
// ============================================================================
// EMIT DATA AND ERRORS
// ============================================================================
// UnexpectedInputError is used by a parser implementation to emit an
// error item that tells the client that an unexpected rune was
// encountered in the input.
// The parameter 'expected' is used to provide some context to the error.
func (p *Parser) UnexpectedInputError(expected string) StateFn {
// next() takes care of error messages for ok == false.
if r, ok := p.next(); ok {
return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
}
return nil
}
// UnexpectedEndOfFile is used by a parser implementation to emit an
// error item that tells the client that more data was expected from
// the input.
// The parameter 'expected' is used to provide some context to the error.
func (p *Parser) UnexpectedEndOfFile(expected string) StateFn {
return p.EmitError("Unexpected end of file (expected %s)", expected)
}
// ============================================================================
// LEXER : our lexer is quite low level, it only returns UTF8 runes
// ============================================================================
// peek returns but does not advance to the next rune(s) in the input.
// Returns the rune, its width and a boolean. The boolean will be false in case
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
func (p *Parser) peek() (rune, int, bool) {
peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
return peeked, width, peeked != utf8.RuneError
}
// peekMulti takes a peek at multiple upcoming runes in the input.
// Returns a slice of runes, their total width in bytes and a boolean.
// The boolean will be false in case less runes can be peeked than
// the requested amount (end of data or invalid UTF8 character).
func (p *Parser) peekMulti(amount int) ([]rune, int, bool) {
width := 0
var peeked []rune
for i := 0; i < amount; i++ {
r, w := utf8.DecodeRuneInString(p.input[p.pos+width:])
switch {
case r == utf8.RuneError:
return peeked, width, false
default:
width += w
peeked = append(peeked, r)
}
}
return peeked, width, true
}
// progress moves the cursor forward in the input, returning one rune
// for every specified pattern. The cursor is only moved forward when
// all patterns are satisfied.
// Returns true when all patterns were satisfied and the cursor was
// moved forward, false otherwise.
// A callback function can be provided to specify what to do with
// the runes that are encountered in the input.
func (p *Parser) progress(callback func(rune), patterns ...string) bool {
if runes, w, ok := p.Match(patterns...); ok {
p.pos += w
for _, r := range runes {
callback(r)
p.advanceCursor(r)
}
return true
}
return false
}
// next returns the next rune from the input and a boolean indicating if
// reading the input was successful.
// When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned. Both are considered error cases,
// and for that reason these automatically emit an error to the client.
func (p *Parser) next() (rune, bool) {
r, w, ok := p.peek()
if ok {
p.pos += w
p.advanceCursor(r)
return r, true
}
if r == utf8.RuneError && w == 0 {
p.EmitError("unexpected end of file")
} else {
p.EmitError("invalid UTF8 character")
}
return r, false
}
// advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error.
func (p *Parser) advanceCursor(r rune) {
if p.newline {
p.cursorColumn = 0
p.cursorRow++
} else {
p.cursorColumn++
}
p.newline = r == '\n'
}