299 lines
8.9 KiB
Go
299 lines
8.9 KiB
Go
package parser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// New takes an input string and a start state,
|
|
// and initializes the parser for it.
|
|
func New(input string, startState StateFn) *Parser {
|
|
return &Parser{
|
|
input: input,
|
|
len: len(input),
|
|
state: startState,
|
|
items: make(chan Item, 2),
|
|
}
|
|
}
|
|
|
|
func (p *Parser) ToChildState(state StateFn) StateFn {
|
|
p.PushState(p.state)
|
|
return state
|
|
}
|
|
|
|
func (p *Parser) ToParentState() StateFn {
|
|
state := p.PopState()
|
|
return state
|
|
}
|
|
|
|
// PushState adds the state function to the state stack.
|
|
// This is used for implementing nested parsing.
|
|
func (l *Parser) PushState(state StateFn) {
|
|
l.stack = append(l.stack, state)
|
|
}
|
|
|
|
// PopState pops the last pushed state from the state stack.
|
|
func (l *Parser) PopState() StateFn {
|
|
last := len(l.stack) - 1
|
|
head, tail := l.stack[:last], l.stack[last]
|
|
l.stack = head
|
|
return tail
|
|
}
|
|
|
|
// AtEndOfFile returns true when there is no more data available in the input.
|
|
func (l *Parser) AtEndOfFile() bool {
|
|
return l.pos >= l.len
|
|
}
|
|
|
|
func (p *Parser) AtEndOfLine() bool {
|
|
return p.AtEndOfFile() ||
|
|
p.Upcoming("\r", "\n") ||
|
|
p.Upcoming("\n")
|
|
}
|
|
|
|
func (p *Parser) SkipEndOfLine() bool {
|
|
return p.AtEndOfFile() ||
|
|
p.SkipMatching("\r", "\n") ||
|
|
p.SkipMatching("\n")
|
|
}
|
|
|
|
func (p *Parser) AcceptEndOfLine() bool {
|
|
// No newline, but we're defintely at the end of the line here.
|
|
if p.AtEndOfFile() {
|
|
return true
|
|
}
|
|
// If we see some kind of end of line, then we accept a
|
|
// normalized newline, which is just a '\n'. This will normalize
|
|
// '\r\n' into '\n'.
|
|
if p.SkipEndOfLine() {
|
|
p.buffer.WriteRune('\n')
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Emit passes a Parser item to the client, including the provided string.
|
|
func (l *Parser) Emit(t ItemType, s string) {
|
|
l.items <- Item{t, s}
|
|
l.buffer.Reset()
|
|
}
|
|
|
|
// EmitLiteral passes a Parser item to the client, including the accumulated
|
|
// string buffer data as a literal string.
|
|
func (l *Parser) EmitLiteral(t ItemType) {
|
|
l.Emit(t, l.buffer.AsLiteralString())
|
|
}
|
|
|
|
// EmitLiteralTrim passes a Parser item to the client, including the
|
|
// accumulated string buffer data as a literal string with whitespace
|
|
// trimmed from it.
|
|
func (l *Parser) EmitLiteralTrim(t ItemType) {
|
|
l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
|
|
}
|
|
|
|
// EmitInterpreted passes a Parser item to the client, including the
|
|
// accumulated string buffer data a Go doubled quoted interpreted string
|
|
// (handling escape codes like \n, \t, \uXXXX, etc.)
|
|
// This method might return an error, in case there is data in the
|
|
// string buffer that is not valid for string interpretation.
|
|
func (l *Parser) EmitInterpreted(t ItemType) error {
|
|
s, err := l.buffer.AsInterpretedString()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
l.Emit(t, s)
|
|
return nil
|
|
}
|
|
|
|
// EmitError emits a Parser error item to the client.
|
|
func (l *Parser) EmitError(format string, args ...interface{}) StateFn {
|
|
message := fmt.Sprintf(format, args...)
|
|
l.Emit(ItemError, message)
|
|
return nil
|
|
}
|
|
|
|
// Match checks if the upcoming runes satisfy all provided patterns.
|
|
// It returns a slice of runes that were found, their total byte width
|
|
// and a boolean indicating whether or not all provided patterns were
|
|
// satisfied by the input data.
|
|
func (l *Parser) Match(patterns ...string) ([]rune, int, bool) {
|
|
peeked, width, ok := l.peekMulti(len(patterns))
|
|
if ok {
|
|
for i, r := range patterns {
|
|
if strings.IndexRune(r, peeked[i]) < 0 {
|
|
return peeked, width, false
|
|
}
|
|
}
|
|
return peeked, width, true
|
|
}
|
|
return peeked, width, false
|
|
}
|
|
|
|
// Upcoming checks if the upcoming runes satisfy all provided patterns.
|
|
// Returns true if all provided patterns are satisfied.
|
|
func (l *Parser) Upcoming(patterns ...string) bool {
|
|
_, _, ok := l.Match(patterns...)
|
|
return ok
|
|
}
|
|
|
|
// AcceptAny adds the next rune from the input to the string buffer.
|
|
// If no rune could be read (end of file or invalid UTF8 data),
|
|
// then false is returned.
|
|
func (l *Parser) AcceptAny() bool {
|
|
if r, ok := l.next(); ok {
|
|
l.buffer.WriteRune(r)
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// AcceptMatching adds the next runes to the string buffer, but only
|
|
// if the upcoming runes satisfy the provided patterns.
|
|
// When runes were added then true is returned, false otherwise.
|
|
func (l *Parser) AcceptMatching(patterns ...string) bool {
|
|
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...)
|
|
}
|
|
|
|
// AcceptConsecutive adds consecutive runes from the input to the string
|
|
// buffer, as long as they exist in the pattern.
|
|
// If any runes were added then true is returned, false otherwise.
|
|
func (l *Parser) AcceptConsecutive(pattern string) bool {
|
|
accepted := false
|
|
for l.AcceptMatching(pattern) {
|
|
accepted = true
|
|
}
|
|
return accepted
|
|
}
|
|
|
|
// SkipMatching skips runes, but only when all provided patterns are satisfied.
|
|
// Returns true when one or more runes were skipped.
|
|
func (l *Parser) SkipMatching(patterns ...string) bool {
|
|
if runes, w, ok := l.Match(patterns...); ok {
|
|
l.pos += w
|
|
for _, r := range runes {
|
|
l.advanceCursor(r)
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SkipConsecutive skips consecutive runes from the provided pattern.
|
|
// Returns true when one or more runes were skipped.
|
|
func (l *Parser) SkipConsecutive(pattern string) bool {
|
|
didSkip := false
|
|
for l.SkipMatching(pattern) {
|
|
didSkip = true
|
|
}
|
|
return didSkip
|
|
}
|
|
|
|
// ============================================================================
|
|
// EMIT DATA AND ERRORS
|
|
// ============================================================================
|
|
|
|
// UnexpectedInputError is used by a parser implementation to emit an
|
|
// error item that tells the client that an unexpected rune was
|
|
// encountered in the input.
|
|
// The parameter 'expected' is used to provide some context to the error.
|
|
func (l *Parser) UnexpectedInputError(expected string) StateFn {
|
|
// next() takes care of error messages for ok == false.
|
|
if r, ok := l.next(); ok {
|
|
return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// UnexpectedEndOfFile is used by a parser implementation to emit an
|
|
// error item that tells the client that more data was expected from
|
|
// the input.
|
|
// The parameter 'expected' is used to provide some context to the error.
|
|
func (l *Parser) UnexpectedEndOfFile(expected string) StateFn {
|
|
return l.EmitError("Unexpected end of file (expected %s)", expected)
|
|
}
|
|
|
|
// ============================================================================
|
|
// LEXER : our lexer is quite low level, it only returns UTF8 runes
|
|
// ============================================================================
|
|
|
|
// peek returns but does not advance to the next rune(s) in the input.
|
|
// Returns the rune, its width and a boolean. The boolean will be false in case
|
|
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
|
func (l *Parser) peek() (rune, int, bool) {
|
|
peeked, width := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
return peeked, width, peeked != utf8.RuneError
|
|
}
|
|
|
|
// peekMulti takes a peek at multiple upcoming runes in the input.
|
|
// Returns a slice of runes, their total width in bytes and a boolean.
|
|
// The boolean will be false in case less runes can be peeked than
|
|
// the requested amount (end of data or invalid UTF8 character).
|
|
func (l *Parser) peekMulti(amount int) ([]rune, int, bool) {
|
|
width := 0
|
|
var peeked []rune
|
|
for i := 0; i < amount; i++ {
|
|
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
|
switch {
|
|
case r == utf8.RuneError:
|
|
return peeked, width, false
|
|
default:
|
|
width += w
|
|
peeked = append(peeked, r)
|
|
}
|
|
}
|
|
return peeked, width, true
|
|
}
|
|
|
|
// progress moves the cursor forward in the input, returning one rune
|
|
// for every specified pattern. The cursor is only moved forward when
|
|
// all patterns are satisfied.
|
|
// Returns true when all patterns were satisfied and the cursor was
|
|
// moved forward, false otherwise.
|
|
// A callback function can be provided to specify what to do with
|
|
// the runes that are encountered in the input.
|
|
func (l *Parser) progress(callback func(rune), patterns ...string) bool {
|
|
if runes, w, ok := l.Match(patterns...); ok {
|
|
l.pos += w
|
|
for _, r := range runes {
|
|
callback(r)
|
|
l.advanceCursor(r)
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// next returns the next rune from the input and a boolean indicating if
|
|
// reading the input was successful.
|
|
// When the end of input is reached, or an invalid UTF8 character is
|
|
// read, then false is returned. Both are considered error cases,
|
|
// and for that reason these automatically emit an error to the client.
|
|
func (l *Parser) next() (rune, bool) {
|
|
r, w, ok := l.peek()
|
|
if ok {
|
|
l.pos += w
|
|
l.advanceCursor(r)
|
|
return r, true
|
|
}
|
|
if r == utf8.RuneError && w == 0 {
|
|
l.EmitError("unexpected end of file")
|
|
} else {
|
|
l.EmitError("invalid UTF8 character")
|
|
}
|
|
return r, false
|
|
}
|
|
|
|
// advanceCursor advances the rune cursor one position in the
|
|
// input data. While doing so, it keeps tracks of newlines,
|
|
// so we can report on row + column positions on error.
|
|
func (l *Parser) advanceCursor(r rune) {
|
|
if l.newline {
|
|
l.cursorColumn = 0
|
|
l.cursorRow++
|
|
} else {
|
|
l.cursorColumn++
|
|
}
|
|
l.newline = r == '\n'
|
|
}
|