package parser import ( "fmt" "strings" "unicode/utf8" ) // New takes an input string and a start state, // and initializes the parser for it. func New(input string, startState StateFn) *Parser { return &Parser{ input: input, len: len(input), state: startState, items: make(chan Item, 2), } } // PushState adds the state function to the state stack. // This is used for implementing nested parsing. func (l *Parser) PushState(state StateFn) { l.stack = append(l.stack, state) } // PopState pops the last pushed state from the state stack. func (l *Parser) PopState() StateFn { last := len(l.stack) - 1 head, tail := l.stack[:last], l.stack[last] l.stack = head return tail } // AtEndOfFile returns true when there is no more data available in the input. func (l *Parser) AtEndOfFile() bool { return l.pos >= l.len } // Emit passes a Parser item to the client, including the provided string. func (l *Parser) Emit(t ItemType, s string) { l.items <- Item{t, s} l.buffer.Reset() } // EmitLiteral passes a Parser item to the client, including the accumulated // string buffer data as a literal string. func (l *Parser) EmitLiteral(t ItemType) { l.Emit(t, l.buffer.AsLiteralString()) } // EmitLiteralTrim passes a Parser item to the client, including the // accumulated string buffer data as a literal string with whitespace // trimmed from it. func (l *Parser) EmitLiteralTrim(t ItemType) { l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString())) } // EmitInterpreted passes a Parser item to the client, including the // accumulated string buffer data a Go doubled quoted interpreted string // (handling escape codes like \n, \t, \uXXXX, etc.) // This method might return an error, in case there is data in the // string buffer that is not valid for string interpretation. func (l *Parser) EmitInterpreted(t ItemType) error { s, err := l.buffer.AsInterpretedString() if err != nil { return err } l.Emit(t, s) return nil } // EmitError emits a Parser error item to the client. func (l *Parser) EmitError(format string, args ...interface{}) StateFn { message := fmt.Sprintf(format, args...) l.Emit(ItemError, message) return nil } // Match checks if the upcoming runes satisfy all provided patterns. // It returns a slice of runes that were found, their total byte width // and a boolean indicating whether or not all provided patterns were // satisfied by the input data. func (l *Parser) Match(patterns ...string) ([]rune, int, bool) { peeked, width, ok := l.peekMulti(len(patterns)) if ok { for i, r := range patterns { if strings.IndexRune(r, peeked[i]) < 0 { return peeked, width, false } } return peeked, width, true } return peeked, width, false } // Upcoming checks if the upcoming runes satisfy all provided patterns. // Returns true if all provided patterns are satisfied. func (l *Parser) Upcoming(patterns ...string) bool { _, _, ok := l.Match(patterns...) return ok } // AcceptAny adds the next rune from the input to the string buffer. // If no rune could be read (end of file or invalid UTF8 data), // then false is returned. func (l *Parser) AcceptAny() bool { if r, ok := l.next(); ok { l.buffer.WriteRune(r) return true } return false } // AcceptMatching adds the next runes to the string buffer, but only // if the upcoming runes satisfy the provided patterns. // When runes were added then true is returned, false otherwise. func (l *Parser) AcceptMatching(patterns ...string) bool { return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...) } // AcceptConsecutive adds consecutive runes from the input to the string // buffer, as long as they exist in the pattern. // If any runes were added then true is returned, false otherwise. func (l *Parser) AcceptConsecutive(pattern string) bool { accepted := false for l.AcceptMatching(pattern) { accepted = true } return accepted } // SkipMatching skips runes, but only when all provided patterns are satisfied. // Returns true when one or more runes were skipped. func (l *Parser) SkipMatching(patterns ...string) bool { if runes, w, ok := l.Match(patterns...); ok { l.pos += w for _, r := range runes { l.advanceCursor(r) } return true } return false } // SkipConsecutive skips consecutive runes from the provided pattern. // Returns true when one or more runes were skipped. func (l *Parser) SkipConsecutive(pattern string) bool { didSkip := false for l.SkipMatching(pattern) { didSkip = true } return didSkip } // ============================================================================ // EMIT DATA AND ERRORS // ============================================================================ // UnexpectedInputError is used by a parser implementation to emit an // error item that tells the client that an unexpected rune was // encountered in the input. // The parameter 'expected' is used to provide some context to the error. func (l *Parser) UnexpectedInputError(expected string) StateFn { // next() takes care of error messages for ok == false. if r, ok := l.next(); ok { return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected)) } return nil } // UnexpectedEndOfFile is used by a parser implementation to emit an // error item that tells the client that more data was expected from // the input. // The parameter 'expected' is used to provide some context to the error. func (l *Parser) UnexpectedEndOfFile(expected string) StateFn { return l.EmitError("Unexpected end of file (expected %s)", expected) } // ============================================================================ // LEXER : our lexer is quite low level, it only returns UTF8 runes // ============================================================================ // peek returns but does not advance to the next rune(s) in the input. // Returns the rune, its width and a boolean. The boolean will be false in case // no upcoming rune can be peeked (end of data or invalid UTF8 character). func (l *Parser) peek() (rune, int, bool) { peeked, width := utf8.DecodeRuneInString(l.input[l.pos:]) return peeked, width, peeked != utf8.RuneError } // peekMulti takes a peek at multiple upcoming runes in the input. // Returns a slice of runes, their total width in bytes and a boolean. // The boolean will be false in case less runes can be peeked than // the requested amount (end of data or invalid UTF8 character). func (l *Parser) peekMulti(amount int) ([]rune, int, bool) { width := 0 var peeked []rune for i := 0; i < amount; i++ { r, w := utf8.DecodeRuneInString(l.input[l.pos+width:]) switch { case r == utf8.RuneError: return peeked, width, false default: width += w peeked = append(peeked, r) } } return peeked, width, true } // progress moves the cursor forward in the input, returning one rune // for every specified pattern. The cursor is only moved forward when // all patterns are satisfied. // Returns true when all patterns were satisfied and the cursor was // moved forward, false otherwise. // A callback function can be provided to specify what to do with // the runes that are encountered in the input. func (l *Parser) progress(callback func(rune), patterns ...string) bool { if runes, w, ok := l.Match(patterns...); ok { l.pos += w for _, r := range runes { callback(r) l.advanceCursor(r) } return true } return false } // next returns the next rune from the input and a boolean indicating if // reading the input was successful. // When the end of input is reached, or an invalid UTF8 character is // read, then false is returned. Both are considered error cases, // and for that reason these automatically emit an error to the client. func (l *Parser) next() (rune, bool) { r, w, ok := l.peek() if ok { l.pos += w l.advanceCursor(r) return r, true } if r == utf8.RuneError && w == 0 { l.EmitError("unexpected end of file") } else { l.EmitError("invalid UTF8 character") } return r, false } // advanceCursor advances the rune cursor one position in the // input data. While doing so, it keeps tracks of newlines, // so we can report on row + column positions on error. func (l *Parser) advanceCursor(r rune) { if l.newline { l.cursorColumn = 0 l.cursorRow++ } else { l.cursorColumn++ } l.newline = r == '\n' }