package parser import ( "fmt" "strings" "unicode/utf8" ) // New takes an input string and a start state, // and initializes the parser for it. func New(input string, startState StateFn) *Parser { return &Parser{ input: input, len: len(input), state: startState, items: make(chan Item, 2), } } // AtEndOfFile returns true when there is no more data available in the input. func (p *Parser) AtEndOfFile() bool { return p.pos >= p.len } func (p *Parser) AtEndOfLine() bool { return p.AtEndOfFile() || p.Upcoming("\r", "\n") || p.Upcoming("\n") } func (p *Parser) SkipEndOfLine() bool { return p.AtEndOfFile() || p.SkipMatching("\r", "\n") || p.SkipMatching("\n") } func (p *Parser) AcceptEndOfLine() bool { // No newline, but we're defintely at the end of the line here. if p.AtEndOfFile() { return true } // If we see some kind of end of line, then we accept a // normalized newline, which is just a '\n'. This will normalize // '\r\n' into '\n'. if p.SkipEndOfLine() { p.buffer.writeRune('\n') return true } return false } // Emit passes a Parser item to the client, including the provided string. func (p *Parser) Emit(t ItemType, s string) { p.items <- Item{t, s} p.buffer.reset() } // EmitLiteral passes a Parser item to the client, including the accumulated // string buffer data as a literal string. func (p *Parser) EmitLiteral(t ItemType) { p.Emit(t, p.buffer.asLiteralString()) } // EmitLiteralTrim passes a Parser item to the client, including the // accumulated string buffer data as a literal string with whitespace // trimmed from it. func (p *Parser) EmitLiteralTrim(t ItemType) { p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString())) } // EmitInterpreted passes a Parser item to the client, including the // accumulated string buffer data a Go doubled quoted interpreted string // (handling escape codes like \n, \t, \uXXXX, etc.) // This method might return an error, in case there is data in the // string buffer that is not valid for string interpretation. func (p *Parser) EmitInterpreted(t ItemType) error { s, err := p.buffer.asInterpretedString() if err != nil { return err } p.Emit(t, s) return nil } // EmitError emits a Parser error item to the client. func (p *Parser) EmitError(format string, args ...interface{}) StateFn { message := fmt.Sprintf(format, args...) p.Emit(ItemError, message) return nil } // Match checks if the upcoming runes satisfy all provided patterns. // It returns a slice of runes that were found, their total byte width // and a boolean indicating whether or not all provided patterns were // satisfied by the input data. func (p *Parser) Match(patterns ...string) ([]rune, int, bool) { peeked, width, ok := p.peekMulti(len(patterns)) if ok { for i, r := range patterns { if strings.IndexRune(r, peeked[i]) < 0 { return peeked, width, false } } return peeked, width, true } return peeked, width, false } // Upcoming checks if the upcoming runes satisfy all provided patterns. // Returns true if all provided patterns are satisfied. func (p *Parser) Upcoming(patterns ...string) bool { _, _, ok := p.Match(patterns...) return ok } // AcceptAny adds the next rune from the input to the string buffer. // If no rune could be read (end of file or invalid UTF8 data), // then false is returned. func (p *Parser) AcceptAny() bool { if r, ok := p.next(); ok { p.buffer.writeRune(r) return true } return false } // AcceptMatching adds the next runes to the string buffer, but only // if the upcoming runes satisfy the provided patterns. // When runes were added then true is returned, false otherwise. func (p *Parser) AcceptMatching(patterns ...string) bool { return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...) } // AcceptConsecutive adds consecutive runes from the input to the string // buffer, as long as they exist in the pattern. // If any runes were added then true is returned, false otherwise. func (p *Parser) AcceptConsecutive(pattern string) bool { accepted := false for p.AcceptMatching(pattern) { accepted = true } return accepted } // SkipMatching skips runes, but only when all provided patterns are satisfied. // Returns true when one or more runes were skipped. func (p *Parser) SkipMatching(patterns ...string) bool { if runes, w, ok := p.Match(patterns...); ok { p.pos += w for _, r := range runes { p.advanceCursor(r) } return true } return false } // SkipConsecutive skips consecutive runes from the provided pattern. // Returns true when one or more runes were skipped. func (p *Parser) SkipConsecutive(pattern string) bool { didSkip := false for p.SkipMatching(pattern) { didSkip = true } return didSkip } // ============================================================================ // EMIT DATA AND ERRORS // ============================================================================ // UnexpectedInputError is used by a parser implementation to emit an // error item that tells the client that an unexpected rune was // encountered in the input. // The parameter 'expected' is used to provide some context to the error. func (p *Parser) UnexpectedInputError(expected string) StateFn { // next() takes care of error messages for ok == false. if r, ok := p.next(); ok { return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected)) } return nil } // UnexpectedEndOfFile is used by a parser implementation to emit an // error item that tells the client that more data was expected from // the input. // The parameter 'expected' is used to provide some context to the error. func (p *Parser) UnexpectedEndOfFile(expected string) StateFn { return p.EmitError("Unexpected end of file (expected %s)", expected) } // ============================================================================ // LEXER : our lexer is quite low level, it only returns UTF8 runes // ============================================================================ // peek returns but does not advance to the next rune(s) in the input. // Returns the rune, its width and a boolean. The boolean will be false in case // no upcoming rune can be peeked (end of data or invalid UTF8 character). func (p *Parser) peek() (rune, int, bool) { peeked, width := utf8.DecodeRuneInString(p.input[p.pos:]) return peeked, width, peeked != utf8.RuneError } // peekMulti takes a peek at multiple upcoming runes in the input. // Returns a slice of runes, their total width in bytes and a boolean. // The boolean will be false in case less runes can be peeked than // the requested amount (end of data or invalid UTF8 character). func (p *Parser) peekMulti(amount int) ([]rune, int, bool) { width := 0 var peeked []rune for i := 0; i < amount; i++ { r, w := utf8.DecodeRuneInString(p.input[p.pos+width:]) switch { case r == utf8.RuneError: return peeked, width, false default: width += w peeked = append(peeked, r) } } return peeked, width, true } // progress moves the cursor forward in the input, returning one rune // for every specified pattern. The cursor is only moved forward when // all patterns are satisfied. // Returns true when all patterns were satisfied and the cursor was // moved forward, false otherwise. // A callback function can be provided to specify what to do with // the runes that are encountered in the input. func (p *Parser) progress(callback func(rune), patterns ...string) bool { if runes, w, ok := p.Match(patterns...); ok { p.pos += w for _, r := range runes { callback(r) p.advanceCursor(r) } return true } return false } // next returns the next rune from the input and a boolean indicating if // reading the input was successful. // When the end of input is reached, or an invalid UTF8 character is // read, then false is returned. Both are considered error cases, // and for that reason these automatically emit an error to the client. func (p *Parser) next() (rune, bool) { r, w, ok := p.peek() if ok { p.pos += w p.advanceCursor(r) return r, true } if r == utf8.RuneError && w == 0 { p.EmitError("unexpected end of file") } else { p.EmitError("invalid UTF8 character") } return r, false } // advanceCursor advances the rune cursor one position in the // input data. While doing so, it keeps tracks of newlines, // so we can report on row + column positions on error. func (p *Parser) advanceCursor(r rune) { if p.newline { p.cursorColumn = 0 p.cursorRow++ } else { p.cursorColumn++ } p.newline = r == '\n' }