Splitting off a more generic parser (it's fun getting to know a language, but you keep refactoring with all new stuff that you learn :-)

This commit is contained in:
Maurice Makaay 2019-05-17 12:44:24 +00:00
parent aeb48edc44
commit f86ef2b918
11 changed files with 720 additions and 234 deletions

View File

@ -1,48 +1,35 @@
package lexer package lexer
import "fmt" import (
"fmt"
// itemType represents the type of lexer items. "github.com/mmakaay/toml/parser"
type itemType int )
// Definition of all the lexer item types for the TOML lexer. // Definition of all the lexer item types for the TOML lexer.
const ( const (
ItemError itemType = iota // An error occurred ItemComment parser.ItemType = iota // An error occurred
ItemEOF // End of input reached ItemKey // Key of a key/value pair
ItemComment // Comment string, starts with # till en of line ItemKeyDot // Dot for a dotted key
ItemKey // Key of a key/value pair ItemAssignment // Value assignment coming up (=)
ItemKeyDot // Dot for a dotted key ItemString // A value of type string
ItemAssignment // Value assignment coming up (=)
ItemString // A value of type string
) )
// Item represents a lexer item returned from the scanner. // ParserItemToString returns a string representation of the
type Item struct { // parser.Item. This is used for unit testing purposes.
Type itemType //Type, e.g. ItemComment, ItemString func ParserItemToString(i parser.Item) string {
Value string // Value, e.g. "10.42", "["
}
// String returns a string representation of the lexer item.
func (i Item) String() string {
switch i.Type { switch i.Type {
case ItemComment:
return fmt.Sprintf("#(%s)", i.Value)
case ItemKey: case ItemKey:
return fmt.Sprintf("[%s]", i.Value) return fmt.Sprintf("[%s]", i.Value)
case ItemString:
return fmt.Sprintf("STR(%s)", i.Value)
case ItemKeyDot: case ItemKeyDot:
return "." return "."
case ItemAssignment: case ItemAssignment:
return "=" return "="
}
return fmt.Sprintf("%s(%s)", i.Type, i.Value)
}
// String returns a string representation of the lexer item type.
func (i itemType) String() string {
switch i {
case ItemComment:
return "#"
case ItemString:
return "STR"
default: default:
panic(fmt.Sprintf("No translation available for type id %d", i)) panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type))
} }
} }

View File

@ -4,22 +4,24 @@ import (
"fmt" "fmt"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
"github.com/mmakaay/toml/parser"
) )
// Lexer holds the state of the lexer. // Lexer holds the state of the lexer.
type Lexer struct { type Lexer struct {
input string // the scanned input string input string // the scanned input
state stateFn // a function that handles the current state state parser.StateFn // a function that handles the current state
stack []stateFn // state function stack, for nested parsing stack []parser.StateFn // state function stack, for nested parsing
pos int // current byte scanning position in the input len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline pos int // current byte scanning position in the input
cursorRow int // current row number in the input newline bool // keep track of when we have scanned a newline
cursorColumn int // current column position in the input cursorRow int // current row number in the input
width int // width of the last rune read, for supporting backup() cursorColumn int // current column position in the input
buffer StringBuffer // an efficient buffer, used to build string values buffer StringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting lexer items items chan parser.Item // channel of resulting lexer items
item Item // the current item as reached by Next() and retrieved by Get() item parser.Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error() err *Error // an error when lexing failed, retrieved by Error()
} }
// Error is used as the error type when lexing errors occur. // Error is used as the error type when lexing errors occur.
@ -35,46 +37,45 @@ func (err *Error) Error() string {
return err.Message return err.Message
} }
// Lex takes an input string and initializes the TOML lexer for it. // New takes an input string and initializes the lexer for it.
func Lex(input string) *Lexer { func New(input string) *Lexer {
return &Lexer{ return &Lexer{
input: input, input: input,
len: len(input),
state: stateKeyValuePair, state: stateKeyValuePair,
items: make(chan Item, 2), items: make(chan parser.Item, 2),
} }
} }
// Next advances to the next lexer item in the input string. // Next advances to the next lexer item in the input string.
// When a valid item was found, then the boolean return parameter is returned. // When a valid item was found, then the boolean return parameter will be true.
// On error or when reaching the end of the input, false is returned. // On error or when reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value. // When an error occurred, it will be set in the error return value, nil otherwise.
func (l *Lexer) Next() (Item, *Error, bool) { func (l *Lexer) Next() (parser.Item, *Error, bool) {
if l.state == nil {
panic("This should not happen: nil state reached, but entering Next()")
}
for { for {
select { select {
case i := <-l.items: case i := <-l.items:
if i.Type == ItemEOF { switch {
case i.Type == ItemEOF:
return i, nil, false return i, nil, false
} case i.Type == ItemError:
if i.Type == ItemError {
l.err = &Error{i.Value, l.cursorRow, l.cursorColumn} l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
return i, l.err, false return i, l.err, false
default:
l.item = i
return i, nil, true
} }
l.item = i
return i, nil, true
default: default:
l.state = l.state(l) l.state = l.state(l)
} }
} }
} }
// ToArray returns lexer items as an array. // ToArray returns lexer items as an array (mainly intended for testing purposes)
// When an error occurs during scanning, a partial result will be // When an error occurs during scanning, a partial result will be
// returned, accompanied by the error that occurred. // returned, accompanied by the error that occurred.
func (l *Lexer) ToArray() ([]Item, *Error) { func (l *Lexer) ToArray() ([]parser.Item, *Error) {
var items []Item var items []parser.Item
for { for {
item, err, more := l.Next() item, err, more := l.Next()
if !more { if !more {
@ -100,25 +101,25 @@ func (l *Lexer) popState() stateFn {
// atEndOfFile returns true when there is no more data available in the input. // atEndOfFile returns true when there is no more data available in the input.
func (l *Lexer) atEndOfFile() bool { func (l *Lexer) atEndOfFile() bool {
return l.pos >= len(l.input) return l.pos >= l.len
} }
// emit passes a lexer item back to the client, including the provided string. // emit passes a lexer item back to the client, including the provided string.
func (l *Lexer) emit(t itemType, s string) { func (l *Lexer) emit(t parser.ItemType, s string) {
l.items <- Item{t, s} l.items <- parser.Item{Type: t, Value: s}
l.buffer.Reset() l.buffer.Reset()
} }
// emitLiteral passes a lexer item back to the client, including the accumulated // emitLiteral passes a lexer item back to the client, including the accumulated
// string buffer data as a literal string. // string buffer data as a literal string.
func (l *Lexer) emitLiteral(t itemType) { func (l *Lexer) emitLiteral(t parser.ItemType) {
l.emit(t, l.buffer.AsLiteralString()) l.emit(t, l.buffer.AsLiteralString())
} }
// emitTrimmedLiteral passes a lexer item back to the client, including the // emitTrimmedLiteral passes a lexer item back to the client, including the
// accumulated string buffer data as a literal string with whitespace // accumulated string buffer data as a literal string with whitespace
// trimmed from it. // trimmed from it.
func (l *Lexer) emitTrimmedLiteral(t itemType) { func (l *Lexer) emitTrimmedLiteral(t parser.ItemType) {
l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString())) l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
} }
@ -127,7 +128,7 @@ func (l *Lexer) emitTrimmedLiteral(t itemType) {
// codes like \n, \t, \uXXXX, etc.) // codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the // This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation. // string buffer that is not valid for string interpretation.
func (l *Lexer) emitInterpreted(t itemType) error { func (l *Lexer) emitInterpreted(t parser.ItemType) error {
s, err := l.buffer.AsInterpretedString() s, err := l.buffer.AsInterpretedString()
if err != nil { if err != nil {
return err return err
@ -137,15 +138,10 @@ func (l *Lexer) emitInterpreted(t itemType) error {
} }
// emitError emits a lexer error item back to the client. // emitError emits a lexer error item back to the client.
func (l *Lexer) emitError(message string) { func (l *Lexer) emitError(format string, args ...interface{}) stateFn {
message := fmt.Sprintf(format, args...)
l.emit(ItemError, message) l.emit(ItemError, message)
} return nil
// backup steps back one rune
// Can be called only once per call of next.
func (l *Lexer) backup() {
l.pos -= l.width
l.cursorColumn--
} }
// peek returns but does not advance to the next rune(s) in the input. // peek returns but does not advance to the next rune(s) in the input.
@ -176,17 +172,40 @@ func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
return peeked, width, true return peeked, width, true
} }
// acceptNext adds the specified amount of runes from the input to the string buffer. // acceptAny adds the next rune from the input to the string buffer.
// If not enough runes could be read (end of file or invalid UTF8 data), then false is returned. // If no rune could be read (end of file or invalid UTF8 data), then
func (l *Lexer) acceptNext(count int) bool { // false is returned.
for i := 0; i < count; i++ { func (l *Lexer) acceptAny() bool {
if r, ok := l.next(); ok { if r, ok := l.next(); ok {
l.buffer.WriteRune(r) l.buffer.WriteRune(r)
} else { return true
return false
}
} }
return true return false
}
// accept adds the next rune to the string buffer and returns true if it's
// from the valid set of runes. Otherwise false is returned.
func (l *Lexer) accept(matches ...string) bool {
return l.acceptPattern(matches...)
}
// AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided pattern.
// When runes were added then true is returned, false otherwise.
func (l *Lexer) acceptPattern(pattern ...string) bool {
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, pattern...)
}
func (l *Lexer) progress(callback func(rune), matches ...string) bool {
if runes, w, ok := l.match(matches...); ok {
l.pos += w
for _, r := range runes {
callback(r)
l.advanceCursor(r)
}
return true
}
return false
} }
// acceptConsecutive adds consecutive runes from the input to the string // acceptConsecutive adds consecutive runes from the input to the string
@ -200,27 +219,9 @@ func (l *Lexer) acceptConsecutive(match string) bool {
return accepted return accepted
} }
// next returns the next rune from the input and a boolean indicating if // advanceCursor advances the rune cursor one position in the
// reading the input was successful. // input data. While doing so, it keeps tracks of newlines,
// When the end of input is reached, or an invalid UTF8 character is // so we can report on row + column positions on error.
// read, then false is returned.
func (l *Lexer) next() (rune, bool) {
r, w, ok := l.peek()
if ok {
l.width = w
l.pos += w
l.advanceCursor(r)
return r, true
}
l.width = 0
if r == utf8.RuneError && w == 0 {
l.emitError("unexpected end of file")
} else {
l.emitError("invalid UTF8 character")
}
return r, false
}
func (l *Lexer) advanceCursor(r rune) { func (l *Lexer) advanceCursor(r rune) {
if l.newline { if l.newline {
l.cursorColumn = 0 l.cursorColumn = 0
@ -233,40 +234,20 @@ func (l *Lexer) advanceCursor(r rune) {
// skip skips runes, but only when all provided matches are satisfied. // skip skips runes, but only when all provided matches are satisfied.
// Returns true when one or more runes were skipped. // Returns true when one or more runes were skipped.
func (l *Lexer) skipMatching(matches ...string) bool { func (l *Lexer) skipMatching(pattern ...string) bool {
if runes, w, ok := l.match(matches...); ok { return l.progress(func(r rune) {}, pattern...)
l.pos += w
for _, r := range runes {
l.advanceCursor(r)
}
return true
}
return false
} }
// skipConsecutive skips consecutive runes from the provided match. // skipConsecutive skips consecutive runes from the provided match.
// Returns true when one or more runes were skipped. // Returns true when one or more runes were skipped.
func (l *Lexer) skipConsecutive(match string) bool { func (l *Lexer) skipConsecutive(pattern string) bool {
didSkip := false didSkip := false
for l.skipMatching(match) { for l.skipMatching(pattern) {
didSkip = true didSkip = true
} }
return didSkip return didSkip
} }
// accept adds the next rune to the string buffer and returns true if it's
// from the valid set of runes. Otherwise false is returned.
func (l *Lexer) accept(match string) bool {
if r, ok := l.next(); ok {
if strings.IndexRune(match, r) >= 0 {
l.buffer.WriteRune(r)
return true
}
}
l.backup()
return false
}
// upcoming checks if the upcoming runes satisfy the provided rune matches. // upcoming checks if the upcoming runes satisfy the provided rune matches.
// This is a lot like the match method, with the difference that // This is a lot like the match method, with the difference that
// this one only returns the boolean value. // this one only returns the boolean value.
@ -275,6 +256,25 @@ func (l *Lexer) upcoming(matches ...string) bool {
return ok return ok
} }
// next returns the next rune from the input and a boolean indicating if
// reading the input was successful.
// When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned.
func (l *Lexer) next() (rune, bool) {
r, w, ok := l.peek()
if ok {
l.pos += w
l.advanceCursor(r)
return r, true
}
if r == utf8.RuneError && w == 0 {
l.emitError("unexpected end of file")
} else {
l.emitError("invalid UTF8 character")
}
return r, false
}
// match checks if the upcoming runes satisfy the provided rune matches. // match checks if the upcoming runes satisfy the provided rune matches.
// It returns a slice of runes that were found, their total byte width // It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided matches matched // and a boolean indicating whether or not all provided matches matched
@ -292,24 +292,14 @@ func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
return peeked, width, false return peeked, width, false
} }
// error returns an error token and terminates the scan
// by returning nil to l.run.
func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
l.items <- Item{
ItemError,
fmt.Sprintf(format, args...),
}
return nil
}
func (l *Lexer) unexpectedInputError(expected string) stateFn { func (l *Lexer) unexpectedInputError(expected string) stateFn {
// next() takes care of error messages for ok == false. // next() takes care of emitting errors for ok == false.
if r, ok := l.next(); ok { if r, ok := l.next(); ok {
l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected)) return l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
} }
return nil return nil
} }
func (l *Lexer) unexpectedEndOfFile(expected string) stateFn { func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
return l.errorf("Unexpected end of file (expected %s)", expected) return l.emitError("Unexpected end of file (expected %s)", expected)
} }

View File

@ -1,8 +1,6 @@
package lexer package lexer
// stateFn represents the state of the lexer as a function import "github.com/mmakaay/toml/parser"
// that returns the next state.
type stateFn func(*Lexer) stateFn
const ( const (
whitespace string = " \t" whitespace string = " \t"
@ -28,59 +26,65 @@ const (
longUtf8Escape string = "U" longUtf8Escape string = "U"
) )
func stateKeyValuePair(l *Lexer) stateFn { // NewParser creates a new parser, using the provided input string
l.skipConsecutive(whitespace + carriageReturn + newline) // as the data to parse.
if l.skipMatching(hash) { func NewParser(input string) *parser.Parser {
return parser.New(input, stateKeyValuePair)
}
func stateKeyValuePair(l *parser.Parser) parser.StateFn {
l.SkipConsecutive(whitespace + carriageReturn + newline)
if l.SkipMatching(hash) {
return stateComment return stateComment
} }
if l.upcoming(startOfKey) { if l.Upcoming(startOfKey) {
return stateKey return stateKey
} }
return stateEndOfFile return stateEndOfFile
} }
// A '#' hash symbol marks the rest of the line as a comment. // A '#' hash symbol marks the rest of the line as a comment.
func stateComment(l *Lexer) stateFn { func stateComment(l *parser.Parser) parser.StateFn {
for { for {
switch { switch {
case l.atEndOfFile() || l.skipMatching(newline): case l.AtEndOfFile() || l.SkipMatching(newline):
l.emitTrimmedLiteral(ItemComment) l.EmitLiteralTrim(ItemComment)
return stateKeyValuePair return stateKeyValuePair
default: default:
if !l.acceptNext(1) { if !l.AcceptAny() {
return l.unexpectedInputError("comment") return nil
} }
} }
} }
} }
// A key may be either bare, quoted or dotted. // A key may be either bare, quoted or dotted.
func stateKey(l *Lexer) stateFn { func stateKey(l *parser.Parser) parser.StateFn {
if l.accept(bareKeyChars) { if l.AcceptMatching(bareKeyChars) {
return statebareKeyChars return statebareKeyChars
} }
return l.unexpectedInputError("a valid key name") return l.UnexpectedInputError("a valid key name")
} }
// Bare keys may only contain ASCII letters, ASCII digits, // Bare keys may only contain ASCII letters, ASCII digits,
// underscores, and dashes (A-Za-z0-9_-). Note that bare // underscores, and dashes (A-Za-z0-9_-). Note that bare
// keys are allowed to be composed of only ASCII digits, // keys are allowed to be composed of only ASCII digits,
// e.g. 1234, but are always interpreted as strings. // e.g. 1234, but are always interpreted as strings.
func statebareKeyChars(l *Lexer) stateFn { func statebareKeyChars(l *parser.Parser) parser.StateFn {
l.acceptConsecutive(bareKeyChars) l.AcceptConsecutive(bareKeyChars)
l.emitLiteral(ItemKey) l.EmitLiteral(ItemKey)
return stateEndOfKeyOrKeyDot return stateEndOfKeyOrKeyDot
} }
// Dotted keys are a sequence of bare or quoted keys joined with a dot. // Dotted keys are a sequence of bare or quoted keys joined with a dot.
// This allows for grouping similar properties together: // This allows for grouping similar properties together:
func stateEndOfKeyOrKeyDot(l *Lexer) stateFn { func stateEndOfKeyOrKeyDot(l *parser.Parser) parser.StateFn {
// Whitespace around dot-separated parts is ignored, however, // Whitespace around dot-separated parts is ignored, however,
// best practice is to not use any extraneous whitespace. // best practice is to not use any extraneous whitespace.
l.skipConsecutive(whitespace) l.SkipConsecutive(whitespace)
if l.skipMatching(dot) { if l.SkipMatching(dot) {
l.emit(ItemKeyDot, "") l.Emit(ItemKeyDot, "")
l.skipConsecutive(whitespace) l.SkipConsecutive(whitespace)
return stateKey return stateKey
} }
return stateKeyAssignment return stateKeyAssignment
@ -90,62 +94,69 @@ func stateEndOfKeyOrKeyDot(l *Lexer) stateFn {
// Whitespace is ignored around key names and values. The key, equals // Whitespace is ignored around key names and values. The key, equals
// sign, and value must be on the same line (though some values can // sign, and value must be on the same line (though some values can
// be broken over multiple lines). // be broken over multiple lines).
func stateKeyAssignment(l *Lexer) stateFn { func stateKeyAssignment(l *parser.Parser) parser.StateFn {
l.skipConsecutive(whitespace) l.SkipConsecutive(whitespace)
if l.skipMatching(equal) { if l.SkipMatching(equal) {
l.emit(ItemAssignment, "") l.Emit(ItemAssignment, "")
l.skipConsecutive(whitespace) l.SkipConsecutive(whitespace)
return stateValue return stateValue
} }
return l.unexpectedInputError("a value assignment") return l.UnexpectedInputError("a value assignment")
} }
// Values must be of the following types: String, Integer, Float, Boolean, // Values must be of the following types: String, Integer, Float, Boolean,
// Datetime, Array, or Inline Table. Unspecified values are invalid. // Datetime, Array, or Inline Table. Unspecified values are invalid.
func stateValue(l *Lexer) stateFn { func stateValue(l *parser.Parser) parser.StateFn {
l.skipConsecutive(whitespace) l.SkipConsecutive(whitespace)
if l.upcoming(quoteChars) { if l.Upcoming(quoteChars) {
return stateStringValue return stateStringValue
} }
return l.unexpectedInputError("a value") return l.UnexpectedInputError("a value")
} }
// There are four ways to express strings: basic, multi-line basic, literal, // There are four ways to express strings: basic, multi-line basic, literal,
// and multi-line literal. All strings must contain only valid UTF-8 characters. // and multi-line literal. All strings must contain only valid UTF-8 characters.
func stateStringValue(l *Lexer) stateFn { func stateStringValue(l *parser.Parser) parser.StateFn {
switch { switch {
case l.skipMatching(doubleQuote, doubleQuote, doubleQuote): case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote):
// Multi-line basic strings are surrounded by three quotation marks on each side. // Multi-line basic strings are surrounded by three quotation marks on each side.
return stateMultiLineBasicString return stateMultiLineBasicString
case l.skipMatching(doubleQuote): case l.SkipMatching(doubleQuote):
// Basic strings are surrounded by quotation marks. // Basic strings are surrounded by quotation marks.
return stateBasicStringValue return stateSingleLineBasicString
} }
return l.unexpectedInputError("a string value") return l.UnexpectedInputError("a string value")
} }
func stateBasicStringValue(l *Lexer) stateFn { func stateSingleLineBasicString(l *parser.Parser) parser.StateFn {
if l.upcoming(doubleQuote, doubleQuote) { if l.Upcoming(doubleQuote, doubleQuote) {
return stateMultiLineBasicString return stateMultiLineBasicString
} }
return stateBasicString return stateBasicString
} }
const invalidBasicStringCharacters string = "" + func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
l.EmitError("Not yet implemented")
return nil
}
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
const invalidBasicStringCharacters string = "\"\\" +
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" + "\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" + "\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
"\u007F" "\u007F"
func stateParseBasicString(l *Lexer) stateFn { func stateParseBasicString(l *parser.Parser) parser.StateFn {
for { for {
switch { switch {
case l.atEndOfFile(): case l.AtEndOfFile():
return l.unexpectedEndOfFile("basic string token") return l.UnexpectedEndOfFile("basic string token")
case l.skipMatching(doubleQuote): case l.SkipMatching(doubleQuote):
return l.popState() return l.PopState()
case l.upcoming(backslash, escapeChars): case l.AcceptMatching(backslash, escapeChars):
// For convenience, some popular characters have a compact escape sequence. // For convenience, some popular characters have a compact escape sequence.
// \b - backspace (U+0008) // \b - backspace (U+0008)
// \t - tab (U+0009) // \t - tab (U+0009)
@ -154,50 +165,45 @@ func stateParseBasicString(l *Lexer) stateFn {
// \r - carriage return (U+000D) // \r - carriage return (U+000D)
// \" - quote (U+0022) // \" - quote (U+0022)
// \\ - backslash (U+005C) // \\ - backslash (U+005C)
l.acceptNext(2) case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex):
case l.upcoming(backslash, shortUtf8Escape, hex, hex, hex, hex):
// \uXXXX - unicode (U+XXXX) // \uXXXX - unicode (U+XXXX)
l.acceptNext(6) case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
case l.upcoming(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
// \UXXXXXXXX - unicode (U+XXXXXXXX) // \UXXXXXXXX - unicode (U+XXXXXXXX)
l.acceptNext(10) case l.Upcoming(backslash):
case l.upcoming(backslash):
// All other escape sequences not listed above are reserved and, // All other escape sequences not listed above are reserved and,
// if used, TOML should produce an error. // if used, TOML should produce an error.
return l.errorf("Invalid escape sequence in basic string") return l.EmitError("Invalid escape sequence in basic string")
case l.upcoming(invalidBasicStringCharacters): case l.Upcoming(invalidBasicStringCharacters):
// Any Unicode character may be used except those that must be escaped: // Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F). // quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
r, _ := l.next() r, _, _ := l.Match(invalidBasicStringCharacters)
return l.errorf("Invalid character in basic string: %q", r) l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
return nil
default: default:
if !l.acceptNext(1) { if !l.AcceptAny() {
return l.unexpectedInputError("string value") return nil
} }
} }
} }
} }
func stateBasicString(l *Lexer) stateFn { func stateBasicString(l *parser.Parser) parser.StateFn {
l.pushState(func(l *Lexer) stateFn { l.PushState(func(l *parser.Parser) parser.StateFn {
err := l.emitInterpreted(ItemString) err := l.EmitInterpreted(ItemString)
if err != nil { if err != nil {
return l.errorf("Invalid data in string: %s", err) l.EmitError("Invalid data in string: %s", err)
return nil
} }
return stateKeyValuePair return stateKeyValuePair
}) })
return stateParseBasicString return stateParseBasicString
} }
func stateMultiLineBasicString(l *Lexer) stateFn { func stateEndOfFile(l *parser.Parser) parser.StateFn {
return l.errorf("Not yet implemented") if l.AtEndOfFile() {
} l.Emit(parser.ItemEOF, "EOF") // todo Automate within parser?
func stateEndOfFile(l *Lexer) stateFn {
if l.atEndOfFile() {
l.emit(ItemEOF, "EOF")
} else { } else {
l.unexpectedInputError("end of file") l.UnexpectedInputError("end of file")
} }
return nil return nil
} }

View File

@ -9,7 +9,7 @@ import (
) )
func TestErrorsIncludeLineAndRowPosition(t *testing.T) { func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
_, err := lexer.Lex("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray() _, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
t.Logf("Got error: %s", err.Error()) t.Logf("Got error: %s", err.Error())
if err.Row != 4 { if err.Row != 4 {
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4) t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
@ -19,21 +19,20 @@ func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
} }
} }
func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""})
}
func TestInvalidUtf8Data(t *testing.T) { func TestInvalidUtf8Data(t *testing.T) {
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"inside comment", "# \xbc", "", "invalid UTF8 character"}, {"inside comment", "# \xbc", "", "invalid UTF8 character"},
{"bare key 1", "\xbc", "", "invalid UTF8 character"}, {"bare key 1", "\xbc", "", "invalid UTF8 character"},
{"bare key 2", "key\xbc", "", "invalid UTF8 character"}, {"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
{"assignment", "key \xbc", "[key]", "invalid UTF8 character"}, {"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"}, {"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"}, {"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
}) })
} }
func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""})
}
func TestWhiteSpaceAndNewlines(t *testing.T) { func TestWhiteSpaceAndNewlines(t *testing.T) {
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"space", " ", "", ""}, {"space", " ", "", ""},
@ -61,13 +60,13 @@ func TestKeyWithoutAssignment(t *testing.T) {
err := "unexpected end of file" err := "unexpected end of file"
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"bare with whitespace", " a ", "[a]", err}, {"bare with whitespace", " a ", "[a]", err},
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "", err}, {"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", err}, {"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
{"bare numbers", "0123456789", "", err}, {"bare numbers", "0123456789", "[0123456789]", err},
{"bare underscore", "_", "", err}, {"bare underscore", "_", "[_]", err},
{"bare dash", "-", "", err}, {"bare dash", "-", "[-]", err},
{"bare big mix", "-hey_good_Lookin123-", "", err}, {"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
{"bare dotted", "a._.c", "[a].[_].", err}, {"bare dotted", "a._.c", "[a].[_].[c]", err},
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err}, {"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
}) })
} }
@ -90,9 +89,9 @@ func TestUnterminatedBasicString(t *testing.T) {
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) { func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
runStatesTs(t, []statesT{ runStatesTs(t, []statesT{
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00'`}, {"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n'`}, {"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f'`}, {"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`},
}) })
// No need to write all test cases for disallowed characters by hand. // No need to write all test cases for disallowed characters by hand.
@ -100,7 +99,7 @@ func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
name := fmt.Sprintf("control character %x", rune(i)) name := fmt.Sprintf("control character %x", rune(i))
runStatesT( runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=", t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`Invalid character in basic string: %q`, rune(i))}) fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))})
} }
} }
@ -163,7 +162,7 @@ func runStatesTs(t *testing.T, tests []statesT) {
} }
func runStatesT(t *testing.T, c statesT) { func runStatesT(t *testing.T, c statesT) {
l, err := lexer.Lex(c.in).ToArray() l, err := lexer.NewParser(c.in).ToArray()
if err == nil && c.err != "" { if err == nil && c.err != "" {
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err) t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
} }
@ -179,14 +178,15 @@ func runStatesT(t *testing.T, c statesT) {
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l)) t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
} }
for i, e := range expected { for i, e := range expected {
if l[i].String() != e { v := lexer.ParserItemToString(l[i])
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, l[i]) if v != e {
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
} }
} }
case string: case string:
a := make([]string, len(l)) a := make([]string, len(l))
for _, v := range l { for _, v := range l {
a = append(a, v.String()) a = append(a, lexer.ParserItemToString(v))
} }
actual := strings.Join(a, "") actual := strings.Join(a, "")
if actual != expected { if actual != expected {

261
parser/parser.go Normal file
View File

@ -0,0 +1,261 @@
package parser
import (
"fmt"
"strings"
"unicode/utf8"
)
// New takes an input string and a start state,
// and initializes the parser for it.
func New(input string, startState StateFn) *Parser {
return &Parser{
input: input,
len: len(input),
state: startState,
items: make(chan Item, 2),
}
}
// PushState adds the state function to the state stack.
// This is used for implementing nested parsing.
func (l *Parser) PushState(state StateFn) {
l.stack = append(l.stack, state)
}
// PopState pops the last pushed state from the state stack.
func (l *Parser) PopState() StateFn {
last := len(l.stack) - 1
head, tail := l.stack[:last], l.stack[last]
l.stack = head
return tail
}
// AtEndOfFile returns true when there is no more data available in the input.
func (l *Parser) AtEndOfFile() bool {
return l.pos >= l.len
}
// Emit passes a Parser item to the client, including the provided string.
func (l *Parser) Emit(t ItemType, s string) {
l.items <- Item{t, s}
l.buffer.Reset()
}
// EmitLiteral passes a Parser item to the client, including the accumulated
// string buffer data as a literal string.
func (l *Parser) EmitLiteral(t ItemType) {
l.Emit(t, l.buffer.AsLiteralString())
}
// EmitLiteralTrim passes a Parser item to the client, including the
// accumulated string buffer data as a literal string with whitespace
// trimmed from it.
func (l *Parser) EmitLiteralTrim(t ItemType) {
l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
}
// EmitInterpreted passes a Parser item to the client, including the
// accumulated string buffer data a Go doubled quoted interpreted string
// (handling escape codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation.
func (l *Parser) EmitInterpreted(t ItemType) error {
s, err := l.buffer.AsInterpretedString()
if err != nil {
return err
}
l.Emit(t, s)
return nil
}
// EmitError emits a Parser error item to the client.
func (l *Parser) EmitError(format string, args ...interface{}) StateFn {
message := fmt.Sprintf(format, args...)
l.Emit(ItemError, message)
return nil
}
// Match checks if the upcoming runes satisfy all provided patterns.
// It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided patterns were
// satisfied by the input data.
func (l *Parser) Match(patterns ...string) ([]rune, int, bool) {
peeked, width, ok := l.peekMulti(len(patterns))
if ok {
for i, r := range patterns {
if strings.IndexRune(r, peeked[i]) < 0 {
return peeked, width, false
}
}
return peeked, width, true
}
return peeked, width, false
}
// Upcoming checks if the upcoming runes satisfy all provided patterns.
// Returns true if all provided patterns are satisfied.
func (l *Parser) Upcoming(patterns ...string) bool {
_, _, ok := l.Match(patterns...)
return ok
}
// AcceptAny adds the next rune from the input to the string buffer.
// If no rune could be read (end of file or invalid UTF8 data),
// then false is returned.
func (l *Parser) AcceptAny() bool {
if r, ok := l.next(); ok {
l.buffer.WriteRune(r)
return true
}
return false
}
// AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided patterns.
// When runes were added then true is returned, false otherwise.
func (l *Parser) AcceptMatching(patterns ...string) bool {
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...)
}
// AcceptConsecutive adds consecutive runes from the input to the string
// buffer, as long as they exist in the pattern.
// If any runes were added then true is returned, false otherwise.
func (l *Parser) AcceptConsecutive(pattern string) bool {
accepted := false
for l.AcceptMatching(pattern) {
accepted = true
}
return accepted
}
// SkipMatching skips runes, but only when all provided patterns are satisfied.
// Returns true when one or more runes were skipped.
func (l *Parser) SkipMatching(patterns ...string) bool {
if runes, w, ok := l.Match(patterns...); ok {
l.pos += w
for _, r := range runes {
l.advanceCursor(r)
}
return true
}
return false
}
// SkipConsecutive skips consecutive runes from the provided pattern.
// Returns true when one or more runes were skipped.
func (l *Parser) SkipConsecutive(pattern string) bool {
didSkip := false
for l.SkipMatching(pattern) {
didSkip = true
}
return didSkip
}
// ============================================================================
// EMIT DATA AND ERRORS
// ============================================================================
// UnexpectedInputError is used by a parser implementation to emit an
// error item that tells the client that an unexpected rune was
// encountered in the input.
// The parameter 'expected' is used to provide some context to the error.
func (l *Parser) UnexpectedInputError(expected string) StateFn {
// next() takes care of error messages for ok == false.
if r, ok := l.next(); ok {
return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
}
return nil
}
// UnexpectedEndOfFile is used by a parser implementation to emit an
// error item that tells the client that more data was expected from
// the input.
// The parameter 'expected' is used to provide some context to the error.
func (l *Parser) UnexpectedEndOfFile(expected string) StateFn {
return l.EmitError("Unexpected end of file (expected %s)", expected)
}
// ============================================================================
// LEXER : our lexer is quite low level, it only returns UTF8 runes
// ============================================================================
// peek returns but does not advance to the next rune(s) in the input.
// Returns the rune, its width and a boolean. The boolean will be false in case
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
func (l *Parser) peek() (rune, int, bool) {
peeked, width := utf8.DecodeRuneInString(l.input[l.pos:])
return peeked, width, peeked != utf8.RuneError
}
// peekMulti takes a peek at multiple upcoming runes in the input.
// Returns a slice of runes, their total width in bytes and a boolean.
// The boolean will be false in case less runes can be peeked than
// the requested amount (end of data or invalid UTF8 character).
func (l *Parser) peekMulti(amount int) ([]rune, int, bool) {
width := 0
var peeked []rune
for i := 0; i < amount; i++ {
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
switch {
case r == utf8.RuneError:
return peeked, width, false
default:
width += w
peeked = append(peeked, r)
}
}
return peeked, width, true
}
// progress moves the cursor forward in the input, returning one rune
// for every specified pattern. The cursor is only moved forward when
// all patterns are satisfied.
// Returns true when all patterns were satisfied and the cursor was
// moved forward, false otherwise.
// A callback function can be provided to specify what to do with
// the runes that are encountered in the input.
func (l *Parser) progress(callback func(rune), patterns ...string) bool {
if runes, w, ok := l.Match(patterns...); ok {
l.pos += w
for _, r := range runes {
callback(r)
l.advanceCursor(r)
}
return true
}
return false
}
// next returns the next rune from the input and a boolean indicating if
// reading the input was successful.
// When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned. Both are considered error cases,
// and for that reason these automatically emit an error to the client.
func (l *Parser) next() (rune, bool) {
r, w, ok := l.peek()
if ok {
l.pos += w
l.advanceCursor(r)
return r, true
}
if r == utf8.RuneError && w == 0 {
l.EmitError("unexpected end of file")
} else {
l.EmitError("invalid UTF8 character")
}
return r, false
}
// advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error.
func (l *Parser) advanceCursor(r rune) {
if l.newline {
l.cursorColumn = 0
l.cursorRow++
} else {
l.cursorColumn++
}
l.newline = r == '\n'
}

62
parser/stringbuf.go Normal file
View File

@ -0,0 +1,62 @@
package parser
import (
"bytes"
"strconv"
"strings"
)
// StringBuffer is a string buffer implementation, which is used by the parser
// to efficiently accumulate runes from the input and eventually turn these
// into a string, either literal or interpreted.
type StringBuffer struct {
buffer bytes.Buffer
}
// Reset resets the string buffer, in order to build a new string.
func (b *StringBuffer) Reset() *StringBuffer {
b.buffer.Reset()
return b
}
// WriteString adds the runes of the input string to the string buffer.
func (b *StringBuffer) WriteString(s string) *StringBuffer {
for _, r := range s {
b.WriteRune(r)
}
return b
}
// WriteRune adds a single rune to the string buffer.
func (b *StringBuffer) WriteRune(r rune) *StringBuffer {
b.buffer.WriteRune(r)
return b
}
// AsLiteralString returns the string buffer as a literal string.
// Literal means that no escape sequences are processed.
func (b *StringBuffer) AsLiteralString() string {
return b.buffer.String()
}
// AsInterpretedString returns the string in its interpreted form.
// Interpreted means that escape sequences are handled in the way that Go would
// have, had it been inside double quotes. It translates for example escape
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
// representations.
// Since the input might contain invalid escape sequences, this method
// also returns an error. When an error is returned, the returned string will
// contain the string as far as it could be interpreted.
func (b *StringBuffer) AsInterpretedString() (string, error) {
var sb strings.Builder
tail := b.buffer.String()
for len(tail) > 0 {
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
if err != nil {
return sb.String(), err
}
tail = newtail
sb.WriteRune(r)
}
return sb.String(), nil
}

90
parser/stringbuf_test.go Normal file
View File

@ -0,0 +1,90 @@
package parser_test
import (
"testing"
"github.com/mmakaay/toml/parser"
)
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
var b parser.StringBuffer
s1, _ := b.WriteString(`hi\nthere`).AsInterpretedString()
s2 := b.AsLiteralString()
if s1 != "hi\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
}
if s2 != "hi\\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
}
}
func TestResetResetsBuffer(t *testing.T) {
var b parser.StringBuffer
s := b.WriteRune('X').Reset().AsLiteralString()
if s != "" {
t.Fatalf("Did not get expected empty string, but %q", s)
}
}
func TestAsLiteralString(t *testing.T) {
b := parser.StringBuffer{}
for _, c := range []stringbufT{
{"empty string", ``, ``, OK},
{"simple string", `Simple string!`, `Simple string!`, OK},
{"single quote", `'`, `'`, OK},
{"double quote", `"`, `"`, OK},
{"escaped single quote", `\'`, `\'`, OK},
{"escaped double quote", `\"`, `\"`, OK},
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
} {
s := b.Reset().WriteString(c.in).AsLiteralString()
if s != c.out {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
func TestAsInterpretedString(t *testing.T) {
b := parser.StringBuffer{}
for _, c := range []stringbufT{
{"empty string", "", "", OK},
{"one character", "Simple string!", "Simple string!", OK},
{"escaped single quote", `\'`, "", FAIL},
{"escaped double quote", `\"`, `"`, OK},
{"bare single quote", `'`, "'", OK},
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
{"example from spec",
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
} {
s, err := b.Reset().WriteString(c.in).AsInterpretedString()
if c.isSuccessCase && err != nil {
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
}
if !c.isSuccessCase && err == nil {
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
}
if s != c.out && c.isSuccessCase {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
type stringbufT struct {
name string
in string
out string
isSuccessCase bool
}
const (
OK bool = true
FAIL bool = false
)

51
parser/types.go Normal file
View File

@ -0,0 +1,51 @@
package parser
// Parser holds the internal state of the Parser.
type Parser struct {
state StateFn // a function that handles the current state
stack []StateFn // state function stack, for nested parsing
input string // the scanned input
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input
cursorColumn int // current column position in the input
buffer StringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
}
// StateFn represents the state of the parser as a function
// that returns the next state.
type StateFn func(*Parser) StateFn
// ItemType represents the type of a parser Item.
type ItemType int
// ItemEOF is a built-in parser item type that is used for flagging that the
// end of the input was reached.
const ItemEOF ItemType = -1
// ItemError is a built-in parser item type that is used for flagging that
// an error has occurred during parsing.
const ItemError ItemType = -2
// Item represents an item returned from the parser.
type Item struct {
Type ItemType
Value string
}
// Error is used as the error type when parsing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
Message string
Row int
Column int
}
func (err *Error) Error() string {
return err.Message
}

39
parser/user_api.go Normal file
View File

@ -0,0 +1,39 @@
package parser
// Next retrieves the next parsed item.
// When a valid item was found, then the boolean return parameter will be true.
// On error or when successfully reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise.
func (l *Parser) Next() (Item, *Error, bool) {
for {
select {
case i := <-l.items:
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
return i, l.err, false
default:
l.item = i
return i, nil, true
}
default:
l.state = l.state(l)
}
}
}
// ToArray returns Parser items as an array (mainly intended for testing purposes)
// When an error occurs during scanning, a partial result will be
// returned, accompanied by the error that occurred.
func (l *Parser) ToArray() ([]Item, *Error) {
var items []Item
for {
item, err, more := l.Next()
if !more {
return items, err
}
items = append(items, item)
}
}