Removed some source files that are not used anymore, after the refactoring steps.
This commit is contained in:
parent
f86ef2b918
commit
db4a8f7942
305
lexer/lexer.no
305
lexer/lexer.no
|
@ -1,305 +0,0 @@
|
||||||
package lexer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/mmakaay/toml/parser"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Lexer holds the state of the lexer.
|
|
||||||
type Lexer struct {
|
|
||||||
input string // the scanned input
|
|
||||||
state parser.StateFn // a function that handles the current state
|
|
||||||
stack []parser.StateFn // state function stack, for nested parsing
|
|
||||||
len int // the total length of the input in bytes
|
|
||||||
pos int // current byte scanning position in the input
|
|
||||||
newline bool // keep track of when we have scanned a newline
|
|
||||||
cursorRow int // current row number in the input
|
|
||||||
cursorColumn int // current column position in the input
|
|
||||||
buffer StringBuffer // an efficient buffer, used to build string values
|
|
||||||
items chan parser.Item // channel of resulting lexer items
|
|
||||||
item parser.Item // the current item as reached by Next() and retrieved by Get()
|
|
||||||
err *Error // an error when lexing failed, retrieved by Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Error is used as the error type when lexing errors occur.
|
|
||||||
// The error includes some extra meta information to allow for useful
|
|
||||||
// error messages to the user.
|
|
||||||
type Error struct {
|
|
||||||
Message string
|
|
||||||
Row int
|
|
||||||
Column int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (err *Error) Error() string {
|
|
||||||
return err.Message
|
|
||||||
}
|
|
||||||
|
|
||||||
// New takes an input string and initializes the lexer for it.
|
|
||||||
func New(input string) *Lexer {
|
|
||||||
return &Lexer{
|
|
||||||
input: input,
|
|
||||||
len: len(input),
|
|
||||||
state: stateKeyValuePair,
|
|
||||||
items: make(chan parser.Item, 2),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Next advances to the next lexer item in the input string.
|
|
||||||
// When a valid item was found, then the boolean return parameter will be true.
|
|
||||||
// On error or when reaching the end of the input, false is returned.
|
|
||||||
// When an error occurred, it will be set in the error return value, nil otherwise.
|
|
||||||
func (l *Lexer) Next() (parser.Item, *Error, bool) {
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case i := <-l.items:
|
|
||||||
switch {
|
|
||||||
case i.Type == ItemEOF:
|
|
||||||
return i, nil, false
|
|
||||||
case i.Type == ItemError:
|
|
||||||
l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
|
|
||||||
return i, l.err, false
|
|
||||||
default:
|
|
||||||
l.item = i
|
|
||||||
return i, nil, true
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
l.state = l.state(l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToArray returns lexer items as an array (mainly intended for testing purposes)
|
|
||||||
// When an error occurs during scanning, a partial result will be
|
|
||||||
// returned, accompanied by the error that occurred.
|
|
||||||
func (l *Lexer) ToArray() ([]parser.Item, *Error) {
|
|
||||||
var items []parser.Item
|
|
||||||
for {
|
|
||||||
item, err, more := l.Next()
|
|
||||||
if !more {
|
|
||||||
return items, err
|
|
||||||
}
|
|
||||||
items = append(items, item)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pushState adds the state function to its stack.
|
|
||||||
// This is used for implementing nested parsing.
|
|
||||||
func (l *Lexer) pushState(state stateFn) {
|
|
||||||
l.stack = append(l.stack, state)
|
|
||||||
}
|
|
||||||
|
|
||||||
// popState pops the last pushed state from its stack.
|
|
||||||
func (l *Lexer) popState() stateFn {
|
|
||||||
last := len(l.stack) - 1
|
|
||||||
head, tail := l.stack[:last], l.stack[last]
|
|
||||||
l.stack = head
|
|
||||||
return tail
|
|
||||||
}
|
|
||||||
|
|
||||||
// atEndOfFile returns true when there is no more data available in the input.
|
|
||||||
func (l *Lexer) atEndOfFile() bool {
|
|
||||||
return l.pos >= l.len
|
|
||||||
}
|
|
||||||
|
|
||||||
// emit passes a lexer item back to the client, including the provided string.
|
|
||||||
func (l *Lexer) emit(t parser.ItemType, s string) {
|
|
||||||
l.items <- parser.Item{Type: t, Value: s}
|
|
||||||
l.buffer.Reset()
|
|
||||||
}
|
|
||||||
|
|
||||||
// emitLiteral passes a lexer item back to the client, including the accumulated
|
|
||||||
// string buffer data as a literal string.
|
|
||||||
func (l *Lexer) emitLiteral(t parser.ItemType) {
|
|
||||||
l.emit(t, l.buffer.AsLiteralString())
|
|
||||||
}
|
|
||||||
|
|
||||||
// emitTrimmedLiteral passes a lexer item back to the client, including the
|
|
||||||
// accumulated string buffer data as a literal string with whitespace
|
|
||||||
// trimmed from it.
|
|
||||||
func (l *Lexer) emitTrimmedLiteral(t parser.ItemType) {
|
|
||||||
l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// emitInterpreted passes a lexer item back to the client, including the
|
|
||||||
// accumulated string buffer data an interpreted string (handling escape
|
|
||||||
// codes like \n, \t, \uXXXX, etc.)
|
|
||||||
// This method might return an error, in case there is data in the
|
|
||||||
// string buffer that is not valid for string interpretation.
|
|
||||||
func (l *Lexer) emitInterpreted(t parser.ItemType) error {
|
|
||||||
s, err := l.buffer.AsInterpretedString()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
l.emit(t, s)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// emitError emits a lexer error item back to the client.
|
|
||||||
func (l *Lexer) emitError(format string, args ...interface{}) stateFn {
|
|
||||||
message := fmt.Sprintf(format, args...)
|
|
||||||
l.emit(ItemError, message)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// peek returns but does not advance to the next rune(s) in the input.
|
|
||||||
// Returns the rune, its width and a boolean. The boolean will be false in case
|
|
||||||
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
|
|
||||||
func (l *Lexer) peek() (rune, int, bool) {
|
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
||||||
return r, w, r != utf8.RuneError
|
|
||||||
}
|
|
||||||
|
|
||||||
// peekMulti takes a peek at multiple upcoming runes in the input.
|
|
||||||
// Returns a slice of runes and a boolean. The boolean will be false in case
|
|
||||||
// less upcoming runes can be peeked than the requested amount
|
|
||||||
// (end of data or invalid UTF8 character).
|
|
||||||
func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
|
|
||||||
width := 0
|
|
||||||
var peeked []rune
|
|
||||||
for i := 0; i < amount; i++ {
|
|
||||||
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
|
|
||||||
switch {
|
|
||||||
case r == utf8.RuneError:
|
|
||||||
return peeked, width, false
|
|
||||||
default:
|
|
||||||
width += w
|
|
||||||
peeked = append(peeked, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return peeked, width, true
|
|
||||||
}
|
|
||||||
|
|
||||||
// acceptAny adds the next rune from the input to the string buffer.
|
|
||||||
// If no rune could be read (end of file or invalid UTF8 data), then
|
|
||||||
// false is returned.
|
|
||||||
func (l *Lexer) acceptAny() bool {
|
|
||||||
if r, ok := l.next(); ok {
|
|
||||||
l.buffer.WriteRune(r)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// accept adds the next rune to the string buffer and returns true if it's
|
|
||||||
// from the valid set of runes. Otherwise false is returned.
|
|
||||||
func (l *Lexer) accept(matches ...string) bool {
|
|
||||||
return l.acceptPattern(matches...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// AcceptMatching adds the next runes to the string buffer, but only
|
|
||||||
// if the upcoming runes satisfy the provided pattern.
|
|
||||||
// When runes were added then true is returned, false otherwise.
|
|
||||||
func (l *Lexer) acceptPattern(pattern ...string) bool {
|
|
||||||
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, pattern...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Lexer) progress(callback func(rune), matches ...string) bool {
|
|
||||||
if runes, w, ok := l.match(matches...); ok {
|
|
||||||
l.pos += w
|
|
||||||
for _, r := range runes {
|
|
||||||
callback(r)
|
|
||||||
l.advanceCursor(r)
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// acceptConsecutive adds consecutive runes from the input to the string
|
|
||||||
// buffer when they match the rune match.
|
|
||||||
// If any runes were added then true is returned, false otherwise.
|
|
||||||
func (l *Lexer) acceptConsecutive(match string) bool {
|
|
||||||
accepted := false
|
|
||||||
for l.accept(match) {
|
|
||||||
accepted = true
|
|
||||||
}
|
|
||||||
return accepted
|
|
||||||
}
|
|
||||||
|
|
||||||
// advanceCursor advances the rune cursor one position in the
|
|
||||||
// input data. While doing so, it keeps tracks of newlines,
|
|
||||||
// so we can report on row + column positions on error.
|
|
||||||
func (l *Lexer) advanceCursor(r rune) {
|
|
||||||
if l.newline {
|
|
||||||
l.cursorColumn = 0
|
|
||||||
l.cursorRow++
|
|
||||||
} else {
|
|
||||||
l.cursorColumn++
|
|
||||||
}
|
|
||||||
l.newline = r == '\n'
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip skips runes, but only when all provided matches are satisfied.
|
|
||||||
// Returns true when one or more runes were skipped.
|
|
||||||
func (l *Lexer) skipMatching(pattern ...string) bool {
|
|
||||||
return l.progress(func(r rune) {}, pattern...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// skipConsecutive skips consecutive runes from the provided match.
|
|
||||||
// Returns true when one or more runes were skipped.
|
|
||||||
func (l *Lexer) skipConsecutive(pattern string) bool {
|
|
||||||
didSkip := false
|
|
||||||
for l.skipMatching(pattern) {
|
|
||||||
didSkip = true
|
|
||||||
}
|
|
||||||
return didSkip
|
|
||||||
}
|
|
||||||
|
|
||||||
// upcoming checks if the upcoming runes satisfy the provided rune matches.
|
|
||||||
// This is a lot like the match method, with the difference that
|
|
||||||
// this one only returns the boolean value.
|
|
||||||
func (l *Lexer) upcoming(matches ...string) bool {
|
|
||||||
_, _, ok := l.match(matches...)
|
|
||||||
return ok
|
|
||||||
}
|
|
||||||
|
|
||||||
// next returns the next rune from the input and a boolean indicating if
|
|
||||||
// reading the input was successful.
|
|
||||||
// When the end of input is reached, or an invalid UTF8 character is
|
|
||||||
// read, then false is returned.
|
|
||||||
func (l *Lexer) next() (rune, bool) {
|
|
||||||
r, w, ok := l.peek()
|
|
||||||
if ok {
|
|
||||||
l.pos += w
|
|
||||||
l.advanceCursor(r)
|
|
||||||
return r, true
|
|
||||||
}
|
|
||||||
if r == utf8.RuneError && w == 0 {
|
|
||||||
l.emitError("unexpected end of file")
|
|
||||||
} else {
|
|
||||||
l.emitError("invalid UTF8 character")
|
|
||||||
}
|
|
||||||
return r, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// match checks if the upcoming runes satisfy the provided rune matches.
|
|
||||||
// It returns a slice of runes that were found, their total byte width
|
|
||||||
// and a boolean indicating whether or not all provided matches matched
|
|
||||||
// the input data.
|
|
||||||
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
|
|
||||||
peeked, width, ok := l.peekMulti(len(matches))
|
|
||||||
if ok {
|
|
||||||
for i, r := range matches {
|
|
||||||
if strings.IndexRune(r, peeked[i]) < 0 {
|
|
||||||
return peeked, width, false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return peeked, width, true
|
|
||||||
}
|
|
||||||
return peeked, width, false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Lexer) unexpectedInputError(expected string) stateFn {
|
|
||||||
// next() takes care of emitting errors for ok == false.
|
|
||||||
if r, ok := l.next(); ok {
|
|
||||||
return l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
|
|
||||||
return l.emitError("Unexpected end of file (expected %s)", expected)
|
|
||||||
}
|
|
|
@ -1,62 +0,0 @@
|
||||||
package lexer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
// StringBuffer is a string buffer implementation, which is used by the lexer
|
|
||||||
// to efficiently accumulate runes from the input and eventually turn these
|
|
||||||
// into a string, either literal or interpreted.
|
|
||||||
type StringBuffer struct {
|
|
||||||
buffer bytes.Buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reset resets the string buffer, in order to build a new string.
|
|
||||||
func (b *StringBuffer) Reset() *StringBuffer {
|
|
||||||
b.buffer.Reset()
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteString adds the runes of the input string to the string buffer.
|
|
||||||
func (b *StringBuffer) WriteString(s string) *StringBuffer {
|
|
||||||
for _, r := range s {
|
|
||||||
b.WriteRune(r)
|
|
||||||
}
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// WriteRune adds a single rune to the string buffer.
|
|
||||||
func (b *StringBuffer) WriteRune(r rune) *StringBuffer {
|
|
||||||
b.buffer.WriteRune(r)
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
// AsLiteralString returns the string buffer as a literal string.
|
|
||||||
// Literal means that no escape sequences are processed.
|
|
||||||
func (b *StringBuffer) AsLiteralString() string {
|
|
||||||
return b.buffer.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// AsInterpretedString returns the string in its interpreted form.
|
|
||||||
// Interpreted means that escape sequences are handled in the way that Go would
|
|
||||||
// have, had it been inside double quotes. It translates for example escape
|
|
||||||
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
|
|
||||||
// representations.
|
|
||||||
// Since the input might contain invalid escape sequences, this method
|
|
||||||
// also returns an error. When an error is returned, the returned string will
|
|
||||||
// contain the string as far as it could be interpreted.
|
|
||||||
func (b *StringBuffer) AsInterpretedString() (string, error) {
|
|
||||||
var sb strings.Builder
|
|
||||||
tail := b.buffer.String()
|
|
||||||
for len(tail) > 0 {
|
|
||||||
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
|
|
||||||
if err != nil {
|
|
||||||
return sb.String(), err
|
|
||||||
}
|
|
||||||
tail = newtail
|
|
||||||
sb.WriteRune(r)
|
|
||||||
}
|
|
||||||
return sb.String(), nil
|
|
||||||
}
|
|
|
@ -1,87 +0,0 @@
|
||||||
package lexer_test
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
import "github.com/mmakaay/toml/lexer"
|
|
||||||
|
|
||||||
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
|
|
||||||
var b lexer.StringBuffer
|
|
||||||
s1, _ := b.WriteString(`hi\nthere`).AsInterpretedString()
|
|
||||||
s2 := b.AsLiteralString()
|
|
||||||
if s1 != "hi\nthere" {
|
|
||||||
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
|
|
||||||
}
|
|
||||||
if s2 != "hi\\nthere" {
|
|
||||||
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResetResetsBuffer(t *testing.T) {
|
|
||||||
var b lexer.StringBuffer
|
|
||||||
s := b.WriteRune('X').Reset().AsLiteralString()
|
|
||||||
if s != "" {
|
|
||||||
t.Fatalf("Did not get expected empty string, but %q", s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAsLiteralString(t *testing.T) {
|
|
||||||
b := lexer.StringBuffer{}
|
|
||||||
for _, c := range []stringbufT{
|
|
||||||
{"empty string", ``, ``, OK},
|
|
||||||
{"simple string", `Simple string!`, `Simple string!`, OK},
|
|
||||||
{"single quote", `'`, `'`, OK},
|
|
||||||
{"double quote", `"`, `"`, OK},
|
|
||||||
{"escaped single quote", `\'`, `\'`, OK},
|
|
||||||
{"escaped double quote", `\"`, `\"`, OK},
|
|
||||||
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
|
|
||||||
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
|
|
||||||
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
|
|
||||||
} {
|
|
||||||
s := b.Reset().WriteString(c.in).AsLiteralString()
|
|
||||||
if s != c.out {
|
|
||||||
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAsInterpretedString(t *testing.T) {
|
|
||||||
b := lexer.StringBuffer{}
|
|
||||||
for _, c := range []stringbufT{
|
|
||||||
{"empty string", "", "", OK},
|
|
||||||
{"one character", "Simple string!", "Simple string!", OK},
|
|
||||||
{"escaped single quote", `\'`, "", FAIL},
|
|
||||||
{"escaped double quote", `\"`, `"`, OK},
|
|
||||||
{"bare single quote", `'`, "'", OK},
|
|
||||||
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
|
|
||||||
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
|
|
||||||
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
|
|
||||||
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
|
|
||||||
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
|
|
||||||
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
|
|
||||||
{"example from spec",
|
|
||||||
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
|
|
||||||
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
|
|
||||||
} {
|
|
||||||
s, err := b.Reset().WriteString(c.in).AsInterpretedString()
|
|
||||||
if c.isSuccessCase && err != nil {
|
|
||||||
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
|
|
||||||
}
|
|
||||||
if !c.isSuccessCase && err == nil {
|
|
||||||
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
|
|
||||||
}
|
|
||||||
if s != c.out && c.isSuccessCase {
|
|
||||||
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type stringbufT struct {
|
|
||||||
name string
|
|
||||||
in string
|
|
||||||
out string
|
|
||||||
isSuccessCase bool
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
OK bool = true
|
|
||||||
FAIL bool = false
|
|
||||||
)
|
|
Loading…
Reference in New Issue