Removed some source files that are not used anymore, after the refactoring steps.

This commit is contained in:
Maurice Makaay 2019-05-17 12:46:09 +00:00
parent f86ef2b918
commit db4a8f7942
3 changed files with 0 additions and 454 deletions

View File

@ -1,305 +0,0 @@
package lexer
import (
"fmt"
"strings"
"unicode/utf8"
"github.com/mmakaay/toml/parser"
)
// Lexer holds the state of the lexer.
type Lexer struct {
input string // the scanned input
state parser.StateFn // a function that handles the current state
stack []parser.StateFn // state function stack, for nested parsing
len int // the total length of the input in bytes
pos int // current byte scanning position in the input
newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input
cursorColumn int // current column position in the input
buffer StringBuffer // an efficient buffer, used to build string values
items chan parser.Item // channel of resulting lexer items
item parser.Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error()
}
// Error is used as the error type when lexing errors occur.
// The error includes some extra meta information to allow for useful
// error messages to the user.
type Error struct {
Message string
Row int
Column int
}
func (err *Error) Error() string {
return err.Message
}
// New takes an input string and initializes the lexer for it.
func New(input string) *Lexer {
return &Lexer{
input: input,
len: len(input),
state: stateKeyValuePair,
items: make(chan parser.Item, 2),
}
}
// Next advances to the next lexer item in the input string.
// When a valid item was found, then the boolean return parameter will be true.
// On error or when reaching the end of the input, false is returned.
// When an error occurred, it will be set in the error return value, nil otherwise.
func (l *Lexer) Next() (parser.Item, *Error, bool) {
for {
select {
case i := <-l.items:
switch {
case i.Type == ItemEOF:
return i, nil, false
case i.Type == ItemError:
l.err = &Error{i.Value, l.cursorRow, l.cursorColumn}
return i, l.err, false
default:
l.item = i
return i, nil, true
}
default:
l.state = l.state(l)
}
}
}
// ToArray returns lexer items as an array (mainly intended for testing purposes)
// When an error occurs during scanning, a partial result will be
// returned, accompanied by the error that occurred.
func (l *Lexer) ToArray() ([]parser.Item, *Error) {
var items []parser.Item
for {
item, err, more := l.Next()
if !more {
return items, err
}
items = append(items, item)
}
}
// pushState adds the state function to its stack.
// This is used for implementing nested parsing.
func (l *Lexer) pushState(state stateFn) {
l.stack = append(l.stack, state)
}
// popState pops the last pushed state from its stack.
func (l *Lexer) popState() stateFn {
last := len(l.stack) - 1
head, tail := l.stack[:last], l.stack[last]
l.stack = head
return tail
}
// atEndOfFile returns true when there is no more data available in the input.
func (l *Lexer) atEndOfFile() bool {
return l.pos >= l.len
}
// emit passes a lexer item back to the client, including the provided string.
func (l *Lexer) emit(t parser.ItemType, s string) {
l.items <- parser.Item{Type: t, Value: s}
l.buffer.Reset()
}
// emitLiteral passes a lexer item back to the client, including the accumulated
// string buffer data as a literal string.
func (l *Lexer) emitLiteral(t parser.ItemType) {
l.emit(t, l.buffer.AsLiteralString())
}
// emitTrimmedLiteral passes a lexer item back to the client, including the
// accumulated string buffer data as a literal string with whitespace
// trimmed from it.
func (l *Lexer) emitTrimmedLiteral(t parser.ItemType) {
l.emit(t, strings.TrimSpace(l.buffer.AsLiteralString()))
}
// emitInterpreted passes a lexer item back to the client, including the
// accumulated string buffer data an interpreted string (handling escape
// codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation.
func (l *Lexer) emitInterpreted(t parser.ItemType) error {
s, err := l.buffer.AsInterpretedString()
if err != nil {
return err
}
l.emit(t, s)
return nil
}
// emitError emits a lexer error item back to the client.
func (l *Lexer) emitError(format string, args ...interface{}) stateFn {
message := fmt.Sprintf(format, args...)
l.emit(ItemError, message)
return nil
}
// peek returns but does not advance to the next rune(s) in the input.
// Returns the rune, its width and a boolean. The boolean will be false in case
// no upcoming rune can be peeked (end of data or invalid UTF8 character).
func (l *Lexer) peek() (rune, int, bool) {
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
return r, w, r != utf8.RuneError
}
// peekMulti takes a peek at multiple upcoming runes in the input.
// Returns a slice of runes and a boolean. The boolean will be false in case
// less upcoming runes can be peeked than the requested amount
// (end of data or invalid UTF8 character).
func (l *Lexer) peekMulti(amount int) ([]rune, int, bool) {
width := 0
var peeked []rune
for i := 0; i < amount; i++ {
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:])
switch {
case r == utf8.RuneError:
return peeked, width, false
default:
width += w
peeked = append(peeked, r)
}
}
return peeked, width, true
}
// acceptAny adds the next rune from the input to the string buffer.
// If no rune could be read (end of file or invalid UTF8 data), then
// false is returned.
func (l *Lexer) acceptAny() bool {
if r, ok := l.next(); ok {
l.buffer.WriteRune(r)
return true
}
return false
}
// accept adds the next rune to the string buffer and returns true if it's
// from the valid set of runes. Otherwise false is returned.
func (l *Lexer) accept(matches ...string) bool {
return l.acceptPattern(matches...)
}
// AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided pattern.
// When runes were added then true is returned, false otherwise.
func (l *Lexer) acceptPattern(pattern ...string) bool {
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, pattern...)
}
func (l *Lexer) progress(callback func(rune), matches ...string) bool {
if runes, w, ok := l.match(matches...); ok {
l.pos += w
for _, r := range runes {
callback(r)
l.advanceCursor(r)
}
return true
}
return false
}
// acceptConsecutive adds consecutive runes from the input to the string
// buffer when they match the rune match.
// If any runes were added then true is returned, false otherwise.
func (l *Lexer) acceptConsecutive(match string) bool {
accepted := false
for l.accept(match) {
accepted = true
}
return accepted
}
// advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error.
func (l *Lexer) advanceCursor(r rune) {
if l.newline {
l.cursorColumn = 0
l.cursorRow++
} else {
l.cursorColumn++
}
l.newline = r == '\n'
}
// skip skips runes, but only when all provided matches are satisfied.
// Returns true when one or more runes were skipped.
func (l *Lexer) skipMatching(pattern ...string) bool {
return l.progress(func(r rune) {}, pattern...)
}
// skipConsecutive skips consecutive runes from the provided match.
// Returns true when one or more runes were skipped.
func (l *Lexer) skipConsecutive(pattern string) bool {
didSkip := false
for l.skipMatching(pattern) {
didSkip = true
}
return didSkip
}
// upcoming checks if the upcoming runes satisfy the provided rune matches.
// This is a lot like the match method, with the difference that
// this one only returns the boolean value.
func (l *Lexer) upcoming(matches ...string) bool {
_, _, ok := l.match(matches...)
return ok
}
// next returns the next rune from the input and a boolean indicating if
// reading the input was successful.
// When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned.
func (l *Lexer) next() (rune, bool) {
r, w, ok := l.peek()
if ok {
l.pos += w
l.advanceCursor(r)
return r, true
}
if r == utf8.RuneError && w == 0 {
l.emitError("unexpected end of file")
} else {
l.emitError("invalid UTF8 character")
}
return r, false
}
// match checks if the upcoming runes satisfy the provided rune matches.
// It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided matches matched
// the input data.
func (l *Lexer) match(matches ...string) ([]rune, int, bool) {
peeked, width, ok := l.peekMulti(len(matches))
if ok {
for i, r := range matches {
if strings.IndexRune(r, peeked[i]) < 0 {
return peeked, width, false
}
}
return peeked, width, true
}
return peeked, width, false
}
func (l *Lexer) unexpectedInputError(expected string) stateFn {
// next() takes care of emitting errors for ok == false.
if r, ok := l.next(); ok {
return l.emitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
}
return nil
}
func (l *Lexer) unexpectedEndOfFile(expected string) stateFn {
return l.emitError("Unexpected end of file (expected %s)", expected)
}

View File

@ -1,62 +0,0 @@
package lexer
import (
"bytes"
"strconv"
"strings"
)
// StringBuffer is a string buffer implementation, which is used by the lexer
// to efficiently accumulate runes from the input and eventually turn these
// into a string, either literal or interpreted.
type StringBuffer struct {
buffer bytes.Buffer
}
// Reset resets the string buffer, in order to build a new string.
func (b *StringBuffer) Reset() *StringBuffer {
b.buffer.Reset()
return b
}
// WriteString adds the runes of the input string to the string buffer.
func (b *StringBuffer) WriteString(s string) *StringBuffer {
for _, r := range s {
b.WriteRune(r)
}
return b
}
// WriteRune adds a single rune to the string buffer.
func (b *StringBuffer) WriteRune(r rune) *StringBuffer {
b.buffer.WriteRune(r)
return b
}
// AsLiteralString returns the string buffer as a literal string.
// Literal means that no escape sequences are processed.
func (b *StringBuffer) AsLiteralString() string {
return b.buffer.String()
}
// AsInterpretedString returns the string in its interpreted form.
// Interpreted means that escape sequences are handled in the way that Go would
// have, had it been inside double quotes. It translates for example escape
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
// representations.
// Since the input might contain invalid escape sequences, this method
// also returns an error. When an error is returned, the returned string will
// contain the string as far as it could be interpreted.
func (b *StringBuffer) AsInterpretedString() (string, error) {
var sb strings.Builder
tail := b.buffer.String()
for len(tail) > 0 {
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
if err != nil {
return sb.String(), err
}
tail = newtail
sb.WriteRune(r)
}
return sb.String(), nil
}

View File

@ -1,87 +0,0 @@
package lexer_test
import "testing"
import "github.com/mmakaay/toml/lexer"
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
var b lexer.StringBuffer
s1, _ := b.WriteString(`hi\nthere`).AsInterpretedString()
s2 := b.AsLiteralString()
if s1 != "hi\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
}
if s2 != "hi\\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
}
}
func TestResetResetsBuffer(t *testing.T) {
var b lexer.StringBuffer
s := b.WriteRune('X').Reset().AsLiteralString()
if s != "" {
t.Fatalf("Did not get expected empty string, but %q", s)
}
}
func TestAsLiteralString(t *testing.T) {
b := lexer.StringBuffer{}
for _, c := range []stringbufT{
{"empty string", ``, ``, OK},
{"simple string", `Simple string!`, `Simple string!`, OK},
{"single quote", `'`, `'`, OK},
{"double quote", `"`, `"`, OK},
{"escaped single quote", `\'`, `\'`, OK},
{"escaped double quote", `\"`, `\"`, OK},
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
} {
s := b.Reset().WriteString(c.in).AsLiteralString()
if s != c.out {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
func TestAsInterpretedString(t *testing.T) {
b := lexer.StringBuffer{}
for _, c := range []stringbufT{
{"empty string", "", "", OK},
{"one character", "Simple string!", "Simple string!", OK},
{"escaped single quote", `\'`, "", FAIL},
{"escaped double quote", `\"`, `"`, OK},
{"bare single quote", `'`, "'", OK},
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
{"example from spec",
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
} {
s, err := b.Reset().WriteString(c.in).AsInterpretedString()
if c.isSuccessCase && err != nil {
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
}
if !c.isSuccessCase && err == nil {
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
}
if s != c.out && c.isSuccessCase {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
type stringbufT struct {
name string
in string
out string
isSuccessCase bool
}
const (
OK bool = true
FAIL bool = false
)