Some huge refactorings before I start moving forward again. Learned a lot about Go in the meanwhile, and more ideas keep popping up to improve what I've got so far even further.

This commit is contained in:
Maurice Makaay 2019-05-17 19:56:55 +00:00
parent 9f19add210
commit 3f638c59cd
20 changed files with 470 additions and 425 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.vscode .vscode
*-workspace

3
Makefile Normal file
View File

@ -0,0 +1,3 @@
test:
cd parser && go test
cd lexer && go test

View File

@ -2,7 +2,7 @@ package lexer
import "github.com/mmakaay/toml/parser" import "github.com/mmakaay/toml/parser"
// Definition of the item types that are emitted by this parser. // Item types that are emitted by this parser.
const ( const (
ItemComment parser.ItemType = iota // An error occurred ItemComment parser.ItemType = iota // An error occurred
ItemKey // Key of a key/value pair ItemKey // Key of a key/value pair
@ -35,6 +35,12 @@ const (
longUtf8Escape string = "U" longUtf8Escape string = "U"
) )
var (
doubleQuote3 = []string{doubleQuote, doubleQuote, doubleQuote}
shortUtf8Match = []string{backslash, "u", hex, hex, hex, hex}
longUtf8Match = []string{backslash, "U", hex, hex, hex, hex, hex, hex, hex, hex}
)
// NewParser creates a new parser, using the provided input string // NewParser creates a new parser, using the provided input string
// as the data to parse. // as the data to parse.
func NewParser(input string) *parser.Parser { func NewParser(input string) *parser.Parser {

75
lexer/helpers_test.go Normal file
View File

@ -0,0 +1,75 @@
package lexer_test
import (
"fmt"
"strings"
"testing"
"github.com/mmakaay/toml/lexer"
"github.com/mmakaay/toml/parser"
)
type statesT struct {
name string
in string
out interface{}
err string
}
func runStatesTs(t *testing.T, tests []statesT) {
for _, c := range tests {
runStatesT(t, c)
}
}
func runStatesT(t *testing.T, c statesT) {
l, err := lexer.NewParser(c.in).ToArray()
if err == nil && c.err != "" {
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
}
if err != nil && c.err == "" {
t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
}
if err != nil && c.err != "" && err.Error() != c.err {
t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
}
switch expected := c.out.(type) {
case []string:
if len(expected) != len(l) {
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
}
for i, e := range expected {
v := ParserItemToString(l[i])
if v != e {
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
}
}
case string:
a := make([]string, len(l))
for _, v := range l {
a = append(a, ParserItemToString(v))
}
actual := strings.Join(a, "")
if actual != expected {
t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
}
}
}
// ParserItemToString returns a string representation of the parser.Item.
func ParserItemToString(i parser.Item) string {
switch i.Type {
case lexer.ItemComment:
return fmt.Sprintf("#(%s)", i.Value)
case lexer.ItemKey:
return fmt.Sprintf("[%s]", i.Value)
case lexer.ItemString:
return fmt.Sprintf("STR(%s)", i.Value)
case lexer.ItemKeyDot:
return "."
case lexer.ItemAssignment:
return "="
default:
panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type))
}
}

43
lexer/lexer_test.go Normal file
View File

@ -0,0 +1,43 @@
package lexer_test
import (
"testing"
"github.com/mmakaay/toml/lexer"
)
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
_, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
t.Logf("Got error: %s", err.Error())
if err.Row != 4 {
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
}
if err.Column != 6 {
t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6)
}
}
func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""})
}
func TestInvalidUtf8Data(t *testing.T) {
runStatesTs(t, []statesT{
{"inside comment", "# \xbc", "", "invalid UTF8 character"},
{"bare key 1", "\xbc", "", "invalid UTF8 character"},
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
})
}
func TestWhiteSpaceAndNewlines(t *testing.T) {
runStatesTs(t, []statesT{
{"space", " ", "", ""},
{"tab", "\t", "", ""},
{"newline", "\n", "", ""},
{"carriage return", "\r", "", ""},
{"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""},
})
}

View File

@ -1,218 +0,0 @@
package lexer_test
import (
"fmt"
"strings"
"testing"
"github.com/mmakaay/toml/lexer"
"github.com/mmakaay/toml/parser"
)
func TestErrorsIncludeLineAndRowPosition(t *testing.T) {
_, err := lexer.NewParser("# 12345 abcde\t\n\n\n# 67890\r\n# 12345\xbc").ToArray()
t.Logf("Got error: %s", err.Error())
if err.Row != 4 {
t.Errorf("Unexpected line number: %d (expected %d)", err.Row, 4)
}
if err.Column != 6 {
t.Errorf("Unexpected line position: %d (expected %d)", err.Column, 6)
}
}
func TestEmptyInput(t *testing.T) {
runStatesT(t, statesT{"empty string", "", "", ""})
}
func TestInvalidUtf8Data(t *testing.T) {
runStatesTs(t, []statesT{
{"inside comment", "# \xbc", "", "invalid UTF8 character"},
{"bare key 1", "\xbc", "", "invalid UTF8 character"},
{"bare key 2", "key\xbc", "[key]", "invalid UTF8 character"},
{"assignment", "key \xbc", "[key]", "invalid UTF8 character"},
{"start of value", "key=\xbc", "[key]=", "invalid UTF8 character"},
{"basic string value", "a=\"\xbc\"", "[a]=", "invalid UTF8 character"},
})
}
func TestWhiteSpaceAndNewlines(t *testing.T) {
runStatesTs(t, []statesT{
{"space", " ", "", ""},
{"tab", "\t", "", ""},
{"newline", "\n", "", ""},
{"carriage return", "\r", "", ""},
{"all whitespace and newlines", " \t \t \r\r\n\n \n \t", "", ""},
})
}
func TestComments(t *testing.T) {
runStatesTs(t, []statesT{
{"empty comment", "#", "#()", ""},
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
{"basic comment", "#chicken", "#(chicken)", ""},
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
})
}
func TestKeyWithoutAssignment(t *testing.T) {
err := "unexpected end of file"
runStatesTs(t, []statesT{
{"bare with whitespace", " a ", "[a]", err},
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
{"bare numbers", "0123456789", "[0123456789]", err},
{"bare underscore", "_", "[_]", err},
{"bare dash", "-", "[-]", err},
{"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
{"bare dotted", "a._.c", "[a].[_].[c]", err},
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
})
}
func TestKeyWithAssignmentButNoValue(t *testing.T) {
err := "unexpected end of file"
runStatesTs(t, []statesT{
{"bare", "a=", "[a]=", err},
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
{"bare dotted", "a.b=", "[a].[b]=", err},
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
})
}
func TestUnterminatedBasicString(t *testing.T) {
runStatesT(t, statesT{
"missing closing quote", `a="value`, "[a]=",
"Unexpected end of file (expected basic string token)"})
}
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
runStatesTs(t, []statesT{
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`},
})
// No need to write all test cases for disallowed characters by hand.
for i := 0x00; i <= 0x1F; i++ {
name := fmt.Sprintf("control character %x", rune(i))
runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))})
}
}
func TestEmptyBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"empty", `a=""`, "[a]=STR()", ""},
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
})
}
func TestBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
})
}
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
err := "Invalid escape sequence in basic string"
runStatesTs(t, []statesT{
{"invalid escape sequence", `a="\x"`, "[a]=", err},
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
})
}
func TestBasicStringEscapes(t *testing.T) {
runStatesTs(t, []statesT{
{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
{"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""},
})
}
type statesT struct {
name string
in string
out interface{}
err string
}
func runStatesTs(t *testing.T, tests []statesT) {
for _, c := range tests {
runStatesT(t, c)
}
}
func runStatesT(t *testing.T, c statesT) {
l, err := lexer.NewParser(c.in).ToArray()
if err == nil && c.err != "" {
t.Errorf("[%s] Expected error '%s', but no error occurred", c.name, c.err)
}
if err != nil && c.err == "" {
t.Errorf("[%s] Expected no error, but got error '%s'", c.name, err)
}
if err != nil && c.err != "" && err.Error() != c.err {
t.Errorf("[%s] Got an unexpected error:\nexpected: %s\nactual: %s\n", c.name, c.err, err)
}
switch expected := c.out.(type) {
case []string:
if len(expected) != len(l) {
t.Errorf("[%s] Unexpected number of lexer items:\nexpected: %d\nactual: %d\n", c.name, len(expected), len(l))
}
for i, e := range expected {
v := ParserItemToString(l[i])
if v != e {
t.Errorf("[%s] Unexpected lexer item at index %d:\nexpected: %s\nactual: %s\n", c.name, i, e, v)
}
}
case string:
a := make([]string, len(l))
for _, v := range l {
a = append(a, ParserItemToString(v))
}
actual := strings.Join(a, "")
if actual != expected {
t.Errorf("[%s] Unexpected lexer output:\nexpected: %s\nactual: %s\n", c.name, expected, actual)
}
}
}
// ParserItemToString returns a string representation of the
// parser.Item. This is used for unit testing purposes.
func ParserItemToString(i parser.Item) string {
switch i.Type {
case lexer.ItemComment:
return fmt.Sprintf("#(%s)", i.Value)
case lexer.ItemKey:
return fmt.Sprintf("[%s]", i.Value)
case lexer.ItemString:
return fmt.Sprintf("STR(%s)", i.Value)
case lexer.ItemKeyDot:
return "."
case lexer.ItemAssignment:
return "="
default:
panic(fmt.Sprintf("No string representation available for parser.Item id %d", i.Type))
}
}

View File

@ -1,88 +0,0 @@
package lexer
import "github.com/mmakaay/toml/parser"
// There are four ways to express strings: basic, multi-line basic, literal,
// and multi-line literal. All strings must contain only valid UTF-8 characters.
func stateStringValue(l *parser.Parser) parser.StateFn {
switch {
case l.SkipMatching(doubleQuote, doubleQuote, doubleQuote):
// Multi-line basic strings are surrounded by three quotation marks on each side.
return stateMultiLineBasicString
case l.SkipMatching(doubleQuote):
// Basic strings are surrounded by quotation marks.
return stateSingleLineBasicString
}
return l.UnexpectedInputError("a string value")
}
func stateSingleLineBasicString(l *parser.Parser) parser.StateFn {
if l.Upcoming(doubleQuote, doubleQuote) {
return stateMultiLineBasicString
}
return stateBasicString
}
func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
l.EmitError("Not yet implemented")
return nil
}
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
const invalidBasicStringCharacters string = "\"\\" +
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
"\u007F"
func stateParseBasicString(l *parser.Parser) parser.StateFn {
for {
switch {
case l.AtEndOfFile():
return l.UnexpectedEndOfFile("basic string token")
case l.SkipMatching(doubleQuote):
return l.PopState()
case l.AcceptMatching(backslash, escapeChars):
// For convenience, some popular characters have a compact escape sequence.
// \b - backspace (U+0008)
// \t - tab (U+0009)
// \n - linefeed (U+000A)
// \f - form feed (U+000C)
// \r - carriage return (U+000D)
// \" - quote (U+0022)
// \\ - backslash (U+005C)
case l.AcceptMatching(backslash, shortUtf8Escape, hex, hex, hex, hex):
// \uXXXX - unicode (U+XXXX)
case l.AcceptMatching(backslash, longUtf8Escape, hex, hex, hex, hex, hex, hex, hex, hex):
// \UXXXXXXXX - unicode (U+XXXXXXXX)
case l.Upcoming(backslash):
// All other escape sequences not listed above are reserved and,
// if used, TOML should produce an error.
return l.EmitError("Invalid escape sequence in basic string")
case l.Upcoming(invalidBasicStringCharacters):
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
r, _, _ := l.Match(invalidBasicStringCharacters)
l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
return nil
default:
if !l.AcceptAny() {
return nil
}
}
}
}
func stateBasicString(l *parser.Parser) parser.StateFn {
l.PushState(func(l *parser.Parser) parser.StateFn {
err := l.EmitInterpreted(ItemString)
if err != nil {
l.EmitError("Invalid data in string: %s", err)
return nil
}
return stateKeyValuePair
})
return stateParseBasicString
}

View File

@ -0,0 +1,20 @@
package lexer_test
import (
"testing"
)
func TestComments(t *testing.T) {
runStatesTs(t, []statesT{
{"empty comment", "#", "#()", ""},
{"empty comment with spaces", "# \t \r\n", `#()`, ""},
{"basic comment", "#chicken", "#(chicken)", ""},
{"basic comment starting after whitespace", "# \tchicken", "#(chicken)", ""},
{"basic comment with surrounding whitespace", "#\t cow \t", "#(cow)", ""},
{"two lines of comments", "# one \r\n#two", "#(one)#(two)", ""},
{"comment with escape-y chars", `# \xxx/ \u can't escape/`, `#(\xxx/ \u can't escape/)`, ""},
{"comment with multiple hashes", `#### Just Jack!`, `#(Just Jack!)`, ""},
{"comment with hashes inside", `# Follow #me2`, `#(Follow #me2)`, ""},
{"carriage returns in comment", "# \tlexe\r accepts embedded ca\r\riage \returns\r", "#(lexe\r accepts embedded ca\r\riage \returns)", ""},
})
}

View File

@ -10,7 +10,7 @@ func stateKeyValuePair(l *parser.Parser) parser.StateFn {
case l.Upcoming(hash): case l.Upcoming(hash):
return l.ToChildState(stateCommentStart) return l.ToChildState(stateCommentStart)
case l.Upcoming(startOfKey): case l.Upcoming(startOfKey):
return l.ToChildState(stateKey) return stateKey
default: default:
return stateEndOfFile return stateEndOfFile
} }
@ -61,13 +61,3 @@ func stateKeyAssignment(l *parser.Parser) parser.StateFn {
} }
return l.UnexpectedInputError("a value assignment") return l.UnexpectedInputError("a value assignment")
} }
// Values must be of the following types: String, Integer, Float, Boolean,
// Datetime, Array, or Inline Table. Unspecified values are invalid.
func stateValue(l *parser.Parser) parser.StateFn {
l.SkipConsecutive(whitespace)
if l.Upcoming(quoteChars) {
return stateStringValue
}
return l.UnexpectedInputError("a value")
}

36
lexer/syn_key_test.go Normal file
View File

@ -0,0 +1,36 @@
package lexer_test
import (
"testing"
)
func TestKeyWithoutAssignment(t *testing.T) {
err := "unexpected end of file"
runStatesTs(t, []statesT{
{"bare with whitespace", " a ", "[a]", err},
{"bare lower", "abcdefghijklmnopqrstuvwxyz", "[abcdefghijklmnopqrstuvwxyz]", err},
{"bare upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "[ABCDEFGHIJKLMNOPQRSTUVWXYZ]", err},
{"bare numbers", "0123456789", "[0123456789]", err},
{"bare underscore", "_", "[_]", err},
{"bare dash", "-", "[-]", err},
{"bare big mix", "-hey_good_Lookin123-", "[-hey_good_Lookin123-]", err},
})
}
func TestDottedKey(t *testing.T) {
err := "unexpected end of file"
runStatesTs(t, []statesT{
{"bare dotted", "a._.c", "[a].[_].[c]", err},
{"bare dotted with whitespace", " a .\t\t b\t ", "[a].[b]", err},
})
}
func TestKeyWithAssignmentButNoValue(t *testing.T) {
err := "unexpected end of file"
runStatesTs(t, []statesT{
{"bare", "a=", "[a]=", err},
{"double equal sign", "a==", "[a]=", "unexpected character '=' (expected a value)"},
{"bare dotted", "a.b=", "[a].[b]=", err},
{"bare dotted with whitespace", " a .\tb\t = ", "[a].[b]=", err},
})
}

84
lexer/syn_strings.go Normal file
View File

@ -0,0 +1,84 @@
package lexer
import "github.com/mmakaay/toml/parser"
// There are four ways to express strings: basic, multi-line basic, literal,
// and multi-line literal. All strings must contain only valid UTF-8 characters.
// * Multi-line basic strings are surrounded by three quotation marks on each side.
// * Basic strings are surrounded by quotation marks.
func stateStringValue(l *parser.Parser) parser.StateFn {
switch {
case l.SkipMatching(doubleQuote3...):
return stateMultiLineBasicString
case l.SkipMatching(doubleQuote):
return l.QueueStates(stateParseString, stateBasicStringSpecific)
}
return l.UnexpectedInputError("a string value")
}
// Specific handling of input for basic strings.
// * A double quote ends the string
// * No additional \escape sequences are allowed. What the spec say about this:
// "All other escape sequences [..] are reserved and, if used, TOML should
// produce an error.""
func stateBasicStringSpecific(p *parser.Parser) parser.StateFn {
switch {
case p.SkipMatching(doubleQuote):
if err := p.EmitInterpreted(ItemString); err != nil {
return p.EmitError("Invalid data in string: %s", err)
}
return stateKeyValuePair
case p.Upcoming(backslash):
return p.EmitError("Invalid escape sequence")
default:
return p.QueueStates(stateParseString, stateBasicStringSpecific)
}
}
func stateMultiLineBasicString(l *parser.Parser) parser.StateFn {
l.EmitError("Not yet implemented")
return nil
}
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
const invalidBasicStringCharacters string = "\"\\" +
"\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
"\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F" +
"\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
"\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F" +
"\u007F"
func stateParseString(l *parser.Parser) parser.StateFn {
switch {
case l.AtEndOfFile():
return l.UnexpectedEndOfFile("basic string token")
case l.AcceptMatching(backslash, escapeChars):
// For convenience, some popular characters have a compact escape sequence.
// \b - backspace (U+0008)
// \t - tab (U+0009)
// \n - linefeed (U+000A)
// \f - form feed (U+000C)
// \r - carriage return (U+000D)
// \" - quote (U+0022)
// \\ - backslash (U+005C)
case l.AcceptMatching(shortUtf8Match...):
// \uXXXX - unicode (U+XXXX)
case l.AcceptMatching(longUtf8Match...):
// \UXXXXXXXX - unicode (U+XXXXXXXX)
case l.Upcoming(backslash) || l.Upcoming(doubleQuote):
// Returning to the parent state to have special cases handled,
// because there are differences between single and multi line strings.
return l.ToParentState()
case l.Upcoming(invalidBasicStringCharacters):
// Any Unicode character may be used except those that must be escaped:
// quotation mark, backslash, and the control characters (U+0000 to U+001F, U+007F).
r, _, _ := l.Match(invalidBasicStringCharacters)
l.EmitError("Invalid character in basic string: %q (must be escaped)", r[0])
return nil
default:
l.AcceptAny()
}
return stateParseString
}

73
lexer/syn_strings_test.go Normal file
View File

@ -0,0 +1,73 @@
package lexer_test
import (
"fmt"
"testing"
)
func TestUnterminatedBasicString(t *testing.T) {
runStatesT(t, statesT{
"missing closing quote", `a="value`, "[a]=",
"Unexpected end of file (expected basic string token)"})
}
func TestBasicStringWithUnescapedControlCharacters(t *testing.T) {
runStatesTs(t, []statesT{
{"null char", "a=\"\u0000\"", "[a]=", `Invalid character in basic string: '\x00' (must be escaped)`},
{"newline", "a=\"b\nc\nd\"", "[a]=", `Invalid character in basic string: '\n' (must be escaped)`},
{"delete", "a=\"\u007F\"", "[a]=", `Invalid character in basic string: '\u007f' (must be escaped)`},
})
// No need to write all test cases for disallowed characters by hand.
for i := 0x00; i <= 0x1F; i++ {
name := fmt.Sprintf("control character %x", rune(i))
runStatesT(
t, statesT{name, fmt.Sprintf(`_="%c"`, rune(i)), "[_]=",
fmt.Sprintf(`Invalid character in basic string: %q (must be escaped)`, rune(i))})
}
}
func TestEmptyBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"empty", `a=""`, "[a]=STR()", ""},
{"with comment", `a="" #cool`, "[a]=STR()#(cool)", ""},
{"with whitespaces", ` a = "" `, "[a]=STR()", ""},
{"dotted", ` a.b = "" `, "[a].[b]=STR()", ""},
{"multiple same line", `a=""b=""`, "[a]=STR()[b]=STR()", ""},
{"multiple lines", "a=\"\" \n b = \"\" ", "[a]=STR()[b]=STR()", ""},
})
}
func TestBasicString(t *testing.T) {
runStatesTs(t, []statesT{
{"ascii value", `_ = "Nothing fancy!"`, "[_]=STR(Nothing fancy!)", ""},
{"UTF8 value", `_ = "A cool ƃuıɹʇs" # what!?`, "[_]=STR(A cool ƃuıɹʇs)#(what!?)", ""},
})
}
func TestBasicStringWithInvalidEscapeSequence(t *testing.T) {
err := "Invalid escape sequence"
runStatesTs(t, []statesT{
{"invalid escape sequence", `a="\x"`, "[a]=", err},
{"too short \\u UTF8", `a="\u123"`, "[a]=", err},
{"invalid hex in \\u UTF8", `a="\u000P"`, "[a]=", err},
{"too short \\U UTF8", `a="\U1234567"`, "[a]=", err},
{"invalid hex in \\U UTF8", `a="\U0000000P"`, "[a]=", err},
})
}
func TestBasicStringEscapes(t *testing.T) {
runStatesTs(t, []statesT{
{"bell escape", `_="\b"`, "[_]=STR(\b)", ""},
{"tab escape", `_="\t"`, "[_]=STR(\t)", ""},
{"newline escape", `_="\n"`, "[_]=STR(\n)", ""},
{"form feed escape", `_="\f"`, "[_]=STR(\f)", ""},
{"carriage return escape", `_="\r"`, "[_]=STR(\r)", ""},
{"double quote escape", `_="\""`, `[_]=STR(")`, ""},
{"backslash escape", `_="\\"`, `[_]=STR(\)`, ""},
{"mix of escapes", `_="\b\t\nhuh\f\r\""`, "[_]=STR(\b\t\nhuh\f\r\")", ""},
{"UTF8 escape short", `_="\u2318"`, "[_]=STR(⌘)", ""},
{"UTF8 escape long", `_="\U0001014D"`, "[_]=STR(𐅍)", ""},
{"UTF8 vertical tab", `_="\u000B"`, "[_]=STR(\v)", ""},
})
}

13
lexer/syn_value.go Normal file
View File

@ -0,0 +1,13 @@
package lexer
import "github.com/mmakaay/toml/parser"
// Values must be of the following types: String, Integer, Float, Boolean,
// Datetime, Array, or Inline Table. Unspecified values are invalid.
func stateValue(l *parser.Parser) parser.StateFn {
l.SkipConsecutive(whitespace)
if l.Upcoming(quoteChars) {
return stateStringValue
}
return l.UnexpectedInputError("a value")
}

View File

@ -17,33 +17,9 @@ func New(input string, startState StateFn) *Parser {
} }
} }
func (p *Parser) ToChildState(state StateFn) StateFn {
p.PushState(p.state)
return state
}
func (p *Parser) ToParentState() StateFn {
state := p.PopState()
return state
}
// PushState adds the state function to the state stack.
// This is used for implementing nested parsing.
func (l *Parser) PushState(state StateFn) {
l.stack = append(l.stack, state)
}
// PopState pops the last pushed state from the state stack.
func (l *Parser) PopState() StateFn {
last := len(l.stack) - 1
head, tail := l.stack[:last], l.stack[last]
l.stack = head
return tail
}
// AtEndOfFile returns true when there is no more data available in the input. // AtEndOfFile returns true when there is no more data available in the input.
func (l *Parser) AtEndOfFile() bool { func (p *Parser) AtEndOfFile() bool {
return l.pos >= l.len return p.pos >= p.len
} }
func (p *Parser) AtEndOfLine() bool { func (p *Parser) AtEndOfLine() bool {
@ -67,29 +43,29 @@ func (p *Parser) AcceptEndOfLine() bool {
// normalized newline, which is just a '\n'. This will normalize // normalized newline, which is just a '\n'. This will normalize
// '\r\n' into '\n'. // '\r\n' into '\n'.
if p.SkipEndOfLine() { if p.SkipEndOfLine() {
p.buffer.WriteRune('\n') p.buffer.writeRune('\n')
return true return true
} }
return false return false
} }
// Emit passes a Parser item to the client, including the provided string. // Emit passes a Parser item to the client, including the provided string.
func (l *Parser) Emit(t ItemType, s string) { func (p *Parser) Emit(t ItemType, s string) {
l.items <- Item{t, s} p.items <- Item{t, s}
l.buffer.Reset() p.buffer.reset()
} }
// EmitLiteral passes a Parser item to the client, including the accumulated // EmitLiteral passes a Parser item to the client, including the accumulated
// string buffer data as a literal string. // string buffer data as a literal string.
func (l *Parser) EmitLiteral(t ItemType) { func (p *Parser) EmitLiteral(t ItemType) {
l.Emit(t, l.buffer.AsLiteralString()) p.Emit(t, p.buffer.asLiteralString())
} }
// EmitLiteralTrim passes a Parser item to the client, including the // EmitLiteralTrim passes a Parser item to the client, including the
// accumulated string buffer data as a literal string with whitespace // accumulated string buffer data as a literal string with whitespace
// trimmed from it. // trimmed from it.
func (l *Parser) EmitLiteralTrim(t ItemType) { func (p *Parser) EmitLiteralTrim(t ItemType) {
l.Emit(t, strings.TrimSpace(l.buffer.AsLiteralString())) p.Emit(t, strings.TrimSpace(p.buffer.asLiteralString()))
} }
// EmitInterpreted passes a Parser item to the client, including the // EmitInterpreted passes a Parser item to the client, including the
@ -97,19 +73,19 @@ func (l *Parser) EmitLiteralTrim(t ItemType) {
// (handling escape codes like \n, \t, \uXXXX, etc.) // (handling escape codes like \n, \t, \uXXXX, etc.)
// This method might return an error, in case there is data in the // This method might return an error, in case there is data in the
// string buffer that is not valid for string interpretation. // string buffer that is not valid for string interpretation.
func (l *Parser) EmitInterpreted(t ItemType) error { func (p *Parser) EmitInterpreted(t ItemType) error {
s, err := l.buffer.AsInterpretedString() s, err := p.buffer.asInterpretedString()
if err != nil { if err != nil {
return err return err
} }
l.Emit(t, s) p.Emit(t, s)
return nil return nil
} }
// EmitError emits a Parser error item to the client. // EmitError emits a Parser error item to the client.
func (l *Parser) EmitError(format string, args ...interface{}) StateFn { func (p *Parser) EmitError(format string, args ...interface{}) StateFn {
message := fmt.Sprintf(format, args...) message := fmt.Sprintf(format, args...)
l.Emit(ItemError, message) p.Emit(ItemError, message)
return nil return nil
} }
@ -117,8 +93,8 @@ func (l *Parser) EmitError(format string, args ...interface{}) StateFn {
// It returns a slice of runes that were found, their total byte width // It returns a slice of runes that were found, their total byte width
// and a boolean indicating whether or not all provided patterns were // and a boolean indicating whether or not all provided patterns were
// satisfied by the input data. // satisfied by the input data.
func (l *Parser) Match(patterns ...string) ([]rune, int, bool) { func (p *Parser) Match(patterns ...string) ([]rune, int, bool) {
peeked, width, ok := l.peekMulti(len(patterns)) peeked, width, ok := p.peekMulti(len(patterns))
if ok { if ok {
for i, r := range patterns { for i, r := range patterns {
if strings.IndexRune(r, peeked[i]) < 0 { if strings.IndexRune(r, peeked[i]) < 0 {
@ -132,17 +108,17 @@ func (l *Parser) Match(patterns ...string) ([]rune, int, bool) {
// Upcoming checks if the upcoming runes satisfy all provided patterns. // Upcoming checks if the upcoming runes satisfy all provided patterns.
// Returns true if all provided patterns are satisfied. // Returns true if all provided patterns are satisfied.
func (l *Parser) Upcoming(patterns ...string) bool { func (p *Parser) Upcoming(patterns ...string) bool {
_, _, ok := l.Match(patterns...) _, _, ok := p.Match(patterns...)
return ok return ok
} }
// AcceptAny adds the next rune from the input to the string buffer. // AcceptAny adds the next rune from the input to the string buffer.
// If no rune could be read (end of file or invalid UTF8 data), // If no rune could be read (end of file or invalid UTF8 data),
// then false is returned. // then false is returned.
func (l *Parser) AcceptAny() bool { func (p *Parser) AcceptAny() bool {
if r, ok := l.next(); ok { if r, ok := p.next(); ok {
l.buffer.WriteRune(r) p.buffer.writeRune(r)
return true return true
} }
return false return false
@ -151,16 +127,16 @@ func (l *Parser) AcceptAny() bool {
// AcceptMatching adds the next runes to the string buffer, but only // AcceptMatching adds the next runes to the string buffer, but only
// if the upcoming runes satisfy the provided patterns. // if the upcoming runes satisfy the provided patterns.
// When runes were added then true is returned, false otherwise. // When runes were added then true is returned, false otherwise.
func (l *Parser) AcceptMatching(patterns ...string) bool { func (p *Parser) AcceptMatching(patterns ...string) bool {
return l.progress(func(r rune) { l.buffer.WriteRune(r) }, patterns...) return p.progress(func(r rune) { p.buffer.writeRune(r) }, patterns...)
} }
// AcceptConsecutive adds consecutive runes from the input to the string // AcceptConsecutive adds consecutive runes from the input to the string
// buffer, as long as they exist in the pattern. // buffer, as long as they exist in the pattern.
// If any runes were added then true is returned, false otherwise. // If any runes were added then true is returned, false otherwise.
func (l *Parser) AcceptConsecutive(pattern string) bool { func (p *Parser) AcceptConsecutive(pattern string) bool {
accepted := false accepted := false
for l.AcceptMatching(pattern) { for p.AcceptMatching(pattern) {
accepted = true accepted = true
} }
return accepted return accepted
@ -168,11 +144,11 @@ func (l *Parser) AcceptConsecutive(pattern string) bool {
// SkipMatching skips runes, but only when all provided patterns are satisfied. // SkipMatching skips runes, but only when all provided patterns are satisfied.
// Returns true when one or more runes were skipped. // Returns true when one or more runes were skipped.
func (l *Parser) SkipMatching(patterns ...string) bool { func (p *Parser) SkipMatching(patterns ...string) bool {
if runes, w, ok := l.Match(patterns...); ok { if runes, w, ok := p.Match(patterns...); ok {
l.pos += w p.pos += w
for _, r := range runes { for _, r := range runes {
l.advanceCursor(r) p.advanceCursor(r)
} }
return true return true
} }
@ -181,9 +157,9 @@ func (l *Parser) SkipMatching(patterns ...string) bool {
// SkipConsecutive skips consecutive runes from the provided pattern. // SkipConsecutive skips consecutive runes from the provided pattern.
// Returns true when one or more runes were skipped. // Returns true when one or more runes were skipped.
func (l *Parser) SkipConsecutive(pattern string) bool { func (p *Parser) SkipConsecutive(pattern string) bool {
didSkip := false didSkip := false
for l.SkipMatching(pattern) { for p.SkipMatching(pattern) {
didSkip = true didSkip = true
} }
return didSkip return didSkip
@ -197,10 +173,10 @@ func (l *Parser) SkipConsecutive(pattern string) bool {
// error item that tells the client that an unexpected rune was // error item that tells the client that an unexpected rune was
// encountered in the input. // encountered in the input.
// The parameter 'expected' is used to provide some context to the error. // The parameter 'expected' is used to provide some context to the error.
func (l *Parser) UnexpectedInputError(expected string) StateFn { func (p *Parser) UnexpectedInputError(expected string) StateFn {
// next() takes care of error messages for ok == false. // next() takes care of error messages for ok == false.
if r, ok := l.next(); ok { if r, ok := p.next(); ok {
return l.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected)) return p.EmitError(fmt.Sprintf("unexpected character %q (expected %s)", r, expected))
} }
return nil return nil
} }
@ -209,8 +185,8 @@ func (l *Parser) UnexpectedInputError(expected string) StateFn {
// error item that tells the client that more data was expected from // error item that tells the client that more data was expected from
// the input. // the input.
// The parameter 'expected' is used to provide some context to the error. // The parameter 'expected' is used to provide some context to the error.
func (l *Parser) UnexpectedEndOfFile(expected string) StateFn { func (p *Parser) UnexpectedEndOfFile(expected string) StateFn {
return l.EmitError("Unexpected end of file (expected %s)", expected) return p.EmitError("Unexpected end of file (expected %s)", expected)
} }
// ============================================================================ // ============================================================================
@ -220,8 +196,8 @@ func (l *Parser) UnexpectedEndOfFile(expected string) StateFn {
// peek returns but does not advance to the next rune(s) in the input. // peek returns but does not advance to the next rune(s) in the input.
// Returns the rune, its width and a boolean. The boolean will be false in case // Returns the rune, its width and a boolean. The boolean will be false in case
// no upcoming rune can be peeked (end of data or invalid UTF8 character). // no upcoming rune can be peeked (end of data or invalid UTF8 character).
func (l *Parser) peek() (rune, int, bool) { func (p *Parser) peek() (rune, int, bool) {
peeked, width := utf8.DecodeRuneInString(l.input[l.pos:]) peeked, width := utf8.DecodeRuneInString(p.input[p.pos:])
return peeked, width, peeked != utf8.RuneError return peeked, width, peeked != utf8.RuneError
} }
@ -229,11 +205,11 @@ func (l *Parser) peek() (rune, int, bool) {
// Returns a slice of runes, their total width in bytes and a boolean. // Returns a slice of runes, their total width in bytes and a boolean.
// The boolean will be false in case less runes can be peeked than // The boolean will be false in case less runes can be peeked than
// the requested amount (end of data or invalid UTF8 character). // the requested amount (end of data or invalid UTF8 character).
func (l *Parser) peekMulti(amount int) ([]rune, int, bool) { func (p *Parser) peekMulti(amount int) ([]rune, int, bool) {
width := 0 width := 0
var peeked []rune var peeked []rune
for i := 0; i < amount; i++ { for i := 0; i < amount; i++ {
r, w := utf8.DecodeRuneInString(l.input[l.pos+width:]) r, w := utf8.DecodeRuneInString(p.input[p.pos+width:])
switch { switch {
case r == utf8.RuneError: case r == utf8.RuneError:
return peeked, width, false return peeked, width, false
@ -252,12 +228,12 @@ func (l *Parser) peekMulti(amount int) ([]rune, int, bool) {
// moved forward, false otherwise. // moved forward, false otherwise.
// A callback function can be provided to specify what to do with // A callback function can be provided to specify what to do with
// the runes that are encountered in the input. // the runes that are encountered in the input.
func (l *Parser) progress(callback func(rune), patterns ...string) bool { func (p *Parser) progress(callback func(rune), patterns ...string) bool {
if runes, w, ok := l.Match(patterns...); ok { if runes, w, ok := p.Match(patterns...); ok {
l.pos += w p.pos += w
for _, r := range runes { for _, r := range runes {
callback(r) callback(r)
l.advanceCursor(r) p.advanceCursor(r)
} }
return true return true
} }
@ -269,17 +245,17 @@ func (l *Parser) progress(callback func(rune), patterns ...string) bool {
// When the end of input is reached, or an invalid UTF8 character is // When the end of input is reached, or an invalid UTF8 character is
// read, then false is returned. Both are considered error cases, // read, then false is returned. Both are considered error cases,
// and for that reason these automatically emit an error to the client. // and for that reason these automatically emit an error to the client.
func (l *Parser) next() (rune, bool) { func (p *Parser) next() (rune, bool) {
r, w, ok := l.peek() r, w, ok := p.peek()
if ok { if ok {
l.pos += w p.pos += w
l.advanceCursor(r) p.advanceCursor(r)
return r, true return r, true
} }
if r == utf8.RuneError && w == 0 { if r == utf8.RuneError && w == 0 {
l.EmitError("unexpected end of file") p.EmitError("unexpected end of file")
} else { } else {
l.EmitError("invalid UTF8 character") p.EmitError("invalid UTF8 character")
} }
return r, false return r, false
} }
@ -287,12 +263,12 @@ func (l *Parser) next() (rune, bool) {
// advanceCursor advances the rune cursor one position in the // advanceCursor advances the rune cursor one position in the
// input data. While doing so, it keeps tracks of newlines, // input data. While doing so, it keeps tracks of newlines,
// so we can report on row + column positions on error. // so we can report on row + column positions on error.
func (l *Parser) advanceCursor(r rune) { func (p *Parser) advanceCursor(r rune) {
if l.newline { if p.newline {
l.cursorColumn = 0 p.cursorColumn = 0
l.cursorRow++ p.cursorRow++
} else { } else {
l.cursorColumn++ p.cursorColumn++
} }
l.newline = r == '\n' p.newline = r == '\n'
} }

33
parser/statestack.go Normal file
View File

@ -0,0 +1,33 @@
package parser
func (p *Parser) QueueStates(states ...StateFn) StateFn {
first, followup := states[0], states[1:]
for reverse := range followup {
p.PushState(followup[len(followup)-reverse-1])
}
return first
}
func (p *Parser) ToChildState(state StateFn) StateFn {
p.PushState(p.state)
return state
}
func (p *Parser) ToParentState() StateFn {
state := p.PopState()
return state
}
// PushState adds the state function to the state stack.
// This is used for implementing nested parsing.
func (p *Parser) PushState(state StateFn) {
p.stack = append(p.stack, state)
}
// PopState pops the last pushed state from the state stack.
func (p *Parser) PopState() StateFn {
last := len(p.stack) - 1
head, tail := p.stack[:last], p.stack[last]
p.stack = head
return tail
}

View File

@ -6,40 +6,40 @@ import (
"strings" "strings"
) )
// StringBuffer is a string buffer implementation, which is used by the parser // stringBuffer is a string buffer implementation, which is used by the parser
// to efficiently accumulate runes from the input and eventually turn these // to efficiently accumulate runes from the input and eventually turn these
// into a string, either literal or interpreted. // into a string, either literal or interpreted.
type StringBuffer struct { type stringBuffer struct {
buffer bytes.Buffer buffer bytes.Buffer
} }
// Reset resets the string buffer, in order to build a new string. // reset resets the string buffer, in order to build a new string.
func (b *StringBuffer) Reset() *StringBuffer { func (b *stringBuffer) reset() *stringBuffer {
b.buffer.Reset() b.buffer.Reset()
return b return b
} }
// WriteString adds the runes of the input string to the string buffer. // writeString adds the runes of the input string to the string buffer.
func (b *StringBuffer) WriteString(s string) *StringBuffer { func (b *stringBuffer) writeString(s string) *stringBuffer {
for _, r := range s { for _, r := range s {
b.WriteRune(r) b.writeRune(r)
} }
return b return b
} }
// WriteRune adds a single rune to the string buffer. // writeRune adds a single rune to the string buffer.
func (b *StringBuffer) WriteRune(r rune) *StringBuffer { func (b *stringBuffer) writeRune(r rune) *stringBuffer {
b.buffer.WriteRune(r) b.buffer.WriteRune(r)
return b return b
} }
// AsLiteralString returns the string buffer as a literal string. // asLiteralString returns the string buffer as a literal string.
// Literal means that no escape sequences are processed. // Literal means that no escape sequences are processed.
func (b *StringBuffer) AsLiteralString() string { func (b *stringBuffer) asLiteralString() string {
return b.buffer.String() return b.buffer.String()
} }
// AsInterpretedString returns the string in its interpreted form. // asInterpretedString returns the string in its interpreted form.
// Interpreted means that escape sequences are handled in the way that Go would // Interpreted means that escape sequences are handled in the way that Go would
// have, had it been inside double quotes. It translates for example escape // have, had it been inside double quotes. It translates for example escape
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string // sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
@ -47,7 +47,7 @@ func (b *StringBuffer) AsLiteralString() string {
// Since the input might contain invalid escape sequences, this method // Since the input might contain invalid escape sequences, this method
// also returns an error. When an error is returned, the returned string will // also returns an error. When an error is returned, the returned string will
// contain the string as far as it could be interpreted. // contain the string as far as it could be interpreted.
func (b *StringBuffer) AsInterpretedString() (string, error) { func (b *stringBuffer) asInterpretedString() (string, error) {
var sb strings.Builder var sb strings.Builder
tail := b.buffer.String() tail := b.buffer.String()
for len(tail) > 0 { for len(tail) > 0 {

View File

@ -1,15 +1,13 @@
package parser_test package parser
import ( import (
"testing" "testing"
"github.com/mmakaay/toml/parser"
) )
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) { func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
var b parser.StringBuffer var b stringBuffer
s1, _ := b.WriteString(`hi\nthere`).AsInterpretedString() s1, _ := b.writeString(`hi\nthere`).asInterpretedString()
s2 := b.AsLiteralString() s2 := b.asLiteralString()
if s1 != "hi\nthere" { if s1 != "hi\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1) t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
} }
@ -19,15 +17,15 @@ func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
} }
func TestResetResetsBuffer(t *testing.T) { func TestResetResetsBuffer(t *testing.T) {
var b parser.StringBuffer var b stringBuffer
s := b.WriteRune('X').Reset().AsLiteralString() s := b.writeRune('X').reset().asLiteralString()
if s != "" { if s != "" {
t.Fatalf("Did not get expected empty string, but %q", s) t.Fatalf("Did not get expected empty string, but %q", s)
} }
} }
func TestAsLiteralString(t *testing.T) { func TestAsLiteralString(t *testing.T) {
b := parser.StringBuffer{} b := stringBuffer{}
for _, c := range []stringbufT{ for _, c := range []stringbufT{
{"empty string", ``, ``, OK}, {"empty string", ``, ``, OK},
{"simple string", `Simple string!`, `Simple string!`, OK}, {"simple string", `Simple string!`, `Simple string!`, OK},
@ -39,7 +37,7 @@ func TestAsLiteralString(t *testing.T) {
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK}, {"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK}, {"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
} { } {
s := b.Reset().WriteString(c.in).AsLiteralString() s := b.reset().writeString(c.in).asLiteralString()
if s != c.out { if s != c.out {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s) t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
} }
@ -47,7 +45,7 @@ func TestAsLiteralString(t *testing.T) {
} }
func TestAsInterpretedString(t *testing.T) { func TestAsInterpretedString(t *testing.T) {
b := parser.StringBuffer{} b := stringBuffer{}
for _, c := range []stringbufT{ for _, c := range []stringbufT{
{"empty string", "", "", OK}, {"empty string", "", "", OK},
{"one character", "Simple string!", "Simple string!", OK}, {"one character", "Simple string!", "Simple string!", OK},
@ -64,7 +62,7 @@ func TestAsInterpretedString(t *testing.T) {
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`, `I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK}, "I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
} { } {
s, err := b.Reset().WriteString(c.in).AsInterpretedString() s, err := b.reset().writeString(c.in).asInterpretedString()
if c.isSuccessCase && err != nil { if c.isSuccessCase && err != nil {
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err) t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
} }

View File

@ -10,7 +10,7 @@ type Parser struct {
newline bool // keep track of when we have scanned a newline newline bool // keep track of when we have scanned a newline
cursorRow int // current row number in the input cursorRow int // current row number in the input
cursorColumn int // current column position in the input cursorColumn int // current column position in the input
buffer StringBuffer // an efficient buffer, used to build string values buffer stringBuffer // an efficient buffer, used to build string values
items chan Item // channel of resulting Parser items items chan Item // channel of resulting Parser items
item Item // the current item as reached by Next() and retrieved by Get() item Item // the current item as reached by Next() and retrieved by Get()
err *Error // an error when lexing failed, retrieved by Error() err *Error // an error when lexing failed, retrieved by Error()