Backup a load of work on typed token support, making it easy to produce tokens directly from parser/combinator-based parsing rules.

This commit is contained in:
Maurice Makaay 2019-06-04 00:03:08 +00:00
parent 21f1aa597c
commit 4580962fb8
33 changed files with 2179 additions and 934 deletions

View File

@ -0,0 +1,19 @@
package assert
import (
"testing"
)
func Equal(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
func NotEqual(t *testing.T, notExpected interface{}, actual interface{}, forWhat string) {
if notExpected == actual {
t.Errorf("Unexpected value for %s: %q", forWhat, actual)
}
}

34
assert/assert_panic.go Normal file
View File

@ -0,0 +1,34 @@
package assert
import (
"regexp"
"testing"
)
type PanicT struct {
Function func()
Expect string
Regexp bool
}
func Panic(t *testing.T, p PanicT) {
defer func() {
if r := recover(); r != nil {
mismatch := false
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
mismatch = true
}
if !p.Regexp && p.Expect != r.(string) {
mismatch = true
}
if mismatch {
t.Errorf(
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
p.Expect, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
}
}()
p.Function()
}

29
cursor.go Normal file
View File

@ -0,0 +1,29 @@
package parsekit
import "fmt"
// Cursor represents the position of the input cursor in various ways.
type Cursor struct {
Byte int // The cursor offset in bytes
Rune int // The cursor offset in UTF8 runes
Column int // The column at which the cursor is (0-indexed)
Line int // The line at which the cursor is (0-indexed)
}
func (c *Cursor) String() string {
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
}
// move updates the position of the cursor, based on the provided input string.
func (c *Cursor) move(input string) {
c.Byte += len(input)
for _, r := range input {
c.Rune++
if r == '\n' {
c.Column = 0
c.Line++
} else {
c.Column++
}
}
}

42
cursor_test.go Normal file
View File

@ -0,0 +1,42 @@
package parsekit
import (
"testing"
)
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
for _, test := range []struct {
name string
input []string
byte int
rune int
line int
column int
}{
{"No input at all", []string{""}, 0, 0, 0, 0},
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
{"One newline", []string{"\n"}, 1, 1, 1, 0},
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
c := Cursor{}
for _, s := range test.input {
c.move(s)
}
if c.Byte != test.byte {
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
}
if c.Rune != test.rune {
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
}
if c.Line != test.line {
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
}
if c.Column != test.column {
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
}
}
}

View File

@ -9,7 +9,6 @@ package parsekit_test
import (
"fmt"
"strconv"
"git.makaay.nl/mauricem/go-parsekit"
)
@ -28,7 +27,6 @@ func Example_basicCalculator1() {
{"+", 0},
{"10.8 + 12", 0},
{"42+ ", 0},
{"9999999999999999999 + 8888888", 0},
} {
output, err := ComputeSimple(c.input)
if err != nil {
@ -47,7 +45,6 @@ func Example_basicCalculator1() {
// Input: "+", got error: unexpected character '+' (expected integer number)
// Input: "10.8 + 12", got error: unexpected character '.' (expected operator, '+' or '-')
// Input: "42+ ", got error: unexpected character ' ' (expected integer number)
// Input: "9999999999999999999 + 8888888", got error: invalid value: strconv.ParseInt: parsing "9999999999999999999": value out of range
}
// ---------------------------------------------------------------------------
@ -72,23 +69,16 @@ type simpleCalculator struct {
op int64 // represents operation for next term (+1 = add, -1 = subtract)
}
// A definition of bareInteger, which conveniently drops surrounding whitespace.
// A definition of an int64, which conveniently drops surrounding whitespace.
var dropWhitespace = parsekit.M.Drop(parsekit.C.Opt(parsekit.A.Whitespace))
var bareInteger = parsekit.C.Seq(dropWhitespace, parsekit.A.Integer, dropWhitespace)
var int64Token = parsekit.T.Int64(nil, bareInteger)
func (c *simpleCalculator) number(p *parsekit.ParseAPI) {
if p.On(bareInteger).Accept() {
value, err := strconv.ParseInt(p.BufLiteral(), 10, 64)
p.BufClear()
if err != nil {
p.Error("invalid value: %s", err)
} else {
c.Result += c.op * value
p.Handle(c.operatorOrEndOfFile)
}
} else {
p.Expects("integer number")
p.UnexpectedInput()
p.Expects("integer number")
if p.On(int64Token).Accept() {
c.Result += c.op * p.Result().Value(0).(int64)
p.Handle(c.operatorOrEndOfFile)
}
}

View File

@ -15,7 +15,6 @@ package parsekit_test
import (
"fmt"
"math"
"strconv"
"git.makaay.nl/mauricem/go-parsekit"
)
@ -97,8 +96,8 @@ func (c *calculator) expr(p *parsekit.ParseAPI) {
var pc, a = parsekit.C, parsekit.A
if p.Handle(c.term) {
for p.On(pc.Any(a.Add, a.Subtract)).Skip() {
op := p.LastMatch
for p.On(pc.Any(a.Add, a.Subtract)).Accept() {
op := p.Result().Rune(0)
if !p.Handle(c.term) {
return
}
@ -115,8 +114,8 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
var pc, a = parsekit.C, parsekit.A
if p.Handle(c.factor) {
for p.On(pc.Any(a.Multiply, a.Divide)).Skip() {
op := p.LastMatch
for p.On(pc.Any(a.Multiply, a.Divide)).Accept() {
op := p.Result().Rune(0)
if !p.Handle(c.factor) {
return
}
@ -130,19 +129,12 @@ func (c *calculator) term(p *parsekit.ParseAPI) {
// <space> = (<space> (SPACE|TAB) | "")
// <factor> = <space> (FLOAT | LPAREN <expr> RPAREN) <space>
func (c *calculator) factor(p *parsekit.ParseAPI) {
var pc, a = parsekit.C, parsekit.A
var a, tok = parsekit.A, parsekit.T
p.On(a.Whitespace).Skip()
switch {
case p.On(pc.Signed(a.Float)).Accept():
floatStr := p.BufLiteral()
p.BufClear()
value, err := strconv.ParseFloat(floatStr, 64)
if err != nil {
p.Error("invalid number %s: %s", floatStr, err)
return
} else {
c.interpreter.pushValue(value)
}
case p.On(tok.Float64(nil, a.Signed(a.Float))).Accept():
value := p.Result().Value(0).(float64)
c.interpreter.pushValue(value)
case p.On(a.LeftParen).Skip():
if !p.Handle(c.expr) {
return
@ -194,16 +186,16 @@ func (i *interpreter) pushValue(value float64) {
i.top.a, i.top.b = i.top.b, value
}
func (i *interpreter) eval(op string) float64 {
func (i *interpreter) eval(op rune) float64 {
value := i.top.a
switch op {
case "+":
case '+':
value += i.top.b
case "-":
case '-':
value -= i.top.b
case "*":
case '*':
value *= i.top.b
case "/":
case '/':
value /= i.top.b
}
i.top.b = value

View File

@ -36,11 +36,11 @@ func Example_dutchPostcodeUsingMatcher() {
// [1] Input: "2233Ab" Output: 2233 AB
// [2] Input: "1001\t\tab" Output: 1001 AB
// [3] Input: "1818ab" Output: 1818 AB
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at line 1, column 1
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at line 1, column 1
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at line 1, column 1
// [8] Input: "\xcd2222AB" Error: invalid UTF8 character in input (expected a Dutch postcode) at line 1, column 1
// [4] Input: "1212abc" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [5] Input: "1234" Error: unexpected character '1' (expected a Dutch postcode) at start of file
// [6] Input: "huh" Error: unexpected character 'h' (expected a Dutch postcode) at start of file
// [7] Input: "" Error: unexpected end of file (expected a Dutch postcode) at start of file
// [8] Input: "\xcd2222AB" Error: unexpected character '<27>' (expected a Dutch postcode) at start of file
}
// ---------------------------------------------------------------------------
@ -57,7 +57,7 @@ func createPostcodeMatcher() *parsekit.Matcher {
// - A space between letters and digits is optional.
// - It is good form to write the letters in upper case.
// - It is good form to use a single space between digits and letters.
digitNotZero := c.Except(c.Rune('0'), a.Digit)
digitNotZero := c.Except(a.Rune('0'), a.Digit)
pcDigits := c.Seq(digitNotZero, c.Rep(3, a.Digit))
pcLetter := c.Any(a.ASCIILower, a.ASCIIUpper)
pcLetters := m.ToUpper(c.Seq(pcLetter, pcLetter))

View File

@ -3,15 +3,15 @@
//
// This implementation uses a state-based Parser for it, and it does not
// implement any custom parser/combinator TokenHandler functions. Note that
// things are much easier to implement using custom TokenHandlers (see the other
// HelloWorldUsingMatcher example for this). Doing this fully parser-based
// things are much easier to implement using custom TokenHandlers (see the
// helloParserCombinator example for this). Doing this fully parser-based
// implementation is mainly for your learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that
// this parser reports errors much more fine-grained. This might or might not be
// useful for your specific use case. If you need error reporting like this,
// then also take a look at the HelloWorldUsingParser2 example, which does the
// same thing as this version, only more concise.
// One big difference between the parser/combinator-based example and this one,
// is that this parser reports errors much more fine-grained. This might or
// might not be useful for your specific use case. If you need error reporting
// like this, then also take a look at the helloSingleState example, which does
// the same thing as this version, only more concise.
package parsekit_test
@ -56,11 +56,11 @@ func Example_helloWorldUsingParser1() {
// [6] Input: "hello" Error: unexpected end of file (expected comma)
// [7] Input: "hello," Error: unexpected end of file (expected name)
// [8] Input: "hello , " Error: unexpected end of file (expected name)
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected name)
// [9] Input: "hello , Droopy" Error: unexpected end of file (expected exclamation)
// [10] Input: "hello , Droopy!" Output: Droopy
// [11] Input: "hello , \t \t Droopy \t !" Output: Droopy
// [12] Input: "Oh no!" Error: unexpected character 'O' (expected hello)
// [13] Input: "hello,!" Error: The name cannot be empty
// [13] Input: "hello,!" Error: unexpected character '!' (expected name)
}
// ---------------------------------------------------------------------------
@ -78,9 +78,9 @@ func (h *helloparser1) Parse(input string) (string, *parsekit.Error) {
}
func (h *helloparser1) start(p *parsekit.ParseAPI) {
c := parsekit.C
a := parsekit.A
p.Expects("hello")
if p.On(c.StrNoCase("hello")).Skip() {
if p.On(a.StrNoCase("hello")).Skip() {
p.Handle(h.comma)
}
}
@ -88,20 +88,42 @@ func (h *helloparser1) start(p *parsekit.ParseAPI) {
func (h *helloparser1) comma(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("comma")
p.On(a.Whitespace).Skip()
if p.On(a.Comma).Skip() {
switch {
case p.On(a.Whitespace).Skip():
p.Handle(h.comma)
case p.On(a.Comma).Skip():
p.Handle(h.startName)
}
}
func (h *helloparser1) startName(p *parsekit.ParseAPI) {
c, a := parsekit.C, parsekit.A
p.Expects("name")
switch {
case p.On(a.Whitespace).Skip():
p.Handle(h.startName)
case p.On(c.Not(a.Excl)).Stay():
p.Handle(h.name)
}
}
func (h *helloparser1) name(p *parsekit.ParseAPI) {
a := parsekit.A
c, a := parsekit.C, parsekit.A
p.Expects("name")
switch {
case p.On(a.Excl).Skip():
p.Handle(h.end)
case p.On(a.AnyRune).Accept():
case p.On(c.Not(a.Excl)).Accept():
h.greetee += p.Result().String()
p.Handle(h.name)
default:
p.Handle(h.exclamation)
}
}
func (h *helloparser1) exclamation(p *parsekit.ParseAPI) {
a := parsekit.A
p.Expects("exclamation")
if p.On(a.Excl).Accept() {
p.Handle(h.end)
}
}
@ -115,7 +137,7 @@ func (h *helloparser1) end(p *parsekit.ParseAPI) {
return
}
h.greetee = strings.TrimSpace(p.BufLiteral())
h.greetee = strings.TrimSpace(h.greetee)
if h.greetee == "" {
p.Error("The name cannot be empty")
} else {

View File

@ -4,7 +4,7 @@
// The implementation uses only parser/combinator TokenHandler functions and does
// not implement a full-fledged state-based Parser for it. If you want to see the
// same kind of functionality, implementated using a Parser, take a look at the
// HelloWorldUsingParser examples.
// other hello examples.
package parsekit_test
import (
@ -37,9 +37,9 @@ func Example_helloWorldUsingMatcher() {
// [1] Input: "HELLO ,Johnny!" Output: Johnny
// [2] Input: "hello , Bob123!" Output: Bob123
// [3] Input: "hello Pizza!" Output: Pizza
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at line 1, column 1
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at line 1, column 1
// [4] Input: "Oh no!" Error: unexpected character 'O' (expected a friendly greeting) at start of file
// [5] Input: "Hello, world" Error: unexpected character 'H' (expected a friendly greeting) at start of file
// [6] Input: "Hello,!" Error: unexpected character 'H' (expected a friendly greeting) at start of file
}
// ---------------------------------------------------------------------------
@ -53,7 +53,7 @@ func createHelloMatcher() *parsekit.Matcher {
// Using the parser/combinator support of parsekit, we create a TokenHandler function
// that does all the work. The 'greeting' TokenHandler matches the whole input and
// drops all but the name from it.
hello := c.StrNoCase("hello")
hello := a.StrNoCase("hello")
comma := c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))
separator := c.Any(comma, a.Whitespace)
name := c.OneOrMore(c.Not(a.Excl))

View File

@ -1,15 +1,15 @@
// This is the same as the example HelloWorldUsingParser1, except that in this
// This is the same as the other hello examples, except that in this
// implementation the state machine is implemented using a combination of some
// TokenHandlers and only a single state, in which multiple ParseAPI.On() calls
// are combined to do all the work in one go.
//
// Note that things are much easier to implement using custom TokenHandlers (see
// the other HelloWorldUsingMatcher example for this). Doing this implementation
// the other helloParserCombinator example for this). Doing this implementation
// is mainly for your learning pleasure.
//
// One big difference between the Matcher-based example and this one, is that
// this parser reports errors much more fine-grained. This might or might not be
// useful for your specific use case.:0
// One big difference between the parser/combinator-based example and this one,
// is that this parser reports errors much more fine-grained. This might or
// might not be useful for your specific use case.
package parsekit_test
@ -80,21 +80,29 @@ func (h *helloparser2) Parse(input string) (string, *parsekit.Error) {
func (h *helloparser2) start(p *parsekit.ParseAPI) {
c, a, m := parsekit.C, parsekit.A, parsekit.M
if !p.On(c.StrNoCase("hello")).Skip() {
if !p.On(a.StrNoCase("hello")).Skip() {
p.Error("the greeting is not being friendly")
} else if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
return
}
if !p.On(c.Seq(c.Opt(a.Whitespace), a.Comma, c.Opt(a.Whitespace))).Skip() {
p.Error("the greeting is not properly separated")
} else if !p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
return
}
if p.On(m.TrimSpace(c.OneOrMore(c.Except(a.Excl, a.AnyRune)))).Accept() {
h.greetee = p.Result().String()
if h.greetee == "" {
p.Error("the name cannot be empty")
return
}
} else {
p.Error("the greeting is targeted at thin air")
} else if !p.On(a.Excl).Skip() {
return
}
if !p.On(a.Excl).Skip() {
p.Error("the greeting is not loud enough")
} else if !p.On(a.EndOfFile).Stay() {
p.Error("too much stuff going on after the closing '!'")
} else {
h.greetee = p.BufLiteral()
if h.greetee == "" {
p.Error("the name cannot be empty")
}
p.Stop()
}
}

View File

@ -1,7 +1,7 @@
// In this example, we show that any type can be extended into a parser,
// filling that type with data from the ParseHandler methods.
//
// Here, we create a custom type 'letterCollection', which is an alias
// Here, we create a custom type 'Chunks', which is an alias
// for []string. We add a ParseHandler method directly to that type
// and let the parsing code fill the slice with strings during parsing.
@ -21,8 +21,7 @@ func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(chunkOfRunes).Accept() {
*l = append(*l, p.BufLiteral())
p.BufClear()
*l = append(*l, p.Result().String())
}
})
return parser.Execute(s)
@ -30,10 +29,10 @@ func (l *Chunks) AddChopped(s string, chunkSize int) *parsekit.Error {
func Example_usingSliceAsParserState() {
chunks := &Chunks{}
chunks.AddChopped("This string will", 4)
chunks.AddChopped("be cut to bits!!!!!!", 8)
chunks.AddChopped("123412341234xxx", 4)
chunks.AddChopped("1234567812345678xxxxx", 8)
fmt.Printf("Matches = %q", *chunks)
// Output:
// Matches = ["This" " str" "ing " "will" "be cut t" "o bits!!" "!!!!"]
// Matches = ["1234" "1234" "1234" "xxx" "12345678" "12345678" "xxxxx"]
}

View File

@ -48,23 +48,47 @@ func ExampleError_Full() {
// it broke down at line 10, column 42
}
func ExampleMatchAnyRune() {
func ExampleMatchAnyRune_usingAcceptedRunes() {
// Easy access to the parsekit definitions.
a := parsekit.A
matches := []string{}
stateHandler := func(p *parsekit.ParseAPI) {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(a.AnyRune).Accept() {
matches = append(matches, p.BufLiteral())
p.BufClear()
matches = append(matches, p.Result().String())
}
p.ExpectEndOfFile()
}
parser := parsekit.NewParser(stateHandler)
})
err := parser.Execute("¡Any will dö!")
fmt.Printf("Matches = %q, Error = %s\n", matches, err)
// Output:
// Matches = ["¡" "A" "n" "y" " " "w" "i" "l" "l" " " "d" "ö" "!"], Error = <nil>
}
func ExampleMatchAnyRune_usingTokens() {
// Easy access to the parsekit definitions.
c, a, tok := parsekit.C, parsekit.A, parsekit.T
var tokens []*parsekit.Token
var accepted string
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
if p.On(c.OneOrMore(tok.Rune("a rune", a.AnyRune))).Accept() {
tokens = p.Result().Tokens()
accepted = p.Result().String()
}
p.ExpectEndOfFile()
})
parser.Execute("¡Any will dö!")
fmt.Printf("Runes accepted: %q\n", accepted)
fmt.Printf("Token values: ")
for _, t := range tokens {
fmt.Printf("%c ", t.Value)
}
// Output:
// Runes accepted: "¡Any will dö!"
// Token values: ¡ A n y w i l l d ö !
}

68
parseapi.go Normal file
View File

@ -0,0 +1,68 @@
package parsekit
import (
"fmt"
"runtime"
"strings"
)
// ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser.
type ParseAPI struct {
tokenAPI *TokenAPI // the input reader
loopCheck map[string]bool // used for parser loop detection
expecting string // a description of what the current state expects to find (see Expects())
result *Result // TokenHandler result, as received from On(...).Accept()
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the ParseAPI methods, to
// prevent further calls to the ParseAPI on these occasions.
//
// Basically, this guard ensures proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *ParseAPI) panicWhenStoppedOrInError() {
if !p.isStoppedOrInError() {
return
}
called, _ := p.getCaller(1)
parts := strings.Split(called, ".")
calledShort := parts[len(parts)-1]
caller, filepos := p.getCaller(2)
after := "Error()"
if p.stopped {
after = "Stop()"
}
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
}
func (p *ParseAPI) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
func (p *ParseAPI) initLoopCheck() {
p.loopCheck = map[string]bool{}
}
func (p *ParseAPI) checkForLoops() {
caller, filepos := p.getCaller(2)
if _, ok := p.loopCheck[filepos]; ok {
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
}
p.loopCheck[filepos] = true
}
// TODO delete this one
func (p *ParseAPI) getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}

View File

@ -1,12 +1,5 @@
package parsekit
import (
"fmt"
"runtime"
"strings"
"unicode/utf8"
)
// ParseHandler defines the type of function that must be implemented to handle
// a parsing state in a Parser state machine.
//
@ -14,103 +7,3 @@ import (
// all the internal state for the parsing state machine and provides the
// interface that the ParseHandler uses to interact with the parser.
type ParseHandler func(*ParseAPI)
// ParseAPI holds the internal state of a parse run and provides an API to
// ParseHandler methods to communicate with the parser.
type ParseAPI struct {
input string // the input that is being scanned by the parser
inputPos int // current byte cursor position in the input
loopCheck map[string]bool // used for parser loop detection
cursorLine int // current rune cursor row number in the input
cursorColumn int // current rune cursor column position in the input
len int // the total length of the input in bytes
newline bool // keep track of when we have scanned a newline
expecting string // a description of what the current state expects to find (see P.Expects())
buffer stringBuffer // an efficient buffer, used to build string values (see P.Accept())
err *Error // error during parsing, retrieved by Error(), further ParseAPI calls are ignored
stopped bool // a boolean set to true by Stop(), further ParseAPI calls are ignored
LastMatch string // a string representation of the last matched input data
}
// panicWhenStoppedOrInError will panic when the parser has produced an error
// or when it has been stopped. It is used from the ParseAPI methods, to
// prevent further calls to the ParseAPI on these occasions.
//
// Basically, this guard ensures proper coding of parsers, making sure
// that clean routes are followed. You can consider this check a runtime
// unit test.
func (p *ParseAPI) panicWhenStoppedOrInError() {
if !p.isStoppedOrInError() {
return
}
called, _ := p.getCaller(1)
parts := strings.Split(called, ".")
calledShort := parts[len(parts)-1]
caller, filepos := p.getCaller(2)
after := "Error()"
if p.stopped {
after = "Stop()"
}
panic(fmt.Sprintf("Illegal call to ParseAPI.%s() from %s at %s: no calls allowed after ParseAPI.%s", calledShort, caller, filepos, after))
}
func (p *ParseAPI) isStoppedOrInError() bool {
return p.stopped || p.err != nil
}
func (p *ParseAPI) checkForLoops() {
caller, filepos := p.getCaller(2)
if _, ok := p.loopCheck[filepos]; ok {
panic(fmt.Sprintf("Loop detected in parser in %s at %s", caller, filepos))
}
p.loopCheck[filepos] = true
}
// peek returns but does not advance the cursor to the next rune in the input.
// Returns the rune, its width in bytes and a boolean.
//
// The boolean will be false in case no upcoming rune can be peeked
// (end of data or invalid UTF8 character). In this case, the returned rune
// will be one of eofRune or invalidRune.
func (p *ParseAPI) peek(byteOffset int) (rune, int, bool) {
r, w := utf8.DecodeRuneInString(p.input[p.inputPos+byteOffset:])
return handleRuneError(r, w)
}
// eofRune is a special rune that is used to indicate an end of file when
// reading a character from the input.
const eofRune rune = -1
// invalidRune is a special rune that is used to indicate an invalid UTF8
// rune on the input.
const invalidRune rune = utf8.RuneError
// handleRuneError is used to create specific rune value in case of errors.
// When an error occurs, then utf8.RuneError will be in the rune.
// This can however indicate one of two situations:
// 1) w == 0: end of file is reached
// 2) w == 1: invalid UTF character on input
// This function lets these two cases return respectively the
// package's own eofRune or invalidRune, to make it easy for calling code
// to distinct between these two cases.
func handleRuneError(r rune, w int) (rune, int, bool) {
if r == utf8.RuneError {
if w == 0 {
return eofRune, 0, false
}
return invalidRune, w, false
}
return r, w, true
}
func (p *ParseAPI) getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}

View File

@ -20,7 +20,11 @@ func (err *Error) Error() string {
// Full returns the current error message, including information about
// the position in the input where the error occurred.
func (err *Error) Full() string {
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
if err.Line == 0 {
return fmt.Sprintf("%s at start of file", err)
} else {
return fmt.Sprintf("%s at line %d, column %d", err, err.Line, err.Column)
}
}
// Error sets the error message in the parser API. This error message
@ -29,5 +33,5 @@ func (p *ParseAPI) Error(format string, args ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, args...)
p.err = &Error{message, p.cursorLine, p.cursorColumn}
p.err = &Error{message, p.tokenAPI.cursor.Line, p.tokenAPI.cursor.Column}
}

View File

@ -1,5 +1,7 @@
package parsekit
import "fmt"
// On checks if the input at the current cursor position matches the provided
// TokenHandler. On must be chained with another method that tells the parser
// what action to perform when a match was found:
@ -17,7 +19,7 @@ package parsekit
// The chain as a whole returns a boolean that indicates whether or not at match
// was found. When no match was found, false is returned and Skip() and Accept()
// will have no effect. Because of this, typical use of an On() chain is as
// expression for a conditional expression (if, switch/case, for). E.g.:
// expression for a conditional statement (if, switch/case, for). E.g.:
//
// // Skip multiple exclamation marks.
// for p.On(parsekit.A.Excl).Skip() { }
@ -32,70 +34,71 @@ package parsekit
// p.RouteTo(stateHandlerC)
// }
//
// // When there's a "hi" on input, emit a parser item for it.
// // When there's a "hi" on input, then say hello.
// if p.On(parsekit.C.Str("hi")).Accept() {
// p.Emit(SomeItemType, p.BufLiteral())
// fmt.Println("Hello!")
// }
func (p *ParseAPI) On(tokenHandler TokenHandler) *ParseAPIOnAction {
p.panicWhenStoppedOrInError()
p.checkForLoops()
// Perform the matching operation.
m := &TokenAPI{p: p}
if tokenHandler == nil {
panic("ParseHandler bug: tokenHandler argument for On() is nil")
}
ok := tokenHandler(m)
// Keep track of the last match, to allow parser implementations
// to access it in an easy way. Typical use would be something like:
//
// if p.On(somethingBad).End() {
// p.Errorf("This was bad: %s", p.LastMatch)
// }
p.LastMatch = string(m.input)
p.result = nil
p.tokenAPI.result = NewResult()
fork := p.tokenAPI.Fork()
ok := tokenHandler(fork)
return &ParseAPIOnAction{
p: p,
parseAPI: p,
tokenAPI: fork,
ok: ok,
input: m.input,
output: m.output,
inputPos: p.inputPos + m.inputOffset,
}
}
// ParseAPIOnAction is a struct that is used for building the On()-method chain.
// The On() method will return an initialized struct of this type.
type ParseAPIOnAction struct {
p *ParseAPI
parseAPI *ParseAPI
tokenAPI *TokenAPI
ok bool
input []rune
output []rune
inputPos int
}
// Accept tells the parser to move the cursor past a match that was found,
// and to store the input that matched in the parser's string buffer.
// When no match was found, then no action is taken.
// and to make the TokenHandler results available in the ParseAPI through
// the Result() method.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Accept() bool {
if a.ok {
a.p.buffer.writeString(string(a.output))
a.advanceCursor()
a.tokenAPI.Merge()
a.parseAPI.result = a.tokenAPI.root.result
a.tokenAPI.root.result = NewResult()
a.tokenAPI.root.detachChilds()
if a.tokenAPI.offset > 0 {
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
a.parseAPI.initLoopCheck()
}
}
return a.ok
}
// Skip tells the parser to move the cursor past a match that was found,
// without storing the actual match in the parser's string buffer.
// without making the results available through the ParseAPI.
//
// Returns true in case a match was found.
// When no match was found, then no action is taken and false is returned.
func (a *ParseAPIOnAction) Skip() bool {
if a.ok {
a.advanceCursor()
a.tokenAPI.root.cursor = a.tokenAPI.cursor
a.tokenAPI.root.result = NewResult()
a.tokenAPI.root.detachChilds()
if a.tokenAPI.offset > 0 {
a.tokenAPI.root.FlushReaderBuffer(a.tokenAPI.offset)
a.parseAPI.initLoopCheck()
}
}
return a.ok
}
@ -103,25 +106,23 @@ func (a *ParseAPIOnAction) Skip() bool {
// Stay tells the parser to not move the cursor after finding a match.
// Returns true in case a match was found, false otherwise.
func (a *ParseAPIOnAction) Stay() bool {
if a.ok {
a.tokenAPI.root.result = NewResult()
a.tokenAPI.root.detachChilds()
}
return a.ok
}
// advanceCursor advances the input position in the input data.
// While doing so, it keeps tracks of newlines that are encountered, so we
// can report on line + column positions on error.
func (a *ParseAPIOnAction) advanceCursor() {
if a.p.inputPos == a.inputPos {
return
}
a.p.loopCheck = map[string]bool{}
a.p.inputPos = a.inputPos
for _, r := range a.input {
if a.p.newline {
a.p.cursorLine++
a.p.cursorColumn = 1
} else {
a.p.cursorColumn++
}
a.p.newline = r == '\n'
// Result returns a Result struct, containing results as produced by the
// last ParseAPI.On() call.
func (p *ParseAPI) Result() *Result {
result := p.result
if p.result == nil {
caller, filepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.ParseAPI.Result(): Result() called without calling "+
"ParseAPI.Accept() on beforehand to make the result available "+
"from %s at %s", caller, filepos))
}
return result
}

View File

@ -1,6 +1,9 @@
package parsekit
import "fmt"
import (
"fmt"
"io"
)
// Handle is used to execute other ParseHandler functions from within your
// ParseHandler function.
@ -77,14 +80,14 @@ func (p *ParseAPI) ExpectEndOfFile() {
// expectation is included in the error message.
func (p *ParseAPI) UnexpectedInput() {
p.panicWhenStoppedOrInError()
r, _, ok := p.peek(0)
r, err := p.tokenAPI.NextRune()
switch {
case ok:
case err == nil:
p.Error("unexpected character %q%s", r, fmtExpects(p))
case r == eofRune:
case err == io.EOF:
p.Error("unexpected end of file%s", fmtExpects(p))
case r == invalidRune:
p.Error("invalid UTF8 character in input%s", fmtExpects(p))
default:
p.Error("unexpected error '%s'%s", err, fmtExpects(p))
}
}

View File

@ -1,47 +0,0 @@
package parsekit
// BufLiteral retrieves the contents of the parser's string buffer (all the
// runes that were added to it using ParseAPI.Accept()) as a literal string.
//
// Literal means that if the input had for example the subsequent runes '\' and
// 'n' in it, then the literal string would have a backslash and an 'n' it in,
// not a linefeed (ASCII char 10).
//
// Retrieving the buffer contents will not affect the buffer itself. New runes
// can still be added to it. Only when calling P.BufClear(), the buffer will be
// cleared.
func (p *ParseAPI) BufLiteral() string {
return p.buffer.asLiteralString()
}
// BufInterpreted retrieves the contents of the parser's string buffer (all the
// runes that were added to it using ParseAPI.Accept()) as an interpreted
// string.
//
// Interpreted means that the contents are treated as a Go double quoted
// interpreted string (handling escape codes like \n, \t, \uXXXX, etc.). if the
// input had for example the subsequent runes '\' and 'n' in it, then the
// interpreted string would have an actual linefeed (ASCII char 10) in it.
//
// This method returns a boolean value, indicating whether or not the string
// interpretation was successful. On invalid string data, an error will
// automatically be emitted and the boolean return value will be false.
//
// Retrieving the buffer contents will not affect the buffer itself. New runes
// can still be added to it. Only when calling P.BufClear(), the buffer will be
// cleared.
func (p *ParseAPI) BufInterpreted() (string, bool) {
s, err := p.buffer.asInterpretedString()
if err != nil {
p.Error(
"invalid string: %s (%s, forgot to escape a double quote or backslash maybe?)",
p.buffer.asLiteralString(), err)
return "", false
}
return s, true
}
// BufClear clears the contents of the parser's string buffer.
func (p *ParseAPI) BufClear() {
p.buffer.reset()
}

View File

@ -43,35 +43,6 @@ func TestGivenParserWithError_WhenCallingHandle_ParsekitPanics(t *testing.T) {
`.*/parsehandler_test\.go:\d+: no calls allowed after ParseAPI\.Error\(\)`})
}
func TestGivenFilledStringBuffer_BufInterpreted_ReturnsInterpretedString(t *testing.T) {
var interpreted string
var literal string
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
literal = p.BufLiteral()
interpreted, _ = p.BufInterpreted()
})
p.Execute(`This\tis\ta\tcool\tstring`)
if literal != `This\tis\ta\tcool\tstring` {
t.Fatal("literal string is incorrect")
}
if interpreted != "This\tis\ta\tcool\tstring" {
t.Fatal("interpreted string is incorrect")
}
}
func TestGivenInputInvalidForStringInterpretation_BufInterpreted_SetsError(t *testing.T) {
p := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.On(parsekit.C.OneOrMore(parsekit.A.AnyRune)).Accept()
p.BufInterpreted()
})
err := p.Execute(`This \is wrongly escaped`)
if err.Error() != `invalid string: This \is wrongly escaped (invalid syntax, forgot to escape a double quote or backslash maybe?)` {
t.Fatalf("Got unexpected error: %s", err.Error())
}
}
type parserWithLoop struct {
loopCounter int
}
@ -119,7 +90,6 @@ func TestGivenLoopingParserDefinition_ParserPanics(t *testing.T) {
func TestGivenLoopingParserDefinition2_ParserPanics(t *testing.T) {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
for p.On(c.Max(5, a.AnyRune)).Accept() {
p.BufClear()
}
p.Stop()
})

View File

@ -1,5 +1,9 @@
package parsekit
import (
"strings"
)
// Parser is the top-level struct that holds the configuration for a parser.
// The Parser can be instantiated using the parsekit.NewParser() method.
type Parser struct {
@ -22,11 +26,8 @@ func NewParser(startHandler ParseHandler) *Parser {
// When an error occurs during parsing, then this error is returned. Nil otherwise.
func (p *Parser) Execute(input string) *Error {
api := &ParseAPI{
input: input,
len: len(input),
cursorLine: 1,
cursorColumn: 1,
loopCheck: map[string]bool{},
tokenAPI: NewTokenAPI(strings.NewReader(input)),
loopCheck: map[string]bool{},
}
api.Handle(p.startHandler)
if !api.stopped && api.err == nil {
@ -39,12 +40,10 @@ func (p *Parser) Execute(input string) *Error {
// a parser that is based solely on a TokenHandler function.
// The Matcher can be instantiated using the parsekit.NewMatcher()
// method.
//
// To match input data against the wrapped Matcher function, use the method
// Matcher.Parse().
// TODO Rename to Tokenizer
type Matcher struct {
parser *Parser
match string
result *Result
}
// NewMatcher instantiates a new Matcher.
@ -55,11 +54,12 @@ type Matcher struct {
//
// The 'expects' parameter is used for creating an error message in case parsed
// input does not match the TokenHandler.
// TODO Rename to NewTokenizer, and make matcher Tokeninzer, also see if we can use a Reader straight away, no ParseAPI.
func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
matcher := &Matcher{}
matcher.parser = NewParser(func(p *ParseAPI) {
if p.On(tokenHandler).Accept() {
matcher.match = p.BufLiteral()
matcher.result = p.Result()
p.Stop()
} else {
p.Expects(expects)
@ -70,9 +70,9 @@ func NewMatcher(tokenHandler TokenHandler, expects string) *Matcher {
}
// Execute feeds the input to the wrapped TokenHandler function.
// It returns the matched input string and an error. When an error
// occurred during parsing, the error will be set, nil otherwise.
func (m *Matcher) Execute(input string) (string, *Error) {
// It returns the TokenHandler's results. When an error occurred during parsing,
// the error will be set, nil otherwise.
func (m *Matcher) Execute(input string) (*Result, *Error) {
err := m.parser.Execute(input)
return m.match, err
return m.result, err
}

View File

@ -11,7 +11,7 @@ import (
)
// Easy access to the parsekit definitions.
var c, a, m = parsekit.C, parsekit.A, parsekit.M
var c, a, m, tok = parsekit.C, parsekit.A, parsekit.M, parsekit.T
type TokenHandlerTest struct {
Input string
@ -27,11 +27,11 @@ func RunTokenHandlerTests(t *testing.T, testSet []TokenHandlerTest) {
}
func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
output, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output != test.Expected {
} else if output := result.String(); output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
@ -41,6 +41,41 @@ func RunTokenHandlerTest(t *testing.T, test TokenHandlerTest) {
}
}
type TokenMakerTest struct {
Input string
TokenHandler parsekit.TokenHandler
Expected []parsekit.Token
}
func RunTokenMakerTest(t *testing.T, test TokenMakerTest) {
result, err := parsekit.NewMatcher(test.TokenHandler, "a match").Execute(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {
if len(result.Tokens()) != len(test.Expected) {
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
}
for i, expected := range test.Expected {
actual := result.Token(i)
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if string(expected.Runes) != string(actual.Runes) {
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}
}
}
}
func RunTokenMakerTests(t *testing.T, testSet []TokenMakerTest) {
for _, test := range testSet {
RunTokenMakerTest(t, test)
}
}
type PanicTest struct {
function func()
expected string

108
reader.go Normal file
View File

@ -0,0 +1,108 @@
package parsekit
import (
"bufio"
"fmt"
"io"
"unicode/utf8"
)
// Reader wraps around an io.Reader and provides buffering to allows us to read
// the same runes over and over again. This is useful for implementing a parser
// that must be able to do lookahead on the input, returning to the original
// input position after finishing that lookahead).
//
// To minimze memory use, it is also possible to flush the buffer when there is
// no more need to go back to previously read runes.
type Reader struct {
bufio *bufio.Reader // Used for ReadRune()
buffer []rune // Input buffer, holding runes that were read from input
bufferOffset int // The offset of the buffer, relative to the start of the input
bufferLen int // Input size, the number of runes in the buffer
}
// NewReader initializes a new Reader struct, wrapped around the provided io.Reader.
func NewReader(r io.Reader) *Reader {
return &Reader{
bufio: bufio.NewReader(r),
buffer: []rune{},
}
}
// RuneAt reads the rune at the provided rune offset.
//
// This offset is relative to the current starting position of the buffer in
// the reader. When starting reading, offset 0 will point at the start of the
// input. After flushing, offset 0 will point at the input up to where
// the flush was done.
//
// The error return value will be nil when reading was successful.
// When an invalid rune is encountered on the input, the error will be nil,
// but the rune will be utf8.RuneError
//
// When reading failed, the rune will be utf8.RuneError. One special read
// fail is actually a normal situation: end of file reached. In that case,
// the returned error wille be io.EOF.
func (r *Reader) RuneAt(offset int) (rune, error) {
// Rune at provided offset is not yet available in the input buffer.
// Read runes until we have enough runes to satisfy the offset.
for r.bufferLen <= offset {
readRune, _, err := r.bufio.ReadRune()
// Handle errors.
if err != nil {
return utf8.RuneError, err
}
// Skip BOM.
if readRune == '\uFEFF' && r.bufferOffset == 0 {
r.bufferOffset++
continue
}
r.buffer = append(r.buffer, readRune)
r.bufferLen++
}
return r.buffer[offset], nil
}
// RunesAt reads a slice of runes of length 'len', starting from offset 'offset'.
//
// This offset is relative to the current starting position of the buffer in
// the reader. When starting reading, offset 0 will point at the start of the
// input. After flushing, offset 0 will point at the input up to where
// the flush was done.
//
// When an error is encountered during reading (EOF or other error), then the
// error return value will be set. In case of an error, any runes that could be
// successfully read are returned along with the error.
// TODO Do I actually use this interface?
func (r *Reader) RunesAt(start int, len int) ([]rune, error) {
if len == 0 {
return r.buffer[0:0], nil
}
end := start + len
_, err := r.RuneAt(end)
if err != nil {
if end > r.bufferLen {
end = r.bufferLen
}
return r.buffer[start:end], err
}
return r.buffer[start:end], nil
}
// Flush deletes the provided number of runes from the start of the
// reader buffer. After flushing the buffer, offset 0 as used by RuneAt()
// will point to the rune that comes after the flushed runes.
// So what this basically does is turn the Reader into a sliding window.
func (r *Reader) Flush(numberOfRunes int) {
if numberOfRunes > r.bufferLen {
panic(fmt.Sprintf(
"parsekit.Input.Reader.Flush(): number of runes to flush (%d) "+
"exceeds size of the buffer (%d)", numberOfRunes, r.bufferLen))
}
r.bufferOffset += numberOfRunes
r.bufferLen -= numberOfRunes
r.buffer = r.buffer[numberOfRunes:]
}

134
reader_test.go Normal file
View File

@ -0,0 +1,134 @@
package parsekit
import (
"fmt"
"io"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit/assert"
)
func ExampleNewReader() {
in := strings.NewReader("Hello, world!")
r := NewReader(in)
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
fmt.Printf("%c", at(0))
fmt.Printf("%c", at(12))
// Output:
// H!
}
func ExampleReader_RuneAt() {
in := strings.NewReader("Hello, world!")
r := NewReader(in)
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
// It is possible to go back and forth while reading the input.
fmt.Printf("%c", at(0))
fmt.Printf("%c", at(12))
fmt.Printf("%c", at(7))
fmt.Printf("%c", at(0))
// Output:
// H!wH
}
func ExampleReader_RuneAt_endOfFile() {
in := strings.NewReader("Hello, world!")
r := NewReader(in)
rn, err := r.RuneAt(13)
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
rn, err = r.RuneAt(20)
fmt.Printf("%q %s %t\n", rn, err, err == io.EOF)
// Output:
// '<27>' EOF true
// '<27>' EOF true
}
func ExampleReader_RuneAt_invalidRune() {
in := strings.NewReader("Hello, \xcdworld!")
r := NewReader(in)
rn, err := r.RuneAt(6)
fmt.Printf("%q %t\n", rn, err == nil)
rn, err = r.RuneAt(7)
fmt.Printf("%q %t\n", rn, err == nil)
rn, err = r.RuneAt(8)
fmt.Printf("%q %t\n", rn, err == nil)
rn, err = r.RuneAt(9)
fmt.Printf("%q %t\n", rn, err == nil)
// Output:
// ' ' true
// '<27>' true
// 'w' true
// 'o' true
}
func ExampleReader_RunesAt() {
in := strings.NewReader("Hello, \xcdworld!")
r := NewReader(in)
rs, err := r.RunesAt(4, 6)
fmt.Printf("%q %t\n", string(rs), err == nil)
rs, err = r.RunesAt(4, 0)
fmt.Printf("%q %t\n", string(rs), err == nil)
rs, err = r.RunesAt(8, 100)
fmt.Printf("%q %t\n", string(rs), err == io.EOF)
// Output:
// "o, <20>wo" true
// "" true
// "world!" true
}
func TestRuneAt_SkipsBOMAtStartOfFile(t *testing.T) {
in := strings.NewReader("\uFEFFBommetje!")
r := NewReader(in)
b, _ := r.RuneAt(0)
o, _ := r.RuneAt(1)
m, _ := r.RuneAt(2)
bom := fmt.Sprintf("%c%c%c", b, o, m)
assert.Equal(t, "Bom", bom, "first three runes")
}
func ExampleReader_Flush() {
in := strings.NewReader("Hello, world!")
r := NewReader(in)
at := func(i int) rune { r, _ := r.RuneAt(i); return r }
rb := func(start int, len int) []rune { r, _ := r.RunesAt(start, len); return r }
// Fills the buffer with the first 8 runes on the input: "Hello, w"
fmt.Printf("%c\n", at(7))
// Now flush the first 4 runes from the buffer (dropping "Hell" from it)
r.Flush(4)
// Rune 0 is now pointing at what originally was rune offset 4.
// We can continue reading from there.
fmt.Printf("%s", string(rb(0, 8)))
// Output:
// w
// o, world
}
func TestGivenNumberOfRunesTooHigh_Flush_Panics(t *testing.T) {
in := strings.NewReader("Hello, world!")
r := NewReader(in)
// Fill buffer with "Hello, worl", the first 11 runes.
r.RuneAt(10)
// However, we flush 12 runes, which exceeds the buffer size.
assert.Panic(t, assert.PanicT{
Function: func() { r.Flush(12) },
Expect: "parsekit.Input.Reader.Flush(): number of runes to flush (12) exceeds size of the buffer (11)",
})
}

View File

@ -1,62 +0,0 @@
package parsekit
import (
"bytes"
"strconv"
"strings"
)
// stringBuffer is a string buffer implementation that is used by the parser
// to efficiently accumulate runes from the input and eventually turn these
// into a string, either literal or interpreted.
type stringBuffer struct {
buffer bytes.Buffer
}
// reset resets the string buffer, in order to build a new string.
func (b *stringBuffer) reset() *stringBuffer {
b.buffer.Reset()
return b
}
// writeString adds the runes of the input string to the string buffer.
func (b *stringBuffer) writeString(s string) *stringBuffer {
for _, r := range s {
b.writeRune(r)
}
return b
}
// writeRune adds a single rune to the string buffer.
func (b *stringBuffer) writeRune(r rune) *stringBuffer {
b.buffer.WriteRune(r)
return b
}
// asLiteralString returns the string buffer as a literal string.
// Literal means that no escape sequences are processed.
func (b *stringBuffer) asLiteralString() string {
return b.buffer.String()
}
// asInterpretedString returns the string in its interpreted form.
// Interpreted means that escape sequences are handled in the way that Go would
// have, had it been inside double quotes. It translates for example escape
// sequences like "\n", "\t", \uXXXX" and "\UXXXXXXXX" into their string
// representations.
// Since the input might contain invalid escape sequences, this method
// also returns an error. When an error is returned, the returned string will
// contain the string as far as it could be interpreted.
func (b *stringBuffer) asInterpretedString() (string, error) {
var sb strings.Builder
tail := b.buffer.String()
for len(tail) > 0 {
r, _, newtail, err := strconv.UnquoteChar(tail, '"')
if err != nil {
return sb.String(), err
}
tail = newtail
sb.WriteRune(r)
}
return sb.String(), nil
}

View File

@ -1,88 +0,0 @@
package parsekit
import (
"testing"
)
func TestGeneratingStringDoesNotResetBuffer(t *testing.T) {
var b stringBuffer
s1, _ := b.writeString(`hi\nthere`).asInterpretedString()
s2 := b.asLiteralString()
if s1 != "hi\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 1, but %q", s1)
}
if s2 != "hi\\nthere" {
t.Fatalf("Did not get expected string\"X\" for try 2, but %q", s2)
}
}
func TestResetResetsBuffer(t *testing.T) {
var b stringBuffer
s := b.writeRune('X').reset().asLiteralString()
if s != "" {
t.Fatalf("Did not get expected empty string, but %q", s)
}
}
func TestAsLiteralString(t *testing.T) {
b := stringBuffer{}
for _, c := range []stringbufT{
{"empty string", ``, ``, OK},
{"simple string", `Simple string!`, `Simple string!`, OK},
{"single quote", `'`, `'`, OK},
{"double quote", `"`, `"`, OK},
{"escaped single quote", `\'`, `\'`, OK},
{"escaped double quote", `\"`, `\"`, OK},
{"escape anything", `\x\t\f\n\r\'\"\\`, `\x\t\f\n\r\'\"\\`, OK},
{"UTF8 escapes", `\uceb2\U00e0b8bf`, `\uceb2\U00e0b8bf`, OK},
{"actual newline", "on\nmultiple\nlines", "on\nmultiple\nlines", OK},
} {
s := b.reset().writeString(c.in).asLiteralString()
if s != c.out {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
func TestAsInterpretedString(t *testing.T) {
b := stringBuffer{}
for _, c := range []stringbufT{
{"empty string", "", "", OK},
{"one character", "Simple string!", "Simple string!", OK},
{"escaped single quote", `\'`, "", FAIL},
{"escaped double quote", `\"`, `"`, OK},
{"bare single quote", `'`, "'", OK},
{"string in single quotes", `'Hello'`, `'Hello'`, OK},
{"string in escaped double quotes", `\"Hello\"`, `"Hello"`, OK},
{"escape something", `\t\f\n\r\"\\`, "\t\f\n\r\"\\", OK},
{"short UTF8 escapes", `\u2318Wh\u00e9\u00e9!`, `⌘Whéé!`, OK},
{"long UTF8 escapes", `\U0001014D \u2318 Wh\u00e9\u00e9!`, `𐅍 ⌘ Whéé!`, OK},
{"UTF8 characters", "Ѝюج wut Ж ?", "Ѝюج wut Ж ?", OK},
{"example from spec",
`I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF.`,
"I'm a string. \"You can quote me\". Name\tJosé\nLocation\tSF.", OK},
} {
s, err := b.reset().writeString(c.in).asInterpretedString()
if c.isSuccessCase && err != nil {
t.Fatalf("[%s] unexpected error for input %q: %s", c.name, c.in, err)
}
if !c.isSuccessCase && err == nil {
t.Fatalf("[%s] expected a failure, but no failure occurred", c.name)
}
if s != c.out && c.isSuccessCase {
t.Fatalf("[%s] %q -> %q failed: actual result = %q", c.name, c.in, c.out, s)
}
}
}
type stringbufT struct {
name string
in string
out string
isSuccessCase bool
}
const (
OK bool = true
FAIL bool = false
)

188
tokenapi.go Normal file
View File

@ -0,0 +1,188 @@
package parsekit
import (
"fmt"
"io"
)
// TokenAPI wraps a parsekit.Reader and its purpose is to retrieve input data and
// to report back results. For easy lookahead support, a forking strategy is
// provided.
//
// BASIC OPERATION:
//
// To retrieve the next rune from the TokenAPI, call the NextRune() method.
//
// When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the result buffer of the TokenAPI struct.
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.
//
// By invoking NextRune() + Accept() multiple times, the result buffer is extended
// with as many runes as needed.
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
//
// Sometimes, we must be able to perform a lookahead, which might either
// succeed or fail. In case of a failing lookahead, the state of the TokenAPI must be
// brought back to the original state, so we can try a different route.
//
// The way in which this is supported, is by forking a TokenAPI struct by calling
// Fork(). This will return a forked child TokenAPI, with an empty result buffer,
// but using the same input cursor position as the forked parent.
//
// After forking, the same interface as described for BASIC OPERATION can be
// used to fill the result buffer. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's result
// buffer to the parent's result buffer, and to move the input cursor position
// to that of the child.
//
// When the lookahead was unsuccessful, then the forked child TokenAPI can simply
// be discarded. The parent TokenAPI was never modified, so it can safely be used
// as if the lookahead never happened.
//
// Note:
// Many tokenizers/parsers take a different approach on lookaheads by using
// peeks and by moving the input cursor position back and forth, or by putting
// read input back on the input stream. That often leads to code that is
// efficient, however, in my opinion, not very untuitive to read.
type TokenAPI struct {
reader *Reader
cursor *Cursor // current read cursor position, rel. to the input start
offset int // current rune offset rel. to the Reader's sliding window
result *Result // results as produced by a TokenHandler (runes, Tokens)
root *TokenAPI // the root TokenAPI
parent *TokenAPI // parent TokenAPI in case this TokenAPI is a fork child
child *TokenAPI // child TokenAPI in case this TokenAPI is a fork parent
}
// NewTokenAPI initializes a new TokenAPI struct, wrapped around the provided io.Reader.
func NewTokenAPI(r io.Reader) *TokenAPI {
input := &TokenAPI{
reader: NewReader(r),
cursor: &Cursor{},
result: NewResult(),
}
input.root = input
return input
}
// NextRune returns the rune at the current read offset.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// After reading a rune it must be Accept()-ed to move the read cursor forward
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
// without explicitly accepting, this method will panic.
func (i *TokenAPI) NextRune() (rune, error) {
if i.result.lastRune != nil {
caller, linepos := getCaller(1)
panic(fmt.Sprintf("parsekit.TokenAPI.NextRune(): NextRune() called without a prior call "+
"to Accept() from %s at %s", caller, linepos))
}
i.detachChilds()
readRune, err := i.reader.RuneAt(i.offset)
i.result.lastRune = &runeInfo{r: readRune, err: err}
return readRune, err
}
// Accept the last rune as read by NextRune() into the result buffer and move
// the cursor forward.
//
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *TokenAPI) Accept() {
if i.result.lastRune == nil {
caller, linepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.Accept(): Accept() called without first "+
"calling NextRune() from %s at %s", caller, linepos))
} else if i.result.lastRune.err != nil {
caller, linepos := getCaller(1)
panic(fmt.Sprintf(
"parsekit.TokenAPI.Accept(): Accept() called while the previous "+
"call to NextRune() failed from %s at %s", caller, linepos))
}
i.result.runes = append(i.result.runes, i.result.lastRune.r)
i.cursor.move(fmt.Sprintf("%c", i.result.lastRune.r))
i.offset++
i.result.lastRune = nil
}
// Fork forks off a child of the TokenAPI struct. It will reuse the same Reader and
// read cursor position, but for the rest this is a fresh TokenAPI.
func (i *TokenAPI) Fork() *TokenAPI {
i.detachChilds()
// Create the new fork.
child := &TokenAPI{
reader: i.reader,
cursor: &Cursor{},
offset: i.offset,
root: i.root,
parent: i,
}
child.result = NewResult()
*child.cursor = *i.cursor
i.child = child
i.result.lastRune = nil
return child
}
// Merge adds the data of the forked child TokenAPI that Merge() is called on to the
// data of its parent (results and read cursor position).
func (i *TokenAPI) Merge() {
if i.parent == nil {
panic("parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI")
}
i.parent.result.runes = append(i.parent.result.runes, i.result.runes...)
i.parent.result.tokens = append(i.parent.result.tokens, i.result.tokens...)
i.parent.offset = i.offset
i.parent.cursor = i.cursor
i.detachChilds()
i.result = NewResult()
}
// Result returns the result data for the TokenAPI. The returned struct
// can be used to retrieve and modify the result data.
func (i *TokenAPI) Result() *Result {
return i.result
}
// Cursor retrieves the current read cursor data.
// TODO make this and offset part of Result struct?
func (i *TokenAPI) Cursor() Cursor {
return *i.cursor
}
// FlushReaderBuffer delegates to the Flush() method of the contained
// parsekit.TokenAPI.Reader. It flushes the provided number of runes from the
// reader cache.
func (i *TokenAPI) FlushReaderBuffer(numberOfRunes int) {
if i != i.root {
panic("parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() can only be called on the root TokenAPI, not on a forked child")
}
i.detachChilds()
i.reader.Flush(numberOfRunes)
i.offset = 0
}
func (i *TokenAPI) detachChilds() {
if i.child != nil {
i.child.detachChildsRecurse()
i.child = nil
}
}
func (i *TokenAPI) detachChildsRecurse() {
if i.child != nil {
i.child.detachChildsRecurse()
}
i.child = nil
i.parent = nil
}

106
tokenapi_result.go Normal file
View File

@ -0,0 +1,106 @@
package parsekit
import (
"fmt"
)
// Result holds results as produced by a TokenHandler.
type Result struct {
lastRune *runeInfo // Information about the last rune read using NextRune()
runes []rune
tokens []*Token
}
type runeInfo struct {
r rune
err error
}
// Token defines a lexical token as produced by TokenHandlers.
type Token struct {
Type interface{} // token type, can be any type that a parser author sees fit
Runes []rune // the runes that make up the token
Value interface{} // an optional value of any type
}
// NewResult initializes an empty result struct.
func NewResult() *Result {
return &Result{
runes: []rune{},
tokens: []*Token{},
}
}
// ClearRunes clears the runes in the Result.
func (r *Result) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the Result with the provided input.
func (r *Result) SetRunes(s interface{}) {
r.ClearRunes()
r.AddRunes(s)
}
// AddRunes is used to add runes to the Result.
func (r *Result) AddRunes(s interface{}) {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
panic(fmt.Sprintf("parsekit.Result.SetRunes(): unsupported type '%T' used", s))
}
}
// Runes retrieves the Runes from the Result.
func (r *Result) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the Result at the specified index.
func (r *Result) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the Result as a string.
func (r *Result) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the Result.
func (r *Result) ClearTokens() {
r.tokens = []*Token{}
}
// AddToken is used to add a Token to the results.
func (r *Result) AddToken(t *Token) {
r.tokens = append(r.tokens, t)
}
// Tokens retrieves the Tokens from the Result.
func (r *Result) Tokens() []*Token {
return r.tokens
}
// Token retrieves a single Token from the Result at the specified index.
func (r *Result) Token(idx int) *Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the Result Tokens.
func (r *Result) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the Result Token at the specified index.
func (r *Result) Value(idx int) interface{} {
return r.tokens[idx].Value
}

27
tokenapi_result_test.go Normal file
View File

@ -0,0 +1,27 @@
package parsekit
import (
"testing"
"git.makaay.nl/mauricem/go-parsekit/assert"
)
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := mkInput()
i.Result().SetRunes("string")
assert.Equal(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
assert.Equal(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
i.Result().SetRunes('X')
assert.Equal(t, "X", string(i.Result().String()), "i.Result() with rune input")
}
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.Result().SetRunes(1234567)
},
Expect: "parsekit.Result.SetRunes(): unsupported type 'int' used",
})
}

288
tokenapi_test.go Normal file
View File

@ -0,0 +1,288 @@
package parsekit
import (
"io"
"strings"
"testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/assert"
)
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
r, _ := mkInput().NextRune()
assert.Equal(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.NextRune()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
`a prior call to Accept\(\) from .*TestCallingNextRuneTwice_Panics.* at /.*_test.go:\d+`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: mkInput().Accept,
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
`first calling NextRune\(\) from .* at /.*:\d+`,
})
}
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.Merge()
},
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.NextRune()
f.Merge()
},
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
i.Fork()
f.Merge()
},
Expect: "parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI",
})
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := mkInput()
f1 := i.Fork()
f2 := f1.Fork()
f3 := f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
f5 := f4.Fork()
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
assert.Equal(t, true, i.child == f1, "i.child == f1")
assert.Equal(t, true, f1.parent == i, "f1.parent == i")
assert.Equal(t, true, f1.child == f4, "f1.child == f4")
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
assert.Equal(t, true, f4.parent == f1, "f4.parent == f1")
assert.Equal(t, true, f4.child == f5, "f4.child == f5")
assert.Equal(t, true, f5.parent == f4, "f5.parent == f4")
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
assert.Equal(t, true, i.parent == nil, "i.parent == nil")
assert.Equal(t, true, i.child == nil, "i.child == nil")
assert.Equal(t, true, f1.parent == nil, "f1.parent == nil")
assert.Equal(t, true, f1.child == nil, "f1.child == nil")
assert.Equal(t, true, f2.child == nil, "f2.child == nil")
assert.Equal(t, true, f2.parent == nil, "f2.parent == nil")
assert.Equal(t, true, f3.child == nil, "f3.child == nil")
assert.Equal(t, true, f3.parent == nil, "f3.parent == nil")
assert.Equal(t, true, f4.parent == nil, "f4.parent == nil")
assert.Equal(t, true, f4.child == nil, "f4.child == nil")
assert.Equal(t, true, f5.parent == nil, "f5.parent == nil")
assert.Equal(t, true, f5.child == nil, "f5.child == nil")
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
i.NextRune()
i.Fork()
i.Accept()
},
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called without ` +
`first calling NextRune\(\) from .* at /.*:\d+`,
})
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := mkInput()
r, _ := i.NextRune()
assert.Equal(t, 'T', r, "result from 1st call to NextRune()")
// TODO still (*runeInfo) case needed?
assert.NotEqual(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after NextRune()")
i.Accept()
assert.Equal(t, (*runeInfo)(nil), i.result.lastRune, "Input.lastRune after Accept()")
assert.Equal(t, 1, i.offset, "Input.offset")
assert.Equal(t, 'T', i.reader.buffer[0], "Input.buffer[0]")
r, _ = i.NextRune()
assert.Equal(t, 'e', r, "result from 2nd call to NextRune()")
}
func TestCallingMultipleAccepts_FillsInputWithData(t *testing.T) {
i := mkInput()
for j := 0; j < 7; j++ {
i.NextRune()
i.Accept()
}
assert.Equal(t, "Testing", string(i.reader.buffer), "reader input buffer")
assert.Equal(t, "Testing", i.Result().String(), "i.Result().String()")
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := NewTokenAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
assert.Equal(t, "line 1, column 1", i.cursor.String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
assert.Equal(t, "line 1, column 7", i.cursor.String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
assert.Equal(t, "line 2, column 1", i.cursor.String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
assert.Equal(t, "line 3, column 5", i.cursor.String(), "cursor 4")
assert.Equal(t, *i.cursor, i.Cursor(), "i.Cursor()")
}
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := mkInput()
i.NextRune()
i.Accept() // T
assert.Equal(t, "T", i.Result().String(), "accepted rune in input")
// Fork
f := i.Fork()
assert.Equal(t, f, i.child, "Input.child (must be f)")
assert.Equal(t, i, f.parent, "Input.parent (must be i)")
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
assert.Equal(t, 1, i.child.cursor.Byte, "i.child.cursor.Byte")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
assert.Equal(t, "es", f.Result().String(), "result runes in fork")
assert.Equal(t, 1, i.cursor.Byte, "i.child.cursor.Byte")
assert.Equal(t, 3, i.child.cursor.Byte, "i.child.cursor.Byte")
// Merge fork back into parent
f.Merge()
assert.Equal(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
assert.Equal(t, 3, i.cursor.Byte, "i.child.cursor.Byte")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
f1 := i.Fork()
f1.NextRune()
f1.Accept()
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
assert.Equal(t, 1, i.offset, "i.offset")
assert.Equal(t, "e", f1.Result().String(), "f1.Result().String()")
assert.Equal(t, 2, f1.offset, "f1.offset")
assert.Equal(t, "s", f2.Result().String(), "f2.Result().String()")
assert.Equal(t, 3, f2.offset, "f2.offset")
f2.Merge()
assert.Equal(t, "T", i.Result().String(), "i.Result().String()")
assert.Equal(t, 1, i.offset, "i.offset")
assert.Equal(t, "es", f1.Result().String(), "f1.Result().String()")
assert.Equal(t, 3, f1.offset, "f1.offset")
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
assert.Equal(t, 3, f2.offset, "f2.offset")
f1.Merge()
assert.Equal(t, "Tes", i.Result().String(), "i.Result().String()")
assert.Equal(t, 3, i.offset, "i.offset")
assert.Equal(t, "", f1.Result().String(), "f1.Result().String()")
assert.Equal(t, 3, f1.offset, "f1.offset")
assert.Equal(t, "", f2.Result().String(), "f2.Result().String()")
assert.Equal(t, 3, f2.offset, "f2.offset")
}
func TestGivenForkedChild_FlushReaderBuffer_Panics(t *testing.T) {
assert.Panic(t, assert.PanicT{
Function: func() {
i := mkInput()
f := i.Fork()
f.FlushReaderBuffer(1)
},
Expect: "parsekit.input.TokenAPI.FlushReaderBuffer(): Flushbuffer() " +
"can only be called on the root TokenAPI, not on a forked child",
})
}
func TestGivenRootWithSomeRunesRead_FlushReaderBuffer_ClearsReaderBuffer(t *testing.T) {
i := mkInput()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.FlushReaderBuffer(2)
assert.Equal(t, "Te", i.Result().String(), "i.Result()")
assert.Equal(t, 0, i.offset, "i.offset")
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
assert.Equal(t, 2, i.offset, "i.offset")
i.FlushReaderBuffer(2)
assert.Equal(t, "Test", i.Result().String(), "i.Result()")
assert.Equal(t, 0, i.offset, "i.offset")
}
func TestWhenCallingNextRuneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
assert.Equal(t, true, r == utf8.RuneError, "returned rune from NextRune()")
assert.Equal(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingRuneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := NewTokenAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
r, err := f.NextRune()
assert.Equal(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
r, err = i.NextRune()
assert.Equal(t, 'X', r, "returned rune from 2nd NextRune()")
assert.Equal(t, true, err == nil, "returned error from 2nd NextRune()")
}
func mkInput() *TokenAPI {
return NewTokenAPI(strings.NewReader("Testing"))
}

View File

@ -2,113 +2,55 @@ package parsekit
import (
"fmt"
"runtime"
)
// TokenHandler is the function type that is involved in turning a low level
// stream of UTF8 runes into parsing tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the match.
//
// A TokenHandler is to be used in conjunction with parsekit.P.On() or
// parsekit.Matcher().
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the token(s).
//
// A TokenHandler function gets a TokenAPI as its input and returns a boolean to
// indicate whether or not it found a match on the input. The TokenAPI is used
// for retrieving input data to match against and for reporting back results.
type TokenHandler func(t *TokenAPI) bool
// TokenAPI is used by TokenHandler functions to retrieve runes from the
// input to match against and to report back results.
//
// Basic operation:
//
// To retrieve the next rune from the input, the TokenHandler function can call
// the TokenAPI.NextRune() method.
//
// The TokenHandler function can then evaluate the retrieved rune and either
// accept of skip the rune. When accepting it using TokenAPI.Accept(), the rune
// is added to the resulting output of the TokenAPI. When using TokenAPI.Skip(),
// the rune will not be added to the output. It is mandatory for a TokenHandler
// to call either Accept() or Skip() after retrieving a rune, before calling
// NextRune() again.
//
// Eventually, the TokenHandler function must return a boolean value, indicating
// whether or not a match was found. When true, then the calling code will
// use the runes that were accepted into the TokenAPI's resulting output.
//
// Forking operation for easy lookahead support:
//
// Sometimes, a TokenHandler function must be able to perform a lookahead, which
// might either succeed or fail. In case of a failing lookahead, the state
// of the TokenAPI must be brought back to the original state.
//
// The way in which this is supported, is by forking a TokenAPI by calling
// TokenAPI.Fork(). This will return a child TokenAPI, with an empty
// output buffer, but using the same input cursor position as the forked parent.
//
// The TokenHandler function can then use the same interface as described for
// normal operation to retrieve runes from the input and to fill the resulting
// output. When the TokenHandler function decides that the lookahead was successful,
// then the method TokenAPI.Merge() can be called on the forked child to
// append the resulting output from the child to the parent's resulting output,
// and to update the parent input cursor position to that of the child.
//
// When the TokenHandler function decides that the lookahead was unsuccessful,
// then it can simply discard the forked child. The parent TokenAPI was never
// modified, so a new match can be safely started using that parent, as if the
// lookahead never happened.
type TokenAPI struct {
p *ParseAPI // parser state, used to retrieve input data to match against (TODO should be tiny interface)
inputOffset int // the byte offset into the input
input []rune // a slice of runes that represents all retrieved input runes for the Matcher
output []rune // a slice of runes that represents the accepted output runes for the Matcher
currRune *runeInfo // hold information for the last rune that was read from the input
parent *TokenAPI // the parent MatchDialog, in case this one was forked
}
// runeInfo describes a single rune and its metadata.
type runeInfo struct {
Rune rune // an UTF8 rune
ByteSize int // the number of bytes in the rune
OK bool // false when the rune represents an invalid UTF8 rune or EOF
}
// NextRune retrieves the next rune from the input.
//
// It returns the rune and a boolean. The boolean will be false in case an
// invalid UTF8 rune or the end of the file was encountered.
//
// After using NextRune() to retrieve a rune, Accept() or Skip() can be called
// to respectively add the rune to the TokenAPI's resulting output or to
// fully ignore it. This way, a TokenHandler has full control over what runes are
// significant for the resulting output of that TokenHandler.
// After retrieving a rune, Accept() or Skip() can be called to respectively add
// the rune to the TokenAPIold's string buffer or to fully ignore it. This way,
// a TokenHandler has full control over what runes are significant for the
// resulting output of that TokenHandler.
//
// After using NextRune(), this method can not be reinvoked, until the last read
// rune is explicitly accepted or skipped as described above.
func (t *TokenAPI) NextRune() (rune, bool) {
if t.currRune != nil {
caller, filepos := t.p.getCaller(1)
panic(fmt.Sprintf(
"TokenHandler bug: NextRune() was called from %s at %s "+
"without accepting or skipping the previously read rune", caller, filepos))
}
r, w, ok := t.p.peek(t.inputOffset)
t.currRune = &runeInfo{r, w, ok}
if ok {
t.input = append(t.input, r)
}
return r, ok
}
// func (t *TokenAPIold) NextRune() (rune, bool) {
// if t.lastRune != nil {
// caller, filepos := getCaller(1)
// panic(fmt.Sprintf(
// "TokenHandler bug: NextRune() was called from %s at %s "+
// "without accepting or skipping the previously read rune", caller, filepos))
// }
// r, w, ok := 'X', 10, true // t.input.peek(t.inputOffset)
// t.lastRune = &runeInfo{r, w, ok}
// if ok {
// t.result.Input = append(t.result.Input, r)
// }
// return r, ok
// }
// Fork splits off a child TokenAPI, containing the same input cursor position
// as the parent TokenAPI, but with all other data in a fresh state.
// Fork splits off a child TokenAPIold, containing the same input cursor position
// as the parent TokenAPIold, but with all other data in a fresh state.
//
// By forking, a TokenHandler function can freely work with a TokenAPI, without
// affecting the parent TokenAPI. This is for example useful when the
// By forking, a TokenHandler function can freely work with a TokenAPIold, without
// affecting the parent TokenAPIold. This is for example useful when the
// TokenHandler function must perform some form of lookahead.
//
// When a successful match was found, the TokenHandler function can call
// TokenAPI.Merge() on the forked child to have the resulting output added
// to the parent TokenAPI.
// TokenAPIold.Merge() on the forked child to have the resulting output added
// to the parent TokenAPIold.
//
// When no match was found, the forked child can simply be discarded.
//
@ -118,7 +60,7 @@ func (t *TokenAPI) NextRune() (rune, bool) {
// case could look like this (yes, it's naive, but it shows the point):
// TODO make proper tested example
//
// func MatchAbcd(t *TokenAPI) bool {
// func MatchAbcd(t *TokenAPIold) bool {
// child := t.Fork() // fork to keep m from input untouched
// for _, letter := []rune {'a', 'b', 'c', 'd'} {
// if r, ok := t.NextRune(); !ok || r != letter {
@ -129,73 +71,69 @@ func (t *TokenAPI) NextRune() (rune, bool) {
// child.Merge() // we have a match, add resulting output to parent
// return true // and report the successful match
// }
func (t *TokenAPI) Fork() *TokenAPI {
return &TokenAPI{
p: t.p,
inputOffset: t.inputOffset,
parent: t,
}
}
// Accept will add the last rune as read by TokenAPI.NextRune() to the resulting
// output of the TokenAPI.
func (t *TokenAPI) Accept() {
t.checkAllowedCall("Accept()")
t.output = append(t.output, t.currRune.Rune)
t.inputOffset += t.currRune.ByteSize
t.currRune = nil
}
// Accept will add the last rune as read by TokenAPIold.NextRune() to the resulting
// output of the TokenAPIold.
// func (t *TokenAPIold) Accept() {
// t.checkAllowedCall("Accept()")
// t.buffer = append(t.buffer, t.lastRune.Rune)
// t.result.Accepted = append(t.result.Accepted, t.lastRune.Rune)
// t.inputOffset += t.lastRune.ByteSize
// t.lastRune = nil
// }
// Skip will ignore the last rune as read by NextRune().
func (t *TokenAPI) Skip() {
t.checkAllowedCall("Skip()")
t.inputOffset += t.currRune.ByteSize
t.currRune = nil
}
// func (t *TokenAPIold) Skip() {
// t.checkAllowedCall("Skip()")
// t.inputOffset += t.lastRune.ByteSize
// t.lastRune = nil
// }
func (t *TokenAPI) checkAllowedCall(name string) {
if t.currRune == nil {
caller, filepos := t.p.getCaller(2)
panic(fmt.Sprintf(
"TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
name, caller, filepos))
}
if !t.currRune.OK {
caller, filepos := t.p.getCaller(2)
panic(fmt.Sprintf(
"TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
"did not return OK (EOF or invalid rune)", name, caller, filepos))
}
}
// func (t *TokenAPIold) checkAllowedCall(name string) {
// if t.lastRune == nil {
// caller, filepos := getCaller(2)
// panic(fmt.Sprintf(
// "TokenHandler bug: %s was called from %s at %s without a prior call to NextRune()",
// name, caller, filepos))
// }
// if !t.lastRune.OK {
// caller, filepos := getCaller(2)
// panic(fmt.Sprintf(
// "TokenHandler bug: %s was called from %s at %s, but prior call to NextRune() "+
// "did not return OK (EOF or invalid rune)", name, caller, filepos))
// }
// }
// Merge merges the resulting output from a forked child TokenAPI back into
// AddToken is used to add a token to the results of the TokenHandler.
// func (t *TokenAPIold) AddToken(tok *Token) {
// t.result.Tokens = append(t.result.Tokens, tok)
// }
// Merge merges the resulting output from a forked child TokenAPIold back into
// its parent: The runes that are accepted in the child are added to the parent
// runes and the parent's input cursor position is advanced to the child's
// cursor position.
//
// After the merge, the child TokenAPI is reset so it can immediately be
// After the merge, the child TokenAPIold is reset so it can immediately be
// reused for performing another match (all data are cleared, except for the
// input offset which is kept at its current position).
func (t *TokenAPI) Merge() bool {
if t.parent == nil {
panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
}
t.parent.input = append(t.parent.input, t.input...)
t.parent.output = append(t.parent.output, t.output...)
t.parent.inputOffset = t.inputOffset
t.ClearOutput()
t.ClearInput()
return true
}
// func (t *TokenAPIold) Merge() bool {
// if t.parent == nil {
// panic("TokenHandler bug: Cannot call Merge a a non-forked MatchDialog")
// }
// t.parent.buffer = append(t.parent.buffer, t.result.Accepted...)
// t.parent.result.Input = append(t.parent.result.Input, t.result.Input...)
// t.parent.result.Accepted = append(t.parent.result.Accepted, t.result.Accepted...)
// t.parent.result.Tokens = append(t.parent.result.Tokens, t.result.Tokens...)
// t.parent.inputOffset = t.inputOffset
// t.result = &TokResult{}
// return true
// }
// ClearOutput clears the resulting output for the TokenAPI, but it keeps
// the input and input offset as-is.
func (t *TokenAPI) ClearOutput() {
t.output = []rune{}
}
// ClearInput clears the input for the TokenAPI, but it keeps the output
// and input offset as-is.
func (t *TokenAPI) ClearInput() {
t.input = []rune{}
func getCaller(depth int) (string, string) {
// No error handling, because we call this method ourselves with safe depth values.
pc, file, line, _ := runtime.Caller(depth + 1)
filepos := fmt.Sprintf("%s:%d", file, line)
caller := runtime.FuncForPC(pc)
return caller.Name(), filepos
}

View File

@ -4,34 +4,107 @@ import (
"testing"
"git.makaay.nl/mauricem/go-parsekit"
"git.makaay.nl/mauricem/go-parsekit/assert"
)
func TestWithinTokenHandler_AcceptIncludesAndSkipIgnoresRuneInOutput(t *testing.T) {
func TestWithinTokenHandler_AcceptIncludesRuneInOutput(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
for i := 0; i < 33; i++ {
for i := 0; i < 20; i++ {
t.NextRune()
t.Accept()
t.NextRune()
t.Skip()
}
return true
}, "test")
output, _ := parser.Execute("Txhxixsx xsxhxoxuxlxdx xbxexcxoxmxex xqxuxixtxex xrxexaxdxaxbxlxex")
if output != "This should become quite readable" {
t.Fatalf("Got unexpected output from TokenHandler: %s", output)
result, _ := parser.Execute("This is some random data to parse")
if result.String() != "This is some random " {
t.Fatalf("Got unexpected output from TokenHandler: %s", result.String())
}
}
func TestGivenNextRuneCalled_WithoutAcceptOrSkip_NextCallToNextRunePanics(t *testing.T) {
func TestWithinTokenHandler_TokensCanBeEmitted(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
t.NextRune()
t.NextRune()
return false
t.Result().AddToken(&parsekit.Token{
Type: "PI",
Runes: []rune("π"),
Value: 3.1415,
})
t.Result().AddToken(&parsekit.Token{
Type: nil,
Runes: []rune("yes"),
Value: true,
})
return true
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("input string") },
`TokenHandler bug: NextRune\(\) was called from .*NextCallToNextRunePanics.* ` +
`at .*/tokenhandler_test\.go:\d+ without accepting or skipping the previously read rune`})
result, _ := parser.Execute("doesn't matter")
if len(result.Tokens()) != 2 {
t.Fatalf("Wrong number of tokens in result, expected 2, got %d", len(result.Tokens()))
}
if result.Token(0).Value != 3.1415 {
t.Fatal("Token 0 value not 3.1415")
}
if string(result.Token(0).Runes) != "π" {
t.Fatal("Token 0 runes not \"π\"")
}
if result.Token(0).Type != "PI" {
t.Fatal("Token 0 type not \"PI\"")
}
if result.Token(1).Value != true {
t.Fatal("Token 1 value not true")
}
if string(result.Token(1).Runes) != "yes" {
t.Fatal("Token 1 runes not \"yes\"")
}
if result.Token(1).Type != nil {
t.Fatal("Token 1 type not nil")
}
}
func TestUsingTokenParserCombinators_TokensCanBeEmitted(t *testing.T) {
fooToken := tok.StrLiteral("ASCII", c.OneOrMore(a.ASCII))
parser := parsekit.NewMatcher(fooToken, "something")
input := "This is fine ASCII Åltho hère öt endĩt!"
result, err := parser.Execute(input)
if err != nil {
t.Fatalf("Unexpected error from parser: %s", err)
}
if result.String() != "This is fine ASCII " {
t.Fatalf("result.String() contains unexpected data: %s", result.String())
}
}
func TestUsingTokenParserCombinators_TokensCanBeNested(t *testing.T) {
fooToken := c.Seq(
m.Drop(c.ZeroOrMore(a.Asterisk)),
tok.StrLiteral("COMBI", c.Seq(
tok.StrLiteral("ASCII", m.TrimSpace(c.OneOrMore(a.ASCII))),
tok.StrLiteral("UTF8", m.TrimSpace(c.OneOrMore(c.Except(a.Asterisk, a.AnyRune)))),
)),
m.Drop(c.ZeroOrMore(a.Asterisk)),
)
parser := parsekit.NewMatcher(fooToken, "something")
input := "*** This is fine ASCII Åltho hère öt endĩt! ***"
output := "This is fine ASCIIÅltho hère öt endĩt!"
result, err := parser.Execute(input)
if err != nil {
t.Fatalf("Unexpected error from parser: %s", err)
}
if result.String() != output {
t.Fatalf("result.String() contains unexpected data: %s", result.String())
}
if result.Token(0).Type != "COMBI" {
t.Fatalf("Token 0 has unexpected type: %s", result.Token(0).Type)
}
if result.Token(0).Value != "This is fine ASCIIÅltho hère öt endĩt!" {
t.Fatalf("Token 0 has unexpected value: %s", result.Token(0).Value)
}
if result.Token(1).Value != "This is fine ASCII" {
t.Fatalf("Token 1 has unexpected value: %s", result.Token(0).Value)
}
if result.Token(2).Value != "Åltho hère öt endĩt!" {
t.Fatalf("Token 2 has unexpected value: %s", result.Token(0).Value)
}
}
func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
@ -39,21 +112,25 @@ func TestGivenNextRuneNotCalled_CallToAcceptPanics(t *testing.T) {
t.Accept()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("input string") },
`TokenHandler bug: Accept\(\) was called from .*CallToAcceptPanics.* ` +
`at .*/tokenhandler_test\.go:\d+ without a prior call to NextRune\(\)`})
assert.Panic(t, assert.PanicT{
Function: func() { parser.Execute("input string") },
Regexp: true,
Expect: `parsekit.TokenAPI.Accept\(\): Accept\(\) called without first ` +
`calling NextRune\(\) from .*CallToAcceptPanics.* at /.*_test.go`,
})
}
func TestGivenNextRuneNotCalled_CallToSkipPanics(t *testing.T) {
func TestGivenAcceptNotCalled_CallToNextRunePanics(t *testing.T) {
parser := parsekit.NewMatcher(func(t *parsekit.TokenAPI) bool {
t.Skip()
t.NextRune()
t.NextRune()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("input string") },
`TokenHandler bug: Skip\(\) was called from .*CallToSkipPanics.* ` +
`at .*tokenhandler_test\.go:\d+ without a prior call to NextRune\(\)`})
assert.Panic(t, assert.PanicT{
Function: func() { parser.Execute("input string") },
Regexp: true,
Expect: `parsekit\.TokenAPI\.NextRune\(\): NextRune\(\) called without ` +
`a prior call to Accept\(\) from .*CallToNextRunePanics.* at /.*/tokenhandler_test.go:\d+`})
}
func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
@ -62,19 +139,19 @@ func TestGivenNextRuneReturningNotOk_CallToAcceptPanics(t *testing.T) {
t.Accept()
return false
}, "test")
RunPanicTest(t, PanicTest{
func() { parser.Execute("\xcd") },
`TokenHandler bug: Accept\(\) was called from .*CallToAcceptPanics.* ` +
`at .*tokenhandler_test\.go:\d+, but prior call to NextRune\(\) did not ` +
`return OK \(EOF or invalid rune\)`})
assert.Panic(t, assert.PanicT{
Function: func() { parser.Execute("") },
Regexp: true,
Expect: `parsekit\.TokenAPI\.Accept\(\): Accept\(\) called while the previous call to ` +
`NextRune\(\) failed from .*CallToAcceptPanics.* at .*_test\.go:\d+`})
}
func TestGivenRootTokenAPI_CallingMergePanics(t *testing.T) {
RunPanicTest(t, PanicTest{
func() {
assert.Panic(t, assert.PanicT{
Function: func() {
a := parsekit.TokenAPI{}
a.Merge()
},
`TokenHandler bug: Cannot call Merge a a non-forked MatchDialog`,
Expect: `parsekit.TokenAPI.Merge(): Cannot call Merge() on a non-forked TokenAPI`,
})
}

View File

@ -2,6 +2,9 @@ package parsekit
import (
"fmt"
"io"
"runtime"
"strconv"
"strings"
"unicode"
)
@ -9,6 +12,11 @@ import (
// C provides convenient access to a range of parser/combinators that can be
// used to construct TokenHandler functions.
//
// Parser/combinators are so called higher order functions that take in one
// or more other TokenHandlers and output a new TokenHandler. They can be
// used to combine TokenHandlers in useful ways to create new more complex
// TokenHandlers.
//
// When using C in your own parser, then it is advised to create a variable
// to reference it:
//
@ -16,11 +24,6 @@ import (
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var C = struct {
Rune func(rune) TokenHandler
Runes func(...rune) TokenHandler
RuneRange func(rune, rune) TokenHandler
Str func(string) TokenHandler
StrNoCase func(string) TokenHandler
Any func(...TokenHandler) TokenHandler
Not func(TokenHandler) TokenHandler
Opt func(TokenHandler) TokenHandler
@ -31,15 +34,9 @@ var C = struct {
ZeroOrMore func(TokenHandler) TokenHandler
OneOrMore func(TokenHandler) TokenHandler
MinMax func(min int, max int, handler TokenHandler) TokenHandler
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency
Separated func(separated TokenHandler, separator TokenHandler) TokenHandler // TODO reverse args for consistency, us string?
Except func(except TokenHandler, handler TokenHandler) TokenHandler
Signed func(TokenHandler) TokenHandler
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
Opt: MatchOpt,
Any: MatchAny,
Not: MatchNot,
@ -52,15 +49,217 @@ var C = struct {
MinMax: MatchMinMax,
Separated: MatchSeparated,
Except: MatchExcept,
Signed: MatchSigned,
}
// A provides convenient access to a range of atoms or functions to build atoms.
//
// When using A in your own parser, then it is advised to create a variable
// to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
Rune func(rune) TokenHandler
Runes func(...rune) TokenHandler
RuneRange func(rune, rune) TokenHandler
Str func(string) TokenHandler
StrNoCase func(string) TokenHandler
EndOfFile TokenHandler
AnyRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
LF TokenHandler
CRLF TokenHandler
Excl TokenHandler
DoubleQuote TokenHandler
Hash TokenHandler
Dollar TokenHandler
Percent TokenHandler
Amp TokenHandler
SingleQuote TokenHandler
RoundOpen TokenHandler
LeftParen TokenHandler
RoundClose TokenHandler
RightParen TokenHandler
Asterisk TokenHandler
Multiply TokenHandler
Plus TokenHandler
Add TokenHandler
Comma TokenHandler
Minus TokenHandler
Subtract TokenHandler
Dot TokenHandler
Slash TokenHandler
Divide TokenHandler
Colon TokenHandler
Semicolon TokenHandler
AngleOpen TokenHandler
LessThan TokenHandler
Equal TokenHandler
AngleClose TokenHandler
GreaterThan TokenHandler
Question TokenHandler
At TokenHandler
SquareOpen TokenHandler
Backslash TokenHandler
SquareClose TokenHandler
Caret TokenHandler
Underscore TokenHandler
Backquote TokenHandler
CurlyOpen TokenHandler
Pipe TokenHandler
CurlyClose TokenHandler
Tilde TokenHandler
Newline TokenHandler
Whitespace TokenHandler
WhitespaceAndNewlines TokenHandler
EndOfLine TokenHandler
Digit TokenHandler
DigitNotZero TokenHandler
Digits TokenHandler
Float TokenHandler
Boolean TokenHandler
Integer TokenHandler
Signed func(TokenHandler) TokenHandler
IntegerBetween func(min int64, max int64) TokenHandler
ASCII TokenHandler
ASCIILower TokenHandler
ASCIIUpper TokenHandler
HexDigit TokenHandler
Octet TokenHandler
IPv4 TokenHandler
IPv4MaskBits TokenHandler
}{
Rune: MatchRune,
Runes: MatchRunes,
RuneRange: MatchRuneRange,
Str: MatchStr,
StrNoCase: MatchStrNoCase,
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: MatchRune(' '),
Tab: MatchRune('\t'),
CR: MatchRune('\r'),
LF: MatchRune('\n'),
CRLF: MatchStr("\r\n"),
Excl: MatchRune('!'),
DoubleQuote: MatchRune('"'),
Hash: MatchRune('#'),
Dollar: MatchRune('$'),
Percent: MatchRune('%'),
Amp: MatchRune('&'),
SingleQuote: MatchRune('\''),
RoundOpen: MatchRune('('),
LeftParen: MatchRune('('),
RoundClose: MatchRune(')'),
RightParen: MatchRune(')'),
Asterisk: MatchRune('*'),
Multiply: MatchRune('*'),
Plus: MatchRune('+'),
Add: MatchRune('+'),
Comma: MatchRune(','),
Minus: MatchRune('-'),
Subtract: MatchRune('-'),
Dot: MatchRune('.'),
Slash: MatchRune('/'),
Divide: MatchRune('/'),
Colon: MatchRune(':'),
Semicolon: MatchRune(';'),
AngleOpen: MatchRune('<'),
LessThan: MatchRune('<'),
Equal: MatchRune('='),
AngleClose: MatchRune('>'),
GreaterThan: MatchRune('>'),
Question: MatchRune('?'),
At: MatchRune('@'),
SquareOpen: MatchRune('['),
Backslash: MatchRune('\\'),
SquareClose: MatchRune(']'),
Caret: MatchRune('^'),
Underscore: MatchRune('_'),
Backquote: MatchRune('`'),
CurlyOpen: MatchRune('{'),
Pipe: MatchRune('|'),
CurlyClose: MatchRune('}'),
Tilde: MatchRune('~'),
Whitespace: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'))),
WhitespaceAndNewlines: MatchOneOrMore(MatchAny(MatchRune(' '), MatchRune('\t'), MatchStr("\r\n"), MatchRune('\n'))),
EndOfLine: MatchAny(MatchStr("\r\n"), MatchRune('\n'), MatchEndOfFile()),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
Integer: MatchInteger(),
Signed: MatchSigned,
IntegerBetween: MatchIntegerBetween,
Float: MatchFloat(),
Boolean: MatchBoolean(),
ASCII: MatchRuneRange('\x00', '\x7F'),
ASCIILower: MatchRuneRange('a', 'z'),
ASCIIUpper: MatchRuneRange('A', 'Z'),
HexDigit: MatchAny(MatchRuneRange('0', '9'), MatchRuneRange('a', 'f'), MatchRuneRange('A', 'F')),
Octet: MatchOctet(false),
IPv4: MatchIPv4(),
IPv4MaskBits: MatchIntegerBetween(0, 32),
}
// T provides convenient access to a range of Token producers (which in their
// nature are parser/combinators) that can be used when creating TokenHandler
// functions.
//
// When using T in your own parser, then it is advised to create a variable
// to reference it:
//
// var t = parsekit.T
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var T = struct {
StrLiteral func(interface{}, TokenHandler) TokenHandler
StrInterpreted func(interface{}, TokenHandler) TokenHandler
Byte func(interface{}, TokenHandler) TokenHandler
Rune func(interface{}, TokenHandler) TokenHandler
Int func(interface{}, TokenHandler) TokenHandler
Int8 func(interface{}, TokenHandler) TokenHandler
Int16 func(interface{}, TokenHandler) TokenHandler
Int32 func(interface{}, TokenHandler) TokenHandler
Int64 func(interface{}, TokenHandler) TokenHandler
Uint func(interface{}, TokenHandler) TokenHandler
Uint8 func(interface{}, TokenHandler) TokenHandler
Uint16 func(interface{}, TokenHandler) TokenHandler
Uint32 func(interface{}, TokenHandler) TokenHandler
Uint64 func(interface{}, TokenHandler) TokenHandler
Float32 func(interface{}, TokenHandler) TokenHandler
Float64 func(interface{}, TokenHandler) TokenHandler
Boolean func(interface{}, TokenHandler) TokenHandler
ByCallback func(TokenHandler, func(t *TokenAPI) *Token) TokenHandler
}{
StrLiteral: MakeStrLiteralToken,
StrInterpreted: MakeStrInterpretedToken,
Byte: MakeByteToken,
Rune: MakeRuneToken,
Int: MakeIntToken,
Int8: MakeInt8Token,
Int16: MakeInt16Token,
Int32: MakeInt32Token,
Int64: MakeInt64Token,
Uint: MakeUintToken,
Uint8: MakeUint8Token,
Uint16: MakeUint16Token,
Uint32: MakeUint32Token,
Uint64: MakeUint64Token,
Float32: MakeFloat32Token,
Float64: MakeFloat64Token,
Boolean: MakeBooleanToken,
ByCallback: MakeTokenByCallback,
}
// MatchRune creates a TokenHandler function that checks if the next rune from
// the input matches the provided rune.
func MatchRune(expected rune) TokenHandler {
return func(t *TokenAPI) bool {
input, ok := t.NextRune()
if ok && input == expected {
input, err := t.NextRune()
if err == nil && input == expected {
t.Accept()
return true
}
@ -73,8 +272,8 @@ func MatchRune(expected rune) TokenHandler {
func MatchRunes(expected ...rune) TokenHandler {
s := string(expected)
return func(t *TokenAPI) bool {
input, ok := t.NextRune()
if ok {
input, err := t.NextRune()
if err == nil {
if strings.ContainsRune(s, input) {
t.Accept()
return true
@ -97,8 +296,8 @@ func MatchRuneRange(start rune, end rune) TokenHandler {
panic(fmt.Sprintf("TokenHandler bug: MatchRuneRange definition error: start %q must not be < end %q", start, end))
}
return func(t *TokenAPI) bool {
input, ok := t.NextRune()
if ok && input >= start && input <= end {
input, err := t.NextRune()
if err == nil && input >= start && input <= end {
t.Accept()
return true
}
@ -167,7 +366,8 @@ func MatchAny(handlers ...TokenHandler) TokenHandler {
for _, handler := range handlers {
child := t.Fork()
if handler(child) {
return child.Merge()
child.Merge()
return true
}
}
return false
@ -183,8 +383,8 @@ func MatchNot(handler TokenHandler) TokenHandler {
if handler(probe) {
return false
}
_, ok := t.NextRune()
if ok {
_, err := t.NextRune()
if err == nil {
t.Accept()
return true
}
@ -311,138 +511,24 @@ func MatchSigned(handler TokenHandler) TokenHandler {
return MatchSeq(sign, handler)
}
// A provides convenient access to a range of atoms that can be used to
// build TokenHandlers or parser rules.
//
// In parsekit, an atom is defined as a ready for use TokenHandler function.
//
// When using A in your own parser, then it is advised to create a variable
// to reference it:
//
// var a = parsekit.A
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var A = struct {
EndOfFile TokenHandler
AnyRune TokenHandler
Space TokenHandler
Tab TokenHandler
CR TokenHandler
LF TokenHandler
CRLF TokenHandler
Excl TokenHandler
DoubleQuote TokenHandler
Hash TokenHandler
Dollar TokenHandler
Percent TokenHandler
Amp TokenHandler
SingleQuote TokenHandler
RoundOpen TokenHandler
LeftParen TokenHandler
RoundClose TokenHandler
RightParen TokenHandler
Asterisk TokenHandler
Multiply TokenHandler
Plus TokenHandler
Add TokenHandler
Comma TokenHandler
Minus TokenHandler
Subtract TokenHandler
Dot TokenHandler
Slash TokenHandler
Divide TokenHandler
Colon TokenHandler
Semicolon TokenHandler
AngleOpen TokenHandler
LessThan TokenHandler
Equal TokenHandler
AngleClose TokenHandler
GreaterThan TokenHandler
Question TokenHandler
At TokenHandler
SquareOpen TokenHandler
Backslash TokenHandler
SquareClose TokenHandler
Caret TokenHandler
Underscore TokenHandler
Backquote TokenHandler
CurlyOpen TokenHandler
Pipe TokenHandler
CurlyClose TokenHandler
Tilde TokenHandler
Newline TokenHandler
Whitespace TokenHandler
WhitespaceAndNewlines TokenHandler
EndOfLine TokenHandler
Digit TokenHandler
DigitNotZero TokenHandler
Digits TokenHandler
Float TokenHandler
Integer TokenHandler
ASCII TokenHandler
ASCIILower TokenHandler
ASCIIUpper TokenHandler
HexDigit TokenHandler
}{
EndOfFile: MatchEndOfFile(),
AnyRune: MatchAnyRune(),
Space: C.Rune(' '),
Tab: C.Rune('\t'),
CR: C.Rune('\r'),
LF: C.Rune('\n'),
CRLF: C.Str("\r\n"),
Excl: C.Rune('!'),
DoubleQuote: C.Rune('"'),
Hash: C.Rune('#'),
Dollar: C.Rune('$'),
Percent: C.Rune('%'),
Amp: C.Rune('&'),
SingleQuote: C.Rune('\''),
RoundOpen: C.Rune('('),
LeftParen: C.Rune('('),
RoundClose: C.Rune(')'),
RightParen: C.Rune(')'),
Asterisk: C.Rune('*'),
Multiply: C.Rune('*'),
Plus: C.Rune('+'),
Add: C.Rune('+'),
Comma: C.Rune(','),
Minus: C.Rune('-'),
Subtract: C.Rune('-'),
Dot: C.Rune('.'),
Slash: C.Rune('/'),
Divide: C.Rune('/'),
Colon: C.Rune(':'),
Semicolon: C.Rune(';'),
AngleOpen: C.Rune('<'),
LessThan: C.Rune('<'),
Equal: C.Rune('='),
AngleClose: C.Rune('>'),
GreaterThan: C.Rune('>'),
Question: C.Rune('?'),
At: C.Rune('@'),
SquareOpen: C.Rune('['),
Backslash: C.Rune('\\'),
SquareClose: C.Rune(']'),
Caret: C.Rune('^'),
Underscore: C.Rune('_'),
Backquote: C.Rune('`'),
CurlyOpen: C.Rune('{'),
Pipe: C.Rune('|'),
CurlyClose: C.Rune('}'),
Tilde: C.Rune('~'),
Whitespace: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'))),
WhitespaceAndNewlines: C.OneOrMore(C.Any(C.Rune(' '), C.Rune('\t'), C.Str("\r\n"), C.Rune('\n'))),
EndOfLine: C.Any(C.Str("\r\n"), C.Rune('\n'), MatchEndOfFile()),
Digit: MatchDigit(),
DigitNotZero: MatchDigitNotZero(),
Digits: MatchDigits(),
Integer: MatchInteger(),
Float: MatchFloat(),
ASCII: C.RuneRange('\x00', '\x7F'),
ASCIILower: C.RuneRange('a', 'z'),
ASCIIUpper: C.RuneRange('A', 'Z'),
HexDigit: C.Any(C.RuneRange('0', '9'), C.RuneRange('a', 'f'), C.RuneRange('A', 'F')),
// MatchIntegerBetween creates a TokenHandler that checks for an integer
// value between the provided min and max boundaries (inclusive).
// It uses an int64 for checking internally, so you can check values
// ranging from -9223372036854775808 to 9223372036854775807.
func MatchIntegerBetween(min int64, max int64) TokenHandler {
digits := MatchSigned(MatchDigits())
return func(t *TokenAPI) bool {
fork := t.Fork()
if !digits(fork) {
return false
}
value, _ := strconv.ParseInt(fork.Result().String(), 10, 64)
if value < min || value > max {
return false
}
fork.Merge()
return true
}
}
// MatchEndOfFile creates a TokenHandler that checks if the end of the input data
@ -451,8 +537,8 @@ var A = struct {
func MatchEndOfFile() TokenHandler {
return func(t *TokenAPI) bool {
fork := t.Fork()
input, ok := fork.NextRune()
return !ok && input == eofRune
_, err := fork.NextRune()
return err == io.EOF
}
}
@ -461,8 +547,8 @@ func MatchEndOfFile() TokenHandler {
// input has not yet been reached and the upcoming input is a valid UTF8 rune.
func MatchAnyRune() TokenHandler {
return func(t *TokenAPI) bool {
_, ok := t.NextRune()
if ok {
_, err := t.NextRune()
if err == nil {
t.Accept()
return true
}
@ -494,7 +580,7 @@ func MatchDigitNotZero() TokenHandler {
// hexadecimal.
func MatchInteger() TokenHandler {
justZero := MatchRune('0')
integer := C.Seq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
integer := MatchSeq(MatchDigitNotZero(), MatchZeroOrMore(MatchDigit()))
return MatchAny(integer, justZero)
}
@ -506,6 +592,56 @@ func MatchFloat() TokenHandler {
return MatchSeq(digits, MatchOpt(MatchSeq(MatchRune('.'), digits)))
}
// MatchBoolean creates a TokenHandler function that checks if a valid boolean
// value can be read from the input. It supports the boolean values as understood
// by Go's strconv.ParseBool() function.
func MatchBoolean() TokenHandler {
trues := MatchAny(MatchStr("true"), MatchStr("TRUE"), MatchStr("True"), MatchRune('1'), MatchRune('t'), MatchRune('T'))
falses := MatchAny(MatchStr("false"), MatchStr("FALSE"), MatchStr("False"), MatchRune('0'), MatchRune('f'), MatchRune('F'))
return MatchAny(trues, falses)
}
// MatchOctet creates a TokenHandler function that checks if a valid octet value
// can be read from the input (octet = byte value representation, with a value
// between 0 and 255 inclusive). It only looks at the first 1 to 3 upcoming
// digits, not if there's a non-digit after it, meaning that "123255" would be
// a valid sequence of two octets.
//
// When the normalize parameter is set to true, then leading zeroes will be
// stripped from the octet.
func MatchOctet(normalize bool) TokenHandler {
digits := MatchMinMax(1, 3, MatchDigit())
return func(t *TokenAPI) bool {
fork := t.Fork()
if !digits(fork) {
return false
}
value, _ := strconv.ParseInt(fork.Result().String(), 10, 16)
if value <= 255 {
if normalize {
runes := fork.Result().Runes()
for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:]
}
fork.Result().SetRunes(runes)
}
fork.Merge()
return true
}
return false
}
}
// MatchIPv4 creates a TokenHandler function that checks if a valid IPv4
// IP address value can be read from the input.
// It will normalize IP-addresses that look like "192.168.001.012" to
// "192.168.1.12".
func MatchIPv4() TokenHandler {
octet := MatchOctet(true)
dot := MatchRune('.')
return MatchSeq(octet, dot, octet, dot, octet, dot, octet)
}
// M provides convenient access to a range of modifiers (which in their nature are
// parser/combinators) that can be used when creating TokenHandler functions.
//
@ -520,25 +656,25 @@ func MatchFloat() TokenHandler {
//
// Doing so saves you a lot of typing, and it makes your code a lot cleaner.
var M = struct {
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
ModifyByCallback func(TokenHandler, func(string) string) TokenHandler
Drop func(TokenHandler) TokenHandler
Trim func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimLeft func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimRight func(handler TokenHandler, cutset string) TokenHandler // TODO reverse arguments?
TrimSpace func(handler TokenHandler) TokenHandler
ToLower func(TokenHandler) TokenHandler
ToUpper func(TokenHandler) TokenHandler
Replace func(handler TokenHandler, replaceWith string) TokenHandler // TODO reverse arguments?
ByCallback func(TokenHandler, func(string) string) TokenHandler
}{
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ModifyByCallback: ModifyByCallback,
Drop: ModifyDrop,
Trim: ModifyTrim,
TrimLeft: ModifyTrimLeft,
TrimRight: ModifyTrimRight,
TrimSpace: ModifyTrimSpace,
ToLower: ModifyToLower,
ToUpper: ModifyToUpper,
Replace: ModifyReplace,
ByCallback: ModifyByCallback,
}
// ModifyDrop creates a TokenHandler that checks if the provided TokenHandler applies.
@ -635,11 +771,222 @@ func ModifyByCallback(handler TokenHandler, modfunc func(string) string) TokenHa
return func(t *TokenAPI) bool {
child := t.Fork()
if handler(child) {
s := modfunc(string(child.output))
child.output = []rune(s)
s := modfunc(child.Result().String())
child.Result().SetRunes(s)
child.Merge()
return true
}
return false
}
}
func MakeStrLiteralToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
literal := t.Result().String()
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: literal}
})
}
func MakeStrInterpretedToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING
interpreted, _ := interpretString(t.Result().String())
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: interpreted}
})
}
func MakeRuneToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 rune input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: t.Result().Rune(0)}
})
}
func MakeByteToken(toktype interface{}, handler TokenHandler) TokenHandler {
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
// TODO ERROR HANDLING --- not a 1 byte input
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: byte(t.Result().Rune(0))}
})
}
func interpretString(str string) (string, error) {
var sb strings.Builder
for len(str) > 0 {
r, _, remainder, err := strconv.UnquoteChar(str, '"')
if err != nil {
return sb.String(), err
}
str = remainder
sb.WriteRune(r)
}
return sb.String(), nil
}
func MakeIntToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler, func(s string) (interface{}, error) {
return strconv.Atoi(s)
})
}
// TODO allow other Go types for oct and hex too.
func MakeInt8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 8)
if err == nil {
return int8(value), err
}
return value, err
})
}
func MakeInt16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 16)
if err == nil {
return int16(value), err
}
return value, err
})
}
func MakeInt32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 32)
if err == nil {
return int32(value), err
}
return value, err
})
}
func MakeInt64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseInt(s, 10, 64)
if err == nil {
return int64(value), err
}
return value, err
})
}
func MakeUintToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 0)
if err == nil {
return uint(value), err
}
return value, err
})
}
// TODO allow other Go types for oct and hex too.
func MakeUint8Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 8)
if err == nil {
return uint8(value), err
}
return value, err
})
}
func MakeUint16Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 16)
if err == nil {
return uint16(value), err
}
return value, err
})
}
func MakeUint32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 32)
if err == nil {
return uint32(value), err
}
return value, err
})
}
func MakeUint64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseUint(s, 10, 64)
if err == nil {
return uint64(value), err
}
return value, err
})
}
func MakeFloat32Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 32)
if err == nil {
return float32(value), err
}
return value, err
})
}
func MakeFloat64Token(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseFloat(s, 64)
if err == nil {
return float64(value), err
}
return value, err
})
}
func MakeBooleanToken(toktype interface{}, handler TokenHandler) TokenHandler {
return makeStrconvToken(toktype, handler,
func(s string) (interface{}, error) {
value, err := strconv.ParseBool(s)
if err == nil {
return bool(value), err
}
return value, err
})
}
func makeStrconvToken(toktype interface{}, handler TokenHandler, convert func(s string) (interface{}, error)) TokenHandler {
pc, _, _, _ := runtime.Caller(1)
fullName := runtime.FuncForPC(pc).Name()
parts := strings.Split(fullName, ".")
name := parts[len(parts)-1]
return MakeTokenByCallback(handler, func(t *TokenAPI) *Token {
value, err := convert(t.Result().String())
if err != nil {
panic(fmt.Sprintf(
"TokenHandler error: %s cannot handle input %q: %s "+
"(only use a type conversion token maker, when the input has been "+
"validated on beforehand)", name, t.Result().String(), err))
}
return &Token{Type: toktype, Runes: t.Result().Runes(), Value: value}
})
}
func MakeTokenByCallback(handler TokenHandler, callback func(t *TokenAPI) *Token) TokenHandler {
return func(t *TokenAPI) bool {
fork := t.Fork()
if handler(fork) {
t.Result().AddToken(callback(fork))
fork.Merge()
return true
}
return false
}
}

View File

@ -9,72 +9,57 @@ import (
func TestCombinators(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{
{"xxx", c.Rune('x'), true, "x"},
{"x ", c.Rune(' '), false, ""},
{"aa", c.RuneRange('b', 'e'), false, ""},
{"bb", c.RuneRange('b', 'e'), true, "b"},
{"cc", c.RuneRange('b', 'e'), true, "c"},
{"dd", c.RuneRange('b', 'e'), true, "d"},
{"ee", c.RuneRange('b', 'e'), true, "e"},
{"ff", c.RuneRange('b', 'e'), false, ""},
{"Hello, world!", c.Str("Hello"), true, "Hello"},
{"HellÖ, world!", c.StrNoCase("hellö"), true, "HellÖ"},
{"+X", c.Runes('+', '-', '*', '/'), true, "+"},
{"-X", c.Runes('+', '-', '*', '/'), true, "-"},
{"*X", c.Runes('+', '-', '*', '/'), true, "*"},
{"/X", c.Runes('+', '-', '*', '/'), true, "/"},
{"!X", c.Runes('+', '-', '*', '/'), false, ""},
{"abc", c.Not(c.Rune('b')), true, "a"},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"bcd", c.Not(c.Rune('b')), false, ""},
{"1010", c.Not(c.Seq(c.Rune('2'), c.Rune('0'))), true, "1"},
{"2020", c.Not(c.Seq(c.Rune('2'), c.Rune('0'))), false, ""},
{"abc", c.Any(c.Rune('a'), c.Rune('b')), true, "a"},
{"bcd", c.Any(c.Rune('a'), c.Rune('b')), true, "b"},
{"cde", c.Any(c.Rune('a'), c.Rune('b')), false, ""},
{"ababc", c.Rep(4, c.Runes('a', 'b')), true, "abab"},
{"ababc", c.Rep(5, c.Runes('a', 'b')), false, ""},
{"", c.Min(0, c.Rune('a')), true, ""},
{"a", c.Min(0, c.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, c.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, c.Rune('a')), false, ""},
{"", c.Max(4, c.Rune('b')), true, ""},
{"X", c.Max(4, c.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, c.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, c.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, c.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccc", c.MinMax(0, 5, c.Rune('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, c.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, c.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, c.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, c.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, c.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, c.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, c.Rune('c')), true, "cccc"},
{"", c.OneOrMore(c.Rune('d')), false, ""},
{"X", c.OneOrMore(c.Rune('d')), false, ""},
{"dX", c.OneOrMore(c.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(c.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(c.Rune('e')), true, ""},
{"X", c.ZeroOrMore(c.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(c.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(c.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Seq(c.Str("Hello"), a.Comma, a.Space, c.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(c.Rune('1'), c.Rune('0'))), true, "101010"},
{"", c.Opt(c.OneOrMore(c.Rune('f'))), true, ""},
{"ghijkl", c.Opt(c.Rune('h')), true, ""},
{"ghijkl", c.Opt(c.Rune('g')), true, "g"},
{"fffffX", c.Opt(c.OneOrMore(c.Rune('f'))), true, "fffff"},
{"abc", c.Not(a.Rune('b')), true, "a"},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
{"ababc", c.Rep(4, a.Runes('a', 'b')), true, "abab"},
{"ababc", c.Rep(5, a.Runes('a', 'b')), false, ""},
{"", c.Min(0, a.Rune('a')), true, ""},
{"a", c.Min(0, a.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
{"", c.Max(4, a.Rune('b')), true, ""},
{"X", c.Max(4, a.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
{"", c.OneOrMore(a.Rune('d')), false, ""},
{"X", c.OneOrMore(a.Rune('d')), false, ""},
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
{"", c.Opt(c.OneOrMore(a.Rune('f'))), true, ""},
{"ghijkl", c.Opt(a.Rune('h')), true, ""},
{"ghijkl", c.Opt(a.Rune('g')), true, "g"},
{"fffffX", c.Opt(c.OneOrMore(a.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, c.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Rep(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
@ -83,27 +68,42 @@ func TestCombinators(t *testing.T) {
func TestCombinatorPanics(t *testing.T) {
RunPanicTests(t, []PanicTest{
{func() { parsekit.C.RuneRange('z', 'a') },
{func() { a.RuneRange('z', 'a') },
"TokenHandler bug: MatchRuneRange definition error: start 'z' must not be < end 'a'"},
{func() { parsekit.C.MinMax(-1, 1, parsekit.A.Space) },
{func() { c.MinMax(-1, 1, parsekit.A.Space) },
"TokenHandler bug: MatchMinMax definition error: min must be >= 0"},
{func() { parsekit.C.MinMax(1, -1, parsekit.A.Space) },
{func() { c.MinMax(1, -1, parsekit.A.Space) },
"TokenHandler bug: MatchMinMax definition error: max must be >= 0"},
{func() { parsekit.C.MinMax(10, 5, parsekit.A.Space) },
{func() { c.MinMax(10, 5, parsekit.A.Space) },
"TokenHandler bug: MatchMinMax definition error: max 5 must not be < min 10"},
{func() { parsekit.C.Min(-10, parsekit.A.Space) },
{func() { c.Min(-10, parsekit.A.Space) },
"TokenHandler bug: MatchMin definition error: min must be >= 0"},
{func() { parsekit.C.Max(-42, parsekit.A.Space) },
{func() { c.Max(-42, parsekit.A.Space) },
"TokenHandler bug: MatchMax definition error: max must be >= 0"},
})
}
func TestAtoms(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{
{"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""},
{"Hello, world!", a.Str("Hello"), true, "Hello"},
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
{"xxx", a.Rune('x'), true, "x"},
{"x ", a.Rune(' '), false, ""},
{"aa", a.RuneRange('b', 'e'), false, ""},
{"bb", a.RuneRange('b', 'e'), true, "b"},
{"cc", a.RuneRange('b', 'e'), true, "c"},
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc", a.AnyRune, false, ""}, // invalid UTF8 rune
{"", a.AnyRune, false, ""}, // end of file
{"\xbc", a.AnyRune, true, "<22>"}, // invalid UTF8 rune
{"", a.AnyRune, false, ""}, // false is for end of file
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
@ -187,32 +187,128 @@ func TestAtoms(t *testing.T) {
{"1", a.Integer, true, "1"},
{"-10X", a.Integer, false, ""},
{"+10X", a.Integer, false, ""},
{"-10X", c.Signed(a.Integer), true, "-10"},
{"+10X", c.Signed(a.Integer), true, "+10"},
{"+10.1X", c.Signed(a.Integer), true, "+10"},
{"-10X", a.Signed(a.Integer), true, "-10"},
{"+10X", a.Signed(a.Integer), true, "+10"},
{"+10.1X", a.Signed(a.Integer), true, "+10"},
{"0X", a.Float, true, "0"},
{"0X", a.Float, true, "0"},
{"1X", a.Float, true, "1"},
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
{"123.321X", a.Float, true, "123.321"},
{"-3.14X", a.Float, false, ""},
{"-3.14X", c.Signed(a.Float), true, "-3.14"},
{"-003.0014X", c.Signed(a.Float), true, "-003.0014"},
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "00"},
{"000X", a.Octet, true, "000"},
{"10X", a.Octet, true, "10"},
{"010X", a.Octet, true, "010"},
{"255123", a.Octet, true, "255"},
{"256123", a.Octet, false, ""},
{"300", a.Octet, false, ""},
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
{"256.255.255.255", a.IPv4, false, ""},
{"0", a.IPv4MaskBits, true, "0"},
{"32", a.IPv4MaskBits, true, "32"},
{"33", a.IPv4MaskBits, false, "0"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
})
}
func TestModifiers(t *testing.T) {
RunTokenHandlerTests(t, []TokenHandlerTest{
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), c.Str("cool")), true, "cool"},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
{"abcdefghijk", m.ModifyByCallback(c.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"NoTaLlUpPeR", m.ToUpper(c.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(c.StrNoCase("NOTALLlower")), true, "notalllower"},
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
})
}
// When a TokenMaker encounters an error, this is considered a programmer error.
// A TokenMaker should not be called, unless the input is already validated to
// follow the correct pattern. Therefore, tokenmakers will panic when the
// input cannot be processed successfully.
func TestTokenMakerErrorHandling(t *testing.T) {
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
parser := parsekit.NewMatcher(invalid, "boolean")
RunPanicTest(t, PanicTest{
func() { parser.Execute("no") },
`TokenHandler error: MakeBooleanToken cannot handle input "no": strconv.ParseBool: parsing "no": ` +
`invalid syntax \(only use a type conversion token maker, when the input has been validated on beforehand\)`,
})
}
func TestTokenMakers(t *testing.T) {
RunTokenMakerTests(t, []TokenMakerTest{
{`empty token`, tok.StrLiteral("A", c.ZeroOrMore(a.Digit)),
[]parsekit.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
{`Ѝюج literal \string`, tok.StrLiteral("B", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
[]parsekit.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []parsekit.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []parsekit.Token{
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
}},
{"Ø*", tok.Rune("P", a.AnyRune), []parsekit.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []parsekit.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []parsekit.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []parsekit.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []parsekit.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []parsekit.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []parsekit.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []parsekit.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []parsekit.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []parsekit.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
{`4294967295`, tok.Uint("I", a.Integer), []parsekit.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []parsekit.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []parsekit.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []parsekit.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []parsekit.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []parsekit.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []parsekit.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
{Type: "P", Runes: []rune("1"), Value: true},
{Type: "P", Runes: []rune("t"), Value: true},
{Type: "P", Runes: []rune("T"), Value: true},
{Type: "P", Runes: []rune("true"), Value: true},
{Type: "P", Runes: []rune("TRUE"), Value: true},
{Type: "P", Runes: []rune("True"), Value: true},
}},
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []parsekit.Token{
{Type: "P", Runes: []rune("0"), Value: false},
{Type: "P", Runes: []rune("f"), Value: false},
{Type: "P", Runes: []rune("F"), Value: false},
{Type: "P", Runes: []rune("false"), Value: false},
{Type: "P", Runes: []rune("FALSE"), Value: false},
{Type: "P", Runes: []rune("False"), Value: false},
}},
})
}
@ -229,7 +325,7 @@ func TestSequenceOfRunes(t *testing.T) {
parser := parsekit.NewParser(func(p *parsekit.ParseAPI) {
p.Expects("Sequence of runes")
if p.On(sequence).Accept() {
output = p.BufLiteral()
output = p.Result().String()
p.Stop()
}
})
@ -250,7 +346,7 @@ func TestCombination(t *testing.T) {
c.Seq(
c.Opt(a.Whitespace),
c.Rep(3, a.AngleClose),
m.ModifyByCallback(c.OneOrMore(c.StrNoCase("hello")), func(s string) string {
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(a.Comma, c.Opt(a.Whitespace)), ", "),