Optimization round completed (for now :-) All tests successful.
This commit is contained in:
parent
7598b62dd0
commit
c532af67ca
|
@ -77,7 +77,7 @@ var int64Token = tokenize.T.Int64(nil, bareInteger)
|
||||||
|
|
||||||
func (c *simpleCalculator) number(p *parse.API) {
|
func (c *simpleCalculator) number(p *parse.API) {
|
||||||
if p.Accept(int64Token) {
|
if p.Accept(int64Token) {
|
||||||
c.Result += c.op * p.Result().Value(0).(int64)
|
c.Result += c.op * p.Result.Tokens[0].Value.(int64)
|
||||||
p.Handle(c.operatorOrEndOfFile)
|
p.Handle(c.operatorOrEndOfFile)
|
||||||
} else {
|
} else {
|
||||||
p.Expected("integer number")
|
p.Expected("integer number")
|
||||||
|
|
|
@ -98,7 +98,7 @@ func (calc *calculator) expr(p *parse.API) {
|
||||||
var A = tokenize.A
|
var A = tokenize.A
|
||||||
if p.Handle(calc.term) {
|
if p.Handle(calc.term) {
|
||||||
for p.Accept(A.Add.Or(A.Subtract)) {
|
for p.Accept(A.Add.Or(A.Subtract)) {
|
||||||
op := p.Result().Rune(0)
|
op := p.Result.Runes[0]
|
||||||
if !p.Handle(calc.term) {
|
if !p.Handle(calc.term) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -116,7 +116,7 @@ func (calc *calculator) term(p *parse.API) {
|
||||||
var A = tokenize.A
|
var A = tokenize.A
|
||||||
if p.Handle(calc.factor) {
|
if p.Handle(calc.factor) {
|
||||||
for p.Accept(A.Multiply.Or(A.Divide)) {
|
for p.Accept(A.Multiply.Or(A.Divide)) {
|
||||||
op := p.Result().Rune(0)
|
op := p.Result.Runes[0]
|
||||||
if !p.Handle(calc.factor) {
|
if !p.Handle(calc.factor) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -134,7 +134,7 @@ func (calc *calculator) factor(p *parse.API) {
|
||||||
p.Accept(A.Blanks)
|
p.Accept(A.Blanks)
|
||||||
switch {
|
switch {
|
||||||
case p.Accept(T.Float64(nil, A.Signed(A.Float))):
|
case p.Accept(T.Float64(nil, A.Signed(A.Float))):
|
||||||
value := p.Result().Value(0).(float64)
|
value := p.Result.Tokens[0].Value.(float64)
|
||||||
calc.interpreter.pushValue(value)
|
calc.interpreter.pushValue(value)
|
||||||
case p.Accept(A.LeftParen):
|
case p.Accept(A.LeftParen):
|
||||||
if !p.Handle(calc.expr) {
|
if !p.Handle(calc.expr) {
|
||||||
|
|
|
@ -116,7 +116,7 @@ func (h *helloparser1) name(p *parse.API) {
|
||||||
case p.Peek(a.Excl):
|
case p.Peek(a.Excl):
|
||||||
p.Handle(h.exclamation)
|
p.Handle(h.exclamation)
|
||||||
case p.Accept(a.AnyRune):
|
case p.Accept(a.AnyRune):
|
||||||
h.greetee += p.Result().String()
|
h.greetee += p.Result.String()
|
||||||
p.Handle(h.name)
|
p.Handle(h.name)
|
||||||
default:
|
default:
|
||||||
p.Expected("exclamation mark")
|
p.Expected("exclamation mark")
|
||||||
|
|
|
@ -90,7 +90,7 @@ func (h *helloparser2) start(p *parse.API) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
|
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
|
||||||
h.greetee = p.Result().String()
|
h.greetee = p.Result.String()
|
||||||
if h.greetee == "" {
|
if h.greetee == "" {
|
||||||
p.Error("the name cannot be empty")
|
p.Error("the name cannot be empty")
|
||||||
return
|
return
|
||||||
|
|
|
@ -22,7 +22,7 @@ func (l *Chunks) AddChopped(s string, chunkSize int) error {
|
||||||
|
|
||||||
parseChunks := parse.New(func(p *parse.API) {
|
parseChunks := parse.New(func(p *parse.API) {
|
||||||
for p.Accept(chunkOfRunes) {
|
for p.Accept(chunkOfRunes) {
|
||||||
*l = append(*l, p.Result().String())
|
*l = append(*l, p.Result.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
return parseChunks(s)
|
return parseChunks(s)
|
||||||
|
|
61
parse/api.go
61
parse/api.go
|
@ -16,14 +16,24 @@ import (
|
||||||
//
|
//
|
||||||
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
|
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
|
||||||
type API struct {
|
type API struct {
|
||||||
tokenAPI tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
||||||
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
|
Result TokenizeResult // a struct, holding the results of the last Peek() or Accept() call
|
||||||
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
|
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
|
||||||
loopCheck map[uintptr]bool // used for parser loop detection
|
loopCheck map[uintptr]bool // used for parser loop detection
|
||||||
err error // parse error, retrieved by Error(), using API methods is denied when set
|
err error // parse error, retrieved by Error(), using API methods is denied when set
|
||||||
stopped bool // a boolean set to true by Stop()
|
stopped bool // a boolean set to true by Stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TokenizeResult holds the results of the last Peek() or Accept() call.
|
||||||
|
type TokenizeResult struct {
|
||||||
|
Tokens []tokenize.Token // the resulting tokens from the last call to Peek() or Accept()
|
||||||
|
Runes []rune // the resulting runes from the last call to Peek() or Accept()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (result *TokenizeResult) String() string {
|
||||||
|
return string(result.Runes)
|
||||||
|
}
|
||||||
|
|
||||||
// DisableSanityChecks disables the built-in parser implementation sanity checks,
|
// DisableSanityChecks disables the built-in parser implementation sanity checks,
|
||||||
// which detects parser implementation errors like loops and continuing parsing
|
// which detects parser implementation errors like loops and continuing parsing
|
||||||
// after an error or invoking Stop().
|
// after an error or invoking Stop().
|
||||||
|
@ -40,16 +50,13 @@ func (p *API) DisableSanityChecks() {
|
||||||
// If it does, then true will be returned, false otherwise. The read cursor
|
// If it does, then true will be returned, false otherwise. The read cursor
|
||||||
// will be kept at the same position, so the next call to Peek() or Accept()
|
// will be kept at the same position, so the next call to Peek() or Accept()
|
||||||
// will start from the same cursor position.
|
// will start from the same cursor position.
|
||||||
//
|
|
||||||
// After calling this method, you can retrieve the produced tokenize.Result
|
|
||||||
// struct using the Result() method.
|
|
||||||
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
|
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
|
||||||
p.result = nil
|
|
||||||
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
|
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
|
||||||
if ok {
|
if ok {
|
||||||
p.result = forkedAPI.Result()
|
p.Result.Tokens = p.tokenAPI.Tokens()
|
||||||
p.tokenAPI.Reset()
|
p.Result.Runes = p.tokenAPI.Runes()
|
||||||
}
|
}
|
||||||
|
p.tokenAPI.Dispose(forkedAPI)
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,24 +65,31 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool {
|
||||||
// forward to beyond the match that was found. Otherwise false will be
|
// forward to beyond the match that was found. Otherwise false will be
|
||||||
// and the read cursor will stay at the same position.
|
// and the read cursor will stay at the same position.
|
||||||
//
|
//
|
||||||
// After calling this method, you can retrieve the tokenize.Result
|
// After calling this method, you can retrieve the results using the Result() method.
|
||||||
// using the Result() method.
|
|
||||||
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
||||||
p.result = nil
|
|
||||||
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
|
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
|
||||||
if ok {
|
if ok {
|
||||||
forkedAPI.Merge()
|
// Keep track of the results.
|
||||||
p.result = p.tokenAPI.Result()
|
p.Result.Tokens = p.tokenAPI.Tokens()
|
||||||
|
p.Result.Runes = p.tokenAPI.Runes()
|
||||||
|
|
||||||
|
// Merge to the parent level.
|
||||||
|
p.tokenAPI.Merge(forkedAPI)
|
||||||
|
p.tokenAPI.Dispose(forkedAPI)
|
||||||
|
|
||||||
|
// And flush the input reader buffer.
|
||||||
if p.tokenAPI.FlushInput() {
|
if p.tokenAPI.FlushInput() {
|
||||||
if p.sanityChecksEnabled {
|
if p.sanityChecksEnabled {
|
||||||
p.initLoopCheck()
|
p.initLoopCheck()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
p.tokenAPI.Dispose(forkedAPI)
|
||||||
}
|
}
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokenize.API, bool) {
|
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
|
||||||
if p.sanityChecksEnabled {
|
if p.sanityChecksEnabled {
|
||||||
p.panicWhenStoppedOrInError(name)
|
p.panicWhenStoppedOrInError(name)
|
||||||
p.checkForLoops(name)
|
p.checkForLoops(name)
|
||||||
|
@ -84,10 +98,9 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokeniz
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p.result = nil
|
|
||||||
p.tokenAPI.Reset()
|
p.tokenAPI.Reset()
|
||||||
child := p.tokenAPI.Fork()
|
child := p.tokenAPI.Fork()
|
||||||
ok := tokenHandler(child)
|
ok := tokenHandler(p.tokenAPI)
|
||||||
|
|
||||||
return child, ok
|
return child, ok
|
||||||
}
|
}
|
||||||
|
@ -138,20 +151,6 @@ func (p *API) checkForLoops(name string) {
|
||||||
p.loopCheck[filepos] = true
|
p.loopCheck[filepos] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Result returns the tokenize.Result struct, containing results as produced by the
|
|
||||||
// last Peek() or Accept() call.
|
|
||||||
//
|
|
||||||
// When Result() is called without first doing a Peek() or Accept(), then no
|
|
||||||
// result will be available and the method will panic.
|
|
||||||
func (p *API) Result() *tokenize.Result {
|
|
||||||
result := p.result
|
|
||||||
if p.result == nil {
|
|
||||||
callerPanic("Result", "parsekit.parse.API.{name}(): {name}() called "+
|
|
||||||
"at {caller} without calling API.Peek() or API.Accept() on beforehand")
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle executes other parse.Handler functions from within the active
|
// Handle executes other parse.Handler functions from within the active
|
||||||
// parse.Handler function.
|
// parse.Handler function.
|
||||||
//
|
//
|
||||||
|
@ -215,7 +214,7 @@ func (p *API) Error(format string, data ...interface{}) {
|
||||||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||||
// set a different error message when needed.
|
// set a different error message when needed.
|
||||||
message := fmt.Sprintf(format, data...)
|
message := fmt.Sprintf(format, data...)
|
||||||
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Result().Cursor())
|
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor())
|
||||||
}
|
}
|
||||||
|
|
||||||
// ExpectEndOfFile can be used to check if the input is at end of file.
|
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||||
|
|
|
@ -5,8 +5,6 @@ package parse
|
||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||||
|
@ -56,39 +54,3 @@ func AssertPanic(t *testing.T, p PanicT) {
|
||||||
}()
|
}()
|
||||||
p.Function()
|
p.Function()
|
||||||
}
|
}
|
||||||
|
|
||||||
type TokenMakerT struct {
|
|
||||||
Input string
|
|
||||||
Handler tokenize.Handler
|
|
||||||
Expected []tokenize.Token
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
|
||||||
for _, test := range testSet {
|
|
||||||
AssertTokenMaker(t, test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
|
||||||
tokenizer := tokenize.New(test.Handler)
|
|
||||||
result, err := tokenizer(test.Input)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
|
||||||
} else {
|
|
||||||
if len(result.Tokens()) != len(test.Expected) {
|
|
||||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
|
||||||
}
|
|
||||||
for i, expected := range test.Expected {
|
|
||||||
actual := result.Token(i)
|
|
||||||
if expected.Type != actual.Type {
|
|
||||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
|
||||||
}
|
|
||||||
if string(expected.Runes) != string(actual.Runes) {
|
|
||||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
|
||||||
}
|
|
||||||
if expected.Value != actual.Value {
|
|
||||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ func ExampleNew_usingAcceptedRunes() {
|
||||||
|
|
||||||
parser := parse.New(func(p *parse.API) {
|
parser := parse.New(func(p *parse.API) {
|
||||||
for p.Accept(a.AnyRune) {
|
for p.Accept(a.AnyRune) {
|
||||||
matches = append(matches, p.Result().String())
|
matches = append(matches, p.Result.String())
|
||||||
}
|
}
|
||||||
p.ExpectEndOfFile()
|
p.ExpectEndOfFile()
|
||||||
})
|
})
|
||||||
|
@ -33,9 +33,9 @@ func ExampleNew_usingTokens() {
|
||||||
|
|
||||||
parser := parse.New(func(p *parse.API) {
|
parser := parse.New(func(p *parse.API) {
|
||||||
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
|
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
|
||||||
fmt.Printf("Runes accepted: %q\n", p.Result().String())
|
fmt.Printf("Runes accepted: %q\n", p.Result.String())
|
||||||
fmt.Printf("Tokens:\n")
|
fmt.Printf("Tokens:\n")
|
||||||
for i, token := range p.Result().Tokens() {
|
for i, token := range p.Result.Tokens {
|
||||||
fmt.Printf("[%d] %s\n", i, token)
|
fmt.Printf("[%d] %s\n", i, token)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,10 +46,10 @@ func ExampleNew_usingTokens() {
|
||||||
// Output:
|
// Output:
|
||||||
// Runes accepted: "¡ök!"
|
// Runes accepted: "¡ök!"
|
||||||
// Tokens:
|
// Tokens:
|
||||||
// [0] RUNE(161)
|
// [0] RUNE('¡')
|
||||||
// [1] RUNE(246)
|
// [1] RUNE('ö')
|
||||||
// [2] RUNE(107)
|
// [2] RUNE('k')
|
||||||
// [3] RUNE(33)
|
// [3] RUNE('!')
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleAPI_Expected() {
|
func ExampleAPI_Expected() {
|
||||||
|
@ -71,7 +71,7 @@ func ExampleAPI_Accept_inIfStatement() {
|
||||||
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
|
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
|
||||||
// Result.String() returns a string containing all
|
// Result.String() returns a string containing all
|
||||||
// accepted runes that were matched against.
|
// accepted runes that were matched against.
|
||||||
fmt.Println(p.Result().String())
|
fmt.Println(p.Result.String())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
parser("YOWZA!")
|
parser("YOWZA!")
|
||||||
|
@ -88,7 +88,7 @@ func ExampleAPI_Accept_inSwitchStatement() {
|
||||||
case p.Accept(tokenize.A.Rune('X')):
|
case p.Accept(tokenize.A.Rune('X')):
|
||||||
// NOOP, skip this rune
|
// NOOP, skip this rune
|
||||||
case p.Accept(tokenize.A.AnyRune):
|
case p.Accept(tokenize.A.AnyRune):
|
||||||
result += p.Result().String()
|
result += p.Result.String()
|
||||||
default:
|
default:
|
||||||
loop = false
|
loop = false
|
||||||
}
|
}
|
||||||
|
@ -107,7 +107,7 @@ func ExampleAPI_Stop() {
|
||||||
parser := parse.New(func(p *parse.API) {
|
parser := parse.New(func(p *parse.API) {
|
||||||
fmt.Printf("First word: ")
|
fmt.Printf("First word: ")
|
||||||
for p.Accept(c.Not(a.Space)) {
|
for p.Accept(c.Not(a.Space)) {
|
||||||
fmt.Printf("%s", p.Result())
|
fmt.Printf("%s", p.Result.String())
|
||||||
}
|
}
|
||||||
p.Stop()
|
p.Stop()
|
||||||
})
|
})
|
||||||
|
@ -123,7 +123,7 @@ func ExampleAPI_Stop_notCalledAndNoInputPending() {
|
||||||
parser := parse.New(func(p *parse.API) {
|
parser := parse.New(func(p *parse.API) {
|
||||||
fmt.Printf("Word: ")
|
fmt.Printf("Word: ")
|
||||||
for p.Accept(c.Not(a.Space)) {
|
for p.Accept(c.Not(a.Space)) {
|
||||||
fmt.Printf("%s", p.Result())
|
fmt.Printf("%s", p.Result.String())
|
||||||
}
|
}
|
||||||
fmt.Printf("\n")
|
fmt.Printf("\n")
|
||||||
})
|
})
|
||||||
|
@ -141,7 +141,7 @@ func ExampleAPI_Stop_notCalledButInputPending() {
|
||||||
parser := parse.New(func(p *parse.API) {
|
parser := parse.New(func(p *parse.API) {
|
||||||
fmt.Printf("First word: ")
|
fmt.Printf("First word: ")
|
||||||
for p.Accept(c.Not(a.Space)) {
|
for p.Accept(c.Not(a.Space)) {
|
||||||
fmt.Printf("%s", p.Result())
|
fmt.Printf("%s", p.Result.String())
|
||||||
}
|
}
|
||||||
fmt.Printf("\n")
|
fmt.Printf("\n")
|
||||||
})
|
})
|
||||||
|
@ -161,7 +161,7 @@ func ExampleAPI_Peek() {
|
||||||
// This handler is able to handle serial numbers.
|
// This handler is able to handle serial numbers.
|
||||||
serialnrHandler := func(p *parse.API) {
|
serialnrHandler := func(p *parse.API) {
|
||||||
if p.Accept(serialnr) {
|
if p.Accept(serialnr) {
|
||||||
fmt.Println(p.Result().String())
|
fmt.Println(p.Result.String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,17 +255,6 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
|
||||||
`at /.*/parse_test\.go:\d+: no calls allowed after API\.Error\(\)`})
|
`at /.*/parse_test\.go:\d+: no calls allowed after API\.Error\(\)`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) {
|
|
||||||
p := parse.New(func(p *parse.API) {
|
|
||||||
p.Result()
|
|
||||||
})
|
|
||||||
parse.AssertPanic(t, parse.PanicT{
|
|
||||||
Function: func() { p("") },
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `parsekit\.parse\.API\.Result\(\): Result\(\) called at ` +
|
|
||||||
`/.*/parse_test.go:\d+ without calling API.Peek\(\) or API.Accept\(\) on beforehand`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
|
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
|
||||||
p := parse.New(func(p *parse.API) {})
|
p := parse.New(func(p *parse.API) {})
|
||||||
err := p("")
|
err := p("")
|
||||||
|
|
361
tokenize/api.go
361
tokenize/api.go
|
@ -25,7 +25,7 @@ import (
|
||||||
//
|
//
|
||||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||||
// with as many runes as needed. Runes collected this way can later on be
|
// with as many runes as needed. Runes collected this way can later on be
|
||||||
// retrieved using the method Result().Runes().
|
// retrieved using the method Runes().
|
||||||
//
|
//
|
||||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||||
// NextRune() again. Failing to do so will result in a panic.
|
// NextRune() again. Failing to do so will result in a panic.
|
||||||
|
@ -74,39 +74,40 @@ type API struct {
|
||||||
runeRead bool // whether or not a rune was read using NextRune()
|
runeRead bool // whether or not a rune was read using NextRune()
|
||||||
runes []rune // the rune stack
|
runes []rune // the rune stack
|
||||||
tokens []Token // the token stack
|
tokens []Token // the token stack
|
||||||
runeStart int
|
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||||
runeEnd int
|
stackLevel int // the current stack level
|
||||||
tokenStart int
|
stackFrame *stackFrame // the current stack frame
|
||||||
tokenEnd int
|
|
||||||
stackLevel int // the stack level for this API object
|
|
||||||
state *apiState // shared API state data
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type apiState struct {
|
type stackFrame struct {
|
||||||
stack []Result // the stack, used for forking / merging the API.
|
offset int // current rune offset relative to the Reader's sliding window
|
||||||
top int // the index of the current top item in the stack
|
runeStart int
|
||||||
|
runeEnd int
|
||||||
|
tokenStart int
|
||||||
|
tokenEnd int
|
||||||
|
cursor Cursor
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
err error // can be used by a Handler to report a specific issue with the input
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialAPIstackDepth determines the initial stack depth for the API.
|
const initialStackDepth = 10
|
||||||
// When a parser requires a higher stack depth, then this is no problem.
|
const initialTokenDepth = 10
|
||||||
// The API will automatically scale the stack when forking beyond this
|
const initialRuneDepth = 10
|
||||||
// default number of stack levels.
|
|
||||||
const initialAPIstackDepth = 10
|
|
||||||
|
|
||||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||||
// For an overview of allowed inputs, take a look at the documentation
|
// For an overview of allowed inputs, take a look at the documentation
|
||||||
// for parsekit.read.New().
|
// for parsekit.read.New().
|
||||||
func NewAPI(input interface{}) API {
|
func NewAPI(input interface{}) *API {
|
||||||
stack := make([]Result, 1, initialAPIstackDepth)
|
api := &API{
|
||||||
state := apiState{
|
reader: read.New(input),
|
||||||
stack: stack,
|
runes: make([]rune, 0, initialRuneDepth),
|
||||||
}
|
tokens: make([]Token, 0, initialTokenDepth),
|
||||||
return API{
|
stackFrames: make([]stackFrame, 1, initialStackDepth),
|
||||||
runes: make([]rune, initialAPIstackDepth),
|
|
||||||
tokens: make([]Token, initialAPIstackDepth),
|
|
||||||
reader: read.New(input),
|
|
||||||
state: &state,
|
|
||||||
}
|
}
|
||||||
|
api.stackFrame = &api.stackFrames[0]
|
||||||
|
|
||||||
|
return api
|
||||||
}
|
}
|
||||||
|
|
||||||
// NextRune returns the rune at the current read offset.
|
// NextRune returns the rune at the current read offset.
|
||||||
|
@ -120,25 +121,16 @@ func NewAPI(input interface{}) API {
|
||||||
// without explicitly accepting, this method will panic. You can see this as a
|
// without explicitly accepting, this method will panic. You can see this as a
|
||||||
// built-in unit test, enforcing correct serialization of API method calls.
|
// built-in unit test, enforcing correct serialization of API method calls.
|
||||||
func (i *API) NextRune() (rune, error) {
|
func (i *API) NextRune() (rune, error) {
|
||||||
if i.stackLevel > i.state.top {
|
|
||||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
|
||||||
"causing this fork to be invalidated)")
|
|
||||||
}
|
|
||||||
|
|
||||||
result := &(i.state.stack[i.stackLevel])
|
|
||||||
if i.runeRead {
|
if i.runeRead {
|
||||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
"without a prior call to Accept()")
|
"without a prior call to Accept()")
|
||||||
}
|
}
|
||||||
|
|
||||||
readRune, err := i.reader.RuneAt(result.offset)
|
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||||
i.lastRune = readRune
|
i.lastRune = readRune
|
||||||
i.lastRuneErr = err
|
i.lastRuneErr = err
|
||||||
i.runeRead = true
|
i.runeRead = true
|
||||||
|
|
||||||
i.DisposeChilds()
|
|
||||||
|
|
||||||
return readRune, err
|
return readRune, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,22 +140,31 @@ func (i *API) NextRune() (rune, error) {
|
||||||
// It is not allowed to call Accept() when the previous call to NextRune()
|
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||||
// returned an error. Calling Accept() in such case will result in a panic.
|
// returned an error. Calling Accept() in such case will result in a panic.
|
||||||
func (i *API) Accept() {
|
func (i *API) Accept() {
|
||||||
if i.stackLevel > i.state.top {
|
// TODO can go after completing the code for performance.
|
||||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
//fmt.Println("STACK [", i.stackLevel, "] runes", len(i.runes), "/", cap(i.runes), "tokens", len(i.tokens), "/", cap(i.tokens))
|
||||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
|
||||||
"causing this fork to be invalidated)")
|
|
||||||
}
|
|
||||||
|
|
||||||
result := &(i.state.stack[i.stackLevel])
|
|
||||||
if !i.runeRead {
|
if !i.runeRead {
|
||||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()")
|
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"without first calling NextRune()")
|
||||||
} else if i.lastRuneErr != nil {
|
} else if i.lastRuneErr != nil {
|
||||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed")
|
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
|
||||||
|
"but the prior call to NextRune() failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
result.runes = append(result.runes, i.lastRune)
|
newRuneEnd := i.stackFrame.runeEnd + 1
|
||||||
result.cursor.moveByRune(i.lastRune)
|
|
||||||
result.offset++
|
// Grow the runes capacity when needed.
|
||||||
|
if cap(i.runes) < newRuneEnd {
|
||||||
|
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||||
|
copy(newRunes, i.runes)
|
||||||
|
i.runes = newRunes
|
||||||
|
} else {
|
||||||
|
i.runes = i.runes[0:newRuneEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
i.runes[newRuneEnd-1] = i.lastRune
|
||||||
|
i.stackFrame.runeEnd++
|
||||||
|
i.stackFrame.cursor.moveByRune(i.lastRune)
|
||||||
|
i.stackFrame.offset++
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,44 +185,30 @@ func (i *API) Accept() {
|
||||||
// Garbage collection will take care of this automatically.
|
// Garbage collection will take care of this automatically.
|
||||||
// The parent API was never modified, so it can safely be used after disposal
|
// The parent API was never modified, so it can safely be used after disposal
|
||||||
// as if the lookahead never happened.
|
// as if the lookahead never happened.
|
||||||
func (i *API) Fork() API {
|
func (i *API) Fork() int {
|
||||||
if i.stackLevel > i.state.top {
|
newStackLevel := i.stackLevel + 1
|
||||||
callerPanic("Fork", "tokenize.API.{name}(): {name}() called at {caller} "+
|
newStackSize := newStackLevel + 1
|
||||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
|
||||||
"causing this fork to be invalidated)")
|
// Grow the stack frames capacity when needed.
|
||||||
|
if cap(i.stackFrames) < newStackSize {
|
||||||
|
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
|
||||||
|
copy(newFrames, i.stackFrames)
|
||||||
|
i.stackFrames = newFrames
|
||||||
|
} else {
|
||||||
|
i.stackFrames = i.stackFrames[0:newStackSize]
|
||||||
}
|
}
|
||||||
|
|
||||||
i.DisposeChilds()
|
i.stackLevel++
|
||||||
result := &(i.state.stack[i.stackLevel])
|
|
||||||
|
|
||||||
// Grow the stack storage when needed.
|
|
||||||
newStackSize := i.stackLevel + 2
|
|
||||||
if cap(i.state.stack) < newStackSize {
|
|
||||||
newStack := make([]Result, newStackSize, newStackSize+initialAPIstackDepth)
|
|
||||||
copy(newStack, i.state.stack)
|
|
||||||
i.state.stack = newStack
|
|
||||||
}
|
|
||||||
i.state.stack = i.state.stack[0 : i.stackLevel+1]
|
|
||||||
|
|
||||||
// Create the new fork.
|
|
||||||
child := API{
|
|
||||||
state: i.state,
|
|
||||||
stackLevel: i.stackLevel + 1,
|
|
||||||
reader: i.reader,
|
|
||||||
}
|
|
||||||
childResult := Result{
|
|
||||||
cursor: result.cursor,
|
|
||||||
offset: result.offset,
|
|
||||||
}
|
|
||||||
i.state.stack = append(i.state.stack, childResult)
|
|
||||||
//i.state.stack[i.stackLevel+1] = childResult
|
|
||||||
|
|
||||||
// Invalidate parent's last read rune.
|
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
|
|
||||||
i.state.top = child.stackLevel
|
parent := i.stackFrame
|
||||||
|
|
||||||
return child
|
i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||||
|
*i.stackFrame = *parent
|
||||||
|
i.stackFrame.runeStart = parent.runeEnd
|
||||||
|
i.stackFrame.tokenStart = parent.tokenEnd
|
||||||
|
|
||||||
|
return i.stackLevel
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||||
|
@ -232,56 +219,68 @@ func (i *API) Fork() API {
|
||||||
// be reused for performing another match. This means that all Result data are
|
// be reused for performing another match. This means that all Result data are
|
||||||
// cleared, but the read cursor position is kept at its current position.
|
// cleared, but the read cursor position is kept at its current position.
|
||||||
// This allows a child to feed results in chunks to its parent.
|
// This allows a child to feed results in chunks to its parent.
|
||||||
func (i *API) Merge() {
|
//
|
||||||
if i.stackLevel == 0 {
|
// Once the child is no longer needed, it can be disposed of by using the
|
||||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on the top-level API")
|
// method Dispose(), which will return the tokenizer to the parent.
|
||||||
}
|
func (i *API) Merge(stackLevel int) {
|
||||||
if i.stackLevel > i.state.top {
|
if stackLevel == 0 {
|
||||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
"on the top-level API stack level 0")
|
||||||
"causing this fork to be invalidated)")
|
}
|
||||||
|
if stackLevel != i.stackLevel {
|
||||||
|
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on API stack level %d, but the current stack level is %d "+
|
||||||
|
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := &(i.state.stack[i.stackLevel])
|
parent := &i.stackFrames[stackLevel-1]
|
||||||
parentResult := &(i.state.stack[i.stackLevel-1])
|
|
||||||
|
|
||||||
// // Grow parent rune storage when needed.
|
// The end of the parent slice aligns with the start of the child slice.
|
||||||
// newRuneSize := len(parentResult.runes) + len(result.runes)
|
// Because of this, to merge the parent slice can simply be expanded
|
||||||
// if cap(parentResult.runes) < newRuneSize {
|
// to include the child slice.
|
||||||
// newRunes := make([]rune, len(parentResult.runes), 2*newRuneSize)
|
// parent : |----------|
|
||||||
// copy(newRunes, parentResult.runes)
|
// child: |------|
|
||||||
// parentResult.runes = newRunes
|
// After merge operation:
|
||||||
// //fmt.Println("Beefed up runes", i.stackLevel-1, newRuneSize*2)
|
// parent: |-----------------|
|
||||||
// }
|
// child: |---> continue reading from here
|
||||||
|
parent.runeEnd = i.stackFrame.runeEnd
|
||||||
|
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||||
|
|
||||||
// // Grow parent token storage when needed.
|
// The same logic applies to tokens.
|
||||||
// newTokenSize := len(parentResult.tokens) + len(result.tokens)
|
parent.tokenEnd = i.stackFrame.tokenEnd
|
||||||
// if cap(parentResult.tokens) < newTokenSize {
|
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||||
// newTokens := make([]Token, len(parentResult.tokens), 2*newTokenSize)
|
|
||||||
// copy(newTokens, parentResult.tokens)
|
|
||||||
// parentResult.tokens = newTokens
|
|
||||||
// //fmt.Println("Beefed up tokens", i.stackLevel-1, newTokenSize*2)
|
|
||||||
// }
|
|
||||||
|
|
||||||
parentResult.runes = append(parentResult.runes, result.runes...)
|
parent.offset = i.stackFrame.offset
|
||||||
parentResult.tokens = append(parentResult.tokens, result.tokens...)
|
parent.cursor = i.stackFrame.cursor
|
||||||
parentResult.offset = result.offset
|
|
||||||
parentResult.cursor = result.cursor
|
i.stackFrame.err = nil
|
||||||
i.DisposeChilds()
|
i.runeRead = false
|
||||||
i.Reset()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) DisposeChilds() {
|
func (i *API) Dispose(stackLevel int) {
|
||||||
i.state.stack = i.state.stack[:i.stackLevel+1]
|
if stackLevel == 0 {
|
||||||
i.state.top = i.stackLevel
|
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on the top-level API stack level 0")
|
||||||
|
}
|
||||||
|
if stackLevel != i.stackLevel {
|
||||||
|
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on API stack level %d, but the current stack level is %d "+
|
||||||
|
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
i.runeRead = false
|
||||||
|
i.stackLevel = stackLevel - 1
|
||||||
|
i.stackFrames = i.stackFrames[:stackLevel]
|
||||||
|
i.stackFrame = &i.stackFrames[stackLevel-1]
|
||||||
|
i.runes = i.runes[0:i.stackFrame.runeEnd]
|
||||||
|
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *API) Reset() {
|
func (i *API) Reset() {
|
||||||
result := &(i.state.stack[i.stackLevel])
|
|
||||||
i.runeRead = false
|
i.runeRead = false
|
||||||
result.runes = result.runes[:0]
|
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||||
result.tokens = result.tokens[:0]
|
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||||
result.err = nil
|
i.stackFrame.err = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FlushInput flushes processed input data from the read.Buffer.
|
// FlushInput flushes processed input data from the read.Buffer.
|
||||||
|
@ -291,18 +290,126 @@ func (i *API) Reset() {
|
||||||
// Note:
|
// Note:
|
||||||
// When writing your own TokenHandler, you normally won't have to call this
|
// When writing your own TokenHandler, you normally won't have to call this
|
||||||
// method yourself. It is automatically called by parsekit when needed.
|
// method yourself. It is automatically called by parsekit when needed.
|
||||||
func (i API) FlushInput() bool {
|
func (i *API) FlushInput() bool {
|
||||||
result := &(i.state.stack[i.stackLevel])
|
// result := &(i.state.stack[i.stackLevel])
|
||||||
if result.offset > 0 {
|
if i.stackFrame.offset > 0 {
|
||||||
i.reader.Flush(result.offset)
|
i.reader.Flush(i.stackFrame.offset)
|
||||||
result.offset = 0
|
i.stackFrame.offset = 0
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Result returns the Result struct from the API. The returned struct
|
func (i *API) String() string {
|
||||||
// can be used to retrieve and to modify result data.
|
return string(i.Runes())
|
||||||
func (i API) Result() *Result {
|
}
|
||||||
return &(i.state.stack[i.stackLevel])
|
|
||||||
|
func (i *API) Runes() []rune {
|
||||||
|
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Rune(offset int) rune {
|
||||||
|
return i.runes[i.stackFrame.runeStart+offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) ClearRunes() {
|
||||||
|
i.runes = i.runes[:i.stackFrame.runeStart]
|
||||||
|
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) SetRunes(runes ...rune) {
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
newRuneEnd := i.stackFrame.runeStart + len(runes)
|
||||||
|
if cap(i.runes) < newRuneEnd {
|
||||||
|
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||||
|
copy(newRunes, i.runes)
|
||||||
|
i.runes = newRunes
|
||||||
|
} else {
|
||||||
|
i.runes = i.runes[0:newRuneEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
for offset, r := range runes {
|
||||||
|
i.runes[i.stackFrame.runeStart+offset] = r
|
||||||
|
}
|
||||||
|
i.stackFrame.runeEnd = newRuneEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) AddRunes(runes ...rune) {
|
||||||
|
// Grow the runes capacity when needed.
|
||||||
|
newRuneEnd := i.stackFrame.runeEnd + len(runes)
|
||||||
|
if cap(i.runes) < newRuneEnd {
|
||||||
|
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||||
|
copy(newRunes, i.runes)
|
||||||
|
i.runes = newRunes
|
||||||
|
} else {
|
||||||
|
i.runes = i.runes[0:newRuneEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
for offset, r := range runes {
|
||||||
|
i.runes[i.stackFrame.runeEnd+offset] = r
|
||||||
|
}
|
||||||
|
i.stackFrame.runeEnd = newRuneEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) AddString(s string) {
|
||||||
|
i.AddRunes([]rune(s)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) SetString(s string) {
|
||||||
|
i.SetRunes([]rune(s)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Cursor() Cursor {
|
||||||
|
return i.stackFrame.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Tokens() []Token {
|
||||||
|
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Token(offset int) Token {
|
||||||
|
return i.tokens[i.stackFrame.tokenStart+offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) TokenValue(offset int) interface{} {
|
||||||
|
return i.tokens[i.stackFrame.tokenStart+offset].Value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) ClearTokens() {
|
||||||
|
i.tokens = i.tokens[:i.stackFrame.tokenStart]
|
||||||
|
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) SetTokens(tokens ...Token) {
|
||||||
|
// Grow the tokens capacity when needed.
|
||||||
|
newTokenEnd := i.stackFrame.tokenStart + len(tokens)
|
||||||
|
if cap(i.tokens) < newTokenEnd {
|
||||||
|
newTokens := make([]Token, newTokenEnd, newTokenEnd*2)
|
||||||
|
copy(newTokens, tokens)
|
||||||
|
i.tokens = newTokens
|
||||||
|
} else {
|
||||||
|
i.tokens = i.tokens[0:newTokenEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
for offset, t := range tokens {
|
||||||
|
i.tokens[i.stackFrame.tokenStart+offset] = t
|
||||||
|
}
|
||||||
|
i.stackFrame.tokenEnd = newTokenEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) AddTokens(tokens ...Token) {
|
||||||
|
// Grow the tokens capacity when needed.
|
||||||
|
newTokenEnd := i.stackFrame.tokenEnd + len(tokens)
|
||||||
|
if cap(i.tokens) < newTokenEnd {
|
||||||
|
newTokens := make([]Token, newTokenEnd, newTokenEnd*2)
|
||||||
|
copy(newTokens, i.tokens)
|
||||||
|
i.tokens = newTokens
|
||||||
|
} else {
|
||||||
|
i.tokens = i.tokens[0:newTokenEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
for offset, t := range tokens {
|
||||||
|
i.tokens[i.stackFrame.tokenEnd+offset] = t
|
||||||
|
}
|
||||||
|
i.stackFrame.tokenEnd = newTokenEnd
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ func ExampleAPI_NextRune() {
|
||||||
r, err := api.NextRune()
|
r, err := api.NextRune()
|
||||||
fmt.Printf("Rune read from input; %c\n", r)
|
fmt.Printf("Rune read from input; %c\n", r)
|
||||||
fmt.Printf("The error: %v\n", err)
|
fmt.Printf("The error: %v\n", err)
|
||||||
fmt.Printf("API results: %q\n", api.Result().String())
|
fmt.Printf("API results: %q\n", api.String())
|
||||||
|
|
||||||
// Output:
|
// Output:
|
||||||
// Rune read from input; T
|
// Rune read from input; T
|
||||||
|
@ -34,38 +34,38 @@ func ExampleAPI_Accept() {
|
||||||
api.Accept() // adds 'h' to the API results
|
api.Accept() // adds 'h' to the API results
|
||||||
api.NextRune() // reads 'e', but it is not added to the API results
|
api.NextRune() // reads 'e', but it is not added to the API results
|
||||||
|
|
||||||
fmt.Printf("API results: %q\n", api.Result().String())
|
fmt.Printf("API results: %q\n", api.String())
|
||||||
|
|
||||||
// Output:
|
// Output:
|
||||||
// API results: "Th"
|
// API results: "Th"
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleAPI_Result() {
|
func ExampleAPI_modifyingResults() {
|
||||||
api := tokenize.NewAPI("")
|
api := tokenize.NewAPI("")
|
||||||
|
|
||||||
result := api.Result()
|
api.AddString("Some runes")
|
||||||
|
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
|
||||||
|
api.AddRunes(' ', 'i', 'n', ' ')
|
||||||
|
api.AddString("various ways")
|
||||||
|
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
|
||||||
|
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||||
|
|
||||||
result.AddRunes("Some runes")
|
api.SetString("new ")
|
||||||
result.AddRunes([]rune{' ', 'a', 'd', 'd', 'e', 'd'})
|
api.AddString("set ")
|
||||||
result.AddRunes(' ', 'i', 'n', ' ', "various ways")
|
api.AddString("of ")
|
||||||
fmt.Printf("API result first 10 runes: %q\n", api.Result().Runes()[0:10])
|
api.AddRunes('r', 'u', 'n', 'e', 's')
|
||||||
fmt.Printf("API result runes as string: %q\n", api.Result().String())
|
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||||
|
fmt.Printf("API result runes: %q\n", api.Runes())
|
||||||
|
fmt.Printf("API third rune: %q\n", api.Rune(2))
|
||||||
|
|
||||||
result.SetRunes("new ", "set ", "of ", 'r', 'u', 'n', 'e', 's')
|
api.AddTokens(tokenize.Token{
|
||||||
fmt.Printf("API result runes as string: %q\n", api.Result().String())
|
|
||||||
fmt.Printf("API result runes: %q\n", api.Result().Runes())
|
|
||||||
fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
|
|
||||||
|
|
||||||
result.AddTokens(tokenize.Token{
|
|
||||||
Runes: []rune("demo 1"),
|
|
||||||
Type: 42,
|
Type: 42,
|
||||||
Value: "towel"})
|
Value: "towel"})
|
||||||
result.AddTokens(tokenize.Token{
|
api.AddTokens(tokenize.Token{
|
||||||
Runes: []rune("demo 2"),
|
|
||||||
Type: 73,
|
Type: 73,
|
||||||
Value: "Zaphod"})
|
Value: "Zaphod"})
|
||||||
fmt.Printf("API result tokens: %v\n", api.Result().Tokens())
|
fmt.Printf("API result tokens: %v\n", api.Tokens())
|
||||||
fmt.Printf("API second result token: %v\n", api.Result().Token(1))
|
fmt.Printf("API second result token: %v\n", api.Token(1))
|
||||||
|
|
||||||
// Output:
|
// Output:
|
||||||
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
||||||
|
@ -84,17 +84,17 @@ func ExampleAPI_Reset() {
|
||||||
api.Accept()
|
api.Accept()
|
||||||
api.NextRune()
|
api.NextRune()
|
||||||
api.Accept()
|
api.Accept()
|
||||||
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
|
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||||
|
|
||||||
// Reset clears the results, but keeps the cursor position.
|
// Reset clears the results, but keeps the cursor position.
|
||||||
api.Reset()
|
api.Reset()
|
||||||
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
|
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||||
|
|
||||||
api.NextRune()
|
api.NextRune()
|
||||||
api.Accept()
|
api.Accept()
|
||||||
api.NextRune()
|
api.NextRune()
|
||||||
api.Accept()
|
api.Accept()
|
||||||
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
|
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||||
|
|
||||||
// Output:
|
// Output:
|
||||||
// API results: "Ve" at line 1, column 3
|
// API results: "Ve" at line 1, column 3
|
||||||
|
@ -104,14 +104,16 @@ func ExampleAPI_Reset() {
|
||||||
|
|
||||||
func ExampleAPI_Fork() {
|
func ExampleAPI_Fork() {
|
||||||
// This custom Handler checks for input 'a', 'b' or 'c'.
|
// This custom Handler checks for input 'a', 'b' or 'c'.
|
||||||
abcHandler := func(t tokenize.API) bool {
|
abcHandler := func(t *tokenize.API) bool {
|
||||||
a := tokenize.A
|
a := tokenize.A
|
||||||
for _, r := range []rune{'a', 'b', 'c'} {
|
for _, r := range []rune{'a', 'b', 'c'} {
|
||||||
child := t.Fork() // fork, so we won't change parent t
|
child := t.Fork() // fork, so we won't change parent t
|
||||||
if a.Rune(r)(child) {
|
if a.Rune(r)(t) {
|
||||||
child.Merge() // accept results into parent t
|
t.Merge(child) // accept results into parent of child
|
||||||
return true // and report a successful match
|
t.Dispose(child) // return to the parent level
|
||||||
|
return true // and report a successful match
|
||||||
}
|
}
|
||||||
|
t.Dispose(child) // return to the parent level
|
||||||
}
|
}
|
||||||
// If we get here, then no match was found. Return false to communicate
|
// If we get here, then no match was found. Return false to communicate
|
||||||
// this to the caller.
|
// this to the caller.
|
||||||
|
@ -141,25 +143,27 @@ func ExampleAPI_Fork() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func ExampleAPI_Merge() {
|
func ExampleAPI_Merge() {
|
||||||
tokenHandler := func(t tokenize.API) bool {
|
tokenHandler := func(t *tokenize.API) bool {
|
||||||
child1 := t.Fork()
|
child1 := t.Fork()
|
||||||
child1.NextRune() // reads 'H'
|
t.NextRune() // reads 'H'
|
||||||
child1.Accept()
|
t.Accept()
|
||||||
child1.NextRune() // reads 'i'
|
t.NextRune() // reads 'i'
|
||||||
child1.Accept()
|
t.Accept()
|
||||||
|
|
||||||
child2 := child1.Fork()
|
child2 := t.Fork()
|
||||||
child2.NextRune() // reads ' '
|
t.NextRune() // reads ' '
|
||||||
child2.Accept()
|
t.Accept()
|
||||||
child2.NextRune() // reads 'd'
|
t.NextRune() // reads 'm'
|
||||||
child2.Accept()
|
t.Accept()
|
||||||
|
t.Dispose(child2)
|
||||||
|
|
||||||
child1.Merge() // We merge child1, which has read 'H' and 'i' only.
|
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
||||||
|
t.Dispose(child1) // and clean up child1 to return to the parent
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
||||||
fmt.Println(result)
|
fmt.Println(result.String())
|
||||||
|
|
||||||
// Output:
|
// Output:
|
||||||
// Hi
|
// Hi
|
||||||
|
@ -170,75 +174,157 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
||||||
|
|
||||||
// Fork a few levels.
|
// Fork a few levels.
|
||||||
child1 := api.Fork()
|
child1 := api.Fork()
|
||||||
child2 := child1.Fork()
|
child2 := api.Fork()
|
||||||
child3 := child2.Fork()
|
child3 := api.Fork()
|
||||||
child4 := child3.Fork()
|
child4 := api.Fork()
|
||||||
|
|
||||||
// Read some data from child4.
|
// Read a rune 'a' from child4.
|
||||||
r, _ := child4.NextRune()
|
r, _ := api.NextRune()
|
||||||
child4.Accept()
|
|
||||||
AssertEqual(t, 'a', r, "child4 rune 1")
|
AssertEqual(t, 'a', r, "child4 rune 1")
|
||||||
|
api.Accept()
|
||||||
|
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
|
||||||
|
|
||||||
r, _ = child4.NextRune()
|
// Read another rune 'b' from child4.
|
||||||
child4.Accept()
|
r, _ = api.NextRune()
|
||||||
AssertEqual(t, 'b', r, "child4 rune 2")
|
AssertEqual(t, 'b', r, "child4 rune 2")
|
||||||
|
api.Accept()
|
||||||
|
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
|
||||||
|
|
||||||
// Merge it to child3.
|
// Merge "ab" from child4 to child3.
|
||||||
child4.Merge()
|
api.Merge(child4)
|
||||||
|
AssertEqual(t, "", api.String(), "child4 runes after first merge")
|
||||||
|
|
||||||
// Read some more from child4.
|
// Read some more from child4.
|
||||||
r, _ = child4.NextRune()
|
r, _ = api.NextRune()
|
||||||
child4.Accept()
|
|
||||||
AssertEqual(t, 'c', r, "child4 rune 3")
|
AssertEqual(t, 'c', r, "child4 rune 3")
|
||||||
AssertEqual(t, "line 1, column 4", child4.Result().Cursor().String(), "cursor child4 rune 3")
|
api.Accept()
|
||||||
|
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
||||||
|
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
||||||
|
|
||||||
AssertEqual(t, "line 1, column 3", child3.Result().Cursor().String(), "cursor child3 rune 3, before merge of child 4")
|
// Merge "c" from child4 to child3.
|
||||||
|
api.Merge(child4)
|
||||||
|
|
||||||
// Again, merge it to child3.
|
// And dispose of child4, making child3 the active stack level.
|
||||||
child4.Merge()
|
api.Dispose(child4)
|
||||||
AssertEqual(t, "line 1, column 4", child3.Result().Cursor().String(), "cursor child3 rune 3, after merge of child 4")
|
|
||||||
|
// Child3 should now have the compbined results "abc" from child4's work.
|
||||||
|
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
||||||
|
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
||||||
|
|
||||||
// Now read some data from child3.
|
// Now read some data from child3.
|
||||||
r, _ = child3.NextRune()
|
r, _ = api.NextRune()
|
||||||
child3.Accept()
|
AssertEqual(t, 'd', r, "child3 rune 5")
|
||||||
r, _ = child3.NextRune()
|
api.Accept()
|
||||||
child3.Accept()
|
|
||||||
r, _ = child3.NextRune()
|
|
||||||
child3.Accept()
|
|
||||||
AssertEqual(t, 'f', r, "child3 rune 5")
|
|
||||||
|
|
||||||
AssertEqual(t, "abcdef", child3.Result().String(), "child3 total result after rune 6")
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'e', r, "child3 rune 5")
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'f', r, "child3 rune 5")
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
|
||||||
|
|
||||||
// Temporarily go some new forks from here, but don't use their outcome.
|
// Temporarily go some new forks from here, but don't use their outcome.
|
||||||
child3sub1 := child3.Fork()
|
child3sub1 := api.Fork()
|
||||||
child3sub1.NextRune()
|
api.NextRune()
|
||||||
child3sub1.Accept()
|
api.Accept()
|
||||||
child3sub1.NextRune()
|
api.NextRune()
|
||||||
child3sub1.Accept()
|
api.Accept()
|
||||||
child3sub2 := child3sub1.Fork()
|
child3sub2 := api.Fork()
|
||||||
child3sub2.NextRune()
|
api.NextRune()
|
||||||
child3sub2.Accept()
|
api.Accept()
|
||||||
child3sub2.Merge()
|
api.Merge(child3sub2) // do merge sub2 down to sub1
|
||||||
|
api.Dispose(child3sub2) // and dispose of sub2
|
||||||
|
api.Dispose(child3sub1) // but dispose of sub1 without merging
|
||||||
|
|
||||||
// Instead merge the pre-forking results from child3 to child2.
|
// Instead merge the results from before this forking segway from child3 to child2
|
||||||
child3.Merge()
|
// and dispose of it.
|
||||||
|
api.Merge(child3)
|
||||||
|
api.Dispose(child3)
|
||||||
|
|
||||||
AssertEqual(t, "abcdef", child2.Result().String(), "child2 total result after merge of child3")
|
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
||||||
AssertEqual(t, "line 1, column 7", child2.Result().Cursor().String(), "cursor child2 after merge child3")
|
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
||||||
|
|
||||||
// Merge child2 to child1.
|
// Merge child2 to child1 and dispose of it.
|
||||||
child2.Merge()
|
api.Merge(child2)
|
||||||
|
api.Dispose(child2)
|
||||||
|
|
||||||
// Merge child1 a few times to the top level api.
|
// Merge child1 a few times to the top level api.
|
||||||
child1.Merge()
|
api.Merge(child1)
|
||||||
child1.Merge()
|
api.Merge(child1)
|
||||||
child1.Merge()
|
api.Merge(child1)
|
||||||
child1.Merge()
|
api.Merge(child1)
|
||||||
|
|
||||||
|
// And dispose of it.
|
||||||
|
api.Dispose(child1)
|
||||||
|
|
||||||
// Read some data from the top level api.
|
// Read some data from the top level api.
|
||||||
r, _ = api.NextRune()
|
r, _ = api.NextRune()
|
||||||
api.Accept()
|
api.Accept()
|
||||||
|
|
||||||
AssertEqual(t, "abcdefg", api.Result().String(), "api string end result")
|
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
||||||
AssertEqual(t, "line 1, column 8", api.Result().Cursor().String(), "api cursor end result")
|
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClearRunes(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("Laphroaig")
|
||||||
|
api.NextRune() // Read 'L'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
api.NextRune() // Read 'a'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
api.ClearRunes() // Clear the runes, giving us a fresh start.
|
||||||
|
api.NextRune() // Read 'p'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
api.NextRune() // Read 'r'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
|
||||||
|
AssertEqual(t, "ph", api.String(), "api string end result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeScenariosForTokens(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("")
|
||||||
|
|
||||||
|
token1 := tokenize.Token{Value: 1}
|
||||||
|
token2 := tokenize.Token{Value: 2}
|
||||||
|
token3 := tokenize.Token{Value: 3}
|
||||||
|
token4 := tokenize.Token{Value: 4}
|
||||||
|
|
||||||
|
api.SetTokens(token1)
|
||||||
|
tokens := api.Tokens()
|
||||||
|
AssertEqual(t, 1, len(tokens), "Tokens 1")
|
||||||
|
|
||||||
|
child := api.Fork()
|
||||||
|
|
||||||
|
tokens = api.Tokens()
|
||||||
|
AssertEqual(t, 0, len(tokens), "Tokens 2")
|
||||||
|
|
||||||
|
api.AddTokens(token2)
|
||||||
|
|
||||||
|
// Here we can merge by expanding the token slice on the parent,
|
||||||
|
// because the end of the parent slice and the start of the child
|
||||||
|
// slice align.
|
||||||
|
api.Merge(child)
|
||||||
|
api.Dispose(child)
|
||||||
|
|
||||||
|
tokens = api.Tokens()
|
||||||
|
AssertEqual(t, 2, len(tokens), "Tokens 3")
|
||||||
|
|
||||||
|
child = api.Fork()
|
||||||
|
api.AddTokens(token3)
|
||||||
|
api.Reset()
|
||||||
|
api.AddTokens(token4)
|
||||||
|
|
||||||
|
// Here the merge means that token4 will be copied to the end of
|
||||||
|
// the token slice of the parent, since there's a gap at the place
|
||||||
|
// where token3 used to be.
|
||||||
|
api.Merge(child)
|
||||||
|
api.Dispose(child)
|
||||||
|
|
||||||
|
tokens = api.Tokens()
|
||||||
|
AssertEqual(t, 3, len(tokens), "Tokens 4")
|
||||||
|
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
|
||||||
|
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
|
||||||
|
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,7 @@ import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||||
|
@ -110,9 +110,6 @@ func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||||
if expected.Type != actual.Type {
|
if expected.Type != actual.Type {
|
||||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||||
}
|
}
|
||||||
if string(expected.Runes) != string(actual.Runes) {
|
|
||||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
|
||||||
}
|
|
||||||
if expected.Value != actual.Value {
|
if expected.Value != actual.Value {
|
||||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package tokenize2
|
package tokenize
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
|
@ -7,11 +7,11 @@ package tokenize
|
||||||
// A Handler function gets an API as its input and returns a boolean to
|
// A Handler function gets an API as its input and returns a boolean to
|
||||||
// indicate whether or not it found a match on the input. The API is used
|
// indicate whether or not it found a match on the input. The API is used
|
||||||
// for retrieving input data to match against and for reporting back results.
|
// for retrieving input data to match against and for reporting back results.
|
||||||
type Handler func(t API) bool
|
type Handler func(t *API) bool
|
||||||
|
|
||||||
// Match is syntactic sugar that allows you to write a construction like
|
// Match is syntactic sugar that allows you to write a construction like
|
||||||
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
||||||
func (handler Handler) Match(input interface{}) (*Result, error) {
|
func (handler Handler) Match(input interface{}) (*API, error) {
|
||||||
tokenizer := New(handler)
|
tokenizer := New(handler)
|
||||||
return tokenizer(input)
|
return tokenizer(input)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestSyntacticSugar(t *testing.T) {
|
func TestSyntacticSugar(t *testing.T) {
|
||||||
|
|
|
@ -230,7 +230,7 @@ var A = struct {
|
||||||
Lower: MatchUnicodeLower(),
|
Lower: MatchUnicodeLower(),
|
||||||
Upper: MatchUnicodeUpper(),
|
Upper: MatchUnicodeUpper(),
|
||||||
HexDigit: MatchHexDigit(),
|
HexDigit: MatchHexDigit(),
|
||||||
Octet: MatchOctet(false),
|
Octet: MatchOctet(true),
|
||||||
IPv4: MatchIPv4(true),
|
IPv4: MatchIPv4(true),
|
||||||
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
||||||
IPv4Netmask: MatchIPv4Netmask(true),
|
IPv4Netmask: MatchIPv4Netmask(true),
|
||||||
|
@ -306,7 +306,7 @@ var T = struct {
|
||||||
Float64 func(interface{}, Handler) Handler
|
Float64 func(interface{}, Handler) Handler
|
||||||
Boolean func(interface{}, Handler) Handler
|
Boolean func(interface{}, Handler) Handler
|
||||||
ByValue func(toktype interface{}, handler Handler, value interface{}) Handler
|
ByValue func(toktype interface{}, handler Handler, value interface{}) Handler
|
||||||
ByCallback func(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler
|
ByCallback func(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler
|
||||||
Group func(interface{}, Handler) Handler
|
Group func(interface{}, Handler) Handler
|
||||||
}{
|
}{
|
||||||
Str: MakeStrLiteralToken,
|
Str: MakeStrLiteralToken,
|
||||||
|
@ -405,9 +405,9 @@ func MatchUnicodeSpace() Handler {
|
||||||
// Note that the callback function matches the signature of the unicode.Is* functions,
|
// Note that the callback function matches the signature of the unicode.Is* functions,
|
||||||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||||
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
input, err := t.NextRune()
|
r, err := t.NextRune()
|
||||||
if err == nil && callback(input) {
|
if err == nil && callback(r) {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -422,9 +422,9 @@ func MatchEndOfLine() Handler {
|
||||||
|
|
||||||
// MatchStr creates a Handler that matches the input against the provided string.
|
// MatchStr creates a Handler that matches the input against the provided string.
|
||||||
func MatchStr(expected string) Handler {
|
func MatchStr(expected string) Handler {
|
||||||
var handlers = []Handler{}
|
var handlers = make([]Handler, len(expected))
|
||||||
for _, r := range expected {
|
for i, r := range expected {
|
||||||
handlers = append(handlers, MatchRune(r))
|
handlers[i] = MatchRune(r)
|
||||||
}
|
}
|
||||||
return MatchSeq(handlers...)
|
return MatchSeq(handlers...)
|
||||||
}
|
}
|
||||||
|
@ -453,16 +453,20 @@ func MatchOptional(handler Handler) Handler {
|
||||||
// applied in their exact order. Only if all Handlers apply, the sequence
|
// applied in their exact order. Only if all Handlers apply, the sequence
|
||||||
// reports successful match.
|
// reports successful match.
|
||||||
func MatchSeq(handlers ...Handler) Handler {
|
func MatchSeq(handlers ...Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
for _, handler := range handlers {
|
for _, handler := range handlers {
|
||||||
subchild := child.Fork()
|
subchild := t.Fork()
|
||||||
if !handler(subchild) {
|
if !handler(t) {
|
||||||
|
t.Dispose(subchild)
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
subchild.Merge()
|
t.Merge(subchild)
|
||||||
|
t.Dispose(subchild)
|
||||||
}
|
}
|
||||||
child.Merge()
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -471,14 +475,17 @@ func MatchSeq(handlers ...Handler) Handler {
|
||||||
// can be applied. They are applied in their provided order. The first Handler
|
// can be applied. They are applied in their provided order. The first Handler
|
||||||
// that applies is used for reporting back a match.
|
// that applies is used for reporting back a match.
|
||||||
func MatchAny(handlers ...Handler) Handler {
|
func MatchAny(handlers ...Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
for _, handler := range handlers {
|
for _, handler := range handlers {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(t) {
|
||||||
child.Merge()
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
t.Dispose(child) // TODO switch to Reset() and move forking outside the loop?
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -487,10 +494,13 @@ func MatchAny(handlers ...Handler) Handler {
|
||||||
// the current input. If it does, then a failed match will be reported. If it
|
// the current input. If it does, then a failed match will be reported. If it
|
||||||
// does not, then the next rune from the input will be reported as a match.
|
// does not, then the next rune from the input will be reported as a match.
|
||||||
func MatchNot(handler Handler) Handler {
|
func MatchNot(handler Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if handler(t.Fork()) {
|
child := t.Fork()
|
||||||
|
if handler(t) {
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
t.Dispose(child)
|
||||||
_, err := t.NextRune()
|
_, err := t.NextRune()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
|
@ -568,28 +578,30 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
||||||
if max >= 0 && min > max {
|
if max >= 0 && min > max {
|
||||||
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
||||||
}
|
}
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
total := 0
|
total := 0
|
||||||
|
|
||||||
// Check for the minimum required amount of matches.
|
// Check for the minimum required amount of matches.
|
||||||
|
child := t.Fork()
|
||||||
for total < min {
|
for total < min {
|
||||||
total++
|
total++
|
||||||
child := t.Fork()
|
if !handler(t) {
|
||||||
if !handler(child) {
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
child.Merge()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// No specified max: include the rest of the available matches.
|
// No specified max: include the rest of the available matches.
|
||||||
// Specified max: include the rest of the availble matches, up to the max.
|
// Specified max: include the rest of the availble matches, up to the max.
|
||||||
//child.Merge()
|
//child.Merge()
|
||||||
for max < 0 || total < max {
|
for max < 0 || total < max {
|
||||||
total++
|
total++
|
||||||
child := t.Fork()
|
if !handler(t) {
|
||||||
if !handler(child) {
|
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
child.Merge()
|
|
||||||
}
|
}
|
||||||
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -607,10 +619,13 @@ func MatchSeparated(separator Handler, separated Handler) Handler {
|
||||||
// applied. If the handler applies, but the except Handler as well, then the match
|
// applied. If the handler applies, but the except Handler as well, then the match
|
||||||
// as a whole will be treated as a mismatch.
|
// as a whole will be treated as a mismatch.
|
||||||
func MatchExcept(handler Handler, except Handler) Handler {
|
func MatchExcept(handler Handler, except Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if except(t.Fork()) {
|
child := t.Fork()
|
||||||
|
if except(t) {
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
t.Dispose(child)
|
||||||
return handler(t)
|
return handler(t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -620,11 +635,12 @@ func MatchExcept(handler Handler, except Handler) Handler {
|
||||||
// When both handlers match, the match for the handler is accepted and the match
|
// When both handlers match, the match for the handler is accepted and the match
|
||||||
// for the lookAhead handler is ignored.
|
// for the lookAhead handler is ignored.
|
||||||
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
if handler(t) {
|
||||||
if handler(child) && lookAhead(child.Fork()) {
|
child := t.Fork()
|
||||||
child.Merge()
|
result := lookAhead(t)
|
||||||
return true
|
t.Dispose(child)
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -635,11 +651,12 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||||
// If the handler matches and the lookAhead handler doesn't, then the match for
|
// If the handler matches and the lookAhead handler doesn't, then the match for
|
||||||
// the handler is accepted.
|
// the handler is accepted.
|
||||||
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
if handler(t) {
|
||||||
if handler(child) && !lookAhead(child.Fork()) {
|
child := t.Fork()
|
||||||
child.Merge()
|
result := !lookAhead(t)
|
||||||
return true
|
t.Dispose(child)
|
||||||
|
return result
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -654,14 +671,14 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||||
//
|
//
|
||||||
// Without flushing the input, the input reader will allocate memory
|
// Without flushing the input, the input reader will allocate memory
|
||||||
// during the parsing process, eventually enough to hold the full input
|
// during the parsing process, eventually enough to hold the full input
|
||||||
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
|
// in memory. By wrapping Handlers with an input flusher, you can tell parsekit
|
||||||
// that the accumulated input so far will no longer be needed, allowing
|
// that the accumulated input so far will no longer be needed, allowing
|
||||||
// this input to be flushed from memory.
|
// this input to be flushed from memory.
|
||||||
//
|
//
|
||||||
// Rule of thumb is: only use it when you have to actually fix a memory
|
// Rule of thumb is: only use it when you have to actually fix a memory
|
||||||
// hogging issue for your use case.
|
// hogging issue for your use case.
|
||||||
func MakeInputFlusher(handler Handler) Handler {
|
func MakeInputFlusher(handler Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if handler(t) {
|
if handler(t) {
|
||||||
t.FlushInput()
|
t.FlushInput()
|
||||||
return true
|
return true
|
||||||
|
@ -689,11 +706,12 @@ func MatchIntegerBetween(min int64, max int64) Handler {
|
||||||
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
||||||
}
|
}
|
||||||
digits := MatchSigned(MatchDigits())
|
digits := MatchSigned(MatchDigits())
|
||||||
return func(t API) bool {
|
|
||||||
|
return func(t *API) bool {
|
||||||
if !digits(t) {
|
if !digits(t) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
value, _ := strconv.ParseInt(t.Result().String(), 10, 64)
|
value, _ := strconv.ParseInt(t.String(), 10, 64)
|
||||||
if value < min || value > max {
|
if value < min || value > max {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -705,9 +723,10 @@ func MatchIntegerBetween(min int64, max int64) Handler {
|
||||||
// has been reached. This Handler will never produce output. It only reports
|
// has been reached. This Handler will never produce output. It only reports
|
||||||
// a successful or a failing match through its boolean return value.
|
// a successful or a failing match through its boolean return value.
|
||||||
func MatchEndOfFile() Handler {
|
func MatchEndOfFile() Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
_, err := child.NextRune()
|
_, err := t.NextRune()
|
||||||
|
t.Dispose(child)
|
||||||
return err == io.EOF
|
return err == io.EOF
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -723,7 +742,7 @@ func MatchUntilEndOfLine() Handler {
|
||||||
// read from the input. Invalid runes on the input are replaced with the UTF8
|
// read from the input. Invalid runes on the input are replaced with the UTF8
|
||||||
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
||||||
func MatchAnyRune() Handler {
|
func MatchAnyRune() Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
_, err := t.NextRune()
|
_, err := t.NextRune()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
|
@ -736,7 +755,7 @@ func MatchAnyRune() Handler {
|
||||||
// MatchValidRune creates a Handler function that checks if a valid
|
// MatchValidRune creates a Handler function that checks if a valid
|
||||||
// UTF8 rune can be read from the input.
|
// UTF8 rune can be read from the input.
|
||||||
func MatchValidRune() Handler {
|
func MatchValidRune() Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.NextRune()
|
r, err := t.NextRune()
|
||||||
if err == nil && r != utf8.RuneError {
|
if err == nil && r != utf8.RuneError {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
|
@ -749,7 +768,7 @@ func MatchValidRune() Handler {
|
||||||
// MatchInvalidRune creates a Handler function that checks if an invalid
|
// MatchInvalidRune creates a Handler function that checks if an invalid
|
||||||
// UTF8 rune can be read from the input.
|
// UTF8 rune can be read from the input.
|
||||||
func MatchInvalidRune() Handler {
|
func MatchInvalidRune() Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
r, err := t.NextRune()
|
r, err := t.NextRune()
|
||||||
if err == nil && r == utf8.RuneError {
|
if err == nil && r == utf8.RuneError {
|
||||||
t.Accept()
|
t.Accept()
|
||||||
|
@ -860,20 +879,20 @@ func MatchHexDigit() Handler {
|
||||||
// stripped from the octet.
|
// stripped from the octet.
|
||||||
func MatchOctet(normalize bool) Handler {
|
func MatchOctet(normalize bool) Handler {
|
||||||
max3Digits := MatchMinMax(1, 3, MatchDigit())
|
max3Digits := MatchMinMax(1, 3, MatchDigit())
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if !max3Digits(t) {
|
if !max3Digits(t) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
value, _ := strconv.ParseInt(t.Result().String(), 10, 16)
|
value, _ := strconv.ParseInt(t.String(), 10, 16)
|
||||||
if value > 255 {
|
if value > 255 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if normalize {
|
if normalize {
|
||||||
runes := t.Result().Runes()
|
runes := t.Runes()
|
||||||
for len(runes) > 1 && runes[0] == '0' {
|
for len(runes) > 1 && runes[0] == '0' {
|
||||||
runes = runes[1:]
|
runes = runes[1:]
|
||||||
}
|
}
|
||||||
t.Result().SetRunes(runes)
|
t.SetRunes(runes...)
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -909,20 +928,19 @@ func MatchIPv4Netmask(normalize bool) Handler {
|
||||||
dot := MatchRune('.')
|
dot := MatchRune('.')
|
||||||
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||||||
|
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if !netmask(t) {
|
if !netmask(t) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the mask is provided in canonical form (ones followed by zeroes).
|
// Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes).
|
||||||
r := t.Result()
|
mask := net.IPv4Mask(t.TokenValue(0).(byte), t.TokenValue(1).(byte), t.TokenValue(2).(byte), t.TokenValue(3).(byte))
|
||||||
mask := net.IPv4Mask(r.Value(0).(byte), r.Value(1).(byte), r.Value(2).(byte), r.Value(3).(byte))
|
|
||||||
ones, bits := mask.Size()
|
ones, bits := mask.Size()
|
||||||
if ones == 0 && bits == 0 {
|
if ones == 0 && bits == 0 {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
r.ClearTokens()
|
t.ClearTokens()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -942,7 +960,7 @@ func MatchIPv4Net(normalize bool) Handler {
|
||||||
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
|
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
|
||||||
ipnet := MatchSeq(ip, slash, mask)
|
ipnet := MatchSeq(ip, slash, mask)
|
||||||
|
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if !ipnet(t) {
|
if !ipnet(t) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -951,19 +969,18 @@ func MatchIPv4Net(normalize bool) Handler {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
r := t.Result()
|
maskToken := t.Token(1)
|
||||||
maskToken := r.Token(1)
|
|
||||||
if maskToken.Type == "cidr" {
|
if maskToken.Type == "cidr" {
|
||||||
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), r.Value(1).(uint8)))
|
t.SetString(fmt.Sprintf("%s/%d", t.TokenValue(0), t.TokenValue(1).(uint8)))
|
||||||
} else {
|
} else {
|
||||||
o := strings.Split(r.Value(1).(string), ".")
|
o := strings.Split(t.TokenValue(1).(string), ".")
|
||||||
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
|
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
|
||||||
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
|
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
|
||||||
bits, _ := mask.Size()
|
bits, _ := mask.Size()
|
||||||
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), bits))
|
t.SetString(fmt.Sprintf("%s/%d", t.TokenValue(0), bits))
|
||||||
}
|
}
|
||||||
|
|
||||||
r.ClearTokens()
|
t.ClearTokens()
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -975,7 +992,7 @@ func MatchIPv6(normalize bool) Handler {
|
||||||
colon := MatchRune(':')
|
colon := MatchRune(':')
|
||||||
empty := MatchSeq(colon, colon)
|
empty := MatchSeq(colon, colon)
|
||||||
|
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
nrOfHextets := 0
|
nrOfHextets := 0
|
||||||
for nrOfHextets < 8 {
|
for nrOfHextets < 8 {
|
||||||
if hextet(t) {
|
if hextet(t) {
|
||||||
|
@ -992,13 +1009,13 @@ func MatchIPv6(normalize bool) Handler {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Invalid IPv6, when net.ParseIP() cannot handle it.
|
// Invalid IPv6, when net.ParseIP() cannot handle it.
|
||||||
parsed := net.ParseIP(t.Result().String())
|
parsed := net.ParseIP(t.String())
|
||||||
if parsed == nil {
|
if parsed == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if normalize {
|
if normalize {
|
||||||
t.Result().SetRunes(parsed.String())
|
t.SetString(parsed.String())
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
@ -1017,13 +1034,12 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
|
||||||
return mask
|
return mask
|
||||||
}
|
}
|
||||||
|
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
if !mask(t) {
|
if !mask(t) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
r := t.Result()
|
bits, _ := strconv.Atoi(t.String())
|
||||||
bits, _ := strconv.Atoi(r.String())
|
t.SetString(fmt.Sprintf("%d", bits))
|
||||||
t.Result().SetRunes(fmt.Sprintf("%d", bits))
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1057,13 +1073,15 @@ func MatchIPv6Net(normalize bool) Handler {
|
||||||
// string "bork" would not match against the second form, but " bork" would.
|
// string "bork" would not match against the second form, but " bork" would.
|
||||||
// In both cases, it would match the first form.
|
// In both cases, it would match the first form.
|
||||||
func ModifyDrop(handler Handler) Handler {
|
func ModifyDrop(handler Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(t) {
|
||||||
child.Reset()
|
t.Reset()
|
||||||
child.Merge()
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1137,14 +1155,16 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
|
||||||
// modified string on output. The return value of the modfunc will replace the
|
// modified string on output. The return value of the modfunc will replace the
|
||||||
// resulting output.
|
// resulting output.
|
||||||
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(t) {
|
||||||
s := modfunc(child.Result().String())
|
s := modfunc(t.String())
|
||||||
child.Result().SetRunes(s)
|
t.SetString(s)
|
||||||
child.Merge()
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1155,8 +1175,8 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
||||||
// escape sequence like "\n" is kept as-is (a backslash character, followed by
|
// escape sequence like "\n" is kept as-is (a backslash character, followed by
|
||||||
// an 'n'-character).
|
// an 'n'-character).
|
||||||
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
|
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
|
||||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||||
literal := t.Result().String()
|
literal := t.String()
|
||||||
return literal
|
return literal
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -1166,9 +1186,9 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
|
||||||
// representation of the read Runes. This string is interpreted, meaning that an
|
// representation of the read Runes. This string is interpreted, meaning that an
|
||||||
// escape sequence like "\n" is translated to an actual newline control character
|
// escape sequence like "\n" is translated to an actual newline control character
|
||||||
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
|
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
|
||||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||||
// TODO ERROR HANDLING
|
// TODO ERROR HANDLING
|
||||||
interpreted, _ := interpretString(t.Result().String())
|
interpreted, _ := interpretString(t.String())
|
||||||
return interpreted
|
return interpreted
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -1190,9 +1210,9 @@ func interpretString(str string) (string, error) {
|
||||||
// Result, for which the Token.Value is set to a Rune-representation
|
// Result, for which the Token.Value is set to a Rune-representation
|
||||||
// of the read Rune.
|
// of the read Rune.
|
||||||
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
|
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
|
||||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||||
// TODO ERROR HANDLING --- not a 1 rune input
|
// TODO ERROR HANDLING --- not a 1 rune input
|
||||||
return t.Result().Rune(0)
|
return t.Rune(0)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1200,9 +1220,9 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler {
|
||||||
// Result, for which the Token.Value is set to a Byte-representation
|
// Result, for which the Token.Value is set to a Byte-representation
|
||||||
// of the read Rune.
|
// of the read Rune.
|
||||||
func MakeByteToken(toktype interface{}, handler Handler) Handler {
|
func MakeByteToken(toktype interface{}, handler Handler) Handler {
|
||||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||||
// TODO ERROR HANDLING --- not a 1 byte input
|
// TODO ERROR HANDLING --- not a 1 byte input
|
||||||
return byte(t.Result().Rune(0))
|
return byte(t.Rune(0))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1406,8 +1426,8 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
|
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
|
||||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||||
value, err := convert(t.Result().String())
|
value, err := convert(t.String())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
|
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
|
||||||
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
|
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
|
||||||
|
@ -1419,17 +1439,17 @@ func makeStrconvToken(name string, toktype interface{}, handler Handler, convert
|
||||||
// MakeTokenByValue creates a Handler that will add a static Token value
|
// MakeTokenByValue creates a Handler that will add a static Token value
|
||||||
// to the Result.
|
// to the Result.
|
||||||
func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler {
|
func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler {
|
||||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} { return value })
|
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return value })
|
||||||
}
|
}
|
||||||
|
|
||||||
// MakeTokenByCallback creates a Handler that will add a Token to the
|
// MakeTokenByCallback creates a Handler that will add a Token to the
|
||||||
// Result, for which the Token.Value is to be generated by the provided
|
// Result, for which the Token.Value is to be generated by the provided
|
||||||
// makeValue() callback function. The function gets the current API as
|
// makeValue() callback function. The function gets the current API as
|
||||||
// its input and must return the token value.
|
// its input and must return the token value.
|
||||||
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler {
|
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(t) {
|
||||||
// The token is not added to the child here. The child might have produced its own
|
// The token is not added to the child here. The child might have produced its own
|
||||||
// tokens and we want those to come after the token for the current parsing level.
|
// tokens and we want those to come after the token for the current parsing level.
|
||||||
// By adding the token to the input API and then merging the child tokens, the order
|
// By adding the token to the input API and then merging the child tokens, the order
|
||||||
|
@ -1437,12 +1457,14 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
|
||||||
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
||||||
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
|
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
|
||||||
// token to the child here, the order would have been "year", "month", "day", "date".
|
// token to the child here, the order would have been "year", "month", "day", "date".
|
||||||
token := Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
|
token := Token{Type: toktype, Value: makeValue(t)}
|
||||||
t.Result().AddTokens(token)
|
t.AddTokens(token)
|
||||||
child.Merge()
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1450,15 +1472,18 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
|
||||||
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
|
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
|
||||||
// take the tokens as produced by the handler and group them together in a single token.
|
// take the tokens as produced by the handler and group them together in a single token.
|
||||||
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
||||||
return func(t API) bool {
|
return func(t *API) bool {
|
||||||
child := t.Fork()
|
child := t.Fork()
|
||||||
if handler(child) {
|
if handler(t) {
|
||||||
result := child.Result()
|
tokens := t.Tokens()
|
||||||
token := Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
tokensCopy := make([]Token, len(tokens))
|
||||||
result.SetTokens(token)
|
copy(tokensCopy, tokens)
|
||||||
child.Merge()
|
t.SetTokens(Token{Type: toktype, Value: tokensCopy})
|
||||||
|
t.Merge(child)
|
||||||
|
t.Dispose(child)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
t.Dispose(child)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,22 +4,32 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestCombinatorsTempDebug(t *testing.T) {
|
||||||
|
var a = tokenize.A
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
// {"024", a.IPv4CIDRMask, true, "24"},
|
||||||
|
// {"024", a.Octet, true, "24"},
|
||||||
|
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestCombinators(t *testing.T) {
|
func TestCombinators(t *testing.T) {
|
||||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||||
AssertHandlers(t, []HandlerT{
|
AssertHandlers(t, []HandlerT{
|
||||||
{"abc", c.Not(a.Rune('b')), true, "a"},
|
{"", c.Not(a.Rune('b')), false, ""},
|
||||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||||
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
|
||||||
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||||
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||||
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||||
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||||
{"ababc", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||||
{"ababc", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||||
|
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||||
|
@ -53,6 +63,7 @@ func TestCombinators(t *testing.T) {
|
||||||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||||
|
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
||||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||||
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||||
|
@ -62,8 +73,20 @@ func TestCombinators(t *testing.T) {
|
||||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||||
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||||
|
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
|
||||||
|
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
|
||||||
|
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||||
|
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||||
|
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
|
||||||
|
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
|
||||||
|
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||||
|
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||||
|
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||||
|
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||||
|
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
|
||||||
|
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,8 +133,10 @@ func TestAtoms(t *testing.T) {
|
||||||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||||
{"", a.AnyRune, false, ""},
|
{"", a.AnyRune, false, ""},
|
||||||
{"⌘", a.ValidRune, true, "⌘"},
|
{"⌘", a.ValidRune, true, "⌘"},
|
||||||
{"\xbc with ValidRune", a.ValidRune, false, "<EFBFBD>"},
|
{"\xbc with ValidRune", a.ValidRune, false, ""},
|
||||||
{"", a.ValidRune, false, ""},
|
{"", a.ValidRune, false, ""},
|
||||||
|
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
|
||||||
|
{"ok with InvalidRune", a.InvalidRune, false, ""},
|
||||||
{" ", a.Space, true, " "},
|
{" ", a.Space, true, " "},
|
||||||
{"X", a.Space, false, ""},
|
{"X", a.Space, false, ""},
|
||||||
{"\t", a.Tab, true, "\t"},
|
{"\t", a.Tab, true, "\t"},
|
||||||
|
@ -225,38 +250,73 @@ func TestAtoms(t *testing.T) {
|
||||||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||||
|
{"fifteen", a.IntegerBetween(0, 10), false, ""},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestIPv4Atoms(t *testing.T) {
|
func TestIPv4Atoms(t *testing.T) {
|
||||||
var a = tokenize.A
|
var a = tokenize.A
|
||||||
AssertHandlers(t, []HandlerT{
|
AssertHandlers(t, []HandlerT{
|
||||||
|
// Not normalized octet.
|
||||||
|
{"0X", tokenize.MatchOctet(false), true, "0"},
|
||||||
|
{"00X", tokenize.MatchOctet(false), true, "00"},
|
||||||
|
{"000X", tokenize.MatchOctet(false), true, "000"},
|
||||||
|
{"10X", tokenize.MatchOctet(false), true, "10"},
|
||||||
|
{"010X", tokenize.MatchOctet(false), true, "010"},
|
||||||
|
{"255123", tokenize.MatchOctet(false), true, "255"},
|
||||||
|
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||||
|
{"300", tokenize.MatchOctet(false), false, ""},
|
||||||
|
|
||||||
|
// Octet.
|
||||||
|
{"0", tokenize.MatchOctet(false), true, "0"},
|
||||||
|
{"02", tokenize.MatchOctet(false), true, "02"},
|
||||||
|
{"003", tokenize.MatchOctet(false), true, "003"},
|
||||||
|
{"256", tokenize.MatchOctet(false), false, ""},
|
||||||
{"0X", a.Octet, true, "0"},
|
{"0X", a.Octet, true, "0"},
|
||||||
{"00X", a.Octet, true, "00"},
|
{"00X", a.Octet, true, "0"},
|
||||||
{"000X", a.Octet, true, "000"},
|
{"000X", a.Octet, true, "0"},
|
||||||
{"10X", a.Octet, true, "10"},
|
{"10X", a.Octet, true, "10"},
|
||||||
{"010X", a.Octet, true, "010"},
|
{"010X", a.Octet, true, "10"},
|
||||||
{"255123", a.Octet, true, "255"},
|
{"255123", a.Octet, true, "255"},
|
||||||
{"256123", a.Octet, false, ""},
|
{"256123", a.Octet, false, ""},
|
||||||
{"300", a.Octet, false, ""},
|
{"300", a.Octet, false, ""},
|
||||||
|
|
||||||
|
// IPv4 address.
|
||||||
|
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
|
||||||
|
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
|
||||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||||
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||||
{"256.255.255.255", a.IPv4, false, ""},
|
{"256.255.255.255", a.IPv4, false, ""},
|
||||||
|
|
||||||
|
// IPv4 CIDR netmask.
|
||||||
|
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
|
||||||
|
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
|
||||||
{"0", a.IPv4CIDRMask, true, "0"},
|
{"0", a.IPv4CIDRMask, true, "0"},
|
||||||
|
{"00", a.IPv4CIDRMask, true, "0"},
|
||||||
|
{"000", a.IPv4CIDRMask, true, "0"},
|
||||||
{"32", a.IPv4CIDRMask, true, "32"},
|
{"32", a.IPv4CIDRMask, true, "32"},
|
||||||
|
{"032", a.IPv4CIDRMask, true, "32"},
|
||||||
{"33", a.IPv4CIDRMask, false, ""},
|
{"33", a.IPv4CIDRMask, false, ""},
|
||||||
|
|
||||||
|
// IPv4 netmask in dotted quad format.
|
||||||
|
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
|
||||||
|
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
|
||||||
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
||||||
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
||||||
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
||||||
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
||||||
|
|
||||||
|
// IPv4 address + CIDR or dotted quad netmask.
|
||||||
{"192.168.6.123", a.IPv4Net, false, ""},
|
{"192.168.6.123", a.IPv4Net, false, ""},
|
||||||
|
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
|
||||||
|
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
|
||||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||||
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
||||||
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
||||||
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
||||||
{"10.0.0.10/16.0.0.0", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,7 +352,10 @@ func TestIPv6Atoms(t *testing.T) {
|
||||||
func TestModifiers(t *testing.T) {
|
func TestModifiers(t *testing.T) {
|
||||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||||
AssertHandlers(t, []HandlerT{
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
||||||
|
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
||||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||||
|
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||||
|
@ -300,6 +363,7 @@ func TestModifiers(t *testing.T) {
|
||||||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||||
|
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
|
||||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||||
})
|
})
|
||||||
|
@ -323,64 +387,99 @@ func TestTokenMakers(t *testing.T) {
|
||||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||||
AssertTokenMakers(t, []TokenMakerT{
|
AssertTokenMakers(t, []TokenMakerT{
|
||||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||||
[]tokenize.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
[]tokenize.Token{{Type: "A", Value: ""}}},
|
||||||
|
|
||||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||||
[]tokenize.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
|
||||||
|
|
||||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||||
[]tokenize.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
|
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||||
|
|
||||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
|
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
|
||||||
|
|
||||||
|
// I don't check the returned error here, but it's good enough to see that the parsing
|
||||||
|
// stopped after the illegal \g escape sequence.
|
||||||
|
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
|
||||||
|
|
||||||
|
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
||||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||||
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
|
{Type: "bar", Value: byte('R')},
|
||||||
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
|
{Type: "bar", Value: byte('O')},
|
||||||
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
|
{Type: "bar", Value: byte('C')},
|
||||||
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
|
{Type: "bar", Value: byte('K')},
|
||||||
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
|
{Type: "bar", Value: byte('S')},
|
||||||
}},
|
}},
|
||||||
|
|
||||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
|
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
|
||||||
|
|
||||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
|
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
|
||||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
|
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
|
||||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
|
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
|
||||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
|
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
|
||||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
|
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
|
||||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
|
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
|
||||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
|
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
|
||||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
|
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
|
||||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
|
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
|
||||||
|
|
||||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
|
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
|
||||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
|
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
|
||||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
|
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
|
||||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
|
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
||||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
|
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
||||||
|
|
||||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
|
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
||||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
|
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
||||||
|
|
||||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||||
{Type: "P", Runes: []rune("1"), Value: true},
|
{Type: "P", Value: true},
|
||||||
{Type: "P", Runes: []rune("t"), Value: true},
|
{Type: "P", Value: true},
|
||||||
{Type: "P", Runes: []rune("T"), Value: true},
|
{Type: "P", Value: true},
|
||||||
{Type: "P", Runes: []rune("true"), Value: true},
|
{Type: "P", Value: true},
|
||||||
{Type: "P", Runes: []rune("TRUE"), Value: true},
|
{Type: "P", Value: true},
|
||||||
{Type: "P", Runes: []rune("True"), Value: true},
|
{Type: "P", Value: true},
|
||||||
}},
|
}},
|
||||||
|
|
||||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||||
{Type: "P", Runes: []rune("0"), Value: false},
|
{Type: "P", Value: false},
|
||||||
{Type: "P", Runes: []rune("f"), Value: false},
|
{Type: "P", Value: false},
|
||||||
{Type: "P", Runes: []rune("F"), Value: false},
|
{Type: "P", Value: false},
|
||||||
{Type: "P", Runes: []rune("false"), Value: false},
|
{Type: "P", Value: false},
|
||||||
{Type: "P", Runes: []rune("FALSE"), Value: false},
|
{Type: "P", Value: false},
|
||||||
{Type: "P", Runes: []rune("False"), Value: false},
|
{Type: "P", Value: false},
|
||||||
}},
|
}},
|
||||||
|
|
||||||
|
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTokenGroup_Match(t *testing.T) {
|
||||||
|
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||||
|
tokenizer := tokenize.New(tok.Group("Group",
|
||||||
|
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
|
||||||
|
|
||||||
|
api, err := tokenizer("xxxxx")
|
||||||
|
AssertTrue(t, err == nil, "Tokenizer result")
|
||||||
|
tokens := api.Tokens()
|
||||||
|
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
|
||||||
|
contained := tokens[0].Value.([]tokenize.Token)
|
||||||
|
AssertEqual(t, 3, len(contained), "Length of contained tokens")
|
||||||
|
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
|
||||||
|
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
|
||||||
|
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenGroup_Mismatch(t *testing.T) {
|
||||||
|
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||||
|
tokenizer := tokenize.New(tok.Group("Group",
|
||||||
|
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
|
||||||
|
|
||||||
|
api, err := tokenizer("12345")
|
||||||
|
AssertTrue(t, err == nil, "Tokenizer result")
|
||||||
|
tokens := api.Tokens()
|
||||||
|
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
|
||||||
|
}
|
||||||
|
|
||||||
// I know, this is hell, but that's the whole point for this test :->
|
// I know, this is hell, but that's the whole point for this test :->
|
||||||
func TestCombination(t *testing.T) {
|
func TestCombination(t *testing.T) {
|
||||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||||
|
|
|
@ -1,155 +0,0 @@
|
||||||
package tokenize
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Result is a struct that is used for holding tokenizer results as produced
|
|
||||||
// by a tokenize.Handler. It also provides the API that Handlers and Parsers
|
|
||||||
// can use to store and retrieve the results.
|
|
||||||
type Result struct {
|
|
||||||
runes []rune // runes as added to the result by tokenize.Handler functions
|
|
||||||
tokens []Token // Tokens as added to the result by tokenize.Handler functions
|
|
||||||
cursor Cursor // current read cursor position, relative to the start of the file
|
|
||||||
offset int // current rune offset relative to the Reader's sliding window
|
|
||||||
err error // can be used by a Handler to report a specific issue with the input
|
|
||||||
}
|
|
||||||
|
|
||||||
// Token defines a lexical token as produced by tokenize.Handlers.
|
|
||||||
//
|
|
||||||
// The only mandatory data in a Token are the Runes. The Type and Value fields
|
|
||||||
// are optional fields that can be filled with data at will.
|
|
||||||
//
|
|
||||||
// The use of the Type field is to let a tokenizer communicate to
|
|
||||||
// the parser what type of token it's handling.
|
|
||||||
//
|
|
||||||
// The use of the Value field is to store any kind af data along with the token.
|
|
||||||
// One use of this can be found in the built-in token maker functions like
|
|
||||||
// MakeInt8Token(), which store an interpreted version of the input string
|
|
||||||
// in the Value field.
|
|
||||||
type Token struct {
|
|
||||||
Runes []rune // the runes that make up the token
|
|
||||||
Type interface{} // optional token type, can be any type that a parser author sees fit
|
|
||||||
Value interface{} // optional token value, of any type as well
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t Token) String() string {
|
|
||||||
tokenType := ""
|
|
||||||
if t.Type != nil {
|
|
||||||
tokenType = fmt.Sprintf("%v", t.Type)
|
|
||||||
}
|
|
||||||
|
|
||||||
value := ""
|
|
||||||
if t.Value != nil {
|
|
||||||
switch t.Value.(type) {
|
|
||||||
case []*Token:
|
|
||||||
return fmt.Sprintf("%v%v", tokenType, t.Value)
|
|
||||||
case string:
|
|
||||||
value = fmt.Sprintf("%q", t.Value)
|
|
||||||
case rune:
|
|
||||||
value = fmt.Sprintf("%v", t.Value)
|
|
||||||
case bool:
|
|
||||||
value = fmt.Sprintf("%v", t.Value)
|
|
||||||
default:
|
|
||||||
value = fmt.Sprintf("(%T)%v", t.Value, t.Value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Sprintf("%v(%s)", tokenType, value)
|
|
||||||
}
|
|
||||||
|
|
||||||
// newResult initializes an empty Result struct.
|
|
||||||
func newResult() Result {
|
|
||||||
return Result{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClearRunes clears the runes in the Result.
|
|
||||||
func (r *Result) ClearRunes() {
|
|
||||||
r.runes = []rune{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetRunes replaces the Runes from the Result with the provided input.
|
|
||||||
func (r *Result) SetRunes(s ...interface{}) {
|
|
||||||
r.ClearRunes()
|
|
||||||
r.addRunes("SetRunes", s...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// AddRunes is used to add runes to the Result.
|
|
||||||
func (r *Result) AddRunes(set ...interface{}) {
|
|
||||||
r.addRunes("AddRunes", set...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *Result) addRunes(name string, set ...interface{}) {
|
|
||||||
for _, s := range set {
|
|
||||||
switch s := s.(type) {
|
|
||||||
case string:
|
|
||||||
r.runes = append(r.runes, []rune(s)...)
|
|
||||||
case []rune:
|
|
||||||
r.runes = append(r.runes, s...)
|
|
||||||
case rune:
|
|
||||||
r.runes = append(r.runes, s)
|
|
||||||
default:
|
|
||||||
callerPanic(name, "tokenize.Result.{name}(): unsupported type '%T' used at {caller}", s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Runes retrieves the Runes from the Result.
|
|
||||||
func (r *Result) Runes() []rune {
|
|
||||||
return r.runes
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rune retrieve a single rune from the Result at the specified index.
|
|
||||||
func (r *Result) Rune(idx int) rune {
|
|
||||||
return r.runes[idx]
|
|
||||||
}
|
|
||||||
|
|
||||||
// String returns the Runes from the Result as a string.
|
|
||||||
func (r *Result) String() string {
|
|
||||||
return string(r.runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClearTokens clears the tokens in the Result.
|
|
||||||
func (r *Result) ClearTokens() {
|
|
||||||
r.tokens = []Token{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetTokens replaces the Tokens from the Result with the provided tokens.
|
|
||||||
func (r *Result) SetTokens(tokens ...Token) {
|
|
||||||
r.tokens = tokens
|
|
||||||
}
|
|
||||||
|
|
||||||
// AddTokens is used to add Tokens to the Result.
|
|
||||||
func (r *Result) AddTokens(tokens ...Token) {
|
|
||||||
r.tokens = append(r.tokens, tokens...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tokens retrieves the Tokens from the Result.
|
|
||||||
func (r *Result) Tokens() []Token {
|
|
||||||
return r.tokens
|
|
||||||
}
|
|
||||||
|
|
||||||
// Token retrieves a single Token from the Result at the specified index.
|
|
||||||
func (r *Result) Token(idx int) Token {
|
|
||||||
return r.tokens[idx]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Values retrieves a slice containing only the Values for the Result Tokens.
|
|
||||||
func (r *Result) Values() []interface{} {
|
|
||||||
values := make([]interface{}, len(r.tokens))
|
|
||||||
for i, tok := range r.tokens {
|
|
||||||
values[i] = tok.Value
|
|
||||||
}
|
|
||||||
return values
|
|
||||||
}
|
|
||||||
|
|
||||||
// Value retrieves a single Value from the Result Token at the specified index.
|
|
||||||
func (r *Result) Value(idx int) interface{} {
|
|
||||||
return r.tokens[idx].Value
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cursor retrieves the read cursor from the Result. This is the first
|
|
||||||
// cursor position after the runes that were read and accepted by the Handler.
|
|
||||||
func (r *Result) Cursor() Cursor {
|
|
||||||
return r.cursor
|
|
||||||
}
|
|
|
@ -1,58 +0,0 @@
|
||||||
package tokenize_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ExampleToken() {
|
|
||||||
t0 := tokenize.Token{}
|
|
||||||
|
|
||||||
t1 := tokenize.Token{
|
|
||||||
Type: "Number",
|
|
||||||
Value: 224,
|
|
||||||
}
|
|
||||||
|
|
||||||
const TName = 1
|
|
||||||
|
|
||||||
t2 := tokenize.Token{
|
|
||||||
Type: TName,
|
|
||||||
Value: "John",
|
|
||||||
}
|
|
||||||
|
|
||||||
t3 := tokenize.Token{
|
|
||||||
Value: 42,
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
|
|
||||||
|
|
||||||
// Result: [ip("0.0.0.0") mask((int8)0)]
|
|
||||||
// Result: [ip("192.168.0.1") mask((int8)24)]
|
|
||||||
// Result: [ip("255.255.255.255") mask((int8)32)]
|
|
||||||
// Error: mismatch at start of file
|
|
||||||
// Error: mismatch at start of file
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
|
||||||
i := tokenize.NewAPI(strings.NewReader("Testing"))
|
|
||||||
i.Result().SetRunes("string")
|
|
||||||
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
|
|
||||||
i.Result().SetRunes([]rune("rune slice"))
|
|
||||||
AssertEqual(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
|
|
||||||
i.Result().SetRunes('X')
|
|
||||||
AssertEqual(t, "X", string(i.Result().String()), "i.Result() with rune input")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := tokenize.NewAPI(strings.NewReader("Testing"))
|
|
||||||
i.Result().SetRunes(1234567)
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.Result\.SetRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`,
|
|
||||||
})
|
|
||||||
}
|
|
|
@ -1,4 +1,4 @@
|
||||||
package tokenize2
|
package tokenize
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
|
@ -1,9 +1,9 @@
|
||||||
package tokenize2_test
|
package tokenize_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ExampleToken_String() {
|
func ExampleToken_String() {
|
|
@ -9,7 +9,7 @@ import (
|
||||||
// Func is the function signature as returned by New: a function that takes
|
// Func is the function signature as returned by New: a function that takes
|
||||||
// any supported type of input, executes a tokenizer run and returns a
|
// any supported type of input, executes a tokenizer run and returns a
|
||||||
// Result struct (possibly nil) and an error (possibly nil).
|
// Result struct (possibly nil) and an error (possibly nil).
|
||||||
type Func func(input interface{}) (*Result, error)
|
type Func func(input interface{}) (*API, error)
|
||||||
|
|
||||||
// New instantiates a new tokenizer.
|
// New instantiates a new tokenizer.
|
||||||
//
|
//
|
||||||
|
@ -28,7 +28,7 @@ type Func func(input interface{}) (*Result, error)
|
||||||
// against the provided input data. For an overview of allowed inputs, take a
|
// against the provided input data. For an overview of allowed inputs, take a
|
||||||
// look at the documentation for parsekit.read.New().
|
// look at the documentation for parsekit.read.New().
|
||||||
func New(tokenHandler Handler) Func {
|
func New(tokenHandler Handler) Func {
|
||||||
return func(input interface{}) (*Result, error) {
|
return func(input interface{}) (*API, error) {
|
||||||
api := NewAPI(input)
|
api := NewAPI(input)
|
||||||
ok := tokenHandler(api)
|
ok := tokenHandler(api)
|
||||||
|
|
||||||
|
@ -36,6 +36,6 @@ func New(tokenHandler Handler) Func {
|
||||||
err := fmt.Errorf("mismatch at %s", Cursor{})
|
err := fmt.Errorf("mismatch at %s", Cursor{})
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return api.Result(), nil
|
return api, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TODO For error handling, it would be really cool if for example the
|
// TODO For error handling, it would be really cool if for example the
|
||||||
|
@ -55,7 +55,7 @@ func ExampleNew() {
|
||||||
|
|
||||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||||
api := makeTokenizeAPI()
|
api := makeTokenizeAPI()
|
||||||
r, _ := (&api).NextRune()
|
r, _ := api.NextRune()
|
||||||
AssertEqual(t, 'T', r, "first rune")
|
AssertEqual(t, 'T', r, "first rune")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||||
i.Accept()
|
i.Accept()
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
i.Accept()
|
i.Accept()
|
||||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
AssertEqual(t, "Tes", i.String(), "i.String()")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||||
|
@ -78,52 +78,92 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
},
|
},
|
||||||
Regexp: true,
|
Regexp: true,
|
||||||
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
|
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
|
||||||
|
`without a prior call to Accept\(\)`,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||||
input := makeTokenizeAPI()
|
api := makeTokenizeAPI()
|
||||||
AssertPanic(t, PanicT{
|
AssertPanic(t, PanicT{
|
||||||
Function: (&input).Accept,
|
Function: api.Accept,
|
||||||
Regexp: true,
|
Regexp: true,
|
||||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
|
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
|
||||||
|
`without first calling NextRune\(\)`,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("")
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
|
||||||
|
`, but the prior call to NextRune\(\) failed`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
|
||||||
AssertPanic(t, PanicT{
|
AssertPanic(t, PanicT{
|
||||||
Function: func() {
|
Function: func() {
|
||||||
i := makeTokenizeAPI()
|
i := makeTokenizeAPI()
|
||||||
i.Merge()
|
i.Merge(0)
|
||||||
},
|
},
|
||||||
Regexp: true,
|
Regexp: true,
|
||||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
|
||||||
AssertPanic(t, PanicT{
|
AssertPanic(t, PanicT{
|
||||||
Function: func() {
|
Function: func() {
|
||||||
i := makeTokenizeAPI()
|
i := makeTokenizeAPI()
|
||||||
f := i.Fork()
|
child := i.Fork()
|
||||||
i.NextRune()
|
i.Fork()
|
||||||
f.Merge()
|
i.Merge(child)
|
||||||
},
|
},
|
||||||
Regexp: true,
|
Regexp: true,
|
||||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
|
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||||
|
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
|
||||||
AssertPanic(t, PanicT{
|
AssertPanic(t, PanicT{
|
||||||
Function: func() {
|
Function: func() {
|
||||||
i := makeTokenizeAPI()
|
i := makeTokenizeAPI()
|
||||||
f := i.Fork()
|
i.Dispose(0)
|
||||||
g := f.Fork()
|
|
||||||
i.Fork()
|
|
||||||
g.Merge()
|
|
||||||
},
|
},
|
||||||
Regexp: true,
|
Regexp: true,
|
||||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
|
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
child := i.Fork()
|
||||||
|
i.Fork()
|
||||||
|
i.Dispose(child)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
|
||||||
|
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.Fork()
|
||||||
|
g := i.Fork()
|
||||||
|
i.Fork()
|
||||||
|
i.Merge(g)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||||
|
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||||
|
@ -135,26 +175,26 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||||
i.Accept()
|
i.Accept()
|
||||||
},
|
},
|
||||||
Regexp: true,
|
Regexp: true,
|
||||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
|
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||||
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||||
AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1")
|
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
||||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
i.Accept()
|
i.Accept()
|
||||||
}
|
}
|
||||||
AssertEqual(t, "line 1, column 7", i.Result().Cursor().String(), "cursor 2")
|
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
||||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||||
i.Accept()
|
i.Accept()
|
||||||
AssertEqual(t, "line 2, column 1", i.Result().Cursor().String(), "cursor 3")
|
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
||||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
i.Accept()
|
i.Accept()
|
||||||
}
|
}
|
||||||
AssertEqual(t, "line 3, column 5", i.Result().Cursor().String(), "cursor 4")
|
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||||
|
@ -167,16 +207,17 @@ func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||||
}
|
}
|
||||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||||
f := i.Fork()
|
child := i.Fork()
|
||||||
f.NextRune()
|
i.NextRune()
|
||||||
f.Accept()
|
i.Accept()
|
||||||
r, err := f.NextRune()
|
r, err := i.NextRune()
|
||||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||||
r, err = i.NextRune()
|
i.Dispose(child) // brings the read offset back to the start
|
||||||
|
r, err = i.NextRune() // so here we should see the same rune
|
||||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeTokenizeAPI() tokenize.API {
|
func makeTokenizeAPI() *tokenize.API {
|
||||||
return tokenize.NewAPI("Testing")
|
return tokenize.NewAPI("Testing")
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,33 +5,33 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||||
// TODO FIXME Speed change
|
|
||||||
// Create input, accept the first rune.
|
// Create input, accept the first rune.
|
||||||
i := NewAPI("Testing")
|
i := NewAPI("Testing")
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
i.Accept() // T
|
i.Accept() // T
|
||||||
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
|
AssertEqual(t, "T", i.String(), "accepted rune in input")
|
||||||
// Fork
|
// Fork
|
||||||
f := i.Fork()
|
child := i.Fork()
|
||||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
|
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
|
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
|
||||||
AssertEqual(t, 1, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
|
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||||
AssertEqual(t, 1, f.state.stack[f.stackLevel].offset, "child offset")
|
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
|
||||||
// Accept two runes via fork.
|
// Accept two runes via fork.
|
||||||
f.NextRune()
|
i.NextRune()
|
||||||
f.Accept() // e
|
i.Accept() // e
|
||||||
f.NextRune()
|
i.NextRune()
|
||||||
f.Accept() // s
|
i.Accept() // s
|
||||||
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
|
AssertEqual(t, "es", i.String(), "result runes in fork")
|
||||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
|
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
|
||||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
|
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
|
||||||
AssertEqual(t, 3, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
|
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||||
AssertEqual(t, 3, f.state.stack[f.stackLevel].offset, "child offset")
|
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
|
||||||
// Merge fork back into parent
|
// Merge fork back into parent
|
||||||
f.Merge()
|
i.Merge(child)
|
||||||
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
i.Dispose(child)
|
||||||
AssertEqual(t, 3, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
|
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
|
||||||
AssertEqual(t, 3, i.state.stack[i.stackLevel].offset, "parent offset")
|
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||||
|
@ -39,86 +39,83 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
i.Accept()
|
i.Accept()
|
||||||
f1 := i.Fork()
|
f1 := i.Fork()
|
||||||
f1.NextRune()
|
|
||||||
f1.Accept()
|
|
||||||
f2 := f1.Fork()
|
|
||||||
f2.NextRune()
|
|
||||||
f2.Accept()
|
|
||||||
// TODO FIXME Speed changes
|
|
||||||
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
|
||||||
// AssertEqual(t, 1, i.result.offset, "i.offset A")
|
|
||||||
// AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
|
|
||||||
// AssertEqual(t, 2, f1.result.offset, "f1.offset A")
|
|
||||||
// AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
|
|
||||||
// AssertEqual(t, 3, f2.result.offset, "f2.offset A")
|
|
||||||
// f2.Merge()
|
|
||||||
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
|
||||||
// AssertEqual(t, 1, i.result.offset, "i.offset B")
|
|
||||||
// AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
|
|
||||||
// AssertEqual(t, 3, f1.result.offset, "f1.offset B")
|
|
||||||
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
|
||||||
// AssertEqual(t, 3, f2.result.offset, "f2.offset B")
|
|
||||||
// f1.Merge()
|
|
||||||
// AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
|
||||||
// AssertEqual(t, 3, i.result.offset, "i.offset C")
|
|
||||||
// AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
|
|
||||||
// AssertEqual(t, 3, f1.result.offset, "f1.offset C")
|
|
||||||
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
|
||||||
// AssertEqual(t, 3, f2.result.offset, "f2.offset C")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
|
||||||
i := NewAPI("Testing")
|
|
||||||
f1 := i.Fork()
|
|
||||||
f2 := f1.Fork()
|
|
||||||
//f3 := f2.Fork()
|
|
||||||
f2.Fork()
|
|
||||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
|
||||||
//f5 := f4.Fork()
|
|
||||||
f4.Fork()
|
|
||||||
// TODO FIXME Speed changes
|
|
||||||
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
|
||||||
// AssertEqual(t, true, i.child == &f1, "i.child == f1")
|
|
||||||
// AssertEqual(t, true, f1.parent == &i, "f1.parent == i")
|
|
||||||
// AssertEqual(t, true, f1.child == &f4, "f1.child == f4")
|
|
||||||
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
|
||||||
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
|
||||||
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
|
||||||
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
|
||||||
// AssertEqual(t, true, f4.parent == &f1, "f4.parent == f1")
|
|
||||||
// AssertEqual(t, true, f4.child == &f5, "f4.child == f5")
|
|
||||||
// AssertEqual(t, true, f5.parent == &f4, "f5.parent == f4")
|
|
||||||
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
|
||||||
|
|
||||||
i.NextRune()
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
f2 := i.Fork()
|
||||||
// AssertEqual(t, true, i.child == nil, "i.child == nil")
|
i.NextRune()
|
||||||
// AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
|
i.Accept()
|
||||||
// AssertEqual(t, true, f1.child == nil, "f1.child == nil")
|
AssertEqual(t, "s", i.String(), "f2 String()")
|
||||||
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
|
||||||
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
i.Merge(f2)
|
||||||
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
i.Dispose(f2)
|
||||||
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
AssertEqual(t, "es", i.String(), "f1 String()")
|
||||||
// AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
|
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||||
// AssertEqual(t, true, f4.child == nil, "f4.child == nil")
|
i.Merge(f1)
|
||||||
// AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
|
i.Dispose(f1)
|
||||||
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
AssertEqual(t, "Tes", i.String(), "top-level API String()")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||||
// TODO FIXME Speed changes
|
|
||||||
i := NewAPI("Testing")
|
i := NewAPI("Testing")
|
||||||
r, _ := i.NextRune()
|
r, _ := i.NextRune()
|
||||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||||
// AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil")
|
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
|
||||||
|
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
|
||||||
i.Accept()
|
i.Accept()
|
||||||
// AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil")
|
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
|
||||||
// AssertEqual(t, 1, i.result.offset, "API.result.offset")
|
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
|
||||||
r, _ = i.NextRune()
|
r, _ = i.NextRune()
|
||||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFlushInput(t *testing.T) {
|
||||||
|
api := NewAPI("cool")
|
||||||
|
|
||||||
|
// Flushing without any read data is okay. FlushInput() will return
|
||||||
|
// false in this case, and nothing else happens.
|
||||||
|
AssertTrue(t, api.FlushInput() == false, "flush input at start")
|
||||||
|
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
|
||||||
|
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
|
||||||
|
|
||||||
|
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
|
||||||
|
|
||||||
|
// Read offset is now zero, but reading should continue after "co".
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertEqual(t, "cool", api.String(), "end result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInputFlusherWrapper(t *testing.T) {
|
||||||
|
runeA := A.Rune('a')
|
||||||
|
flushB := C.FlushInput(A.Rune('b'))
|
||||||
|
api := NewAPI("abaab")
|
||||||
|
runeA(api)
|
||||||
|
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
|
||||||
|
AssertEqual(t, "a", api.String(), "runes after 1 read")
|
||||||
|
flushB(api)
|
||||||
|
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
|
||||||
|
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
|
||||||
|
runeA(api)
|
||||||
|
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
|
||||||
|
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
|
||||||
|
runeA(api)
|
||||||
|
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
|
||||||
|
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
|
||||||
|
flushB(api)
|
||||||
|
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
|
||||||
|
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
|
||||||
|
}
|
||||||
|
|
||||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf(
|
t.Errorf(
|
||||||
|
|
374
tokenize2/api.go
374
tokenize2/api.go
|
@ -1,374 +0,0 @@
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
import (
|
|
||||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
|
||||||
)
|
|
||||||
|
|
||||||
// API holds the internal state of a tokenizer run and provides an API that
|
|
||||||
// tokenize.Handler functions can use to:
|
|
||||||
//
|
|
||||||
// • read and accept runes from the input (NextRune, Accept)
|
|
||||||
//
|
|
||||||
// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
|
|
||||||
//
|
|
||||||
// • flush already read input data when not needed anymore (FlushInput)
|
|
||||||
//
|
|
||||||
// • retrieve the tokenizer Result struct (Result) to read or modify the results
|
|
||||||
//
|
|
||||||
// BASIC OPERATION:
|
|
||||||
//
|
|
||||||
// To retrieve the next rune from the API, call the NextRune() method.
|
|
||||||
//
|
|
||||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
|
||||||
// is then added to the result runes of the API and the read cursor is moved
|
|
||||||
// forward.
|
|
||||||
//
|
|
||||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
|
||||||
// with as many runes as needed. Runes collected this way can later on be
|
|
||||||
// retrieved using the method Result().Runes().
|
|
||||||
//
|
|
||||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
|
||||||
// NextRune() again. Failing to do so will result in a panic.
|
|
||||||
//
|
|
||||||
// Next to adding runes to the result, it is also possible to modify the
|
|
||||||
// stored runes or to add lexical Tokens to the result. For all things
|
|
||||||
// concerning results, take a look at the Result struct, which
|
|
||||||
// can be accessed though the method Result().
|
|
||||||
//
|
|
||||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
|
||||||
//
|
|
||||||
// Sometimes, we must be able to perform a lookahead, which might either
|
|
||||||
// succeed or fail. In case of a failing lookahead, the state of the
|
|
||||||
// API must be brought back to the original state, so we can try
|
|
||||||
// a different route.
|
|
||||||
//
|
|
||||||
// The way in which this is supported, is by forking an API struct by
|
|
||||||
// calling method Fork(). This will return a forked child API, with
|
|
||||||
// empty result data, but using the same read cursor position as the
|
|
||||||
// forked parent.
|
|
||||||
//
|
|
||||||
// After forking, the same interface as described for BASIC OPERATION can be
|
|
||||||
// used to fill the results. When the lookahead was successful, then
|
|
||||||
// Merge() can be called on the forked child to append the child's results
|
|
||||||
// to the parent's results, and to move the read cursor position to that
|
|
||||||
// of the child.
|
|
||||||
//
|
|
||||||
// When the lookahead was unsuccessful, then the forked child API can
|
|
||||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
|
||||||
// Garbage collection will take care of this automatically.
|
|
||||||
// The parent API was never modified, so it can safely be used after disposal
|
|
||||||
// as if the lookahead never happened.
|
|
||||||
//
|
|
||||||
// Opinionized note:
|
|
||||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
|
||||||
// peeks and by moving the read cursor position back and forth, or by putting
|
|
||||||
// read input back on the input stream. That often leads to code that is
|
|
||||||
// efficient, however, in my opinion, not very intuitive to read. It can also
|
|
||||||
// be tedious to get the cursor position back at the correct position, which
|
|
||||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
|
||||||
// no bookkeeping has to be implemented when implementing a parser.
|
|
||||||
type API struct {
|
|
||||||
reader *read.Buffer // the input data reader
|
|
||||||
lastRune rune // the rune as retrieved by the last NextRune() calll
|
|
||||||
lastRuneErr error // the error for the last NextRune() call
|
|
||||||
runeRead bool // whether or not a rune was read using NextRune()
|
|
||||||
runes []rune // the rune stack
|
|
||||||
tokens []Token // the token stack
|
|
||||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
|
||||||
stackLevel int // the current stack level
|
|
||||||
stackFrame *stackFrame // the current stack frame
|
|
||||||
}
|
|
||||||
|
|
||||||
type stackFrame struct {
|
|
||||||
offset int // current rune offset relative to the Reader's sliding window
|
|
||||||
runeStart int
|
|
||||||
runeEnd int
|
|
||||||
tokenStart int
|
|
||||||
tokenEnd int
|
|
||||||
cursor Cursor
|
|
||||||
|
|
||||||
// TODO
|
|
||||||
err error // can be used by a Handler to report a specific issue with the input
|
|
||||||
}
|
|
||||||
|
|
||||||
const initialStackDepth = 10
|
|
||||||
const initialTokenDepth = 10
|
|
||||||
const initialRuneDepth = 10
|
|
||||||
|
|
||||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
|
||||||
// For an overview of allowed inputs, take a look at the documentation
|
|
||||||
// for parsekit.read.New().
|
|
||||||
func NewAPI(input interface{}) *API {
|
|
||||||
api := &API{
|
|
||||||
reader: read.New(input),
|
|
||||||
runes: make([]rune, 0, initialRuneDepth),
|
|
||||||
tokens: make([]Token, 0, initialTokenDepth),
|
|
||||||
stackFrames: make([]stackFrame, 1, initialStackDepth),
|
|
||||||
}
|
|
||||||
api.stackFrame = &api.stackFrames[0]
|
|
||||||
|
|
||||||
return api
|
|
||||||
}
|
|
||||||
|
|
||||||
// NextRune returns the rune at the current read offset.
|
|
||||||
//
|
|
||||||
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
|
||||||
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
|
||||||
// when needed.
|
|
||||||
//
|
|
||||||
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
|
||||||
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
|
||||||
// without explicitly accepting, this method will panic. You can see this as a
|
|
||||||
// built-in unit test, enforcing correct serialization of API method calls.
|
|
||||||
func (i *API) NextRune() (rune, error) {
|
|
||||||
if i.runeRead {
|
|
||||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"without a prior call to Accept()")
|
|
||||||
}
|
|
||||||
|
|
||||||
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
|
||||||
i.lastRune = readRune
|
|
||||||
i.lastRuneErr = err
|
|
||||||
i.runeRead = true
|
|
||||||
|
|
||||||
return readRune, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accept the last rune as read by NextRune() into the Result runes and move
|
|
||||||
// the cursor forward.
|
|
||||||
//
|
|
||||||
// It is not allowed to call Accept() when the previous call to NextRune()
|
|
||||||
// returned an error. Calling Accept() in such case will result in a panic.
|
|
||||||
func (i *API) Accept() {
|
|
||||||
if !i.runeRead {
|
|
||||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"without first calling NextRune()")
|
|
||||||
} else if i.lastRuneErr != nil {
|
|
||||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
|
|
||||||
"but the prior call to NextRune() failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
i.runes = append(i.runes, i.lastRune)
|
|
||||||
i.stackFrame.runeEnd++
|
|
||||||
i.stackFrame.cursor.moveByRune(i.lastRune)
|
|
||||||
i.stackFrame.offset++
|
|
||||||
i.runeRead = false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fork forks off a child of the API struct. It will reuse the same
|
|
||||||
// read buffer and cursor position, but for the rest this is a fresh API.
|
|
||||||
//
|
|
||||||
// By forking an API, you can freely work with the forked child, without
|
|
||||||
// affecting the parent API. This is for example useful when you must perform
|
|
||||||
// some form of lookahead.
|
|
||||||
//
|
|
||||||
// When processing of the Handler was successful and you want to add the results
|
|
||||||
// to the parent API, you can call Merge() on the forked child.
|
|
||||||
// This will add the results to the results of the parent (runes, tokens).
|
|
||||||
// It also updates the read cursor position of the parent to that of the child.
|
|
||||||
//
|
|
||||||
// When the lookahead was unsuccessful, then the forked child API can
|
|
||||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
|
||||||
// Garbage collection will take care of this automatically.
|
|
||||||
// The parent API was never modified, so it can safely be used after disposal
|
|
||||||
// as if the lookahead never happened.
|
|
||||||
func (i *API) Fork() int {
|
|
||||||
newStackLevel := i.stackLevel + 1
|
|
||||||
newStackSize := newStackLevel + 1
|
|
||||||
|
|
||||||
// Grow the stack frames capacity when needed.
|
|
||||||
if cap(i.stackFrames) < newStackSize {
|
|
||||||
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
|
|
||||||
copy(newFrames, i.stackFrames)
|
|
||||||
i.stackFrames = newFrames
|
|
||||||
} else {
|
|
||||||
i.stackFrames = i.stackFrames[0:newStackSize]
|
|
||||||
}
|
|
||||||
|
|
||||||
parent := i.stackFrame
|
|
||||||
i.stackLevel++
|
|
||||||
i.stackFrame = &i.stackFrames[i.stackLevel]
|
|
||||||
*i.stackFrame = *parent
|
|
||||||
i.stackFrame.runeStart = parent.runeEnd
|
|
||||||
i.stackFrame.tokenStart = parent.tokenEnd
|
|
||||||
i.runeRead = false
|
|
||||||
|
|
||||||
return i.stackLevel
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
|
||||||
// results of its parent. The read cursor of the parent is also updated
|
|
||||||
// to that of the forked child.
|
|
||||||
//
|
|
||||||
// After the merge operation, the child results are reset so it can immediately
|
|
||||||
// be reused for performing another match. This means that all Result data are
|
|
||||||
// cleared, but the read cursor position is kept at its current position.
|
|
||||||
// This allows a child to feed results in chunks to its parent.
|
|
||||||
//
|
|
||||||
// Once the child is no longer needed, it can be disposed of by using the
|
|
||||||
// method Dispose(), which will return the tokenizer to the parent.
|
|
||||||
func (i *API) Merge(stackLevel int) {
|
|
||||||
if stackLevel == 0 {
|
|
||||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on the top-level API stack level 0")
|
|
||||||
}
|
|
||||||
if stackLevel != i.stackLevel {
|
|
||||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on API stack level %d, but the current stack level is %d "+
|
|
||||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
|
||||||
}
|
|
||||||
|
|
||||||
parent := &i.stackFrames[stackLevel-1]
|
|
||||||
|
|
||||||
if parent.runeEnd == i.stackFrame.runeStart {
|
|
||||||
// The end of the parent slice aligns with the start of the child slice.
|
|
||||||
// Because of this, to merge the parent slice can simply be expanded
|
|
||||||
// to include the child slice.
|
|
||||||
// parent : |----------|
|
|
||||||
// child: |------|
|
|
||||||
// After merge operation:
|
|
||||||
// parent: |-----------------|
|
|
||||||
// child: |---> continue reading from here
|
|
||||||
parent.runeEnd = i.stackFrame.runeEnd
|
|
||||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
|
||||||
} else {
|
|
||||||
// The end of the parent slice does not align with the start of the
|
|
||||||
// child slice. The child slice has to be copied onto the end of
|
|
||||||
// the parent slice.
|
|
||||||
// parent : |----------|
|
|
||||||
// child: |------|
|
|
||||||
// After merge operation:
|
|
||||||
// parent: |-----------------|
|
|
||||||
// child: |---> continue reading from here
|
|
||||||
i.runes = append(i.runes[:parent.runeEnd], i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]...)
|
|
||||||
parent.runeEnd = len(i.runes)
|
|
||||||
i.stackFrame.runeStart = parent.runeEnd
|
|
||||||
i.stackFrame.runeEnd = parent.runeEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
// The same logic applies to tokens.
|
|
||||||
if parent.tokenEnd == i.stackFrame.tokenStart {
|
|
||||||
parent.tokenEnd = i.stackFrame.tokenEnd
|
|
||||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
|
||||||
} else {
|
|
||||||
i.tokens = append(i.tokens[:parent.tokenEnd], i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]...)
|
|
||||||
parent.tokenEnd = len(i.tokens)
|
|
||||||
i.stackFrame.tokenStart = parent.tokenEnd
|
|
||||||
i.stackFrame.tokenEnd = parent.tokenEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
parent.offset = i.stackFrame.offset
|
|
||||||
parent.cursor = i.stackFrame.cursor
|
|
||||||
|
|
||||||
i.stackFrame.err = nil
|
|
||||||
i.runeRead = false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Dispose(stackLevel int) {
|
|
||||||
if stackLevel == 0 {
|
|
||||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on the top-level API stack level 0")
|
|
||||||
}
|
|
||||||
if stackLevel != i.stackLevel {
|
|
||||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
|
||||||
"on API stack level %d, but the current stack level is %d "+
|
|
||||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
|
||||||
}
|
|
||||||
|
|
||||||
i.runeRead = false
|
|
||||||
i.stackLevel = stackLevel - 1
|
|
||||||
i.stackFrames = i.stackFrames[:stackLevel]
|
|
||||||
i.stackFrame = &i.stackFrames[stackLevel-1]
|
|
||||||
i.runes = i.runes[0:i.stackFrame.runeEnd]
|
|
||||||
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Reset() {
|
|
||||||
i.runeRead = false
|
|
||||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
|
||||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
|
||||||
i.stackFrame.err = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// FlushInput flushes processed input data from the read.Buffer.
|
|
||||||
// In this context 'processed' means all runes that were read using NextRune()
|
|
||||||
// and that were added to the results using Accept().
|
|
||||||
//
|
|
||||||
// Note:
|
|
||||||
// When writing your own TokenHandler, you normally won't have to call this
|
|
||||||
// method yourself. It is automatically called by parsekit when needed.
|
|
||||||
func (i *API) FlushInput() bool {
|
|
||||||
// result := &(i.state.stack[i.stackLevel])
|
|
||||||
if i.stackFrame.offset > 0 {
|
|
||||||
i.reader.Flush(i.stackFrame.offset)
|
|
||||||
i.stackFrame.offset = 0
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) String() string {
|
|
||||||
return string(i.Runes())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Runes() []rune {
|
|
||||||
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Rune(offset int) rune {
|
|
||||||
return i.runes[i.stackFrame.runeStart+offset]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) ClearRunes() {
|
|
||||||
i.runes = i.runes[:i.stackFrame.runeStart]
|
|
||||||
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) SetRunes(runes ...rune) {
|
|
||||||
i.runes = append(i.runes[:i.stackFrame.runeStart], runes...)
|
|
||||||
i.stackFrame.runeEnd = i.stackFrame.runeStart + len(runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) AddRunes(runes ...rune) {
|
|
||||||
i.runes = append(i.runes[:i.stackFrame.runeEnd], runes...)
|
|
||||||
i.stackFrame.runeEnd += len(runes)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) AddString(s string) {
|
|
||||||
i.AddRunes([]rune(s)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) SetString(s string) {
|
|
||||||
i.SetRunes([]rune(s)...)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Cursor() Cursor {
|
|
||||||
return i.stackFrame.cursor
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Tokens() []Token {
|
|
||||||
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) Token(offset int) Token {
|
|
||||||
return i.tokens[i.stackFrame.tokenStart+offset]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) TokenValue(offset int) interface{} {
|
|
||||||
return i.tokens[i.stackFrame.tokenStart+offset].Value
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) ClearTokens() {
|
|
||||||
i.tokens = i.tokens[:i.stackFrame.tokenStart]
|
|
||||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) SetTokens(tokens ...Token) {
|
|
||||||
i.tokens = append(i.tokens[:i.stackFrame.tokenStart], tokens...)
|
|
||||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart + len(tokens)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *API) AddTokens(tokens ...Token) {
|
|
||||||
i.tokens = append(i.tokens[:i.stackFrame.tokenEnd], tokens...)
|
|
||||||
i.stackFrame.tokenEnd += len(tokens)
|
|
||||||
}
|
|
|
@ -1,330 +0,0 @@
|
||||||
package tokenize2_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ExampleNewAPI() {
|
|
||||||
tokenize.NewAPI("The input that the API will handle")
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleAPI_NextRune() {
|
|
||||||
api := tokenize.NewAPI("The input that the API will handle")
|
|
||||||
r, err := api.NextRune()
|
|
||||||
fmt.Printf("Rune read from input; %c\n", r)
|
|
||||||
fmt.Printf("The error: %v\n", err)
|
|
||||||
fmt.Printf("API results: %q\n", api.String())
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// Rune read from input; T
|
|
||||||
// The error: <nil>
|
|
||||||
// API results: ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleAPI_Accept() {
|
|
||||||
api := tokenize.NewAPI("The input that the API will handle")
|
|
||||||
api.NextRune() // reads 'T'
|
|
||||||
api.Accept() // adds 'T' to the API results
|
|
||||||
api.NextRune() // reads 'h'
|
|
||||||
api.Accept() // adds 'h' to the API results
|
|
||||||
api.NextRune() // reads 'e', but it is not added to the API results
|
|
||||||
|
|
||||||
fmt.Printf("API results: %q\n", api.String())
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// API results: "Th"
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleAPI_modifyingResults() {
|
|
||||||
api := tokenize.NewAPI("")
|
|
||||||
|
|
||||||
api.AddString("Some runes")
|
|
||||||
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
|
|
||||||
api.AddRunes(' ', 'i', 'n', ' ')
|
|
||||||
api.AddString("various ways")
|
|
||||||
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
|
|
||||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
|
||||||
|
|
||||||
api.SetString("new ")
|
|
||||||
api.AddString("set ")
|
|
||||||
api.AddString("of ")
|
|
||||||
api.AddRunes('r', 'u', 'n', 'e', 's')
|
|
||||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
|
||||||
fmt.Printf("API result runes: %q\n", api.Runes())
|
|
||||||
fmt.Printf("API third rune: %q\n", api.Rune(2))
|
|
||||||
|
|
||||||
api.AddTokens(tokenize.Token{
|
|
||||||
Type: 42,
|
|
||||||
Value: "towel"})
|
|
||||||
api.AddTokens(tokenize.Token{
|
|
||||||
Type: 73,
|
|
||||||
Value: "Zaphod"})
|
|
||||||
fmt.Printf("API result tokens: %v\n", api.Tokens())
|
|
||||||
fmt.Printf("API second result token: %v\n", api.Token(1))
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
|
||||||
// API result runes as string: "Some runes added in various ways"
|
|
||||||
// API result runes as string: "new set of runes"
|
|
||||||
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
|
|
||||||
// API third rune: 'w'
|
|
||||||
// API result tokens: [42("towel") 73("Zaphod")]
|
|
||||||
// API second result token: 73("Zaphod")
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleAPI_Reset() {
|
|
||||||
api := tokenize.NewAPI("Very important input!")
|
|
||||||
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
|
||||||
|
|
||||||
// Reset clears the results, but keeps the cursor position.
|
|
||||||
api.Reset()
|
|
||||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
|
||||||
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// API results: "Ve" at line 1, column 3
|
|
||||||
// API results: "" at line 1, column 3
|
|
||||||
// API results: "ry" at line 1, column 5
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleAPI_Fork() {
|
|
||||||
// This custom Handler checks for input 'a', 'b' or 'c'.
|
|
||||||
abcHandler := func(t *tokenize.API) bool {
|
|
||||||
a := tokenize.A
|
|
||||||
for _, r := range []rune{'a', 'b', 'c'} {
|
|
||||||
child := t.Fork() // fork, so we won't change parent t
|
|
||||||
if a.Rune(r)(t) {
|
|
||||||
t.Merge(child) // accept results into parent of child
|
|
||||||
t.Dispose(child) // return to the parent level
|
|
||||||
return true // and report a successful match
|
|
||||||
}
|
|
||||||
t.Dispose(child) // return to the parent level
|
|
||||||
}
|
|
||||||
// If we get here, then no match was found. Return false to communicate
|
|
||||||
// this to the caller.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: a custom Handler is normally not what you need.
|
|
||||||
// You can make use of the parser/combinator tooling to make the
|
|
||||||
// implementation a lot simpler and to take care of forking at
|
|
||||||
// the appropriate places. The handler from above can be replaced with:
|
|
||||||
simpler := tokenize.A.RuneRange('a', 'c')
|
|
||||||
|
|
||||||
result, err := tokenize.New(abcHandler)("another test")
|
|
||||||
fmt.Println(result, err)
|
|
||||||
result, err = tokenize.New(simpler)("curious")
|
|
||||||
fmt.Println(result, err)
|
|
||||||
result, err = tokenize.New(abcHandler)("bang on!")
|
|
||||||
fmt.Println(result, err)
|
|
||||||
result, err = tokenize.New(abcHandler)("not a match")
|
|
||||||
fmt.Println(result, err)
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// a <nil>
|
|
||||||
// c <nil>
|
|
||||||
// b <nil>
|
|
||||||
// <nil> mismatch at start of file
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleAPI_Merge() {
|
|
||||||
tokenHandler := func(t *tokenize.API) bool {
|
|
||||||
child1 := t.Fork()
|
|
||||||
t.NextRune() // reads 'H'
|
|
||||||
t.Accept()
|
|
||||||
t.NextRune() // reads 'i'
|
|
||||||
t.Accept()
|
|
||||||
|
|
||||||
child2 := t.Fork()
|
|
||||||
t.NextRune() // reads ' '
|
|
||||||
t.Accept()
|
|
||||||
t.NextRune() // reads 'm'
|
|
||||||
t.Accept()
|
|
||||||
t.Dispose(child2)
|
|
||||||
|
|
||||||
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
|
||||||
t.Dispose(child1) // and clean up child1 to return to the parent
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
|
||||||
fmt.Println(result.String())
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// Hi
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
|
||||||
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
|
|
||||||
|
|
||||||
// Fork a few levels.
|
|
||||||
child1 := api.Fork()
|
|
||||||
child2 := api.Fork()
|
|
||||||
child3 := api.Fork()
|
|
||||||
child4 := api.Fork()
|
|
||||||
|
|
||||||
// Read a rune 'a' from child4.
|
|
||||||
r, _ := api.NextRune()
|
|
||||||
AssertEqual(t, 'a', r, "child4 rune 1")
|
|
||||||
api.Accept()
|
|
||||||
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
|
|
||||||
|
|
||||||
// Read another rune 'b' from child4.
|
|
||||||
r, _ = api.NextRune()
|
|
||||||
AssertEqual(t, 'b', r, "child4 rune 2")
|
|
||||||
api.Accept()
|
|
||||||
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
|
|
||||||
|
|
||||||
// Merge "ab" from child4 to child3.
|
|
||||||
api.Merge(child4)
|
|
||||||
AssertEqual(t, "", api.String(), "child4 runes after first merge")
|
|
||||||
|
|
||||||
// Read some more from child4.
|
|
||||||
r, _ = api.NextRune()
|
|
||||||
AssertEqual(t, 'c', r, "child4 rune 3")
|
|
||||||
api.Accept()
|
|
||||||
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
|
||||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
|
||||||
|
|
||||||
// Merge "c" from child4 to child3.
|
|
||||||
api.Merge(child4)
|
|
||||||
|
|
||||||
// And dispose of child4, making child3 the active stack level.
|
|
||||||
api.Dispose(child4)
|
|
||||||
|
|
||||||
// Child3 should now have the compbined results "abc" from child4's work.
|
|
||||||
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
|
||||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
|
||||||
|
|
||||||
// Now read some data from child3.
|
|
||||||
r, _ = api.NextRune()
|
|
||||||
AssertEqual(t, 'd', r, "child3 rune 5")
|
|
||||||
api.Accept()
|
|
||||||
|
|
||||||
r, _ = api.NextRune()
|
|
||||||
AssertEqual(t, 'e', r, "child3 rune 5")
|
|
||||||
api.Accept()
|
|
||||||
|
|
||||||
r, _ = api.NextRune()
|
|
||||||
AssertEqual(t, 'f', r, "child3 rune 5")
|
|
||||||
api.Accept()
|
|
||||||
|
|
||||||
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
|
|
||||||
|
|
||||||
// Temporarily go some new forks from here, but don't use their outcome.
|
|
||||||
child3sub1 := api.Fork()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
child3sub2 := api.Fork()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
api.Merge(child3sub2) // do merge sub2 down to sub1
|
|
||||||
api.Dispose(child3sub2) // and dispose of sub2
|
|
||||||
api.Dispose(child3sub1) // but dispose of sub1 without merging
|
|
||||||
|
|
||||||
// Instead merge the results from before this forking segway from child3 to child2
|
|
||||||
// and dispose of it.
|
|
||||||
api.Merge(child3)
|
|
||||||
api.Dispose(child3)
|
|
||||||
|
|
||||||
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
|
||||||
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
|
||||||
|
|
||||||
// Merge child2 to child1 and dispose of it.
|
|
||||||
api.Merge(child2)
|
|
||||||
api.Dispose(child2)
|
|
||||||
|
|
||||||
// Merge child1 a few times to the top level api.
|
|
||||||
api.Merge(child1)
|
|
||||||
api.Merge(child1)
|
|
||||||
api.Merge(child1)
|
|
||||||
api.Merge(child1)
|
|
||||||
|
|
||||||
// And dispose of it.
|
|
||||||
api.Dispose(child1)
|
|
||||||
|
|
||||||
// Read some data from the top level api.
|
|
||||||
r, _ = api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
|
|
||||||
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
|
||||||
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestClearRunes(t *testing.T) {
|
|
||||||
api := tokenize.NewAPI("Laphroaig")
|
|
||||||
api.NextRune() // Read 'L'
|
|
||||||
api.Accept() // Add to runes
|
|
||||||
api.NextRune() // Read 'a'
|
|
||||||
api.Accept() // Add to runes
|
|
||||||
api.ClearRunes() // Clear the runes, giving us a fresh start.
|
|
||||||
api.NextRune() // Read 'p'
|
|
||||||
api.Accept() // Add to runes
|
|
||||||
api.NextRune() // Read 'r'
|
|
||||||
api.Accept() // Add to runes
|
|
||||||
|
|
||||||
AssertEqual(t, "ph", api.String(), "api string end result")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMergeScenariosForTokens(t *testing.T) {
|
|
||||||
api := tokenize.NewAPI("")
|
|
||||||
|
|
||||||
token1 := tokenize.Token{Value: 1}
|
|
||||||
token2 := tokenize.Token{Value: 2}
|
|
||||||
token3 := tokenize.Token{Value: 3}
|
|
||||||
token4 := tokenize.Token{Value: 4}
|
|
||||||
|
|
||||||
api.SetTokens(token1)
|
|
||||||
tokens := api.Tokens()
|
|
||||||
AssertEqual(t, 1, len(tokens), "Tokens 1")
|
|
||||||
|
|
||||||
child := api.Fork()
|
|
||||||
|
|
||||||
tokens = api.Tokens()
|
|
||||||
AssertEqual(t, 0, len(tokens), "Tokens 2")
|
|
||||||
|
|
||||||
api.AddTokens(token2)
|
|
||||||
|
|
||||||
// Here we can merge by expanding the token slice on the parent,
|
|
||||||
// because the end of the parent slice and the start of the child
|
|
||||||
// slice align.
|
|
||||||
api.Merge(child)
|
|
||||||
api.Dispose(child)
|
|
||||||
|
|
||||||
tokens = api.Tokens()
|
|
||||||
AssertEqual(t, 2, len(tokens), "Tokens 3")
|
|
||||||
|
|
||||||
child = api.Fork()
|
|
||||||
api.AddTokens(token3)
|
|
||||||
api.Reset()
|
|
||||||
api.AddTokens(token4)
|
|
||||||
|
|
||||||
// Here the merge means that token4 will be copied to the end of
|
|
||||||
// the token slice of the parent, since there's a gap at the place
|
|
||||||
// where token3 used to be.
|
|
||||||
api.Merge(child)
|
|
||||||
api.Dispose(child)
|
|
||||||
|
|
||||||
tokens = api.Tokens()
|
|
||||||
AssertEqual(t, 3, len(tokens), "Tokens 4")
|
|
||||||
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
|
|
||||||
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
|
|
||||||
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
|
|
||||||
}
|
|
|
@ -1,118 +0,0 @@
|
||||||
package tokenize2_test
|
|
||||||
|
|
||||||
// This file contains some tools that are used for writing tests.
|
|
||||||
|
|
||||||
import (
|
|
||||||
"regexp"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
|
||||||
)
|
|
||||||
|
|
||||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
|
||||||
if expected != actual {
|
|
||||||
t.Errorf(
|
|
||||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
|
||||||
forWhat, expected, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
|
||||||
if !b {
|
|
||||||
t.Errorf("Assertion %s is false", assertion)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type PanicT struct {
|
|
||||||
Function func()
|
|
||||||
Regexp bool
|
|
||||||
Expect string
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertPanics(t *testing.T, testSet []PanicT) {
|
|
||||||
for _, test := range testSet {
|
|
||||||
AssertPanic(t, test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertPanic(t *testing.T, p PanicT) {
|
|
||||||
defer func() {
|
|
||||||
if r := recover(); r != nil {
|
|
||||||
mismatch := false
|
|
||||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
|
||||||
mismatch = true
|
|
||||||
}
|
|
||||||
if !p.Regexp && p.Expect != r.(string) {
|
|
||||||
mismatch = true
|
|
||||||
}
|
|
||||||
if mismatch {
|
|
||||||
t.Errorf(
|
|
||||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
|
||||||
p.Expect, r)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
p.Function()
|
|
||||||
}
|
|
||||||
|
|
||||||
type HandlerT struct {
|
|
||||||
Input string
|
|
||||||
Handler tokenize.Handler
|
|
||||||
MustMatch bool
|
|
||||||
Expected string
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertHandlers(t *testing.T, testSet []HandlerT) {
|
|
||||||
for _, test := range testSet {
|
|
||||||
AssertHandler(t, test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertHandler(t *testing.T, test HandlerT) {
|
|
||||||
result, err := tokenize.New(test.Handler)(test.Input)
|
|
||||||
if test.MustMatch {
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
|
||||||
} else if output := result.String(); output != test.Expected {
|
|
||||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err == nil {
|
|
||||||
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type TokenMakerT struct {
|
|
||||||
Input string
|
|
||||||
Handler tokenize.Handler
|
|
||||||
Expected []tokenize.Token
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
|
||||||
for _, test := range testSet {
|
|
||||||
AssertTokenMaker(t, test)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
|
||||||
result, err := tokenize.New(test.Handler)(test.Input)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
|
||||||
} else {
|
|
||||||
if len(result.Tokens()) != len(test.Expected) {
|
|
||||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
|
||||||
}
|
|
||||||
for i, expected := range test.Expected {
|
|
||||||
actual := result.Token(i)
|
|
||||||
if expected.Type != actual.Type {
|
|
||||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
|
||||||
}
|
|
||||||
if expected.Value != actual.Value {
|
|
||||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"runtime"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func callerPanic(name, f string, data ...interface{}) {
|
|
||||||
filepos := callerBefore(name)
|
|
||||||
m := fmt.Sprintf(f, data...)
|
|
||||||
m = strings.Replace(m, "{caller}", filepos, -1)
|
|
||||||
m = strings.Replace(m, "{name}", name, -1)
|
|
||||||
panic(m)
|
|
||||||
}
|
|
||||||
|
|
||||||
func callerBefore(name string) string {
|
|
||||||
found := false
|
|
||||||
for i := 1; ; i++ {
|
|
||||||
pc, file, line, ok := runtime.Caller(i)
|
|
||||||
if found {
|
|
||||||
return fmt.Sprintf("%s:%d", file, line)
|
|
||||||
}
|
|
||||||
if !ok {
|
|
||||||
return "unknown caller"
|
|
||||||
}
|
|
||||||
f := runtime.FuncForPC(pc)
|
|
||||||
|
|
||||||
if strings.HasSuffix(f.Name(), "."+name) {
|
|
||||||
found = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,45 +0,0 @@
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"unicode/utf8"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Cursor represents the position of a cursor in various ways.
|
|
||||||
type Cursor struct {
|
|
||||||
Byte int // The cursor offset in bytes
|
|
||||||
Rune int // The cursor offset in UTF8 runes
|
|
||||||
Column int // The column at which the cursor is (0-indexed)
|
|
||||||
Line int // The line at which the cursor is (0-indexed)
|
|
||||||
}
|
|
||||||
|
|
||||||
// String produces a string representation of the cursor position.
|
|
||||||
func (c Cursor) String() string {
|
|
||||||
if c.Line == 0 && c.Column == 0 {
|
|
||||||
return fmt.Sprintf("start of file")
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
|
||||||
}
|
|
||||||
|
|
||||||
// move updates the position of the cursor, based on the provided input string.
|
|
||||||
// The input string represents the runes that the cursor must be moved over.
|
|
||||||
// This method will take newlines into account to keep track of line numbers and
|
|
||||||
// column positions automatically.
|
|
||||||
func (c *Cursor) move(input string) *Cursor {
|
|
||||||
for _, r := range input {
|
|
||||||
c.moveByRune(r)
|
|
||||||
}
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Cursor) moveByRune(r rune) *Cursor {
|
|
||||||
c.Byte += utf8.RuneLen(r)
|
|
||||||
c.Rune++
|
|
||||||
if r == '\n' {
|
|
||||||
c.Column = 0
|
|
||||||
c.Line++
|
|
||||||
} else {
|
|
||||||
c.Column++
|
|
||||||
}
|
|
||||||
return c
|
|
||||||
}
|
|
|
@ -1,69 +0,0 @@
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func ExampleCursor_move() {
|
|
||||||
c := Cursor{}
|
|
||||||
fmt.Printf("after initialization : %s\n", c)
|
|
||||||
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
|
|
||||||
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
|
|
||||||
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// after initialization : start of file
|
|
||||||
// after 'some words' : line 1, column 11
|
|
||||||
// after '\n' : line 2, column 1
|
|
||||||
// after '\r\nskip\nlines' : line 4, column 6
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleCursor_String() {
|
|
||||||
c := Cursor{}
|
|
||||||
fmt.Println(c.String())
|
|
||||||
|
|
||||||
c.move("\nfoobar")
|
|
||||||
fmt.Println(c.String())
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// start of file
|
|
||||||
// line 2, column 7
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
|
||||||
for _, test := range []struct {
|
|
||||||
name string
|
|
||||||
input []string
|
|
||||||
byte int
|
|
||||||
rune int
|
|
||||||
line int
|
|
||||||
column int
|
|
||||||
}{
|
|
||||||
{"No input at all", []string{""}, 0, 0, 0, 0},
|
|
||||||
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
|
|
||||||
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
|
|
||||||
{"One newline", []string{"\n"}, 1, 1, 1, 0},
|
|
||||||
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
|
|
||||||
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
|
|
||||||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
|
||||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
|
||||||
} {
|
|
||||||
c := Cursor{}
|
|
||||||
for _, s := range test.input {
|
|
||||||
c.move(s)
|
|
||||||
}
|
|
||||||
if c.Byte != test.byte {
|
|
||||||
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
|
||||||
}
|
|
||||||
if c.Rune != test.rune {
|
|
||||||
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
|
|
||||||
}
|
|
||||||
if c.Line != test.line {
|
|
||||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
|
||||||
}
|
|
||||||
if c.Column != test.column {
|
|
||||||
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,53 +0,0 @@
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
// Handler is the function type that is involved in turning a low level
|
|
||||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
|
||||||
// data matches some kind of pattern and to report back the results.
|
|
||||||
//
|
|
||||||
// A Handler function gets an API as its input and returns a boolean to
|
|
||||||
// indicate whether or not it found a match on the input. The API is used
|
|
||||||
// for retrieving input data to match against and for reporting back results.
|
|
||||||
type Handler func(t *API) bool
|
|
||||||
|
|
||||||
// Match is syntactic sugar that allows you to write a construction like
|
|
||||||
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
|
||||||
func (handler Handler) Match(input interface{}) (*API, error) {
|
|
||||||
tokenizer := New(handler)
|
|
||||||
return tokenizer(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Or is syntactic sugar that allows you to write a construction like
|
|
||||||
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
|
|
||||||
func (handler Handler) Or(otherHandler Handler) Handler {
|
|
||||||
return MatchAny(handler, otherHandler)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Times is syntactic sugar that allows you to write a construction like
|
|
||||||
// MatchRep(3, handler) as handler.Times(3).
|
|
||||||
func (handler Handler) Times(n int) Handler {
|
|
||||||
return MatchRep(n, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Then is syntactic sugar that allows you to write a construction like
|
|
||||||
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
|
|
||||||
func (handler Handler) Then(otherHandler Handler) Handler {
|
|
||||||
return MatchSeq(handler, otherHandler)
|
|
||||||
}
|
|
||||||
|
|
||||||
// SeparatedBy is syntactic sugar that allows you to write a construction like
|
|
||||||
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
|
|
||||||
func (handler Handler) SeparatedBy(separator Handler) Handler {
|
|
||||||
return MatchSeparated(separator, handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Optional is syntactic sugar that allows you to write a construction like
|
|
||||||
// MatchOptional(handler) as handler.Optional().
|
|
||||||
func (handler Handler) Optional() Handler {
|
|
||||||
return MatchOptional(handler)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Except is syntactic sugar that allows you to write a construction like
|
|
||||||
// MatchExcept(handler) as handler.Optional().
|
|
||||||
func (handler Handler) Except(exceptHandler Handler) Handler {
|
|
||||||
return MatchExcept(handler, exceptHandler)
|
|
||||||
}
|
|
|
@ -1,101 +0,0 @@
|
||||||
package tokenize2_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestSyntacticSugar(t *testing.T) {
|
|
||||||
var a = tokenize.A
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
|
||||||
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
|
||||||
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
|
||||||
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
|
|
||||||
{"cccccc", a.Rune('c').Optional(), true, "c"},
|
|
||||||
{"dddddd", a.Rune('c').Optional(), true, ""},
|
|
||||||
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
|
|
||||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
|
|
||||||
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
|
|
||||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
|
|
||||||
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleHandler_Times() {
|
|
||||||
c, a := tokenize.C, tokenize.A
|
|
||||||
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
|
|
||||||
|
|
||||||
fmt.Println(phoneNumber.Match("0201234567"))
|
|
||||||
// Output:
|
|
||||||
// 0201234567 <nil>
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleHandler_Then() {
|
|
||||||
c, a := tokenize.C, tokenize.A
|
|
||||||
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
|
|
||||||
|
|
||||||
fmt.Println(phoneNumber.Match("0208888888"))
|
|
||||||
// Output:
|
|
||||||
// 0208888888 <nil>
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleHandler_Or() {
|
|
||||||
c, a := tokenize.C, tokenize.A
|
|
||||||
phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit))
|
|
||||||
|
|
||||||
fmt.Println(phoneNumber.Match("+31209876543"))
|
|
||||||
fmt.Println(phoneNumber.Match("0031209876543"))
|
|
||||||
fmt.Println(phoneNumber.Match("0031020991234"))
|
|
||||||
fmt.Println(phoneNumber.Match("0031201234"))
|
|
||||||
// Output:
|
|
||||||
// +31209876543 <nil>
|
|
||||||
// 0031209876543 <nil>
|
|
||||||
// <nil> mismatch at start of file
|
|
||||||
// <nil> mismatch at start of file
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleHandler_SeparatedBy() {
|
|
||||||
a, t := tokenize.A, tokenize.T
|
|
||||||
csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
|
|
||||||
|
|
||||||
r, _ := csv.Match("123,456,7,8,9")
|
|
||||||
for i, token := range r.Tokens() {
|
|
||||||
fmt.Printf("[%d] %v\n", i, token)
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// [0] number((int)123)
|
|
||||||
// [1] number((int)456)
|
|
||||||
// [2] number((int)7)
|
|
||||||
// [3] number((int)8)
|
|
||||||
// [4] number((int)9)
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleHandler_Optional() {
|
|
||||||
c, a := tokenize.C, tokenize.A
|
|
||||||
|
|
||||||
spanish := c.Seq(
|
|
||||||
a.Rune('¿').Optional(),
|
|
||||||
c.OneOrMore(a.AnyRune.Except(a.Question)),
|
|
||||||
a.Rune('?').Optional())
|
|
||||||
|
|
||||||
fmt.Println(spanish.Match("¿Habla español María?"))
|
|
||||||
fmt.Println(spanish.Match("Sí, María habla español."))
|
|
||||||
// Output:
|
|
||||||
// ¿Habla español María? <nil>
|
|
||||||
// Sí, María habla español. <nil>
|
|
||||||
}
|
|
||||||
|
|
||||||
func ExampleHandler_Match() {
|
|
||||||
r, err := tokenize.A.IPv4.Match("001.002.003.004")
|
|
||||||
fmt.Println(r, err)
|
|
||||||
|
|
||||||
r, err = tokenize.A.IPv4.Match("1.2.3")
|
|
||||||
fmt.Println(r, err)
|
|
||||||
|
|
||||||
// Output:
|
|
||||||
// 1.2.3.4 <nil>
|
|
||||||
// <nil> mismatch at start of file
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,512 +0,0 @@
|
||||||
package tokenize2_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestCombinatorsTempDebug(t *testing.T) {
|
|
||||||
var a = tokenize.A
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
// {"024", a.IPv4CIDRMask, true, "24"},
|
|
||||||
// {"024", a.Octet, true, "24"},
|
|
||||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCombinators(t *testing.T) {
|
|
||||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
{"", c.Not(a.Rune('b')), false, ""},
|
|
||||||
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
|
||||||
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
|
||||||
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
|
|
||||||
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
|
||||||
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
|
||||||
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
|
||||||
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
|
||||||
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
|
||||||
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
|
||||||
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
|
||||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
|
||||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
|
||||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
|
||||||
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
|
||||||
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
|
||||||
{"", c.Max(4, a.Rune('b')), true, ""},
|
|
||||||
{"X", c.Max(4, a.Rune('b')), true, ""},
|
|
||||||
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
|
||||||
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
|
||||||
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
|
||||||
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
|
||||||
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
|
||||||
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
|
||||||
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
|
||||||
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
|
||||||
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
|
||||||
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
|
||||||
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
|
||||||
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
|
||||||
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
|
||||||
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
|
||||||
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
|
||||||
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
|
||||||
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
|
||||||
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
|
||||||
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
|
||||||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
|
||||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
|
||||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
|
||||||
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
|
||||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
|
||||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
|
||||||
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
|
||||||
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
|
|
||||||
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
|
|
||||||
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
|
||||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
|
||||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
|
||||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
|
||||||
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
|
||||||
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
|
||||||
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
|
|
||||||
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
|
|
||||||
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
|
||||||
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
|
||||||
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
|
|
||||||
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
|
|
||||||
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
|
||||||
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
|
||||||
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
|
||||||
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
|
||||||
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
|
|
||||||
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCombinatorPanics(t *testing.T) {
|
|
||||||
var c, a = tokenize.C, tokenize.A
|
|
||||||
AssertPanics(t, []PanicT{
|
|
||||||
{func() { a.RuneRange('z', 'a') }, true,
|
|
||||||
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
|
|
||||||
{func() { c.MinMax(-1, 1, a.Space) }, true,
|
|
||||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
|
||||||
{func() { c.MinMax(1, -1, a.Space) }, true,
|
|
||||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
|
||||||
{func() { c.MinMax(10, 5, a.Space) }, true,
|
|
||||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
|
|
||||||
{func() { c.Min(-10, a.Space) }, true,
|
|
||||||
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
|
||||||
{func() { c.Max(-42, a.Space) }, true,
|
|
||||||
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
|
||||||
{func() { a.IntegerBetween(10, -10) }, true,
|
|
||||||
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAtoms(t *testing.T) {
|
|
||||||
var a = tokenize.A
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
|
||||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
|
||||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
|
||||||
{"Hello, world!", a.Str("Hello"), true, "Hello"},
|
|
||||||
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
|
|
||||||
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
|
||||||
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
|
||||||
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
|
||||||
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
|
||||||
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
|
||||||
{"xxx", a.Rune('x'), true, "x"},
|
|
||||||
{"x ", a.Rune(' '), false, ""},
|
|
||||||
{"aa", a.RuneRange('b', 'e'), false, ""},
|
|
||||||
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
|
||||||
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
|
||||||
{"", a.EndOfFile, true, ""},
|
|
||||||
{"⌘", a.AnyRune, true, "⌘"},
|
|
||||||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
|
||||||
{"", a.AnyRune, false, ""},
|
|
||||||
{"⌘", a.ValidRune, true, "⌘"},
|
|
||||||
{"\xbc with ValidRune", a.ValidRune, false, ""},
|
|
||||||
{"", a.ValidRune, false, ""},
|
|
||||||
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
|
|
||||||
{"ok with InvalidRune", a.InvalidRune, false, ""},
|
|
||||||
{" ", a.Space, true, " "},
|
|
||||||
{"X", a.Space, false, ""},
|
|
||||||
{"\t", a.Tab, true, "\t"},
|
|
||||||
{"\r", a.CR, true, "\r"},
|
|
||||||
{"\n", a.LF, true, "\n"},
|
|
||||||
{"!", a.Excl, true, "!"},
|
|
||||||
{"\"", a.DoubleQuote, true, "\""},
|
|
||||||
{"#", a.Hash, true, "#"},
|
|
||||||
{"$", a.Dollar, true, "$"},
|
|
||||||
{"%", a.Percent, true, "%"},
|
|
||||||
{"&", a.Amp, true, "&"},
|
|
||||||
{"'", a.SingleQuote, true, "'"},
|
|
||||||
{"(", a.LeftParen, true, "("},
|
|
||||||
{"(", a.RoundOpen, true, "("},
|
|
||||||
{")", a.RightParen, true, ")"},
|
|
||||||
{")", a.RoundClose, true, ")"},
|
|
||||||
{"*", a.Asterisk, true, "*"},
|
|
||||||
{"*", a.Multiply, true, "*"},
|
|
||||||
{"+", a.Plus, true, "+"},
|
|
||||||
{"+", a.Add, true, "+"},
|
|
||||||
{",", a.Comma, true, ","},
|
|
||||||
{"-", a.Minus, true, "-"},
|
|
||||||
{"-", a.Subtract, true, "-"},
|
|
||||||
{".", a.Dot, true, "."},
|
|
||||||
{"/", a.Slash, true, "/"},
|
|
||||||
{"/", a.Divide, true, "/"},
|
|
||||||
{":", a.Colon, true, ":"},
|
|
||||||
{";", a.Semicolon, true, ";"},
|
|
||||||
{"<", a.AngleOpen, true, "<"},
|
|
||||||
{"<", a.LessThan, true, "<"},
|
|
||||||
{"=", a.Equal, true, "="},
|
|
||||||
{">", a.AngleClose, true, ">"},
|
|
||||||
{">", a.GreaterThan, true, ">"},
|
|
||||||
{"?", a.Question, true, "?"},
|
|
||||||
{"@", a.At, true, "@"},
|
|
||||||
{"[", a.SquareOpen, true, "["},
|
|
||||||
{"\\", a.Backslash, true, "\\"},
|
|
||||||
{"]", a.SquareClose, true, "]"},
|
|
||||||
{"^", a.Caret, true, "^"},
|
|
||||||
{"_", a.Underscore, true, "_"},
|
|
||||||
{"`", a.Backquote, true, "`"},
|
|
||||||
{"{", a.CurlyOpen, true, "{"},
|
|
||||||
{"|", a.Pipe, true, "|"},
|
|
||||||
{"}", a.CurlyClose, true, "}"},
|
|
||||||
{"~", a.Tilde, true, "~"},
|
|
||||||
{"\t \t \r\n", a.Blank, true, "\t"},
|
|
||||||
{" \t \t \r\n", a.Blanks, true, " \t \t "},
|
|
||||||
{"xxx", a.Whitespace, false, ""},
|
|
||||||
{" ", a.Whitespace, true, " "},
|
|
||||||
{"\t", a.Whitespace, true, "\t"},
|
|
||||||
{"\n", a.Whitespace, true, "\n"},
|
|
||||||
{"\r\n", a.Whitespace, true, "\r\n"},
|
|
||||||
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
|
|
||||||
{"xxx", a.UnicodeSpace, false, ""},
|
|
||||||
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
|
|
||||||
{"", a.EndOfLine, true, ""},
|
|
||||||
{"\r\n", a.EndOfLine, true, "\r\n"},
|
|
||||||
{"\n", a.EndOfLine, true, "\n"},
|
|
||||||
{"0", a.Digit, true, "0"},
|
|
||||||
{"1", a.Digit, true, "1"},
|
|
||||||
{"2", a.Digit, true, "2"},
|
|
||||||
{"3", a.Digit, true, "3"},
|
|
||||||
{"4", a.Digit, true, "4"},
|
|
||||||
{"5", a.Digit, true, "5"},
|
|
||||||
{"6", a.Digit, true, "6"},
|
|
||||||
{"7", a.Digit, true, "7"},
|
|
||||||
{"8", a.Digit, true, "8"},
|
|
||||||
{"9", a.Digit, true, "9"},
|
|
||||||
{"X", a.Digit, false, ""},
|
|
||||||
{"a", a.ASCIILower, true, "a"},
|
|
||||||
{"z", a.ASCIILower, true, "z"},
|
|
||||||
{"A", a.ASCIILower, false, ""},
|
|
||||||
{"Z", a.ASCIILower, false, ""},
|
|
||||||
{"A", a.ASCIIUpper, true, "A"},
|
|
||||||
{"Z", a.ASCIIUpper, true, "Z"},
|
|
||||||
{"a", a.ASCIIUpper, false, ""},
|
|
||||||
{"z", a.ASCIIUpper, false, ""},
|
|
||||||
{"1", a.Letter, false, ""},
|
|
||||||
{"a", a.Letter, true, "a"},
|
|
||||||
{"Ø", a.Letter, true, "Ø"},
|
|
||||||
{"Ë", a.Lower, false, ""},
|
|
||||||
{"ë", a.Lower, true, "ë"},
|
|
||||||
{"ä", a.Upper, false, "ä"},
|
|
||||||
{"Ä", a.Upper, true, "Ä"},
|
|
||||||
{"0", a.HexDigit, true, "0"},
|
|
||||||
{"9", a.HexDigit, true, "9"},
|
|
||||||
{"a", a.HexDigit, true, "a"},
|
|
||||||
{"f", a.HexDigit, true, "f"},
|
|
||||||
{"A", a.HexDigit, true, "A"},
|
|
||||||
{"F", a.HexDigit, true, "F"},
|
|
||||||
{"g", a.HexDigit, false, "g"},
|
|
||||||
{"G", a.HexDigit, false, "G"},
|
|
||||||
{"0", a.Integer, true, "0"},
|
|
||||||
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
|
|
||||||
{"1", a.Integer, true, "1"},
|
|
||||||
{"-10X", a.Integer, false, ""},
|
|
||||||
{"+10X", a.Integer, false, ""},
|
|
||||||
{"-10X", a.Signed(a.Integer), true, "-10"},
|
|
||||||
{"+10X", a.Signed(a.Integer), true, "+10"},
|
|
||||||
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
|
||||||
{"0X", a.Float, true, "0"},
|
|
||||||
{"0X", a.Float, true, "0"},
|
|
||||||
{"1X", a.Float, true, "1"},
|
|
||||||
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
|
||||||
{"123.321X", a.Float, true, "123.321"},
|
|
||||||
{"-3.14X", a.Float, false, ""},
|
|
||||||
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
|
||||||
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
|
||||||
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
|
||||||
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
|
||||||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
|
||||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
|
||||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
|
||||||
{"fifteen", a.IntegerBetween(0, 10), false, ""},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIPv4Atoms(t *testing.T) {
|
|
||||||
var a = tokenize.A
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
// Not normalized octet.
|
|
||||||
{"0X", tokenize.MatchOctet(false), true, "0"},
|
|
||||||
{"00X", tokenize.MatchOctet(false), true, "00"},
|
|
||||||
{"000X", tokenize.MatchOctet(false), true, "000"},
|
|
||||||
{"10X", tokenize.MatchOctet(false), true, "10"},
|
|
||||||
{"010X", tokenize.MatchOctet(false), true, "010"},
|
|
||||||
{"255123", tokenize.MatchOctet(false), true, "255"},
|
|
||||||
{"256123", tokenize.MatchOctet(false), false, ""},
|
|
||||||
{"300", tokenize.MatchOctet(false), false, ""},
|
|
||||||
|
|
||||||
// Octet.
|
|
||||||
{"0", tokenize.MatchOctet(false), true, "0"},
|
|
||||||
{"02", tokenize.MatchOctet(false), true, "02"},
|
|
||||||
{"003", tokenize.MatchOctet(false), true, "003"},
|
|
||||||
{"256", tokenize.MatchOctet(false), false, ""},
|
|
||||||
{"0X", a.Octet, true, "0"},
|
|
||||||
{"00X", a.Octet, true, "0"},
|
|
||||||
{"000X", a.Octet, true, "0"},
|
|
||||||
{"10X", a.Octet, true, "10"},
|
|
||||||
{"010X", a.Octet, true, "10"},
|
|
||||||
{"255123", a.Octet, true, "255"},
|
|
||||||
{"256123", a.Octet, false, ""},
|
|
||||||
{"300", a.Octet, false, ""},
|
|
||||||
|
|
||||||
// IPv4 address.
|
|
||||||
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
|
|
||||||
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
|
|
||||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
|
||||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
|
||||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
|
||||||
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
|
||||||
{"256.255.255.255", a.IPv4, false, ""},
|
|
||||||
|
|
||||||
// IPv4 CIDR netmask.
|
|
||||||
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
|
|
||||||
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
|
|
||||||
{"0", a.IPv4CIDRMask, true, "0"},
|
|
||||||
{"00", a.IPv4CIDRMask, true, "0"},
|
|
||||||
{"000", a.IPv4CIDRMask, true, "0"},
|
|
||||||
{"32", a.IPv4CIDRMask, true, "32"},
|
|
||||||
{"032", a.IPv4CIDRMask, true, "32"},
|
|
||||||
{"33", a.IPv4CIDRMask, false, ""},
|
|
||||||
|
|
||||||
// IPv4 netmask in dotted quad format.
|
|
||||||
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
|
|
||||||
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
|
|
||||||
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
|
||||||
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
|
||||||
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
|
||||||
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
|
||||||
|
|
||||||
// IPv4 address + CIDR or dotted quad netmask.
|
|
||||||
{"192.168.6.123", a.IPv4Net, false, ""},
|
|
||||||
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
|
|
||||||
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
|
|
||||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
|
||||||
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
|
||||||
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
|
||||||
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
|
||||||
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIPv6Atoms(t *testing.T) {
|
|
||||||
var a = tokenize.A
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
{"", a.IPv6, false, ""},
|
|
||||||
{"::", a.IPv6, true, "::"},
|
|
||||||
{"1::", a.IPv6, true, "1::"},
|
|
||||||
{"1::1", a.IPv6, true, "1::1"},
|
|
||||||
{"::1", a.IPv6, true, "::1"},
|
|
||||||
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
|
|
||||||
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
|
|
||||||
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
|
|
||||||
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
|
||||||
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
|
||||||
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
|
|
||||||
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
|
|
||||||
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
|
|
||||||
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
|
|
||||||
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
|
|
||||||
{"0", a.IPv6CIDRMask, true, "0"},
|
|
||||||
{"128", a.IPv6CIDRMask, true, "128"},
|
|
||||||
{"129", a.IPv6CIDRMask, false, ""},
|
|
||||||
{"::1/128", a.IPv6Net, true, "::1/128"},
|
|
||||||
{"::1/129", a.IPv6Net, false, ""},
|
|
||||||
{"1.1.1.1/24", a.IPv6Net, false, ""},
|
|
||||||
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
|
|
||||||
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestModifiers(t *testing.T) {
|
|
||||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
|
||||||
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
|
||||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
|
||||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
|
||||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
|
||||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
|
||||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
|
||||||
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
|
||||||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
|
||||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
|
||||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
|
||||||
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
|
|
||||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
|
||||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// When a TokenMaker encounters an error, this is considered a programmer error.
|
|
||||||
// A TokenMaker should not be called, unless the input is already validated to
|
|
||||||
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
|
||||||
// input cannot be processed successfully.
|
|
||||||
func TestTokenMakerErrorHandling(t *testing.T) {
|
|
||||||
var a, tok = tokenize.A, tokenize.T
|
|
||||||
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
|
||||||
tokenizer := tokenize.New(invalid)
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
func() { tokenizer("no") }, false,
|
|
||||||
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTokenMakers(t *testing.T) {
|
|
||||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
|
||||||
AssertTokenMakers(t, []TokenMakerT{
|
|
||||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
|
||||||
[]tokenize.Token{{Type: "A", Value: ""}}},
|
|
||||||
|
|
||||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
|
||||||
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
|
|
||||||
|
|
||||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
|
||||||
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
|
||||||
|
|
||||||
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
|
|
||||||
|
|
||||||
// I don't check the returned error here, but it's good enough to see that the parsing
|
|
||||||
// stopped after the illegal \g escape sequence.
|
|
||||||
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
|
|
||||||
|
|
||||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
|
||||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
|
||||||
{Type: "bar", Value: byte('R')},
|
|
||||||
{Type: "bar", Value: byte('O')},
|
|
||||||
{Type: "bar", Value: byte('C')},
|
|
||||||
{Type: "bar", Value: byte('K')},
|
|
||||||
{Type: "bar", Value: byte('S')},
|
|
||||||
}},
|
|
||||||
|
|
||||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
|
|
||||||
|
|
||||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
|
|
||||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
|
|
||||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
|
|
||||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
|
|
||||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
|
|
||||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
|
|
||||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
|
|
||||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
|
|
||||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
|
|
||||||
|
|
||||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
|
|
||||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
|
|
||||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
|
|
||||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
|
||||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
|
||||||
|
|
||||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
|
||||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
|
||||||
|
|
||||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
|
||||||
{Type: "P", Value: true},
|
|
||||||
{Type: "P", Value: true},
|
|
||||||
{Type: "P", Value: true},
|
|
||||||
{Type: "P", Value: true},
|
|
||||||
{Type: "P", Value: true},
|
|
||||||
{Type: "P", Value: true},
|
|
||||||
}},
|
|
||||||
|
|
||||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
|
||||||
{Type: "P", Value: false},
|
|
||||||
{Type: "P", Value: false},
|
|
||||||
{Type: "P", Value: false},
|
|
||||||
{Type: "P", Value: false},
|
|
||||||
{Type: "P", Value: false},
|
|
||||||
{Type: "P", Value: false},
|
|
||||||
}},
|
|
||||||
|
|
||||||
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTokenGroup_Match(t *testing.T) {
|
|
||||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
|
||||||
tokenizer := tokenize.New(tok.Group("Group",
|
|
||||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
|
|
||||||
|
|
||||||
api, err := tokenizer("xxxxx")
|
|
||||||
AssertTrue(t, err == nil, "Tokenizer result")
|
|
||||||
tokens := api.Tokens()
|
|
||||||
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
|
|
||||||
contained := tokens[0].Value.([]tokenize.Token)
|
|
||||||
AssertEqual(t, 3, len(contained), "Length of contained tokens")
|
|
||||||
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
|
|
||||||
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
|
|
||||||
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTokenGroup_Mismatch(t *testing.T) {
|
|
||||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
|
||||||
tokenizer := tokenize.New(tok.Group("Group",
|
|
||||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
|
|
||||||
|
|
||||||
api, err := tokenizer("12345")
|
|
||||||
AssertTrue(t, err == nil, "Tokenizer result")
|
|
||||||
tokens := api.Tokens()
|
|
||||||
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
|
|
||||||
}
|
|
||||||
|
|
||||||
// I know, this is hell, but that's the whole point for this test :->
|
|
||||||
func TestCombination(t *testing.T) {
|
|
||||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
|
||||||
demonic := c.Seq(
|
|
||||||
c.Optional(a.SquareOpen),
|
|
||||||
m.Trim(
|
|
||||||
c.Seq(
|
|
||||||
c.Optional(a.Blanks),
|
|
||||||
c.Repeated(3, a.AngleClose),
|
|
||||||
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
|
|
||||||
return fmt.Sprintf("%d", len(s))
|
|
||||||
}),
|
|
||||||
m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
|
|
||||||
m.ToUpper(c.Min(1, a.ASCIILower)),
|
|
||||||
m.Drop(a.Excl),
|
|
||||||
c.Repeated(3, a.AngleOpen),
|
|
||||||
c.Optional(a.Blanks),
|
|
||||||
),
|
|
||||||
" \t",
|
|
||||||
),
|
|
||||||
c.Optional(a.SquareClose),
|
|
||||||
)
|
|
||||||
|
|
||||||
AssertHandlers(t, []HandlerT{
|
|
||||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
|
||||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
|
||||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
|
||||||
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
|
||||||
})
|
|
||||||
}
|
|
|
@ -1,41 +0,0 @@
|
||||||
// Package tokenize provides tooling to build a tokenizer in
|
|
||||||
// parser/combinator-style, used to feed data to the parser.
|
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Func is the function signature as returned by New: a function that takes
|
|
||||||
// any supported type of input, executes a tokenizer run and returns a
|
|
||||||
// Result struct (possibly nil) and an error (possibly nil).
|
|
||||||
type Func func(input interface{}) (*API, error)
|
|
||||||
|
|
||||||
// New instantiates a new tokenizer.
|
|
||||||
//
|
|
||||||
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
|
|
||||||
// functions are used to move the state machine forward during tokenizing.
|
|
||||||
// Using the New function, you can wrap a tokenize.Handler in a simple way,
|
|
||||||
// making it possible to feed some input to the handler and retrieve the
|
|
||||||
// tokenizing results.
|
|
||||||
//
|
|
||||||
// The startHandler argument points the tokenizer to the tokenize.Handler function
|
|
||||||
// that must be executed at the start of the tokenizing process. From there on
|
|
||||||
// other tokenize.Handler functions can be invoked recursively to implement the
|
|
||||||
// tokenizing process.
|
|
||||||
//
|
|
||||||
// THis function returns a function that can be invoked to run the tokenizer
|
|
||||||
// against the provided input data. For an overview of allowed inputs, take a
|
|
||||||
// look at the documentation for parsekit.read.New().
|
|
||||||
func New(tokenHandler Handler) Func {
|
|
||||||
return func(input interface{}) (*API, error) {
|
|
||||||
api := NewAPI(input)
|
|
||||||
ok := tokenHandler(api)
|
|
||||||
|
|
||||||
if !ok {
|
|
||||||
err := fmt.Errorf("mismatch at %s", Cursor{})
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return api, nil
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,223 +0,0 @@
|
||||||
package tokenize2_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
|
||||||
)
|
|
||||||
|
|
||||||
// TODO For error handling, it would be really cool if for example the
|
|
||||||
// 10.0.300.1/24 case would return an actual error stating that
|
|
||||||
// 300 is not a valid octet for an IPv4 address.
|
|
||||||
// Biggest thing to take care of here, is that errors should not stop
|
|
||||||
// a Parser flow (since we might be trying to match different cases in
|
|
||||||
// sequence), but a Parser flow should optionally be able to make use
|
|
||||||
// of the actual error.
|
|
||||||
// The same goes for a Tokenizer, since those can also make use of
|
|
||||||
// optional matching using tokenize.C.Any(...) for example. If matching
|
|
||||||
// for Any(IPv4, Digits), the example case should simply end up with 10
|
|
||||||
// after the IPv4 mismatch.
|
|
||||||
func ExampleNew() {
|
|
||||||
// Build the tokenizer for ip/mask.
|
|
||||||
var c, a, t = tokenize.C, tokenize.A, tokenize.T
|
|
||||||
ip := t.Str("ip", a.IPv4)
|
|
||||||
mask := t.Int8("mask", a.IPv4CIDRMask)
|
|
||||||
cidr := c.Seq(ip, a.Slash, mask)
|
|
||||||
tokenizer := tokenize.New(cidr)
|
|
||||||
|
|
||||||
for _, input := range []string{
|
|
||||||
"000.000.000.000/000",
|
|
||||||
"192.168.0.1/24",
|
|
||||||
"255.255.255.255/32",
|
|
||||||
"10.0.300.1/24",
|
|
||||||
"not an IPv4 CIDR",
|
|
||||||
} {
|
|
||||||
// Execute returns a Result and an error, which is nil on success.
|
|
||||||
result, err := tokenizer(input)
|
|
||||||
|
|
||||||
if err == nil {
|
|
||||||
fmt.Printf("Result: %s\n", result.Tokens())
|
|
||||||
} else {
|
|
||||||
fmt.Printf("Error: %s\n", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Output:
|
|
||||||
// Result: [ip("0.0.0.0") mask((int8)0)]
|
|
||||||
// Result: [ip("192.168.0.1") mask((int8)24)]
|
|
||||||
// Result: [ip("255.255.255.255") mask((int8)32)]
|
|
||||||
// Error: mismatch at start of file
|
|
||||||
// Error: mismatch at start of file
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
|
||||||
api := makeTokenizeAPI()
|
|
||||||
r, _ := api.NextRune()
|
|
||||||
AssertEqual(t, 'T', r, "first rune")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
AssertEqual(t, "Tes", i.String(), "i.String()")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
i.NextRune()
|
|
||||||
i.NextRune()
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
|
|
||||||
`without a prior call to Accept\(\)`,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
|
||||||
api := makeTokenizeAPI()
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: api.Accept,
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
|
|
||||||
`without first calling NextRune\(\)`,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
|
|
||||||
api := tokenize.NewAPI("")
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
|
|
||||||
`, but the prior call to NextRune\(\) failed`,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
i.Merge(0)
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
child := i.Fork()
|
|
||||||
i.Fork()
|
|
||||||
i.Merge(child)
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
|
||||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
i.Dispose(0)
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
child := i.Fork()
|
|
||||||
i.Fork()
|
|
||||||
i.Dispose(child)
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
|
|
||||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
i.Fork()
|
|
||||||
g := i.Fork()
|
|
||||||
i.Fork()
|
|
||||||
i.Merge(g)
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
|
||||||
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
|
||||||
AssertPanic(t, PanicT{
|
|
||||||
Function: func() {
|
|
||||||
i := makeTokenizeAPI()
|
|
||||||
i.NextRune()
|
|
||||||
i.Fork()
|
|
||||||
i.Accept()
|
|
||||||
},
|
|
||||||
Regexp: true,
|
|
||||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
|
||||||
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
|
||||||
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
|
||||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
}
|
|
||||||
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
|
||||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
|
||||||
i.Accept()
|
|
||||||
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
|
||||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
}
|
|
||||||
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
|
||||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
r, err := i.NextRune()
|
|
||||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
|
||||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
|
||||||
}
|
|
||||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
|
||||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
|
||||||
child := i.Fork()
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
r, err := i.NextRune()
|
|
||||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
|
||||||
i.Dispose(child) // brings the read offset back to the start
|
|
||||||
r, err = i.NextRune() // so here we should see the same rune
|
|
||||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
|
||||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeTokenizeAPI() *tokenize.API {
|
|
||||||
return tokenize.NewAPI("Testing")
|
|
||||||
}
|
|
|
@ -1,131 +0,0 @@
|
||||||
package tokenize2
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
|
||||||
// Create input, accept the first rune.
|
|
||||||
i := NewAPI("Testing")
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept() // T
|
|
||||||
AssertEqual(t, "T", i.String(), "accepted rune in input")
|
|
||||||
// Fork
|
|
||||||
child := i.Fork()
|
|
||||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
|
||||||
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
|
|
||||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
|
||||||
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
|
|
||||||
// Accept two runes via fork.
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept() // e
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept() // s
|
|
||||||
AssertEqual(t, "es", i.String(), "result runes in fork")
|
|
||||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
|
|
||||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
|
|
||||||
// Merge fork back into parent
|
|
||||||
i.Merge(child)
|
|
||||||
i.Dispose(child)
|
|
||||||
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
|
||||||
i := NewAPI("Testing")
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
f1 := i.Fork()
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
f2 := i.Fork()
|
|
||||||
i.NextRune()
|
|
||||||
i.Accept()
|
|
||||||
AssertEqual(t, "s", i.String(), "f2 String()")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
|
|
||||||
i.Merge(f2)
|
|
||||||
i.Dispose(f2)
|
|
||||||
AssertEqual(t, "es", i.String(), "f1 String()")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
|
||||||
i.Merge(f1)
|
|
||||||
i.Dispose(f1)
|
|
||||||
AssertEqual(t, "Tes", i.String(), "top-level API String()")
|
|
||||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
|
||||||
i := NewAPI("Testing")
|
|
||||||
r, _ := i.NextRune()
|
|
||||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
|
||||||
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
|
|
||||||
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
|
|
||||||
i.Accept()
|
|
||||||
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
|
|
||||||
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
|
|
||||||
r, _ = i.NextRune()
|
|
||||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFlushInput(t *testing.T) {
|
|
||||||
api := NewAPI("cool")
|
|
||||||
|
|
||||||
// Flushing without any read data is okay. FlushInput() will return
|
|
||||||
// false in this case, and nothing else happens.
|
|
||||||
AssertTrue(t, api.FlushInput() == false, "flush input at start")
|
|
||||||
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
|
|
||||||
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
|
|
||||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
|
|
||||||
|
|
||||||
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
|
|
||||||
|
|
||||||
// Read offset is now zero, but reading should continue after "co".
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
api.NextRune()
|
|
||||||
api.Accept()
|
|
||||||
|
|
||||||
AssertEqual(t, "cool", api.String(), "end result")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInputFlusherWrapper(t *testing.T) {
|
|
||||||
runeA := A.Rune('a')
|
|
||||||
flushB := C.FlushInput(A.Rune('b'))
|
|
||||||
api := NewAPI("abaab")
|
|
||||||
runeA(api)
|
|
||||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
|
|
||||||
AssertEqual(t, "a", api.String(), "runes after 1 read")
|
|
||||||
flushB(api)
|
|
||||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
|
|
||||||
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
|
|
||||||
runeA(api)
|
|
||||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
|
|
||||||
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
|
|
||||||
runeA(api)
|
|
||||||
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
|
|
||||||
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
|
|
||||||
flushB(api)
|
|
||||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
|
|
||||||
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
|
||||||
if expected != actual {
|
|
||||||
t.Errorf(
|
|
||||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
|
||||||
forWhat, expected, actual)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
|
||||||
if !b {
|
|
||||||
t.Errorf("Assertion %s is false", assertion)
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue