Optimization round completed (for now :-) All tests successful.
This commit is contained in:
parent
7598b62dd0
commit
c532af67ca
|
@ -77,7 +77,7 @@ var int64Token = tokenize.T.Int64(nil, bareInteger)
|
|||
|
||||
func (c *simpleCalculator) number(p *parse.API) {
|
||||
if p.Accept(int64Token) {
|
||||
c.Result += c.op * p.Result().Value(0).(int64)
|
||||
c.Result += c.op * p.Result.Tokens[0].Value.(int64)
|
||||
p.Handle(c.operatorOrEndOfFile)
|
||||
} else {
|
||||
p.Expected("integer number")
|
||||
|
|
|
@ -98,7 +98,7 @@ func (calc *calculator) expr(p *parse.API) {
|
|||
var A = tokenize.A
|
||||
if p.Handle(calc.term) {
|
||||
for p.Accept(A.Add.Or(A.Subtract)) {
|
||||
op := p.Result().Rune(0)
|
||||
op := p.Result.Runes[0]
|
||||
if !p.Handle(calc.term) {
|
||||
return
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ func (calc *calculator) term(p *parse.API) {
|
|||
var A = tokenize.A
|
||||
if p.Handle(calc.factor) {
|
||||
for p.Accept(A.Multiply.Or(A.Divide)) {
|
||||
op := p.Result().Rune(0)
|
||||
op := p.Result.Runes[0]
|
||||
if !p.Handle(calc.factor) {
|
||||
return
|
||||
}
|
||||
|
@ -134,7 +134,7 @@ func (calc *calculator) factor(p *parse.API) {
|
|||
p.Accept(A.Blanks)
|
||||
switch {
|
||||
case p.Accept(T.Float64(nil, A.Signed(A.Float))):
|
||||
value := p.Result().Value(0).(float64)
|
||||
value := p.Result.Tokens[0].Value.(float64)
|
||||
calc.interpreter.pushValue(value)
|
||||
case p.Accept(A.LeftParen):
|
||||
if !p.Handle(calc.expr) {
|
||||
|
|
|
@ -116,7 +116,7 @@ func (h *helloparser1) name(p *parse.API) {
|
|||
case p.Peek(a.Excl):
|
||||
p.Handle(h.exclamation)
|
||||
case p.Accept(a.AnyRune):
|
||||
h.greetee += p.Result().String()
|
||||
h.greetee += p.Result.String()
|
||||
p.Handle(h.name)
|
||||
default:
|
||||
p.Expected("exclamation mark")
|
||||
|
|
|
@ -90,7 +90,7 @@ func (h *helloparser2) start(p *parse.API) {
|
|||
return
|
||||
}
|
||||
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
|
||||
h.greetee = p.Result().String()
|
||||
h.greetee = p.Result.String()
|
||||
if h.greetee == "" {
|
||||
p.Error("the name cannot be empty")
|
||||
return
|
||||
|
|
|
@ -22,7 +22,7 @@ func (l *Chunks) AddChopped(s string, chunkSize int) error {
|
|||
|
||||
parseChunks := parse.New(func(p *parse.API) {
|
||||
for p.Accept(chunkOfRunes) {
|
||||
*l = append(*l, p.Result().String())
|
||||
*l = append(*l, p.Result.String())
|
||||
}
|
||||
})
|
||||
return parseChunks(s)
|
||||
|
|
61
parse/api.go
61
parse/api.go
|
@ -16,14 +16,24 @@ import (
|
|||
//
|
||||
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
|
||||
type API struct {
|
||||
tokenAPI tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
||||
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
|
||||
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
|
||||
Result TokenizeResult // a struct, holding the results of the last Peek() or Accept() call
|
||||
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
|
||||
loopCheck map[uintptr]bool // used for parser loop detection
|
||||
err error // parse error, retrieved by Error(), using API methods is denied when set
|
||||
stopped bool // a boolean set to true by Stop()
|
||||
}
|
||||
|
||||
// TokenizeResult holds the results of the last Peek() or Accept() call.
|
||||
type TokenizeResult struct {
|
||||
Tokens []tokenize.Token // the resulting tokens from the last call to Peek() or Accept()
|
||||
Runes []rune // the resulting runes from the last call to Peek() or Accept()
|
||||
}
|
||||
|
||||
func (result *TokenizeResult) String() string {
|
||||
return string(result.Runes)
|
||||
}
|
||||
|
||||
// DisableSanityChecks disables the built-in parser implementation sanity checks,
|
||||
// which detects parser implementation errors like loops and continuing parsing
|
||||
// after an error or invoking Stop().
|
||||
|
@ -40,16 +50,13 @@ func (p *API) DisableSanityChecks() {
|
|||
// If it does, then true will be returned, false otherwise. The read cursor
|
||||
// will be kept at the same position, so the next call to Peek() or Accept()
|
||||
// will start from the same cursor position.
|
||||
//
|
||||
// After calling this method, you can retrieve the produced tokenize.Result
|
||||
// struct using the Result() method.
|
||||
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
|
||||
p.result = nil
|
||||
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
|
||||
if ok {
|
||||
p.result = forkedAPI.Result()
|
||||
p.tokenAPI.Reset()
|
||||
p.Result.Tokens = p.tokenAPI.Tokens()
|
||||
p.Result.Runes = p.tokenAPI.Runes()
|
||||
}
|
||||
p.tokenAPI.Dispose(forkedAPI)
|
||||
return ok
|
||||
}
|
||||
|
||||
|
@ -58,24 +65,31 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool {
|
|||
// forward to beyond the match that was found. Otherwise false will be
|
||||
// and the read cursor will stay at the same position.
|
||||
//
|
||||
// After calling this method, you can retrieve the tokenize.Result
|
||||
// using the Result() method.
|
||||
// After calling this method, you can retrieve the results using the Result() method.
|
||||
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
|
||||
p.result = nil
|
||||
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
|
||||
if ok {
|
||||
forkedAPI.Merge()
|
||||
p.result = p.tokenAPI.Result()
|
||||
// Keep track of the results.
|
||||
p.Result.Tokens = p.tokenAPI.Tokens()
|
||||
p.Result.Runes = p.tokenAPI.Runes()
|
||||
|
||||
// Merge to the parent level.
|
||||
p.tokenAPI.Merge(forkedAPI)
|
||||
p.tokenAPI.Dispose(forkedAPI)
|
||||
|
||||
// And flush the input reader buffer.
|
||||
if p.tokenAPI.FlushInput() {
|
||||
if p.sanityChecksEnabled {
|
||||
p.initLoopCheck()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
p.tokenAPI.Dispose(forkedAPI)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
|
||||
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokenize.API, bool) {
|
||||
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
|
||||
if p.sanityChecksEnabled {
|
||||
p.panicWhenStoppedOrInError(name)
|
||||
p.checkForLoops(name)
|
||||
|
@ -84,10 +98,9 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokeniz
|
|||
}
|
||||
}
|
||||
|
||||
p.result = nil
|
||||
p.tokenAPI.Reset()
|
||||
child := p.tokenAPI.Fork()
|
||||
ok := tokenHandler(child)
|
||||
ok := tokenHandler(p.tokenAPI)
|
||||
|
||||
return child, ok
|
||||
}
|
||||
|
@ -138,20 +151,6 @@ func (p *API) checkForLoops(name string) {
|
|||
p.loopCheck[filepos] = true
|
||||
}
|
||||
|
||||
// Result returns the tokenize.Result struct, containing results as produced by the
|
||||
// last Peek() or Accept() call.
|
||||
//
|
||||
// When Result() is called without first doing a Peek() or Accept(), then no
|
||||
// result will be available and the method will panic.
|
||||
func (p *API) Result() *tokenize.Result {
|
||||
result := p.result
|
||||
if p.result == nil {
|
||||
callerPanic("Result", "parsekit.parse.API.{name}(): {name}() called "+
|
||||
"at {caller} without calling API.Peek() or API.Accept() on beforehand")
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Handle executes other parse.Handler functions from within the active
|
||||
// parse.Handler function.
|
||||
//
|
||||
|
@ -215,7 +214,7 @@ func (p *API) Error(format string, data ...interface{}) {
|
|||
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
|
||||
// set a different error message when needed.
|
||||
message := fmt.Sprintf(format, data...)
|
||||
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Result().Cursor())
|
||||
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor())
|
||||
}
|
||||
|
||||
// ExpectEndOfFile can be used to check if the input is at end of file.
|
||||
|
|
|
@ -5,8 +5,6 @@ package parse
|
|||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
|
@ -56,39 +54,3 @@ func AssertPanic(t *testing.T, p PanicT) {
|
|||
}()
|
||||
p.Function()
|
||||
}
|
||||
|
||||
type TokenMakerT struct {
|
||||
Input string
|
||||
Handler tokenize.Handler
|
||||
Expected []tokenize.Token
|
||||
}
|
||||
|
||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenMaker(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||
tokenizer := tokenize.New(test.Handler)
|
||||
result, err := tokenizer(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if string(expected.Runes) != string(actual.Runes) {
|
||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ func ExampleNew_usingAcceptedRunes() {
|
|||
|
||||
parser := parse.New(func(p *parse.API) {
|
||||
for p.Accept(a.AnyRune) {
|
||||
matches = append(matches, p.Result().String())
|
||||
matches = append(matches, p.Result.String())
|
||||
}
|
||||
p.ExpectEndOfFile()
|
||||
})
|
||||
|
@ -33,9 +33,9 @@ func ExampleNew_usingTokens() {
|
|||
|
||||
parser := parse.New(func(p *parse.API) {
|
||||
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
|
||||
fmt.Printf("Runes accepted: %q\n", p.Result().String())
|
||||
fmt.Printf("Runes accepted: %q\n", p.Result.String())
|
||||
fmt.Printf("Tokens:\n")
|
||||
for i, token := range p.Result().Tokens() {
|
||||
for i, token := range p.Result.Tokens {
|
||||
fmt.Printf("[%d] %s\n", i, token)
|
||||
}
|
||||
}
|
||||
|
@ -46,10 +46,10 @@ func ExampleNew_usingTokens() {
|
|||
// Output:
|
||||
// Runes accepted: "¡ök!"
|
||||
// Tokens:
|
||||
// [0] RUNE(161)
|
||||
// [1] RUNE(246)
|
||||
// [2] RUNE(107)
|
||||
// [3] RUNE(33)
|
||||
// [0] RUNE('¡')
|
||||
// [1] RUNE('ö')
|
||||
// [2] RUNE('k')
|
||||
// [3] RUNE('!')
|
||||
}
|
||||
|
||||
func ExampleAPI_Expected() {
|
||||
|
@ -71,7 +71,7 @@ func ExampleAPI_Accept_inIfStatement() {
|
|||
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
|
||||
// Result.String() returns a string containing all
|
||||
// accepted runes that were matched against.
|
||||
fmt.Println(p.Result().String())
|
||||
fmt.Println(p.Result.String())
|
||||
}
|
||||
})
|
||||
parser("YOWZA!")
|
||||
|
@ -88,7 +88,7 @@ func ExampleAPI_Accept_inSwitchStatement() {
|
|||
case p.Accept(tokenize.A.Rune('X')):
|
||||
// NOOP, skip this rune
|
||||
case p.Accept(tokenize.A.AnyRune):
|
||||
result += p.Result().String()
|
||||
result += p.Result.String()
|
||||
default:
|
||||
loop = false
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ func ExampleAPI_Stop() {
|
|||
parser := parse.New(func(p *parse.API) {
|
||||
fmt.Printf("First word: ")
|
||||
for p.Accept(c.Not(a.Space)) {
|
||||
fmt.Printf("%s", p.Result())
|
||||
fmt.Printf("%s", p.Result.String())
|
||||
}
|
||||
p.Stop()
|
||||
})
|
||||
|
@ -123,7 +123,7 @@ func ExampleAPI_Stop_notCalledAndNoInputPending() {
|
|||
parser := parse.New(func(p *parse.API) {
|
||||
fmt.Printf("Word: ")
|
||||
for p.Accept(c.Not(a.Space)) {
|
||||
fmt.Printf("%s", p.Result())
|
||||
fmt.Printf("%s", p.Result.String())
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
})
|
||||
|
@ -141,7 +141,7 @@ func ExampleAPI_Stop_notCalledButInputPending() {
|
|||
parser := parse.New(func(p *parse.API) {
|
||||
fmt.Printf("First word: ")
|
||||
for p.Accept(c.Not(a.Space)) {
|
||||
fmt.Printf("%s", p.Result())
|
||||
fmt.Printf("%s", p.Result.String())
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
})
|
||||
|
@ -161,7 +161,7 @@ func ExampleAPI_Peek() {
|
|||
// This handler is able to handle serial numbers.
|
||||
serialnrHandler := func(p *parse.API) {
|
||||
if p.Accept(serialnr) {
|
||||
fmt.Println(p.Result().String())
|
||||
fmt.Println(p.Result.String())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,17 +255,6 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
|
|||
`at /.*/parse_test\.go:\d+: no calls allowed after API\.Error\(\)`})
|
||||
}
|
||||
|
||||
func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) {
|
||||
p := parse.New(func(p *parse.API) {
|
||||
p.Result()
|
||||
})
|
||||
parse.AssertPanic(t, parse.PanicT{
|
||||
Function: func() { p("") },
|
||||
Regexp: true,
|
||||
Expect: `parsekit\.parse\.API\.Result\(\): Result\(\) called at ` +
|
||||
`/.*/parse_test.go:\d+ without calling API.Peek\(\) or API.Accept\(\) on beforehand`})
|
||||
}
|
||||
|
||||
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
|
||||
p := parse.New(func(p *parse.API) {})
|
||||
err := p("")
|
||||
|
|
355
tokenize/api.go
355
tokenize/api.go
|
@ -25,7 +25,7 @@ import (
|
|||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed. Runes collected this way can later on be
|
||||
// retrieved using the method Result().Runes().
|
||||
// retrieved using the method Runes().
|
||||
//
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
|
@ -74,39 +74,40 @@ type API struct {
|
|||
runeRead bool // whether or not a rune was read using NextRune()
|
||||
runes []rune // the rune stack
|
||||
tokens []Token // the token stack
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
}
|
||||
|
||||
type stackFrame struct {
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
runeStart int
|
||||
runeEnd int
|
||||
tokenStart int
|
||||
tokenEnd int
|
||||
stackLevel int // the stack level for this API object
|
||||
state *apiState // shared API state data
|
||||
cursor Cursor
|
||||
|
||||
// TODO
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
}
|
||||
|
||||
type apiState struct {
|
||||
stack []Result // the stack, used for forking / merging the API.
|
||||
top int // the index of the current top item in the stack
|
||||
}
|
||||
|
||||
// initialAPIstackDepth determines the initial stack depth for the API.
|
||||
// When a parser requires a higher stack depth, then this is no problem.
|
||||
// The API will automatically scale the stack when forking beyond this
|
||||
// default number of stack levels.
|
||||
const initialAPIstackDepth = 10
|
||||
const initialStackDepth = 10
|
||||
const initialTokenDepth = 10
|
||||
const initialRuneDepth = 10
|
||||
|
||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||
// For an overview of allowed inputs, take a look at the documentation
|
||||
// for parsekit.read.New().
|
||||
func NewAPI(input interface{}) API {
|
||||
stack := make([]Result, 1, initialAPIstackDepth)
|
||||
state := apiState{
|
||||
stack: stack,
|
||||
}
|
||||
return API{
|
||||
runes: make([]rune, initialAPIstackDepth),
|
||||
tokens: make([]Token, initialAPIstackDepth),
|
||||
func NewAPI(input interface{}) *API {
|
||||
api := &API{
|
||||
reader: read.New(input),
|
||||
state: &state,
|
||||
runes: make([]rune, 0, initialRuneDepth),
|
||||
tokens: make([]Token, 0, initialTokenDepth),
|
||||
stackFrames: make([]stackFrame, 1, initialStackDepth),
|
||||
}
|
||||
api.stackFrame = &api.stackFrames[0]
|
||||
|
||||
return api
|
||||
}
|
||||
|
||||
// NextRune returns the rune at the current read offset.
|
||||
|
@ -120,25 +121,16 @@ func NewAPI(input interface{}) API {
|
|||
// without explicitly accepting, this method will panic. You can see this as a
|
||||
// built-in unit test, enforcing correct serialization of API method calls.
|
||||
func (i *API) NextRune() (rune, error) {
|
||||
if i.stackLevel > i.state.top {
|
||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
||||
"causing this fork to be invalidated)")
|
||||
}
|
||||
|
||||
result := &(i.state.stack[i.stackLevel])
|
||||
if i.runeRead {
|
||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"without a prior call to Accept()")
|
||||
}
|
||||
|
||||
readRune, err := i.reader.RuneAt(result.offset)
|
||||
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||
i.lastRune = readRune
|
||||
i.lastRuneErr = err
|
||||
i.runeRead = true
|
||||
|
||||
i.DisposeChilds()
|
||||
|
||||
return readRune, err
|
||||
}
|
||||
|
||||
|
@ -148,22 +140,31 @@ func (i *API) NextRune() (rune, error) {
|
|||
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||
// returned an error. Calling Accept() in such case will result in a panic.
|
||||
func (i *API) Accept() {
|
||||
if i.stackLevel > i.state.top {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
||||
"causing this fork to be invalidated)")
|
||||
}
|
||||
|
||||
result := &(i.state.stack[i.stackLevel])
|
||||
// TODO can go after completing the code for performance.
|
||||
//fmt.Println("STACK [", i.stackLevel, "] runes", len(i.runes), "/", cap(i.runes), "tokens", len(i.tokens), "/", cap(i.tokens))
|
||||
if !i.runeRead {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()")
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"without first calling NextRune()")
|
||||
} else if i.lastRuneErr != nil {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed")
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
|
||||
"but the prior call to NextRune() failed")
|
||||
}
|
||||
|
||||
result.runes = append(result.runes, i.lastRune)
|
||||
result.cursor.moveByRune(i.lastRune)
|
||||
result.offset++
|
||||
newRuneEnd := i.stackFrame.runeEnd + 1
|
||||
|
||||
// Grow the runes capacity when needed.
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
} else {
|
||||
i.runes = i.runes[0:newRuneEnd]
|
||||
}
|
||||
|
||||
i.runes[newRuneEnd-1] = i.lastRune
|
||||
i.stackFrame.runeEnd++
|
||||
i.stackFrame.cursor.moveByRune(i.lastRune)
|
||||
i.stackFrame.offset++
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
|
@ -184,44 +185,30 @@ func (i *API) Accept() {
|
|||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
func (i *API) Fork() API {
|
||||
if i.stackLevel > i.state.top {
|
||||
callerPanic("Fork", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
||||
"causing this fork to be invalidated)")
|
||||
func (i *API) Fork() int {
|
||||
newStackLevel := i.stackLevel + 1
|
||||
newStackSize := newStackLevel + 1
|
||||
|
||||
// Grow the stack frames capacity when needed.
|
||||
if cap(i.stackFrames) < newStackSize {
|
||||
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
|
||||
copy(newFrames, i.stackFrames)
|
||||
i.stackFrames = newFrames
|
||||
} else {
|
||||
i.stackFrames = i.stackFrames[0:newStackSize]
|
||||
}
|
||||
|
||||
i.DisposeChilds()
|
||||
result := &(i.state.stack[i.stackLevel])
|
||||
|
||||
// Grow the stack storage when needed.
|
||||
newStackSize := i.stackLevel + 2
|
||||
if cap(i.state.stack) < newStackSize {
|
||||
newStack := make([]Result, newStackSize, newStackSize+initialAPIstackDepth)
|
||||
copy(newStack, i.state.stack)
|
||||
i.state.stack = newStack
|
||||
}
|
||||
i.state.stack = i.state.stack[0 : i.stackLevel+1]
|
||||
|
||||
// Create the new fork.
|
||||
child := API{
|
||||
state: i.state,
|
||||
stackLevel: i.stackLevel + 1,
|
||||
reader: i.reader,
|
||||
}
|
||||
childResult := Result{
|
||||
cursor: result.cursor,
|
||||
offset: result.offset,
|
||||
}
|
||||
i.state.stack = append(i.state.stack, childResult)
|
||||
//i.state.stack[i.stackLevel+1] = childResult
|
||||
|
||||
// Invalidate parent's last read rune.
|
||||
i.stackLevel++
|
||||
i.runeRead = false
|
||||
|
||||
i.state.top = child.stackLevel
|
||||
parent := i.stackFrame
|
||||
|
||||
return child
|
||||
i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||
*i.stackFrame = *parent
|
||||
i.stackFrame.runeStart = parent.runeEnd
|
||||
i.stackFrame.tokenStart = parent.tokenEnd
|
||||
|
||||
return i.stackLevel
|
||||
}
|
||||
|
||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||
|
@ -232,56 +219,68 @@ func (i *API) Fork() API {
|
|||
// be reused for performing another match. This means that all Result data are
|
||||
// cleared, but the read cursor position is kept at its current position.
|
||||
// This allows a child to feed results in chunks to its parent.
|
||||
func (i *API) Merge() {
|
||||
if i.stackLevel == 0 {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on the top-level API")
|
||||
}
|
||||
if i.stackLevel > i.state.top {
|
||||
//
|
||||
// Once the child is no longer needed, it can be disposed of by using the
|
||||
// method Dispose(), which will return the tokenizer to the parent.
|
||||
func (i *API) Merge(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"using a non-active API fork (a parent was read, forked or merged, "+
|
||||
"causing this fork to be invalidated)")
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
}
|
||||
|
||||
result := &(i.state.stack[i.stackLevel])
|
||||
parentResult := &(i.state.stack[i.stackLevel-1])
|
||||
parent := &i.stackFrames[stackLevel-1]
|
||||
|
||||
// // Grow parent rune storage when needed.
|
||||
// newRuneSize := len(parentResult.runes) + len(result.runes)
|
||||
// if cap(parentResult.runes) < newRuneSize {
|
||||
// newRunes := make([]rune, len(parentResult.runes), 2*newRuneSize)
|
||||
// copy(newRunes, parentResult.runes)
|
||||
// parentResult.runes = newRunes
|
||||
// //fmt.Println("Beefed up runes", i.stackLevel-1, newRuneSize*2)
|
||||
// }
|
||||
// The end of the parent slice aligns with the start of the child slice.
|
||||
// Because of this, to merge the parent slice can simply be expanded
|
||||
// to include the child slice.
|
||||
// parent : |----------|
|
||||
// child: |------|
|
||||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
parent.runeEnd = i.stackFrame.runeEnd
|
||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||
|
||||
// // Grow parent token storage when needed.
|
||||
// newTokenSize := len(parentResult.tokens) + len(result.tokens)
|
||||
// if cap(parentResult.tokens) < newTokenSize {
|
||||
// newTokens := make([]Token, len(parentResult.tokens), 2*newTokenSize)
|
||||
// copy(newTokens, parentResult.tokens)
|
||||
// parentResult.tokens = newTokens
|
||||
// //fmt.Println("Beefed up tokens", i.stackLevel-1, newTokenSize*2)
|
||||
// }
|
||||
// The same logic applies to tokens.
|
||||
parent.tokenEnd = i.stackFrame.tokenEnd
|
||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
|
||||
parentResult.runes = append(parentResult.runes, result.runes...)
|
||||
parentResult.tokens = append(parentResult.tokens, result.tokens...)
|
||||
parentResult.offset = result.offset
|
||||
parentResult.cursor = result.cursor
|
||||
i.DisposeChilds()
|
||||
i.Reset()
|
||||
parent.offset = i.stackFrame.offset
|
||||
parent.cursor = i.stackFrame.cursor
|
||||
|
||||
i.stackFrame.err = nil
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
func (i *API) DisposeChilds() {
|
||||
i.state.stack = i.state.stack[:i.stackLevel+1]
|
||||
i.state.top = i.stackLevel
|
||||
func (i *API) Dispose(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
}
|
||||
|
||||
i.runeRead = false
|
||||
i.stackLevel = stackLevel - 1
|
||||
i.stackFrames = i.stackFrames[:stackLevel]
|
||||
i.stackFrame = &i.stackFrames[stackLevel-1]
|
||||
i.runes = i.runes[0:i.stackFrame.runeEnd]
|
||||
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (i *API) Reset() {
|
||||
result := &(i.state.stack[i.stackLevel])
|
||||
i.runeRead = false
|
||||
result.runes = result.runes[:0]
|
||||
result.tokens = result.tokens[:0]
|
||||
result.err = nil
|
||||
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||
i.stackFrame.err = nil
|
||||
}
|
||||
|
||||
// FlushInput flushes processed input data from the read.Buffer.
|
||||
|
@ -291,18 +290,126 @@ func (i *API) Reset() {
|
|||
// Note:
|
||||
// When writing your own TokenHandler, you normally won't have to call this
|
||||
// method yourself. It is automatically called by parsekit when needed.
|
||||
func (i API) FlushInput() bool {
|
||||
result := &(i.state.stack[i.stackLevel])
|
||||
if result.offset > 0 {
|
||||
i.reader.Flush(result.offset)
|
||||
result.offset = 0
|
||||
func (i *API) FlushInput() bool {
|
||||
// result := &(i.state.stack[i.stackLevel])
|
||||
if i.stackFrame.offset > 0 {
|
||||
i.reader.Flush(i.stackFrame.offset)
|
||||
i.stackFrame.offset = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Result returns the Result struct from the API. The returned struct
|
||||
// can be used to retrieve and to modify result data.
|
||||
func (i API) Result() *Result {
|
||||
return &(i.state.stack[i.stackLevel])
|
||||
func (i *API) String() string {
|
||||
return string(i.Runes())
|
||||
}
|
||||
|
||||
func (i *API) Runes() []rune {
|
||||
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
||||
}
|
||||
|
||||
func (i *API) Rune(offset int) rune {
|
||||
return i.runes[i.stackFrame.runeStart+offset]
|
||||
}
|
||||
|
||||
func (i *API) ClearRunes() {
|
||||
i.runes = i.runes[:i.stackFrame.runeStart]
|
||||
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||
}
|
||||
|
||||
func (i *API) SetRunes(runes ...rune) {
|
||||
// Grow the runes capacity when needed.
|
||||
newRuneEnd := i.stackFrame.runeStart + len(runes)
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
} else {
|
||||
i.runes = i.runes[0:newRuneEnd]
|
||||
}
|
||||
|
||||
for offset, r := range runes {
|
||||
i.runes[i.stackFrame.runeStart+offset] = r
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
}
|
||||
|
||||
func (i *API) AddRunes(runes ...rune) {
|
||||
// Grow the runes capacity when needed.
|
||||
newRuneEnd := i.stackFrame.runeEnd + len(runes)
|
||||
if cap(i.runes) < newRuneEnd {
|
||||
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
|
||||
copy(newRunes, i.runes)
|
||||
i.runes = newRunes
|
||||
} else {
|
||||
i.runes = i.runes[0:newRuneEnd]
|
||||
}
|
||||
|
||||
for offset, r := range runes {
|
||||
i.runes[i.stackFrame.runeEnd+offset] = r
|
||||
}
|
||||
i.stackFrame.runeEnd = newRuneEnd
|
||||
}
|
||||
|
||||
func (i *API) AddString(s string) {
|
||||
i.AddRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) SetString(s string) {
|
||||
i.SetRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) Cursor() Cursor {
|
||||
return i.stackFrame.cursor
|
||||
}
|
||||
|
||||
func (i *API) Tokens() []Token {
|
||||
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (i *API) Token(offset int) Token {
|
||||
return i.tokens[i.stackFrame.tokenStart+offset]
|
||||
}
|
||||
|
||||
func (i *API) TokenValue(offset int) interface{} {
|
||||
return i.tokens[i.stackFrame.tokenStart+offset].Value
|
||||
}
|
||||
|
||||
func (i *API) ClearTokens() {
|
||||
i.tokens = i.tokens[:i.stackFrame.tokenStart]
|
||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||
}
|
||||
|
||||
func (i *API) SetTokens(tokens ...Token) {
|
||||
// Grow the tokens capacity when needed.
|
||||
newTokenEnd := i.stackFrame.tokenStart + len(tokens)
|
||||
if cap(i.tokens) < newTokenEnd {
|
||||
newTokens := make([]Token, newTokenEnd, newTokenEnd*2)
|
||||
copy(newTokens, tokens)
|
||||
i.tokens = newTokens
|
||||
} else {
|
||||
i.tokens = i.tokens[0:newTokenEnd]
|
||||
}
|
||||
|
||||
for offset, t := range tokens {
|
||||
i.tokens[i.stackFrame.tokenStart+offset] = t
|
||||
}
|
||||
i.stackFrame.tokenEnd = newTokenEnd
|
||||
}
|
||||
|
||||
func (i *API) AddTokens(tokens ...Token) {
|
||||
// Grow the tokens capacity when needed.
|
||||
newTokenEnd := i.stackFrame.tokenEnd + len(tokens)
|
||||
if cap(i.tokens) < newTokenEnd {
|
||||
newTokens := make([]Token, newTokenEnd, newTokenEnd*2)
|
||||
copy(newTokens, i.tokens)
|
||||
i.tokens = newTokens
|
||||
} else {
|
||||
i.tokens = i.tokens[0:newTokenEnd]
|
||||
}
|
||||
|
||||
for offset, t := range tokens {
|
||||
i.tokens[i.stackFrame.tokenEnd+offset] = t
|
||||
}
|
||||
i.stackFrame.tokenEnd = newTokenEnd
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ func ExampleAPI_NextRune() {
|
|||
r, err := api.NextRune()
|
||||
fmt.Printf("Rune read from input; %c\n", r)
|
||||
fmt.Printf("The error: %v\n", err)
|
||||
fmt.Printf("API results: %q\n", api.Result().String())
|
||||
fmt.Printf("API results: %q\n", api.String())
|
||||
|
||||
// Output:
|
||||
// Rune read from input; T
|
||||
|
@ -34,38 +34,38 @@ func ExampleAPI_Accept() {
|
|||
api.Accept() // adds 'h' to the API results
|
||||
api.NextRune() // reads 'e', but it is not added to the API results
|
||||
|
||||
fmt.Printf("API results: %q\n", api.Result().String())
|
||||
fmt.Printf("API results: %q\n", api.String())
|
||||
|
||||
// Output:
|
||||
// API results: "Th"
|
||||
}
|
||||
|
||||
func ExampleAPI_Result() {
|
||||
func ExampleAPI_modifyingResults() {
|
||||
api := tokenize.NewAPI("")
|
||||
|
||||
result := api.Result()
|
||||
api.AddString("Some runes")
|
||||
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
|
||||
api.AddRunes(' ', 'i', 'n', ' ')
|
||||
api.AddString("various ways")
|
||||
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
|
||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||
|
||||
result.AddRunes("Some runes")
|
||||
result.AddRunes([]rune{' ', 'a', 'd', 'd', 'e', 'd'})
|
||||
result.AddRunes(' ', 'i', 'n', ' ', "various ways")
|
||||
fmt.Printf("API result first 10 runes: %q\n", api.Result().Runes()[0:10])
|
||||
fmt.Printf("API result runes as string: %q\n", api.Result().String())
|
||||
api.SetString("new ")
|
||||
api.AddString("set ")
|
||||
api.AddString("of ")
|
||||
api.AddRunes('r', 'u', 'n', 'e', 's')
|
||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||
fmt.Printf("API result runes: %q\n", api.Runes())
|
||||
fmt.Printf("API third rune: %q\n", api.Rune(2))
|
||||
|
||||
result.SetRunes("new ", "set ", "of ", 'r', 'u', 'n', 'e', 's')
|
||||
fmt.Printf("API result runes as string: %q\n", api.Result().String())
|
||||
fmt.Printf("API result runes: %q\n", api.Result().Runes())
|
||||
fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
|
||||
|
||||
result.AddTokens(tokenize.Token{
|
||||
Runes: []rune("demo 1"),
|
||||
api.AddTokens(tokenize.Token{
|
||||
Type: 42,
|
||||
Value: "towel"})
|
||||
result.AddTokens(tokenize.Token{
|
||||
Runes: []rune("demo 2"),
|
||||
api.AddTokens(tokenize.Token{
|
||||
Type: 73,
|
||||
Value: "Zaphod"})
|
||||
fmt.Printf("API result tokens: %v\n", api.Result().Tokens())
|
||||
fmt.Printf("API second result token: %v\n", api.Result().Token(1))
|
||||
fmt.Printf("API result tokens: %v\n", api.Tokens())
|
||||
fmt.Printf("API second result token: %v\n", api.Token(1))
|
||||
|
||||
// Output:
|
||||
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
||||
|
@ -84,17 +84,17 @@ func ExampleAPI_Reset() {
|
|||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
// Reset clears the results, but keeps the cursor position.
|
||||
api.Reset()
|
||||
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
// Output:
|
||||
// API results: "Ve" at line 1, column 3
|
||||
|
@ -104,14 +104,16 @@ func ExampleAPI_Reset() {
|
|||
|
||||
func ExampleAPI_Fork() {
|
||||
// This custom Handler checks for input 'a', 'b' or 'c'.
|
||||
abcHandler := func(t tokenize.API) bool {
|
||||
abcHandler := func(t *tokenize.API) bool {
|
||||
a := tokenize.A
|
||||
for _, r := range []rune{'a', 'b', 'c'} {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
if a.Rune(r)(child) {
|
||||
child.Merge() // accept results into parent t
|
||||
if a.Rune(r)(t) {
|
||||
t.Merge(child) // accept results into parent of child
|
||||
t.Dispose(child) // return to the parent level
|
||||
return true // and report a successful match
|
||||
}
|
||||
t.Dispose(child) // return to the parent level
|
||||
}
|
||||
// If we get here, then no match was found. Return false to communicate
|
||||
// this to the caller.
|
||||
|
@ -141,25 +143,27 @@ func ExampleAPI_Fork() {
|
|||
}
|
||||
|
||||
func ExampleAPI_Merge() {
|
||||
tokenHandler := func(t tokenize.API) bool {
|
||||
tokenHandler := func(t *tokenize.API) bool {
|
||||
child1 := t.Fork()
|
||||
child1.NextRune() // reads 'H'
|
||||
child1.Accept()
|
||||
child1.NextRune() // reads 'i'
|
||||
child1.Accept()
|
||||
t.NextRune() // reads 'H'
|
||||
t.Accept()
|
||||
t.NextRune() // reads 'i'
|
||||
t.Accept()
|
||||
|
||||
child2 := child1.Fork()
|
||||
child2.NextRune() // reads ' '
|
||||
child2.Accept()
|
||||
child2.NextRune() // reads 'd'
|
||||
child2.Accept()
|
||||
child2 := t.Fork()
|
||||
t.NextRune() // reads ' '
|
||||
t.Accept()
|
||||
t.NextRune() // reads 'm'
|
||||
t.Accept()
|
||||
t.Dispose(child2)
|
||||
|
||||
child1.Merge() // We merge child1, which has read 'H' and 'i' only.
|
||||
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
||||
t.Dispose(child1) // and clean up child1 to return to the parent
|
||||
return true
|
||||
}
|
||||
|
||||
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
||||
fmt.Println(result)
|
||||
fmt.Println(result.String())
|
||||
|
||||
// Output:
|
||||
// Hi
|
||||
|
@ -170,75 +174,157 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
|||
|
||||
// Fork a few levels.
|
||||
child1 := api.Fork()
|
||||
child2 := child1.Fork()
|
||||
child3 := child2.Fork()
|
||||
child4 := child3.Fork()
|
||||
child2 := api.Fork()
|
||||
child3 := api.Fork()
|
||||
child4 := api.Fork()
|
||||
|
||||
// Read some data from child4.
|
||||
r, _ := child4.NextRune()
|
||||
child4.Accept()
|
||||
// Read a rune 'a' from child4.
|
||||
r, _ := api.NextRune()
|
||||
AssertEqual(t, 'a', r, "child4 rune 1")
|
||||
api.Accept()
|
||||
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
|
||||
|
||||
r, _ = child4.NextRune()
|
||||
child4.Accept()
|
||||
// Read another rune 'b' from child4.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'b', r, "child4 rune 2")
|
||||
api.Accept()
|
||||
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
|
||||
|
||||
// Merge it to child3.
|
||||
child4.Merge()
|
||||
// Merge "ab" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
AssertEqual(t, "", api.String(), "child4 runes after first merge")
|
||||
|
||||
// Read some more from child4.
|
||||
r, _ = child4.NextRune()
|
||||
child4.Accept()
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'c', r, "child4 rune 3")
|
||||
AssertEqual(t, "line 1, column 4", child4.Result().Cursor().String(), "cursor child4 rune 3")
|
||||
api.Accept()
|
||||
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
||||
|
||||
AssertEqual(t, "line 1, column 3", child3.Result().Cursor().String(), "cursor child3 rune 3, before merge of child 4")
|
||||
// Merge "c" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
|
||||
// Again, merge it to child3.
|
||||
child4.Merge()
|
||||
AssertEqual(t, "line 1, column 4", child3.Result().Cursor().String(), "cursor child3 rune 3, after merge of child 4")
|
||||
// And dispose of child4, making child3 the active stack level.
|
||||
api.Dispose(child4)
|
||||
|
||||
// Child3 should now have the compbined results "abc" from child4's work.
|
||||
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
||||
|
||||
// Now read some data from child3.
|
||||
r, _ = child3.NextRune()
|
||||
child3.Accept()
|
||||
r, _ = child3.NextRune()
|
||||
child3.Accept()
|
||||
r, _ = child3.NextRune()
|
||||
child3.Accept()
|
||||
AssertEqual(t, 'f', r, "child3 rune 5")
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'd', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdef", child3.Result().String(), "child3 total result after rune 6")
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'e', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'f', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
|
||||
|
||||
// Temporarily go some new forks from here, but don't use their outcome.
|
||||
child3sub1 := child3.Fork()
|
||||
child3sub1.NextRune()
|
||||
child3sub1.Accept()
|
||||
child3sub1.NextRune()
|
||||
child3sub1.Accept()
|
||||
child3sub2 := child3sub1.Fork()
|
||||
child3sub2.NextRune()
|
||||
child3sub2.Accept()
|
||||
child3sub2.Merge()
|
||||
child3sub1 := api.Fork()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
child3sub2 := api.Fork()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.Merge(child3sub2) // do merge sub2 down to sub1
|
||||
api.Dispose(child3sub2) // and dispose of sub2
|
||||
api.Dispose(child3sub1) // but dispose of sub1 without merging
|
||||
|
||||
// Instead merge the pre-forking results from child3 to child2.
|
||||
child3.Merge()
|
||||
// Instead merge the results from before this forking segway from child3 to child2
|
||||
// and dispose of it.
|
||||
api.Merge(child3)
|
||||
api.Dispose(child3)
|
||||
|
||||
AssertEqual(t, "abcdef", child2.Result().String(), "child2 total result after merge of child3")
|
||||
AssertEqual(t, "line 1, column 7", child2.Result().Cursor().String(), "cursor child2 after merge child3")
|
||||
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
||||
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
||||
|
||||
// Merge child2 to child1.
|
||||
child2.Merge()
|
||||
// Merge child2 to child1 and dispose of it.
|
||||
api.Merge(child2)
|
||||
api.Dispose(child2)
|
||||
|
||||
// Merge child1 a few times to the top level api.
|
||||
child1.Merge()
|
||||
child1.Merge()
|
||||
child1.Merge()
|
||||
child1.Merge()
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
|
||||
// And dispose of it.
|
||||
api.Dispose(child1)
|
||||
|
||||
// Read some data from the top level api.
|
||||
r, _ = api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdefg", api.Result().String(), "api string end result")
|
||||
AssertEqual(t, "line 1, column 8", api.Result().Cursor().String(), "api cursor end result")
|
||||
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
||||
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
||||
}
|
||||
|
||||
func TestClearRunes(t *testing.T) {
|
||||
api := tokenize.NewAPI("Laphroaig")
|
||||
api.NextRune() // Read 'L'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'a'
|
||||
api.Accept() // Add to runes
|
||||
api.ClearRunes() // Clear the runes, giving us a fresh start.
|
||||
api.NextRune() // Read 'p'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'r'
|
||||
api.Accept() // Add to runes
|
||||
|
||||
AssertEqual(t, "ph", api.String(), "api string end result")
|
||||
}
|
||||
|
||||
func TestMergeScenariosForTokens(t *testing.T) {
|
||||
api := tokenize.NewAPI("")
|
||||
|
||||
token1 := tokenize.Token{Value: 1}
|
||||
token2 := tokenize.Token{Value: 2}
|
||||
token3 := tokenize.Token{Value: 3}
|
||||
token4 := tokenize.Token{Value: 4}
|
||||
|
||||
api.SetTokens(token1)
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 1, len(tokens), "Tokens 1")
|
||||
|
||||
child := api.Fork()
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 0, len(tokens), "Tokens 2")
|
||||
|
||||
api.AddTokens(token2)
|
||||
|
||||
// Here we can merge by expanding the token slice on the parent,
|
||||
// because the end of the parent slice and the start of the child
|
||||
// slice align.
|
||||
api.Merge(child)
|
||||
api.Dispose(child)
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 2, len(tokens), "Tokens 3")
|
||||
|
||||
child = api.Fork()
|
||||
api.AddTokens(token3)
|
||||
api.Reset()
|
||||
api.AddTokens(token4)
|
||||
|
||||
// Here the merge means that token4 will be copied to the end of
|
||||
// the token slice of the parent, since there's a gap at the place
|
||||
// where token3 used to be.
|
||||
api.Merge(child)
|
||||
api.Dispose(child)
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 3, len(tokens), "Tokens 4")
|
||||
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
|
||||
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
|
||||
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ import (
|
|||
"regexp"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
|
@ -110,9 +110,6 @@ func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
|||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if string(expected.Runes) != string(actual.Runes) {
|
||||
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package tokenize2
|
||||
package tokenize
|
||||
|
||||
import (
|
||||
"strings"
|
|
@ -7,11 +7,11 @@ package tokenize
|
|||
// A Handler function gets an API as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The API is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type Handler func(t API) bool
|
||||
type Handler func(t *API) bool
|
||||
|
||||
// Match is syntactic sugar that allows you to write a construction like
|
||||
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
||||
func (handler Handler) Match(input interface{}) (*Result, error) {
|
||||
func (handler Handler) Match(input interface{}) (*API, error) {
|
||||
tokenizer := New(handler)
|
||||
return tokenizer(input)
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"fmt"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func TestSyntacticSugar(t *testing.T) {
|
||||
|
|
|
@ -230,7 +230,7 @@ var A = struct {
|
|||
Lower: MatchUnicodeLower(),
|
||||
Upper: MatchUnicodeUpper(),
|
||||
HexDigit: MatchHexDigit(),
|
||||
Octet: MatchOctet(false),
|
||||
Octet: MatchOctet(true),
|
||||
IPv4: MatchIPv4(true),
|
||||
IPv4CIDRMask: MatchIPv4CIDRMask(true),
|
||||
IPv4Netmask: MatchIPv4Netmask(true),
|
||||
|
@ -306,7 +306,7 @@ var T = struct {
|
|||
Float64 func(interface{}, Handler) Handler
|
||||
Boolean func(interface{}, Handler) Handler
|
||||
ByValue func(toktype interface{}, handler Handler, value interface{}) Handler
|
||||
ByCallback func(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler
|
||||
ByCallback func(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler
|
||||
Group func(interface{}, Handler) Handler
|
||||
}{
|
||||
Str: MakeStrLiteralToken,
|
||||
|
@ -405,9 +405,9 @@ func MatchUnicodeSpace() Handler {
|
|||
// Note that the callback function matches the signature of the unicode.Is* functions,
|
||||
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
|
||||
func MatchRuneByCallback(callback func(rune) bool) Handler {
|
||||
return func(t API) bool {
|
||||
input, err := t.NextRune()
|
||||
if err == nil && callback(input) {
|
||||
return func(t *API) bool {
|
||||
r, err := t.NextRune()
|
||||
if err == nil && callback(r) {
|
||||
t.Accept()
|
||||
return true
|
||||
}
|
||||
|
@ -422,9 +422,9 @@ func MatchEndOfLine() Handler {
|
|||
|
||||
// MatchStr creates a Handler that matches the input against the provided string.
|
||||
func MatchStr(expected string) Handler {
|
||||
var handlers = []Handler{}
|
||||
for _, r := range expected {
|
||||
handlers = append(handlers, MatchRune(r))
|
||||
var handlers = make([]Handler, len(expected))
|
||||
for i, r := range expected {
|
||||
handlers[i] = MatchRune(r)
|
||||
}
|
||||
return MatchSeq(handlers...)
|
||||
}
|
||||
|
@ -453,16 +453,20 @@ func MatchOptional(handler Handler) Handler {
|
|||
// applied in their exact order. Only if all Handlers apply, the sequence
|
||||
// reports successful match.
|
||||
func MatchSeq(handlers ...Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
for _, handler := range handlers {
|
||||
subchild := child.Fork()
|
||||
if !handler(subchild) {
|
||||
subchild := t.Fork()
|
||||
if !handler(t) {
|
||||
t.Dispose(subchild)
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
subchild.Merge()
|
||||
t.Merge(subchild)
|
||||
t.Dispose(subchild)
|
||||
}
|
||||
child.Merge()
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -471,14 +475,17 @@ func MatchSeq(handlers ...Handler) Handler {
|
|||
// can be applied. They are applied in their provided order. The first Handler
|
||||
// that applies is used for reporting back a match.
|
||||
func MatchAny(handlers ...Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
for _, handler := range handlers {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
child.Merge()
|
||||
if handler(t) {
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
t.Dispose(child) // TODO switch to Reset() and move forking outside the loop?
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -487,10 +494,13 @@ func MatchAny(handlers ...Handler) Handler {
|
|||
// the current input. If it does, then a failed match will be reported. If it
|
||||
// does not, then the next rune from the input will be reported as a match.
|
||||
func MatchNot(handler Handler) Handler {
|
||||
return func(t API) bool {
|
||||
if handler(t.Fork()) {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(t) {
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
t.Dispose(child)
|
||||
_, err := t.NextRune()
|
||||
if err == nil {
|
||||
t.Accept()
|
||||
|
@ -568,28 +578,30 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
|
|||
if max >= 0 && min > max {
|
||||
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
||||
}
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
total := 0
|
||||
|
||||
// Check for the minimum required amount of matches.
|
||||
child := t.Fork()
|
||||
for total < min {
|
||||
total++
|
||||
child := t.Fork()
|
||||
if !handler(child) {
|
||||
if !handler(t) {
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
child.Merge()
|
||||
}
|
||||
|
||||
// No specified max: include the rest of the available matches.
|
||||
// Specified max: include the rest of the availble matches, up to the max.
|
||||
//child.Merge()
|
||||
for max < 0 || total < max {
|
||||
total++
|
||||
child := t.Fork()
|
||||
if !handler(child) {
|
||||
if !handler(t) {
|
||||
break
|
||||
}
|
||||
child.Merge()
|
||||
}
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -607,10 +619,13 @@ func MatchSeparated(separator Handler, separated Handler) Handler {
|
|||
// applied. If the handler applies, but the except Handler as well, then the match
|
||||
// as a whole will be treated as a mismatch.
|
||||
func MatchExcept(handler Handler, except Handler) Handler {
|
||||
return func(t API) bool {
|
||||
if except(t.Fork()) {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if except(t) {
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
t.Dispose(child)
|
||||
return handler(t)
|
||||
}
|
||||
}
|
||||
|
@ -620,11 +635,12 @@ func MatchExcept(handler Handler, except Handler) Handler {
|
|||
// When both handlers match, the match for the handler is accepted and the match
|
||||
// for the lookAhead handler is ignored.
|
||||
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if handler(t) {
|
||||
child := t.Fork()
|
||||
if handler(child) && lookAhead(child.Fork()) {
|
||||
child.Merge()
|
||||
return true
|
||||
result := lookAhead(t)
|
||||
t.Dispose(child)
|
||||
return result
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -635,11 +651,12 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
|
|||
// If the handler matches and the lookAhead handler doesn't, then the match for
|
||||
// the handler is accepted.
|
||||
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if handler(t) {
|
||||
child := t.Fork()
|
||||
if handler(child) && !lookAhead(child.Fork()) {
|
||||
child.Merge()
|
||||
return true
|
||||
result := !lookAhead(t)
|
||||
t.Dispose(child)
|
||||
return result
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -654,14 +671,14 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
|
|||
//
|
||||
// Without flushing the input, the input reader will allocate memory
|
||||
// during the parsing process, eventually enough to hold the full input
|
||||
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
|
||||
// in memory. By wrapping Handlers with an input flusher, you can tell parsekit
|
||||
// that the accumulated input so far will no longer be needed, allowing
|
||||
// this input to be flushed from memory.
|
||||
//
|
||||
// Rule of thumb is: only use it when you have to actually fix a memory
|
||||
// hogging issue for your use case.
|
||||
func MakeInputFlusher(handler Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if handler(t) {
|
||||
t.FlushInput()
|
||||
return true
|
||||
|
@ -689,11 +706,12 @@ func MatchIntegerBetween(min int64, max int64) Handler {
|
|||
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
|
||||
}
|
||||
digits := MatchSigned(MatchDigits())
|
||||
return func(t API) bool {
|
||||
|
||||
return func(t *API) bool {
|
||||
if !digits(t) {
|
||||
return false
|
||||
}
|
||||
value, _ := strconv.ParseInt(t.Result().String(), 10, 64)
|
||||
value, _ := strconv.ParseInt(t.String(), 10, 64)
|
||||
if value < min || value > max {
|
||||
return false
|
||||
}
|
||||
|
@ -705,9 +723,10 @@ func MatchIntegerBetween(min int64, max int64) Handler {
|
|||
// has been reached. This Handler will never produce output. It only reports
|
||||
// a successful or a failing match through its boolean return value.
|
||||
func MatchEndOfFile() Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
_, err := child.NextRune()
|
||||
_, err := t.NextRune()
|
||||
t.Dispose(child)
|
||||
return err == io.EOF
|
||||
}
|
||||
}
|
||||
|
@ -723,7 +742,7 @@ func MatchUntilEndOfLine() Handler {
|
|||
// read from the input. Invalid runes on the input are replaced with the UTF8
|
||||
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
|
||||
func MatchAnyRune() Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
_, err := t.NextRune()
|
||||
if err == nil {
|
||||
t.Accept()
|
||||
|
@ -736,7 +755,7 @@ func MatchAnyRune() Handler {
|
|||
// MatchValidRune creates a Handler function that checks if a valid
|
||||
// UTF8 rune can be read from the input.
|
||||
func MatchValidRune() Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
r, err := t.NextRune()
|
||||
if err == nil && r != utf8.RuneError {
|
||||
t.Accept()
|
||||
|
@ -749,7 +768,7 @@ func MatchValidRune() Handler {
|
|||
// MatchInvalidRune creates a Handler function that checks if an invalid
|
||||
// UTF8 rune can be read from the input.
|
||||
func MatchInvalidRune() Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
r, err := t.NextRune()
|
||||
if err == nil && r == utf8.RuneError {
|
||||
t.Accept()
|
||||
|
@ -860,20 +879,20 @@ func MatchHexDigit() Handler {
|
|||
// stripped from the octet.
|
||||
func MatchOctet(normalize bool) Handler {
|
||||
max3Digits := MatchMinMax(1, 3, MatchDigit())
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if !max3Digits(t) {
|
||||
return false
|
||||
}
|
||||
value, _ := strconv.ParseInt(t.Result().String(), 10, 16)
|
||||
value, _ := strconv.ParseInt(t.String(), 10, 16)
|
||||
if value > 255 {
|
||||
return false
|
||||
}
|
||||
if normalize {
|
||||
runes := t.Result().Runes()
|
||||
runes := t.Runes()
|
||||
for len(runes) > 1 && runes[0] == '0' {
|
||||
runes = runes[1:]
|
||||
}
|
||||
t.Result().SetRunes(runes)
|
||||
t.SetRunes(runes...)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -909,20 +928,19 @@ func MatchIPv4Netmask(normalize bool) Handler {
|
|||
dot := MatchRune('.')
|
||||
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
|
||||
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if !netmask(t) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if the mask is provided in canonical form (ones followed by zeroes).
|
||||
r := t.Result()
|
||||
mask := net.IPv4Mask(r.Value(0).(byte), r.Value(1).(byte), r.Value(2).(byte), r.Value(3).(byte))
|
||||
// Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes).
|
||||
mask := net.IPv4Mask(t.TokenValue(0).(byte), t.TokenValue(1).(byte), t.TokenValue(2).(byte), t.TokenValue(3).(byte))
|
||||
ones, bits := mask.Size()
|
||||
if ones == 0 && bits == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
r.ClearTokens()
|
||||
t.ClearTokens()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -942,7 +960,7 @@ func MatchIPv4Net(normalize bool) Handler {
|
|||
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
|
||||
ipnet := MatchSeq(ip, slash, mask)
|
||||
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if !ipnet(t) {
|
||||
return false
|
||||
}
|
||||
|
@ -951,19 +969,18 @@ func MatchIPv4Net(normalize bool) Handler {
|
|||
return true
|
||||
}
|
||||
|
||||
r := t.Result()
|
||||
maskToken := r.Token(1)
|
||||
maskToken := t.Token(1)
|
||||
if maskToken.Type == "cidr" {
|
||||
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), r.Value(1).(uint8)))
|
||||
t.SetString(fmt.Sprintf("%s/%d", t.TokenValue(0), t.TokenValue(1).(uint8)))
|
||||
} else {
|
||||
o := strings.Split(r.Value(1).(string), ".")
|
||||
o := strings.Split(t.TokenValue(1).(string), ".")
|
||||
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
|
||||
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
|
||||
bits, _ := mask.Size()
|
||||
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), bits))
|
||||
t.SetString(fmt.Sprintf("%s/%d", t.TokenValue(0), bits))
|
||||
}
|
||||
|
||||
r.ClearTokens()
|
||||
t.ClearTokens()
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -975,7 +992,7 @@ func MatchIPv6(normalize bool) Handler {
|
|||
colon := MatchRune(':')
|
||||
empty := MatchSeq(colon, colon)
|
||||
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
nrOfHextets := 0
|
||||
for nrOfHextets < 8 {
|
||||
if hextet(t) {
|
||||
|
@ -992,13 +1009,13 @@ func MatchIPv6(normalize bool) Handler {
|
|||
}
|
||||
|
||||
// Invalid IPv6, when net.ParseIP() cannot handle it.
|
||||
parsed := net.ParseIP(t.Result().String())
|
||||
parsed := net.ParseIP(t.String())
|
||||
if parsed == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if normalize {
|
||||
t.Result().SetRunes(parsed.String())
|
||||
t.SetString(parsed.String())
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
@ -1017,13 +1034,12 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
|
|||
return mask
|
||||
}
|
||||
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
if !mask(t) {
|
||||
return false
|
||||
}
|
||||
r := t.Result()
|
||||
bits, _ := strconv.Atoi(r.String())
|
||||
t.Result().SetRunes(fmt.Sprintf("%d", bits))
|
||||
bits, _ := strconv.Atoi(t.String())
|
||||
t.SetString(fmt.Sprintf("%d", bits))
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -1057,13 +1073,15 @@ func MatchIPv6Net(normalize bool) Handler {
|
|||
// string "bork" would not match against the second form, but " bork" would.
|
||||
// In both cases, it would match the first form.
|
||||
func ModifyDrop(handler Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
child.Reset()
|
||||
child.Merge()
|
||||
if handler(t) {
|
||||
t.Reset()
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -1137,14 +1155,16 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
|
|||
// modified string on output. The return value of the modfunc will replace the
|
||||
// resulting output.
|
||||
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
s := modfunc(child.Result().String())
|
||||
child.Result().SetRunes(s)
|
||||
child.Merge()
|
||||
if handler(t) {
|
||||
s := modfunc(t.String())
|
||||
t.SetString(s)
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -1155,8 +1175,8 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
|
|||
// escape sequence like "\n" is kept as-is (a backslash character, followed by
|
||||
// an 'n'-character).
|
||||
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
|
||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
||||
literal := t.Result().String()
|
||||
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||
literal := t.String()
|
||||
return literal
|
||||
})
|
||||
}
|
||||
|
@ -1166,9 +1186,9 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
|
|||
// representation of the read Runes. This string is interpreted, meaning that an
|
||||
// escape sequence like "\n" is translated to an actual newline control character
|
||||
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
|
||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
||||
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||
// TODO ERROR HANDLING
|
||||
interpreted, _ := interpretString(t.Result().String())
|
||||
interpreted, _ := interpretString(t.String())
|
||||
return interpreted
|
||||
})
|
||||
}
|
||||
|
@ -1190,9 +1210,9 @@ func interpretString(str string) (string, error) {
|
|||
// Result, for which the Token.Value is set to a Rune-representation
|
||||
// of the read Rune.
|
||||
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
|
||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
||||
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||
// TODO ERROR HANDLING --- not a 1 rune input
|
||||
return t.Result().Rune(0)
|
||||
return t.Rune(0)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1200,9 +1220,9 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler {
|
|||
// Result, for which the Token.Value is set to a Byte-representation
|
||||
// of the read Rune.
|
||||
func MakeByteToken(toktype interface{}, handler Handler) Handler {
|
||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
||||
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||
// TODO ERROR HANDLING --- not a 1 byte input
|
||||
return byte(t.Result().Rune(0))
|
||||
return byte(t.Rune(0))
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1406,8 +1426,8 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
|
|||
}
|
||||
|
||||
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
|
||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
|
||||
value, err := convert(t.Result().String())
|
||||
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
|
||||
value, err := convert(t.String())
|
||||
if err != nil {
|
||||
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
|
||||
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
|
||||
|
@ -1419,17 +1439,17 @@ func makeStrconvToken(name string, toktype interface{}, handler Handler, convert
|
|||
// MakeTokenByValue creates a Handler that will add a static Token value
|
||||
// to the Result.
|
||||
func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler {
|
||||
return MakeTokenByCallback(toktype, handler, func(t API) interface{} { return value })
|
||||
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return value })
|
||||
}
|
||||
|
||||
// MakeTokenByCallback creates a Handler that will add a Token to the
|
||||
// Result, for which the Token.Value is to be generated by the provided
|
||||
// makeValue() callback function. The function gets the current API as
|
||||
// its input and must return the token value.
|
||||
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler {
|
||||
return func(t API) bool {
|
||||
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
if handler(t) {
|
||||
// The token is not added to the child here. The child might have produced its own
|
||||
// tokens and we want those to come after the token for the current parsing level.
|
||||
// By adding the token to the input API and then merging the child tokens, the order
|
||||
|
@ -1437,12 +1457,14 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
|
|||
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
|
||||
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
|
||||
// token to the child here, the order would have been "year", "month", "day", "date".
|
||||
token := Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
|
||||
t.Result().AddTokens(token)
|
||||
child.Merge()
|
||||
token := Token{Type: toktype, Value: makeValue(t)}
|
||||
t.AddTokens(token)
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
|
||||
return true
|
||||
}
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
@ -1450,15 +1472,18 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
|
|||
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
|
||||
// take the tokens as produced by the handler and group them together in a single token.
|
||||
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
|
||||
return func(t API) bool {
|
||||
return func(t *API) bool {
|
||||
child := t.Fork()
|
||||
if handler(child) {
|
||||
result := child.Result()
|
||||
token := Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
|
||||
result.SetTokens(token)
|
||||
child.Merge()
|
||||
if handler(t) {
|
||||
tokens := t.Tokens()
|
||||
tokensCopy := make([]Token, len(tokens))
|
||||
copy(tokensCopy, tokens)
|
||||
t.SetTokens(Token{Type: toktype, Value: tokensCopy})
|
||||
t.Merge(child)
|
||||
t.Dispose(child)
|
||||
return true
|
||||
}
|
||||
t.Dispose(child)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,22 +4,32 @@ import (
|
|||
"fmt"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func TestCombinatorsTempDebug(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
// {"024", a.IPv4CIDRMask, true, "24"},
|
||||
// {"024", a.Octet, true, "24"},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestCombinators(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"abc", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
{"bcd", c.Not(a.Rune('b')), false, ""},
|
||||
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||
{"ababc", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||
{"", c.Not(a.Rune('b')), false, ""},
|
||||
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
|
||||
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||
|
@ -53,6 +63,7 @@ func TestCombinators(t *testing.T) {
|
|||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||
|
@ -62,8 +73,20 @@ func TestCombinators(t *testing.T) {
|
|||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
|
||||
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
|
||||
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
|
||||
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
|
||||
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
|
||||
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -110,8 +133,10 @@ func TestAtoms(t *testing.T) {
|
|||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||
{"", a.AnyRune, false, ""},
|
||||
{"⌘", a.ValidRune, true, "⌘"},
|
||||
{"\xbc with ValidRune", a.ValidRune, false, "<EFBFBD>"},
|
||||
{"\xbc with ValidRune", a.ValidRune, false, ""},
|
||||
{"", a.ValidRune, false, ""},
|
||||
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
|
||||
{"ok with InvalidRune", a.InvalidRune, false, ""},
|
||||
{" ", a.Space, true, " "},
|
||||
{"X", a.Space, false, ""},
|
||||
{"\t", a.Tab, true, "\t"},
|
||||
|
@ -225,38 +250,73 @@ func TestAtoms(t *testing.T) {
|
|||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||
{"fifteen", a.IntegerBetween(0, 10), false, ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestIPv4Atoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
// Not normalized octet.
|
||||
{"0X", tokenize.MatchOctet(false), true, "0"},
|
||||
{"00X", tokenize.MatchOctet(false), true, "00"},
|
||||
{"000X", tokenize.MatchOctet(false), true, "000"},
|
||||
{"10X", tokenize.MatchOctet(false), true, "10"},
|
||||
{"010X", tokenize.MatchOctet(false), true, "010"},
|
||||
{"255123", tokenize.MatchOctet(false), true, "255"},
|
||||
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||
{"300", tokenize.MatchOctet(false), false, ""},
|
||||
|
||||
// Octet.
|
||||
{"0", tokenize.MatchOctet(false), true, "0"},
|
||||
{"02", tokenize.MatchOctet(false), true, "02"},
|
||||
{"003", tokenize.MatchOctet(false), true, "003"},
|
||||
{"256", tokenize.MatchOctet(false), false, ""},
|
||||
{"0X", a.Octet, true, "0"},
|
||||
{"00X", a.Octet, true, "00"},
|
||||
{"000X", a.Octet, true, "000"},
|
||||
{"00X", a.Octet, true, "0"},
|
||||
{"000X", a.Octet, true, "0"},
|
||||
{"10X", a.Octet, true, "10"},
|
||||
{"010X", a.Octet, true, "010"},
|
||||
{"010X", a.Octet, true, "10"},
|
||||
{"255123", a.Octet, true, "255"},
|
||||
{"256123", a.Octet, false, ""},
|
||||
{"300", a.Octet, false, ""},
|
||||
|
||||
// IPv4 address.
|
||||
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
|
||||
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
|
||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||
{"256.255.255.255", a.IPv4, false, ""},
|
||||
|
||||
// IPv4 CIDR netmask.
|
||||
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
|
||||
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
|
||||
{"0", a.IPv4CIDRMask, true, "0"},
|
||||
{"00", a.IPv4CIDRMask, true, "0"},
|
||||
{"000", a.IPv4CIDRMask, true, "0"},
|
||||
{"32", a.IPv4CIDRMask, true, "32"},
|
||||
{"032", a.IPv4CIDRMask, true, "32"},
|
||||
{"33", a.IPv4CIDRMask, false, ""},
|
||||
|
||||
// IPv4 netmask in dotted quad format.
|
||||
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
|
||||
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
|
||||
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
||||
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
||||
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
||||
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
||||
|
||||
// IPv4 address + CIDR or dotted quad netmask.
|
||||
{"192.168.6.123", a.IPv4Net, false, ""},
|
||||
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
|
||||
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
||||
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
||||
{"10.0.0.10/16.0.0.0", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
||||
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -292,7 +352,10 @@ func TestIPv6Atoms(t *testing.T) {
|
|||
func TestModifiers(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
||||
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||
|
@ -300,6 +363,7 @@ func TestModifiers(t *testing.T) {
|
|||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
|
||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||
})
|
||||
|
@ -323,64 +387,99 @@ func TestTokenMakers(t *testing.T) {
|
|||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
AssertTokenMakers(t, []TokenMakerT{
|
||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||
[]tokenize.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
|
||||
[]tokenize.Token{{Type: "A", Value: ""}}},
|
||||
|
||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
|
||||
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
|
||||
|
||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
|
||||
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
|
||||
|
||||
// I don't check the returned error here, but it's good enough to see that the parsing
|
||||
// stopped after the illegal \g escape sequence.
|
||||
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
|
||||
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
|
||||
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
|
||||
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
|
||||
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
|
||||
{Type: "bar", Value: byte('R')},
|
||||
{Type: "bar", Value: byte('O')},
|
||||
{Type: "bar", Value: byte('C')},
|
||||
{Type: "bar", Value: byte('K')},
|
||||
{Type: "bar", Value: byte('S')},
|
||||
}},
|
||||
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
|
||||
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
|
||||
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
||||
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
||||
|
||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Runes: []rune("1"), Value: true},
|
||||
{Type: "P", Runes: []rune("t"), Value: true},
|
||||
{Type: "P", Runes: []rune("T"), Value: true},
|
||||
{Type: "P", Runes: []rune("true"), Value: true},
|
||||
{Type: "P", Runes: []rune("TRUE"), Value: true},
|
||||
{Type: "P", Runes: []rune("True"), Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
}},
|
||||
|
||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Runes: []rune("0"), Value: false},
|
||||
{Type: "P", Runes: []rune("f"), Value: false},
|
||||
{Type: "P", Runes: []rune("F"), Value: false},
|
||||
{Type: "P", Runes: []rune("false"), Value: false},
|
||||
{Type: "P", Runes: []rune("FALSE"), Value: false},
|
||||
{Type: "P", Runes: []rune("False"), Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
}},
|
||||
|
||||
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenGroup_Match(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
tokenizer := tokenize.New(tok.Group("Group",
|
||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
|
||||
|
||||
api, err := tokenizer("xxxxx")
|
||||
AssertTrue(t, err == nil, "Tokenizer result")
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
|
||||
contained := tokens[0].Value.([]tokenize.Token)
|
||||
AssertEqual(t, 3, len(contained), "Length of contained tokens")
|
||||
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
|
||||
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
|
||||
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
|
||||
}
|
||||
|
||||
func TestTokenGroup_Mismatch(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
tokenizer := tokenize.New(tok.Group("Group",
|
||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
|
||||
|
||||
api, err := tokenizer("12345")
|
||||
AssertTrue(t, err == nil, "Tokenizer result")
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
|
||||
}
|
||||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
|
|
|
@ -1,155 +0,0 @@
|
|||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Result is a struct that is used for holding tokenizer results as produced
|
||||
// by a tokenize.Handler. It also provides the API that Handlers and Parsers
|
||||
// can use to store and retrieve the results.
|
||||
type Result struct {
|
||||
runes []rune // runes as added to the result by tokenize.Handler functions
|
||||
tokens []Token // Tokens as added to the result by tokenize.Handler functions
|
||||
cursor Cursor // current read cursor position, relative to the start of the file
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
}
|
||||
|
||||
// Token defines a lexical token as produced by tokenize.Handlers.
|
||||
//
|
||||
// The only mandatory data in a Token are the Runes. The Type and Value fields
|
||||
// are optional fields that can be filled with data at will.
|
||||
//
|
||||
// The use of the Type field is to let a tokenizer communicate to
|
||||
// the parser what type of token it's handling.
|
||||
//
|
||||
// The use of the Value field is to store any kind af data along with the token.
|
||||
// One use of this can be found in the built-in token maker functions like
|
||||
// MakeInt8Token(), which store an interpreted version of the input string
|
||||
// in the Value field.
|
||||
type Token struct {
|
||||
Runes []rune // the runes that make up the token
|
||||
Type interface{} // optional token type, can be any type that a parser author sees fit
|
||||
Value interface{} // optional token value, of any type as well
|
||||
}
|
||||
|
||||
func (t Token) String() string {
|
||||
tokenType := ""
|
||||
if t.Type != nil {
|
||||
tokenType = fmt.Sprintf("%v", t.Type)
|
||||
}
|
||||
|
||||
value := ""
|
||||
if t.Value != nil {
|
||||
switch t.Value.(type) {
|
||||
case []*Token:
|
||||
return fmt.Sprintf("%v%v", tokenType, t.Value)
|
||||
case string:
|
||||
value = fmt.Sprintf("%q", t.Value)
|
||||
case rune:
|
||||
value = fmt.Sprintf("%v", t.Value)
|
||||
case bool:
|
||||
value = fmt.Sprintf("%v", t.Value)
|
||||
default:
|
||||
value = fmt.Sprintf("(%T)%v", t.Value, t.Value)
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%v(%s)", tokenType, value)
|
||||
}
|
||||
|
||||
// newResult initializes an empty Result struct.
|
||||
func newResult() Result {
|
||||
return Result{}
|
||||
}
|
||||
|
||||
// ClearRunes clears the runes in the Result.
|
||||
func (r *Result) ClearRunes() {
|
||||
r.runes = []rune{}
|
||||
}
|
||||
|
||||
// SetRunes replaces the Runes from the Result with the provided input.
|
||||
func (r *Result) SetRunes(s ...interface{}) {
|
||||
r.ClearRunes()
|
||||
r.addRunes("SetRunes", s...)
|
||||
}
|
||||
|
||||
// AddRunes is used to add runes to the Result.
|
||||
func (r *Result) AddRunes(set ...interface{}) {
|
||||
r.addRunes("AddRunes", set...)
|
||||
}
|
||||
|
||||
func (r *Result) addRunes(name string, set ...interface{}) {
|
||||
for _, s := range set {
|
||||
switch s := s.(type) {
|
||||
case string:
|
||||
r.runes = append(r.runes, []rune(s)...)
|
||||
case []rune:
|
||||
r.runes = append(r.runes, s...)
|
||||
case rune:
|
||||
r.runes = append(r.runes, s)
|
||||
default:
|
||||
callerPanic(name, "tokenize.Result.{name}(): unsupported type '%T' used at {caller}", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Runes retrieves the Runes from the Result.
|
||||
func (r *Result) Runes() []rune {
|
||||
return r.runes
|
||||
}
|
||||
|
||||
// Rune retrieve a single rune from the Result at the specified index.
|
||||
func (r *Result) Rune(idx int) rune {
|
||||
return r.runes[idx]
|
||||
}
|
||||
|
||||
// String returns the Runes from the Result as a string.
|
||||
func (r *Result) String() string {
|
||||
return string(r.runes)
|
||||
}
|
||||
|
||||
// ClearTokens clears the tokens in the Result.
|
||||
func (r *Result) ClearTokens() {
|
||||
r.tokens = []Token{}
|
||||
}
|
||||
|
||||
// SetTokens replaces the Tokens from the Result with the provided tokens.
|
||||
func (r *Result) SetTokens(tokens ...Token) {
|
||||
r.tokens = tokens
|
||||
}
|
||||
|
||||
// AddTokens is used to add Tokens to the Result.
|
||||
func (r *Result) AddTokens(tokens ...Token) {
|
||||
r.tokens = append(r.tokens, tokens...)
|
||||
}
|
||||
|
||||
// Tokens retrieves the Tokens from the Result.
|
||||
func (r *Result) Tokens() []Token {
|
||||
return r.tokens
|
||||
}
|
||||
|
||||
// Token retrieves a single Token from the Result at the specified index.
|
||||
func (r *Result) Token(idx int) Token {
|
||||
return r.tokens[idx]
|
||||
}
|
||||
|
||||
// Values retrieves a slice containing only the Values for the Result Tokens.
|
||||
func (r *Result) Values() []interface{} {
|
||||
values := make([]interface{}, len(r.tokens))
|
||||
for i, tok := range r.tokens {
|
||||
values[i] = tok.Value
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
// Value retrieves a single Value from the Result Token at the specified index.
|
||||
func (r *Result) Value(idx int) interface{} {
|
||||
return r.tokens[idx].Value
|
||||
}
|
||||
|
||||
// Cursor retrieves the read cursor from the Result. This is the first
|
||||
// cursor position after the runes that were read and accepted by the Handler.
|
||||
func (r *Result) Cursor() Cursor {
|
||||
return r.cursor
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
package tokenize_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func ExampleToken() {
|
||||
t0 := tokenize.Token{}
|
||||
|
||||
t1 := tokenize.Token{
|
||||
Type: "Number",
|
||||
Value: 224,
|
||||
}
|
||||
|
||||
const TName = 1
|
||||
|
||||
t2 := tokenize.Token{
|
||||
Type: TName,
|
||||
Value: "John",
|
||||
}
|
||||
|
||||
t3 := tokenize.Token{
|
||||
Value: 42,
|
||||
}
|
||||
|
||||
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
|
||||
|
||||
// Result: [ip("0.0.0.0") mask((int8)0)]
|
||||
// Result: [ip("192.168.0.1") mask((int8)24)]
|
||||
// Result: [ip("255.255.255.255") mask((int8)32)]
|
||||
// Error: mismatch at start of file
|
||||
// Error: mismatch at start of file
|
||||
}
|
||||
|
||||
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("Testing"))
|
||||
i.Result().SetRunes("string")
|
||||
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
|
||||
i.Result().SetRunes([]rune("rune slice"))
|
||||
AssertEqual(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
|
||||
i.Result().SetRunes('X')
|
||||
AssertEqual(t, "X", string(i.Result().String()), "i.Result() with rune input")
|
||||
}
|
||||
|
||||
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := tokenize.NewAPI(strings.NewReader("Testing"))
|
||||
i.Result().SetRunes(1234567)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.Result\.SetRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`,
|
||||
})
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package tokenize2
|
||||
package tokenize
|
||||
|
||||
import (
|
||||
"fmt"
|
|
@ -1,9 +1,9 @@
|
|||
package tokenize2_test
|
||||
package tokenize_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
func ExampleToken_String() {
|
|
@ -9,7 +9,7 @@ import (
|
|||
// Func is the function signature as returned by New: a function that takes
|
||||
// any supported type of input, executes a tokenizer run and returns a
|
||||
// Result struct (possibly nil) and an error (possibly nil).
|
||||
type Func func(input interface{}) (*Result, error)
|
||||
type Func func(input interface{}) (*API, error)
|
||||
|
||||
// New instantiates a new tokenizer.
|
||||
//
|
||||
|
@ -28,7 +28,7 @@ type Func func(input interface{}) (*Result, error)
|
|||
// against the provided input data. For an overview of allowed inputs, take a
|
||||
// look at the documentation for parsekit.read.New().
|
||||
func New(tokenHandler Handler) Func {
|
||||
return func(input interface{}) (*Result, error) {
|
||||
return func(input interface{}) (*API, error) {
|
||||
api := NewAPI(input)
|
||||
ok := tokenHandler(api)
|
||||
|
||||
|
@ -36,6 +36,6 @@ func New(tokenHandler Handler) Func {
|
|||
err := fmt.Errorf("mismatch at %s", Cursor{})
|
||||
return nil, err
|
||||
}
|
||||
return api.Result(), nil
|
||||
return api, nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ import (
|
|||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
|
||||
)
|
||||
|
||||
// TODO For error handling, it would be really cool if for example the
|
||||
|
@ -55,7 +55,7 @@ func ExampleNew() {
|
|||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
api := makeTokenizeAPI()
|
||||
r, _ := (&api).NextRune()
|
||||
r, _ := api.NextRune()
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
|||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
AssertEqual(t, "Tes", i.String(), "i.String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
|
@ -78,52 +78,92 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
|||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
|
||||
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
|
||||
`without a prior call to Accept\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
input := makeTokenizeAPI()
|
||||
api := makeTokenizeAPI()
|
||||
AssertPanic(t, PanicT{
|
||||
Function: (&input).Accept,
|
||||
Function: api.Accept,
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
|
||||
`without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
|
||||
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
|
||||
api := tokenize.NewAPI("")
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
|
||||
`, but the prior call to NextRune\(\) failed`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Merge()
|
||||
i.Merge(0)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||
}
|
||||
|
||||
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
f := i.Fork()
|
||||
i.NextRune()
|
||||
f.Merge()
|
||||
child := i.Fork()
|
||||
i.Fork()
|
||||
i.Merge(child)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
|
||||
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
f := i.Fork()
|
||||
g := f.Fork()
|
||||
i.Fork()
|
||||
g.Merge()
|
||||
i.Dispose(0)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
|
||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||
}
|
||||
|
||||
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
child := i.Fork()
|
||||
i.Fork()
|
||||
i.Dispose(child)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Fork()
|
||||
g := i.Fork()
|
||||
i.Fork()
|
||||
i.Merge(g)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
|
@ -135,26 +175,26 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
|
|||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1")
|
||||
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.Result().Cursor().String(), "cursor 2")
|
||||
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.Result().Cursor().String(), "cursor 3")
|
||||
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.Result().Cursor().String(), "cursor 4")
|
||||
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
|
@ -167,16 +207,17 @@ func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
|||
}
|
||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||
f := i.Fork()
|
||||
f.NextRune()
|
||||
f.Accept()
|
||||
r, err := f.NextRune()
|
||||
child := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
r, err = i.NextRune()
|
||||
i.Dispose(child) // brings the read offset back to the start
|
||||
r, err = i.NextRune() // so here we should see the same rune
|
||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func makeTokenizeAPI() tokenize.API {
|
||||
func makeTokenizeAPI() *tokenize.API {
|
||||
return tokenize.NewAPI("Testing")
|
||||
}
|
||||
|
|
|
@ -5,33 +5,33 @@ import (
|
|||
)
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// TODO FIXME Speed change
|
||||
// Create input, accept the first rune.
|
||||
i := NewAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
|
||||
AssertEqual(t, "T", i.String(), "accepted rune in input")
|
||||
// Fork
|
||||
f := i.Fork()
|
||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
|
||||
AssertEqual(t, 1, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 1, f.state.stack[f.stackLevel].offset, "child offset")
|
||||
child := i.Fork()
|
||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
|
||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
|
||||
// Accept two runes via fork.
|
||||
f.NextRune()
|
||||
f.Accept() // e
|
||||
f.NextRune()
|
||||
f.Accept() // s
|
||||
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
|
||||
AssertEqual(t, 3, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 3, f.state.stack[f.stackLevel].offset, "child offset")
|
||||
i.NextRune()
|
||||
i.Accept() // e
|
||||
i.NextRune()
|
||||
i.Accept() // s
|
||||
AssertEqual(t, "es", i.String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
|
||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
|
||||
// Merge fork back into parent
|
||||
f.Merge()
|
||||
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 3, i.state.stack[i.stackLevel].offset, "parent offset")
|
||||
i.Merge(child)
|
||||
i.Dispose(child)
|
||||
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
|
@ -39,86 +39,83 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
|
|||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
f1.NextRune()
|
||||
f1.Accept()
|
||||
f2 := f1.Fork()
|
||||
f2.NextRune()
|
||||
f2.Accept()
|
||||
// TODO FIXME Speed changes
|
||||
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
// AssertEqual(t, 1, i.result.offset, "i.offset A")
|
||||
// AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
|
||||
// AssertEqual(t, 2, f1.result.offset, "f1.offset A")
|
||||
// AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
|
||||
// AssertEqual(t, 3, f2.result.offset, "f2.offset A")
|
||||
// f2.Merge()
|
||||
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
|
||||
// AssertEqual(t, 1, i.result.offset, "i.offset B")
|
||||
// AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
|
||||
// AssertEqual(t, 3, f1.result.offset, "f1.offset B")
|
||||
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
// AssertEqual(t, 3, f2.result.offset, "f2.offset B")
|
||||
// f1.Merge()
|
||||
// AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
|
||||
// AssertEqual(t, 3, i.result.offset, "i.offset C")
|
||||
// AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
|
||||
// AssertEqual(t, 3, f1.result.offset, "f1.offset C")
|
||||
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
|
||||
// AssertEqual(t, 3, f2.result.offset, "f2.offset C")
|
||||
}
|
||||
|
||||
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
|
||||
i := NewAPI("Testing")
|
||||
f1 := i.Fork()
|
||||
f2 := f1.Fork()
|
||||
//f3 := f2.Fork()
|
||||
f2.Fork()
|
||||
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
|
||||
//f5 := f4.Fork()
|
||||
f4.Fork()
|
||||
// TODO FIXME Speed changes
|
||||
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
// AssertEqual(t, true, i.child == &f1, "i.child == f1")
|
||||
// AssertEqual(t, true, f1.parent == &i, "f1.parent == i")
|
||||
// AssertEqual(t, true, f1.child == &f4, "f1.child == f4")
|
||||
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
// AssertEqual(t, true, f4.parent == &f1, "f4.parent == f1")
|
||||
// AssertEqual(t, true, f4.child == &f5, "f4.child == f5")
|
||||
// AssertEqual(t, true, f5.parent == &f4, "f5.parent == f4")
|
||||
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
|
||||
i.NextRune()
|
||||
|
||||
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
|
||||
// AssertEqual(t, true, i.child == nil, "i.child == nil")
|
||||
// AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
|
||||
// AssertEqual(t, true, f1.child == nil, "f1.child == nil")
|
||||
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
|
||||
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
|
||||
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
|
||||
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
|
||||
// AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
|
||||
// AssertEqual(t, true, f4.child == nil, "f4.child == nil")
|
||||
// AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
|
||||
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
|
||||
i.Accept()
|
||||
f2 := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "s", i.String(), "f2 String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
|
||||
i.Merge(f2)
|
||||
i.Dispose(f2)
|
||||
AssertEqual(t, "es", i.String(), "f1 String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||
i.Merge(f1)
|
||||
i.Dispose(f1)
|
||||
AssertEqual(t, "Tes", i.String(), "top-level API String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
// TODO FIXME Speed changes
|
||||
i := NewAPI("Testing")
|
||||
r, _ := i.NextRune()
|
||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||
// AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil")
|
||||
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
|
||||
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
|
||||
i.Accept()
|
||||
// AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil")
|
||||
// AssertEqual(t, 1, i.result.offset, "API.result.offset")
|
||||
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestFlushInput(t *testing.T) {
|
||||
api := NewAPI("cool")
|
||||
|
||||
// Flushing without any read data is okay. FlushInput() will return
|
||||
// false in this case, and nothing else happens.
|
||||
AssertTrue(t, api.FlushInput() == false, "flush input at start")
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
|
||||
|
||||
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
|
||||
|
||||
// Read offset is now zero, but reading should continue after "co".
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "cool", api.String(), "end result")
|
||||
}
|
||||
|
||||
func TestInputFlusherWrapper(t *testing.T) {
|
||||
runeA := A.Rune('a')
|
||||
flushB := C.FlushInput(A.Rune('b'))
|
||||
api := NewAPI("abaab")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
|
||||
AssertEqual(t, "a", api.String(), "runes after 1 read")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
|
||||
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
|
||||
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
|
||||
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
|
||||
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
|
||||
}
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
|
|
374
tokenize2/api.go
374
tokenize2/api.go
|
@ -1,374 +0,0 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
)
|
||||
|
||||
// API holds the internal state of a tokenizer run and provides an API that
|
||||
// tokenize.Handler functions can use to:
|
||||
//
|
||||
// • read and accept runes from the input (NextRune, Accept)
|
||||
//
|
||||
// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
|
||||
//
|
||||
// • flush already read input data when not needed anymore (FlushInput)
|
||||
//
|
||||
// • retrieve the tokenizer Result struct (Result) to read or modify the results
|
||||
//
|
||||
// BASIC OPERATION:
|
||||
//
|
||||
// To retrieve the next rune from the API, call the NextRune() method.
|
||||
//
|
||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||
// is then added to the result runes of the API and the read cursor is moved
|
||||
// forward.
|
||||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed. Runes collected this way can later on be
|
||||
// retrieved using the method Result().Runes().
|
||||
//
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
//
|
||||
// Next to adding runes to the result, it is also possible to modify the
|
||||
// stored runes or to add lexical Tokens to the result. For all things
|
||||
// concerning results, take a look at the Result struct, which
|
||||
// can be accessed though the method Result().
|
||||
//
|
||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||
//
|
||||
// Sometimes, we must be able to perform a lookahead, which might either
|
||||
// succeed or fail. In case of a failing lookahead, the state of the
|
||||
// API must be brought back to the original state, so we can try
|
||||
// a different route.
|
||||
//
|
||||
// The way in which this is supported, is by forking an API struct by
|
||||
// calling method Fork(). This will return a forked child API, with
|
||||
// empty result data, but using the same read cursor position as the
|
||||
// forked parent.
|
||||
//
|
||||
// After forking, the same interface as described for BASIC OPERATION can be
|
||||
// used to fill the results. When the lookahead was successful, then
|
||||
// Merge() can be called on the forked child to append the child's results
|
||||
// to the parent's results, and to move the read cursor position to that
|
||||
// of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child API can
|
||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
//
|
||||
// Opinionized note:
|
||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||
// peeks and by moving the read cursor position back and forth, or by putting
|
||||
// read input back on the input stream. That often leads to code that is
|
||||
// efficient, however, in my opinion, not very intuitive to read. It can also
|
||||
// be tedious to get the cursor position back at the correct position, which
|
||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
reader *read.Buffer // the input data reader
|
||||
lastRune rune // the rune as retrieved by the last NextRune() calll
|
||||
lastRuneErr error // the error for the last NextRune() call
|
||||
runeRead bool // whether or not a rune was read using NextRune()
|
||||
runes []rune // the rune stack
|
||||
tokens []Token // the token stack
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
}
|
||||
|
||||
type stackFrame struct {
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
runeStart int
|
||||
runeEnd int
|
||||
tokenStart int
|
||||
tokenEnd int
|
||||
cursor Cursor
|
||||
|
||||
// TODO
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
}
|
||||
|
||||
const initialStackDepth = 10
|
||||
const initialTokenDepth = 10
|
||||
const initialRuneDepth = 10
|
||||
|
||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||
// For an overview of allowed inputs, take a look at the documentation
|
||||
// for parsekit.read.New().
|
||||
func NewAPI(input interface{}) *API {
|
||||
api := &API{
|
||||
reader: read.New(input),
|
||||
runes: make([]rune, 0, initialRuneDepth),
|
||||
tokens: make([]Token, 0, initialTokenDepth),
|
||||
stackFrames: make([]stackFrame, 1, initialStackDepth),
|
||||
}
|
||||
api.stackFrame = &api.stackFrames[0]
|
||||
|
||||
return api
|
||||
}
|
||||
|
||||
// NextRune returns the rune at the current read offset.
|
||||
//
|
||||
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
||||
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
||||
// when needed.
|
||||
//
|
||||
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
||||
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
||||
// without explicitly accepting, this method will panic. You can see this as a
|
||||
// built-in unit test, enforcing correct serialization of API method calls.
|
||||
func (i *API) NextRune() (rune, error) {
|
||||
if i.runeRead {
|
||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"without a prior call to Accept()")
|
||||
}
|
||||
|
||||
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||
i.lastRune = readRune
|
||||
i.lastRuneErr = err
|
||||
i.runeRead = true
|
||||
|
||||
return readRune, err
|
||||
}
|
||||
|
||||
// Accept the last rune as read by NextRune() into the Result runes and move
|
||||
// the cursor forward.
|
||||
//
|
||||
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||
// returned an error. Calling Accept() in such case will result in a panic.
|
||||
func (i *API) Accept() {
|
||||
if !i.runeRead {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"without first calling NextRune()")
|
||||
} else if i.lastRuneErr != nil {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
|
||||
"but the prior call to NextRune() failed")
|
||||
}
|
||||
|
||||
i.runes = append(i.runes, i.lastRune)
|
||||
i.stackFrame.runeEnd++
|
||||
i.stackFrame.cursor.moveByRune(i.lastRune)
|
||||
i.stackFrame.offset++
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
// Fork forks off a child of the API struct. It will reuse the same
|
||||
// read buffer and cursor position, but for the rest this is a fresh API.
|
||||
//
|
||||
// By forking an API, you can freely work with the forked child, without
|
||||
// affecting the parent API. This is for example useful when you must perform
|
||||
// some form of lookahead.
|
||||
//
|
||||
// When processing of the Handler was successful and you want to add the results
|
||||
// to the parent API, you can call Merge() on the forked child.
|
||||
// This will add the results to the results of the parent (runes, tokens).
|
||||
// It also updates the read cursor position of the parent to that of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child API can
|
||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
func (i *API) Fork() int {
|
||||
newStackLevel := i.stackLevel + 1
|
||||
newStackSize := newStackLevel + 1
|
||||
|
||||
// Grow the stack frames capacity when needed.
|
||||
if cap(i.stackFrames) < newStackSize {
|
||||
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
|
||||
copy(newFrames, i.stackFrames)
|
||||
i.stackFrames = newFrames
|
||||
} else {
|
||||
i.stackFrames = i.stackFrames[0:newStackSize]
|
||||
}
|
||||
|
||||
parent := i.stackFrame
|
||||
i.stackLevel++
|
||||
i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||
*i.stackFrame = *parent
|
||||
i.stackFrame.runeStart = parent.runeEnd
|
||||
i.stackFrame.tokenStart = parent.tokenEnd
|
||||
i.runeRead = false
|
||||
|
||||
return i.stackLevel
|
||||
}
|
||||
|
||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||
// results of its parent. The read cursor of the parent is also updated
|
||||
// to that of the forked child.
|
||||
//
|
||||
// After the merge operation, the child results are reset so it can immediately
|
||||
// be reused for performing another match. This means that all Result data are
|
||||
// cleared, but the read cursor position is kept at its current position.
|
||||
// This allows a child to feed results in chunks to its parent.
|
||||
//
|
||||
// Once the child is no longer needed, it can be disposed of by using the
|
||||
// method Dispose(), which will return the tokenizer to the parent.
|
||||
func (i *API) Merge(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
}
|
||||
|
||||
parent := &i.stackFrames[stackLevel-1]
|
||||
|
||||
if parent.runeEnd == i.stackFrame.runeStart {
|
||||
// The end of the parent slice aligns with the start of the child slice.
|
||||
// Because of this, to merge the parent slice can simply be expanded
|
||||
// to include the child slice.
|
||||
// parent : |----------|
|
||||
// child: |------|
|
||||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
parent.runeEnd = i.stackFrame.runeEnd
|
||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||
} else {
|
||||
// The end of the parent slice does not align with the start of the
|
||||
// child slice. The child slice has to be copied onto the end of
|
||||
// the parent slice.
|
||||
// parent : |----------|
|
||||
// child: |------|
|
||||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
i.runes = append(i.runes[:parent.runeEnd], i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]...)
|
||||
parent.runeEnd = len(i.runes)
|
||||
i.stackFrame.runeStart = parent.runeEnd
|
||||
i.stackFrame.runeEnd = parent.runeEnd
|
||||
}
|
||||
|
||||
// The same logic applies to tokens.
|
||||
if parent.tokenEnd == i.stackFrame.tokenStart {
|
||||
parent.tokenEnd = i.stackFrame.tokenEnd
|
||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
} else {
|
||||
i.tokens = append(i.tokens[:parent.tokenEnd], i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]...)
|
||||
parent.tokenEnd = len(i.tokens)
|
||||
i.stackFrame.tokenStart = parent.tokenEnd
|
||||
i.stackFrame.tokenEnd = parent.tokenEnd
|
||||
}
|
||||
|
||||
parent.offset = i.stackFrame.offset
|
||||
parent.cursor = i.stackFrame.cursor
|
||||
|
||||
i.stackFrame.err = nil
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
func (i *API) Dispose(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
}
|
||||
|
||||
i.runeRead = false
|
||||
i.stackLevel = stackLevel - 1
|
||||
i.stackFrames = i.stackFrames[:stackLevel]
|
||||
i.stackFrame = &i.stackFrames[stackLevel-1]
|
||||
i.runes = i.runes[0:i.stackFrame.runeEnd]
|
||||
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (i *API) Reset() {
|
||||
i.runeRead = false
|
||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
i.stackFrame.err = nil
|
||||
}
|
||||
|
||||
// FlushInput flushes processed input data from the read.Buffer.
|
||||
// In this context 'processed' means all runes that were read using NextRune()
|
||||
// and that were added to the results using Accept().
|
||||
//
|
||||
// Note:
|
||||
// When writing your own TokenHandler, you normally won't have to call this
|
||||
// method yourself. It is automatically called by parsekit when needed.
|
||||
func (i *API) FlushInput() bool {
|
||||
// result := &(i.state.stack[i.stackLevel])
|
||||
if i.stackFrame.offset > 0 {
|
||||
i.reader.Flush(i.stackFrame.offset)
|
||||
i.stackFrame.offset = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (i *API) String() string {
|
||||
return string(i.Runes())
|
||||
}
|
||||
|
||||
func (i *API) Runes() []rune {
|
||||
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
||||
}
|
||||
|
||||
func (i *API) Rune(offset int) rune {
|
||||
return i.runes[i.stackFrame.runeStart+offset]
|
||||
}
|
||||
|
||||
func (i *API) ClearRunes() {
|
||||
i.runes = i.runes[:i.stackFrame.runeStart]
|
||||
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||
}
|
||||
|
||||
func (i *API) SetRunes(runes ...rune) {
|
||||
i.runes = append(i.runes[:i.stackFrame.runeStart], runes...)
|
||||
i.stackFrame.runeEnd = i.stackFrame.runeStart + len(runes)
|
||||
}
|
||||
|
||||
func (i *API) AddRunes(runes ...rune) {
|
||||
i.runes = append(i.runes[:i.stackFrame.runeEnd], runes...)
|
||||
i.stackFrame.runeEnd += len(runes)
|
||||
}
|
||||
|
||||
func (i *API) AddString(s string) {
|
||||
i.AddRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) SetString(s string) {
|
||||
i.SetRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) Cursor() Cursor {
|
||||
return i.stackFrame.cursor
|
||||
}
|
||||
|
||||
func (i *API) Tokens() []Token {
|
||||
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (i *API) Token(offset int) Token {
|
||||
return i.tokens[i.stackFrame.tokenStart+offset]
|
||||
}
|
||||
|
||||
func (i *API) TokenValue(offset int) interface{} {
|
||||
return i.tokens[i.stackFrame.tokenStart+offset].Value
|
||||
}
|
||||
|
||||
func (i *API) ClearTokens() {
|
||||
i.tokens = i.tokens[:i.stackFrame.tokenStart]
|
||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||
}
|
||||
|
||||
func (i *API) SetTokens(tokens ...Token) {
|
||||
i.tokens = append(i.tokens[:i.stackFrame.tokenStart], tokens...)
|
||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart + len(tokens)
|
||||
}
|
||||
|
||||
func (i *API) AddTokens(tokens ...Token) {
|
||||
i.tokens = append(i.tokens[:i.stackFrame.tokenEnd], tokens...)
|
||||
i.stackFrame.tokenEnd += len(tokens)
|
||||
}
|
|
@ -1,330 +0,0 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func ExampleNewAPI() {
|
||||
tokenize.NewAPI("The input that the API will handle")
|
||||
|
||||
// Output:
|
||||
}
|
||||
|
||||
func ExampleAPI_NextRune() {
|
||||
api := tokenize.NewAPI("The input that the API will handle")
|
||||
r, err := api.NextRune()
|
||||
fmt.Printf("Rune read from input; %c\n", r)
|
||||
fmt.Printf("The error: %v\n", err)
|
||||
fmt.Printf("API results: %q\n", api.String())
|
||||
|
||||
// Output:
|
||||
// Rune read from input; T
|
||||
// The error: <nil>
|
||||
// API results: ""
|
||||
}
|
||||
|
||||
func ExampleAPI_Accept() {
|
||||
api := tokenize.NewAPI("The input that the API will handle")
|
||||
api.NextRune() // reads 'T'
|
||||
api.Accept() // adds 'T' to the API results
|
||||
api.NextRune() // reads 'h'
|
||||
api.Accept() // adds 'h' to the API results
|
||||
api.NextRune() // reads 'e', but it is not added to the API results
|
||||
|
||||
fmt.Printf("API results: %q\n", api.String())
|
||||
|
||||
// Output:
|
||||
// API results: "Th"
|
||||
}
|
||||
|
||||
func ExampleAPI_modifyingResults() {
|
||||
api := tokenize.NewAPI("")
|
||||
|
||||
api.AddString("Some runes")
|
||||
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
|
||||
api.AddRunes(' ', 'i', 'n', ' ')
|
||||
api.AddString("various ways")
|
||||
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
|
||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||
|
||||
api.SetString("new ")
|
||||
api.AddString("set ")
|
||||
api.AddString("of ")
|
||||
api.AddRunes('r', 'u', 'n', 'e', 's')
|
||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||
fmt.Printf("API result runes: %q\n", api.Runes())
|
||||
fmt.Printf("API third rune: %q\n", api.Rune(2))
|
||||
|
||||
api.AddTokens(tokenize.Token{
|
||||
Type: 42,
|
||||
Value: "towel"})
|
||||
api.AddTokens(tokenize.Token{
|
||||
Type: 73,
|
||||
Value: "Zaphod"})
|
||||
fmt.Printf("API result tokens: %v\n", api.Tokens())
|
||||
fmt.Printf("API second result token: %v\n", api.Token(1))
|
||||
|
||||
// Output:
|
||||
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
||||
// API result runes as string: "Some runes added in various ways"
|
||||
// API result runes as string: "new set of runes"
|
||||
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
|
||||
// API third rune: 'w'
|
||||
// API result tokens: [42("towel") 73("Zaphod")]
|
||||
// API second result token: 73("Zaphod")
|
||||
}
|
||||
|
||||
func ExampleAPI_Reset() {
|
||||
api := tokenize.NewAPI("Very important input!")
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
// Reset clears the results, but keeps the cursor position.
|
||||
api.Reset()
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
// Output:
|
||||
// API results: "Ve" at line 1, column 3
|
||||
// API results: "" at line 1, column 3
|
||||
// API results: "ry" at line 1, column 5
|
||||
}
|
||||
|
||||
func ExampleAPI_Fork() {
|
||||
// This custom Handler checks for input 'a', 'b' or 'c'.
|
||||
abcHandler := func(t *tokenize.API) bool {
|
||||
a := tokenize.A
|
||||
for _, r := range []rune{'a', 'b', 'c'} {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
if a.Rune(r)(t) {
|
||||
t.Merge(child) // accept results into parent of child
|
||||
t.Dispose(child) // return to the parent level
|
||||
return true // and report a successful match
|
||||
}
|
||||
t.Dispose(child) // return to the parent level
|
||||
}
|
||||
// If we get here, then no match was found. Return false to communicate
|
||||
// this to the caller.
|
||||
return false
|
||||
}
|
||||
|
||||
// Note: a custom Handler is normally not what you need.
|
||||
// You can make use of the parser/combinator tooling to make the
|
||||
// implementation a lot simpler and to take care of forking at
|
||||
// the appropriate places. The handler from above can be replaced with:
|
||||
simpler := tokenize.A.RuneRange('a', 'c')
|
||||
|
||||
result, err := tokenize.New(abcHandler)("another test")
|
||||
fmt.Println(result, err)
|
||||
result, err = tokenize.New(simpler)("curious")
|
||||
fmt.Println(result, err)
|
||||
result, err = tokenize.New(abcHandler)("bang on!")
|
||||
fmt.Println(result, err)
|
||||
result, err = tokenize.New(abcHandler)("not a match")
|
||||
fmt.Println(result, err)
|
||||
|
||||
// Output:
|
||||
// a <nil>
|
||||
// c <nil>
|
||||
// b <nil>
|
||||
// <nil> mismatch at start of file
|
||||
}
|
||||
|
||||
func ExampleAPI_Merge() {
|
||||
tokenHandler := func(t *tokenize.API) bool {
|
||||
child1 := t.Fork()
|
||||
t.NextRune() // reads 'H'
|
||||
t.Accept()
|
||||
t.NextRune() // reads 'i'
|
||||
t.Accept()
|
||||
|
||||
child2 := t.Fork()
|
||||
t.NextRune() // reads ' '
|
||||
t.Accept()
|
||||
t.NextRune() // reads 'm'
|
||||
t.Accept()
|
||||
t.Dispose(child2)
|
||||
|
||||
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
||||
t.Dispose(child1) // and clean up child1 to return to the parent
|
||||
return true
|
||||
}
|
||||
|
||||
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
||||
fmt.Println(result.String())
|
||||
|
||||
// Output:
|
||||
// Hi
|
||||
}
|
||||
|
||||
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
||||
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
|
||||
|
||||
// Fork a few levels.
|
||||
child1 := api.Fork()
|
||||
child2 := api.Fork()
|
||||
child3 := api.Fork()
|
||||
child4 := api.Fork()
|
||||
|
||||
// Read a rune 'a' from child4.
|
||||
r, _ := api.NextRune()
|
||||
AssertEqual(t, 'a', r, "child4 rune 1")
|
||||
api.Accept()
|
||||
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
|
||||
|
||||
// Read another rune 'b' from child4.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'b', r, "child4 rune 2")
|
||||
api.Accept()
|
||||
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
|
||||
|
||||
// Merge "ab" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
AssertEqual(t, "", api.String(), "child4 runes after first merge")
|
||||
|
||||
// Read some more from child4.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'c', r, "child4 rune 3")
|
||||
api.Accept()
|
||||
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
||||
|
||||
// Merge "c" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
|
||||
// And dispose of child4, making child3 the active stack level.
|
||||
api.Dispose(child4)
|
||||
|
||||
// Child3 should now have the compbined results "abc" from child4's work.
|
||||
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
||||
|
||||
// Now read some data from child3.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'd', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'e', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'f', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
|
||||
|
||||
// Temporarily go some new forks from here, but don't use their outcome.
|
||||
child3sub1 := api.Fork()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
child3sub2 := api.Fork()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.Merge(child3sub2) // do merge sub2 down to sub1
|
||||
api.Dispose(child3sub2) // and dispose of sub2
|
||||
api.Dispose(child3sub1) // but dispose of sub1 without merging
|
||||
|
||||
// Instead merge the results from before this forking segway from child3 to child2
|
||||
// and dispose of it.
|
||||
api.Merge(child3)
|
||||
api.Dispose(child3)
|
||||
|
||||
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
||||
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
||||
|
||||
// Merge child2 to child1 and dispose of it.
|
||||
api.Merge(child2)
|
||||
api.Dispose(child2)
|
||||
|
||||
// Merge child1 a few times to the top level api.
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
|
||||
// And dispose of it.
|
||||
api.Dispose(child1)
|
||||
|
||||
// Read some data from the top level api.
|
||||
r, _ = api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
||||
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
||||
}
|
||||
|
||||
func TestClearRunes(t *testing.T) {
|
||||
api := tokenize.NewAPI("Laphroaig")
|
||||
api.NextRune() // Read 'L'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'a'
|
||||
api.Accept() // Add to runes
|
||||
api.ClearRunes() // Clear the runes, giving us a fresh start.
|
||||
api.NextRune() // Read 'p'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'r'
|
||||
api.Accept() // Add to runes
|
||||
|
||||
AssertEqual(t, "ph", api.String(), "api string end result")
|
||||
}
|
||||
|
||||
func TestMergeScenariosForTokens(t *testing.T) {
|
||||
api := tokenize.NewAPI("")
|
||||
|
||||
token1 := tokenize.Token{Value: 1}
|
||||
token2 := tokenize.Token{Value: 2}
|
||||
token3 := tokenize.Token{Value: 3}
|
||||
token4 := tokenize.Token{Value: 4}
|
||||
|
||||
api.SetTokens(token1)
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 1, len(tokens), "Tokens 1")
|
||||
|
||||
child := api.Fork()
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 0, len(tokens), "Tokens 2")
|
||||
|
||||
api.AddTokens(token2)
|
||||
|
||||
// Here we can merge by expanding the token slice on the parent,
|
||||
// because the end of the parent slice and the start of the child
|
||||
// slice align.
|
||||
api.Merge(child)
|
||||
api.Dispose(child)
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 2, len(tokens), "Tokens 3")
|
||||
|
||||
child = api.Fork()
|
||||
api.AddTokens(token3)
|
||||
api.Reset()
|
||||
api.AddTokens(token4)
|
||||
|
||||
// Here the merge means that token4 will be copied to the end of
|
||||
// the token slice of the parent, since there's a gap at the place
|
||||
// where token3 used to be.
|
||||
api.Merge(child)
|
||||
api.Dispose(child)
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 3, len(tokens), "Tokens 4")
|
||||
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
|
||||
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
|
||||
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
|
||||
}
|
|
@ -1,118 +0,0 @@
|
|||
package tokenize2_test
|
||||
|
||||
// This file contains some tools that are used for writing tests.
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
||||
|
||||
type PanicT struct {
|
||||
Function func()
|
||||
Regexp bool
|
||||
Expect string
|
||||
}
|
||||
|
||||
func AssertPanics(t *testing.T, testSet []PanicT) {
|
||||
for _, test := range testSet {
|
||||
AssertPanic(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertPanic(t *testing.T, p PanicT) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
mismatch := false
|
||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||
mismatch = true
|
||||
}
|
||||
if !p.Regexp && p.Expect != r.(string) {
|
||||
mismatch = true
|
||||
}
|
||||
if mismatch {
|
||||
t.Errorf(
|
||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||
p.Expect, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||
}
|
||||
}()
|
||||
p.Function()
|
||||
}
|
||||
|
||||
type HandlerT struct {
|
||||
Input string
|
||||
Handler tokenize.Handler
|
||||
MustMatch bool
|
||||
Expected string
|
||||
}
|
||||
|
||||
func AssertHandlers(t *testing.T, testSet []HandlerT) {
|
||||
for _, test := range testSet {
|
||||
AssertHandler(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertHandler(t *testing.T, test HandlerT) {
|
||||
result, err := tokenize.New(test.Handler)(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else if output := result.String(); output != test.Expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TokenMakerT struct {
|
||||
Input string
|
||||
Handler tokenize.Handler
|
||||
Expected []tokenize.Token
|
||||
}
|
||||
|
||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenMaker(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||
result, err := tokenize.New(test.Handler)(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func callerPanic(name, f string, data ...interface{}) {
|
||||
filepos := callerBefore(name)
|
||||
m := fmt.Sprintf(f, data...)
|
||||
m = strings.Replace(m, "{caller}", filepos, -1)
|
||||
m = strings.Replace(m, "{name}", name, -1)
|
||||
panic(m)
|
||||
}
|
||||
|
||||
func callerBefore(name string) string {
|
||||
found := false
|
||||
for i := 1; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
if found {
|
||||
return fmt.Sprintf("%s:%d", file, line)
|
||||
}
|
||||
if !ok {
|
||||
return "unknown caller"
|
||||
}
|
||||
f := runtime.FuncForPC(pc)
|
||||
|
||||
if strings.HasSuffix(f.Name(), "."+name) {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Cursor represents the position of a cursor in various ways.
|
||||
type Cursor struct {
|
||||
Byte int // The cursor offset in bytes
|
||||
Rune int // The cursor offset in UTF8 runes
|
||||
Column int // The column at which the cursor is (0-indexed)
|
||||
Line int // The line at which the cursor is (0-indexed)
|
||||
}
|
||||
|
||||
// String produces a string representation of the cursor position.
|
||||
func (c Cursor) String() string {
|
||||
if c.Line == 0 && c.Column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||
}
|
||||
|
||||
// move updates the position of the cursor, based on the provided input string.
|
||||
// The input string represents the runes that the cursor must be moved over.
|
||||
// This method will take newlines into account to keep track of line numbers and
|
||||
// column positions automatically.
|
||||
func (c *Cursor) move(input string) *Cursor {
|
||||
for _, r := range input {
|
||||
c.moveByRune(r)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Cursor) moveByRune(r rune) *Cursor {
|
||||
c.Byte += utf8.RuneLen(r)
|
||||
c.Rune++
|
||||
if r == '\n' {
|
||||
c.Column = 0
|
||||
c.Line++
|
||||
} else {
|
||||
c.Column++
|
||||
}
|
||||
return c
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExampleCursor_move() {
|
||||
c := Cursor{}
|
||||
fmt.Printf("after initialization : %s\n", c)
|
||||
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
|
||||
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
|
||||
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
|
||||
|
||||
// Output:
|
||||
// after initialization : start of file
|
||||
// after 'some words' : line 1, column 11
|
||||
// after '\n' : line 2, column 1
|
||||
// after '\r\nskip\nlines' : line 4, column 6
|
||||
}
|
||||
|
||||
func ExampleCursor_String() {
|
||||
c := Cursor{}
|
||||
fmt.Println(c.String())
|
||||
|
||||
c.move("\nfoobar")
|
||||
fmt.Println(c.String())
|
||||
|
||||
// Output:
|
||||
// start of file
|
||||
// line 2, column 7
|
||||
}
|
||||
|
||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
name string
|
||||
input []string
|
||||
byte int
|
||||
rune int
|
||||
line int
|
||||
column int
|
||||
}{
|
||||
{"No input at all", []string{""}, 0, 0, 0, 0},
|
||||
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
|
||||
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
|
||||
{"One newline", []string{"\n"}, 1, 1, 1, 0},
|
||||
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
|
||||
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
|
||||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||
} {
|
||||
c := Cursor{}
|
||||
for _, s := range test.input {
|
||||
c.move(s)
|
||||
}
|
||||
if c.Byte != test.byte {
|
||||
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
||||
}
|
||||
if c.Rune != test.rune {
|
||||
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
|
||||
}
|
||||
if c.Line != test.line {
|
||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
||||
}
|
||||
if c.Column != test.column {
|
||||
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
package tokenize2
|
||||
|
||||
// Handler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the results.
|
||||
//
|
||||
// A Handler function gets an API as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The API is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type Handler func(t *API) bool
|
||||
|
||||
// Match is syntactic sugar that allows you to write a construction like
|
||||
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
||||
func (handler Handler) Match(input interface{}) (*API, error) {
|
||||
tokenizer := New(handler)
|
||||
return tokenizer(input)
|
||||
}
|
||||
|
||||
// Or is syntactic sugar that allows you to write a construction like
|
||||
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
|
||||
func (handler Handler) Or(otherHandler Handler) Handler {
|
||||
return MatchAny(handler, otherHandler)
|
||||
}
|
||||
|
||||
// Times is syntactic sugar that allows you to write a construction like
|
||||
// MatchRep(3, handler) as handler.Times(3).
|
||||
func (handler Handler) Times(n int) Handler {
|
||||
return MatchRep(n, handler)
|
||||
}
|
||||
|
||||
// Then is syntactic sugar that allows you to write a construction like
|
||||
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
|
||||
func (handler Handler) Then(otherHandler Handler) Handler {
|
||||
return MatchSeq(handler, otherHandler)
|
||||
}
|
||||
|
||||
// SeparatedBy is syntactic sugar that allows you to write a construction like
|
||||
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
|
||||
func (handler Handler) SeparatedBy(separator Handler) Handler {
|
||||
return MatchSeparated(separator, handler)
|
||||
}
|
||||
|
||||
// Optional is syntactic sugar that allows you to write a construction like
|
||||
// MatchOptional(handler) as handler.Optional().
|
||||
func (handler Handler) Optional() Handler {
|
||||
return MatchOptional(handler)
|
||||
}
|
||||
|
||||
// Except is syntactic sugar that allows you to write a construction like
|
||||
// MatchExcept(handler) as handler.Optional().
|
||||
func (handler Handler) Except(exceptHandler Handler) Handler {
|
||||
return MatchExcept(handler, exceptHandler)
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func TestSyntacticSugar(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
||||
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
||||
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
||||
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
|
||||
{"cccccc", a.Rune('c').Optional(), true, "c"},
|
||||
{"dddddd", a.Rune('c').Optional(), true, ""},
|
||||
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
|
||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
|
||||
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
|
||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
|
||||
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
|
||||
})
|
||||
}
|
||||
|
||||
func ExampleHandler_Times() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
|
||||
|
||||
fmt.Println(phoneNumber.Match("0201234567"))
|
||||
// Output:
|
||||
// 0201234567 <nil>
|
||||
}
|
||||
|
||||
func ExampleHandler_Then() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
|
||||
|
||||
fmt.Println(phoneNumber.Match("0208888888"))
|
||||
// Output:
|
||||
// 0208888888 <nil>
|
||||
}
|
||||
|
||||
func ExampleHandler_Or() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit))
|
||||
|
||||
fmt.Println(phoneNumber.Match("+31209876543"))
|
||||
fmt.Println(phoneNumber.Match("0031209876543"))
|
||||
fmt.Println(phoneNumber.Match("0031020991234"))
|
||||
fmt.Println(phoneNumber.Match("0031201234"))
|
||||
// Output:
|
||||
// +31209876543 <nil>
|
||||
// 0031209876543 <nil>
|
||||
// <nil> mismatch at start of file
|
||||
// <nil> mismatch at start of file
|
||||
}
|
||||
|
||||
func ExampleHandler_SeparatedBy() {
|
||||
a, t := tokenize.A, tokenize.T
|
||||
csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
|
||||
|
||||
r, _ := csv.Match("123,456,7,8,9")
|
||||
for i, token := range r.Tokens() {
|
||||
fmt.Printf("[%d] %v\n", i, token)
|
||||
}
|
||||
// Output:
|
||||
// [0] number((int)123)
|
||||
// [1] number((int)456)
|
||||
// [2] number((int)7)
|
||||
// [3] number((int)8)
|
||||
// [4] number((int)9)
|
||||
}
|
||||
|
||||
func ExampleHandler_Optional() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
|
||||
spanish := c.Seq(
|
||||
a.Rune('¿').Optional(),
|
||||
c.OneOrMore(a.AnyRune.Except(a.Question)),
|
||||
a.Rune('?').Optional())
|
||||
|
||||
fmt.Println(spanish.Match("¿Habla español María?"))
|
||||
fmt.Println(spanish.Match("Sí, María habla español."))
|
||||
// Output:
|
||||
// ¿Habla español María? <nil>
|
||||
// Sí, María habla español. <nil>
|
||||
}
|
||||
|
||||
func ExampleHandler_Match() {
|
||||
r, err := tokenize.A.IPv4.Match("001.002.003.004")
|
||||
fmt.Println(r, err)
|
||||
|
||||
r, err = tokenize.A.IPv4.Match("1.2.3")
|
||||
fmt.Println(r, err)
|
||||
|
||||
// Output:
|
||||
// 1.2.3.4 <nil>
|
||||
// <nil> mismatch at start of file
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,512 +0,0 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func TestCombinatorsTempDebug(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
// {"024", a.IPv4CIDRMask, true, "24"},
|
||||
// {"024", a.Octet, true, "24"},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestCombinators(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"", c.Not(a.Rune('b')), false, ""},
|
||||
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
|
||||
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
||||
{"", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"X", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
|
||||
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
|
||||
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
|
||||
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
|
||||
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
|
||||
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
|
||||
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
|
||||
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
|
||||
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
|
||||
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestCombinatorPanics(t *testing.T) {
|
||||
var c, a = tokenize.C, tokenize.A
|
||||
AssertPanics(t, []PanicT{
|
||||
{func() { a.RuneRange('z', 'a') }, true,
|
||||
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
|
||||
{func() { c.MinMax(-1, 1, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.MinMax(1, -1, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||
{func() { c.MinMax(10, 5, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
|
||||
{func() { c.Min(-10, a.Space) }, true,
|
||||
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.Max(-42, a.Space) }, true,
|
||||
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||
{func() { a.IntegerBetween(10, -10) }, true,
|
||||
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||
{"Hello, world!", a.Str("Hello"), true, "Hello"},
|
||||
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
|
||||
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
||||
{"xxx", a.Rune('x'), true, "x"},
|
||||
{"x ", a.Rune(' '), false, ""},
|
||||
{"aa", a.RuneRange('b', 'e'), false, ""},
|
||||
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
||||
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
||||
{"", a.EndOfFile, true, ""},
|
||||
{"⌘", a.AnyRune, true, "⌘"},
|
||||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||
{"", a.AnyRune, false, ""},
|
||||
{"⌘", a.ValidRune, true, "⌘"},
|
||||
{"\xbc with ValidRune", a.ValidRune, false, ""},
|
||||
{"", a.ValidRune, false, ""},
|
||||
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
|
||||
{"ok with InvalidRune", a.InvalidRune, false, ""},
|
||||
{" ", a.Space, true, " "},
|
||||
{"X", a.Space, false, ""},
|
||||
{"\t", a.Tab, true, "\t"},
|
||||
{"\r", a.CR, true, "\r"},
|
||||
{"\n", a.LF, true, "\n"},
|
||||
{"!", a.Excl, true, "!"},
|
||||
{"\"", a.DoubleQuote, true, "\""},
|
||||
{"#", a.Hash, true, "#"},
|
||||
{"$", a.Dollar, true, "$"},
|
||||
{"%", a.Percent, true, "%"},
|
||||
{"&", a.Amp, true, "&"},
|
||||
{"'", a.SingleQuote, true, "'"},
|
||||
{"(", a.LeftParen, true, "("},
|
||||
{"(", a.RoundOpen, true, "("},
|
||||
{")", a.RightParen, true, ")"},
|
||||
{")", a.RoundClose, true, ")"},
|
||||
{"*", a.Asterisk, true, "*"},
|
||||
{"*", a.Multiply, true, "*"},
|
||||
{"+", a.Plus, true, "+"},
|
||||
{"+", a.Add, true, "+"},
|
||||
{",", a.Comma, true, ","},
|
||||
{"-", a.Minus, true, "-"},
|
||||
{"-", a.Subtract, true, "-"},
|
||||
{".", a.Dot, true, "."},
|
||||
{"/", a.Slash, true, "/"},
|
||||
{"/", a.Divide, true, "/"},
|
||||
{":", a.Colon, true, ":"},
|
||||
{";", a.Semicolon, true, ";"},
|
||||
{"<", a.AngleOpen, true, "<"},
|
||||
{"<", a.LessThan, true, "<"},
|
||||
{"=", a.Equal, true, "="},
|
||||
{">", a.AngleClose, true, ">"},
|
||||
{">", a.GreaterThan, true, ">"},
|
||||
{"?", a.Question, true, "?"},
|
||||
{"@", a.At, true, "@"},
|
||||
{"[", a.SquareOpen, true, "["},
|
||||
{"\\", a.Backslash, true, "\\"},
|
||||
{"]", a.SquareClose, true, "]"},
|
||||
{"^", a.Caret, true, "^"},
|
||||
{"_", a.Underscore, true, "_"},
|
||||
{"`", a.Backquote, true, "`"},
|
||||
{"{", a.CurlyOpen, true, "{"},
|
||||
{"|", a.Pipe, true, "|"},
|
||||
{"}", a.CurlyClose, true, "}"},
|
||||
{"~", a.Tilde, true, "~"},
|
||||
{"\t \t \r\n", a.Blank, true, "\t"},
|
||||
{" \t \t \r\n", a.Blanks, true, " \t \t "},
|
||||
{"xxx", a.Whitespace, false, ""},
|
||||
{" ", a.Whitespace, true, " "},
|
||||
{"\t", a.Whitespace, true, "\t"},
|
||||
{"\n", a.Whitespace, true, "\n"},
|
||||
{"\r\n", a.Whitespace, true, "\r\n"},
|
||||
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
|
||||
{"xxx", a.UnicodeSpace, false, ""},
|
||||
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
|
||||
{"", a.EndOfLine, true, ""},
|
||||
{"\r\n", a.EndOfLine, true, "\r\n"},
|
||||
{"\n", a.EndOfLine, true, "\n"},
|
||||
{"0", a.Digit, true, "0"},
|
||||
{"1", a.Digit, true, "1"},
|
||||
{"2", a.Digit, true, "2"},
|
||||
{"3", a.Digit, true, "3"},
|
||||
{"4", a.Digit, true, "4"},
|
||||
{"5", a.Digit, true, "5"},
|
||||
{"6", a.Digit, true, "6"},
|
||||
{"7", a.Digit, true, "7"},
|
||||
{"8", a.Digit, true, "8"},
|
||||
{"9", a.Digit, true, "9"},
|
||||
{"X", a.Digit, false, ""},
|
||||
{"a", a.ASCIILower, true, "a"},
|
||||
{"z", a.ASCIILower, true, "z"},
|
||||
{"A", a.ASCIILower, false, ""},
|
||||
{"Z", a.ASCIILower, false, ""},
|
||||
{"A", a.ASCIIUpper, true, "A"},
|
||||
{"Z", a.ASCIIUpper, true, "Z"},
|
||||
{"a", a.ASCIIUpper, false, ""},
|
||||
{"z", a.ASCIIUpper, false, ""},
|
||||
{"1", a.Letter, false, ""},
|
||||
{"a", a.Letter, true, "a"},
|
||||
{"Ø", a.Letter, true, "Ø"},
|
||||
{"Ë", a.Lower, false, ""},
|
||||
{"ë", a.Lower, true, "ë"},
|
||||
{"ä", a.Upper, false, "ä"},
|
||||
{"Ä", a.Upper, true, "Ä"},
|
||||
{"0", a.HexDigit, true, "0"},
|
||||
{"9", a.HexDigit, true, "9"},
|
||||
{"a", a.HexDigit, true, "a"},
|
||||
{"f", a.HexDigit, true, "f"},
|
||||
{"A", a.HexDigit, true, "A"},
|
||||
{"F", a.HexDigit, true, "F"},
|
||||
{"g", a.HexDigit, false, "g"},
|
||||
{"G", a.HexDigit, false, "G"},
|
||||
{"0", a.Integer, true, "0"},
|
||||
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
|
||||
{"1", a.Integer, true, "1"},
|
||||
{"-10X", a.Integer, false, ""},
|
||||
{"+10X", a.Integer, false, ""},
|
||||
{"-10X", a.Signed(a.Integer), true, "-10"},
|
||||
{"+10X", a.Signed(a.Integer), true, "+10"},
|
||||
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
||||
{"0X", a.Float, true, "0"},
|
||||
{"0X", a.Float, true, "0"},
|
||||
{"1X", a.Float, true, "1"},
|
||||
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||
{"123.321X", a.Float, true, "123.321"},
|
||||
{"-3.14X", a.Float, false, ""},
|
||||
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
||||
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
||||
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
||||
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
||||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||
{"fifteen", a.IntegerBetween(0, 10), false, ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestIPv4Atoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
// Not normalized octet.
|
||||
{"0X", tokenize.MatchOctet(false), true, "0"},
|
||||
{"00X", tokenize.MatchOctet(false), true, "00"},
|
||||
{"000X", tokenize.MatchOctet(false), true, "000"},
|
||||
{"10X", tokenize.MatchOctet(false), true, "10"},
|
||||
{"010X", tokenize.MatchOctet(false), true, "010"},
|
||||
{"255123", tokenize.MatchOctet(false), true, "255"},
|
||||
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||
{"300", tokenize.MatchOctet(false), false, ""},
|
||||
|
||||
// Octet.
|
||||
{"0", tokenize.MatchOctet(false), true, "0"},
|
||||
{"02", tokenize.MatchOctet(false), true, "02"},
|
||||
{"003", tokenize.MatchOctet(false), true, "003"},
|
||||
{"256", tokenize.MatchOctet(false), false, ""},
|
||||
{"0X", a.Octet, true, "0"},
|
||||
{"00X", a.Octet, true, "0"},
|
||||
{"000X", a.Octet, true, "0"},
|
||||
{"10X", a.Octet, true, "10"},
|
||||
{"010X", a.Octet, true, "10"},
|
||||
{"255123", a.Octet, true, "255"},
|
||||
{"256123", a.Octet, false, ""},
|
||||
{"300", a.Octet, false, ""},
|
||||
|
||||
// IPv4 address.
|
||||
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
|
||||
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
|
||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||
{"256.255.255.255", a.IPv4, false, ""},
|
||||
|
||||
// IPv4 CIDR netmask.
|
||||
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
|
||||
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
|
||||
{"0", a.IPv4CIDRMask, true, "0"},
|
||||
{"00", a.IPv4CIDRMask, true, "0"},
|
||||
{"000", a.IPv4CIDRMask, true, "0"},
|
||||
{"32", a.IPv4CIDRMask, true, "32"},
|
||||
{"032", a.IPv4CIDRMask, true, "32"},
|
||||
{"33", a.IPv4CIDRMask, false, ""},
|
||||
|
||||
// IPv4 netmask in dotted quad format.
|
||||
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
|
||||
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
|
||||
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
||||
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
||||
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
||||
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
||||
|
||||
// IPv4 address + CIDR or dotted quad netmask.
|
||||
{"192.168.6.123", a.IPv4Net, false, ""},
|
||||
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
|
||||
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
||||
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
||||
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
||||
})
|
||||
}
|
||||
|
||||
func TestIPv6Atoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"", a.IPv6, false, ""},
|
||||
{"::", a.IPv6, true, "::"},
|
||||
{"1::", a.IPv6, true, "1::"},
|
||||
{"1::1", a.IPv6, true, "1::1"},
|
||||
{"::1", a.IPv6, true, "::1"},
|
||||
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
|
||||
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
|
||||
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
|
||||
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
||||
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
||||
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
|
||||
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
|
||||
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
|
||||
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
|
||||
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
|
||||
{"0", a.IPv6CIDRMask, true, "0"},
|
||||
{"128", a.IPv6CIDRMask, true, "128"},
|
||||
{"129", a.IPv6CIDRMask, false, ""},
|
||||
{"::1/128", a.IPv6Net, true, "::1/128"},
|
||||
{"::1/129", a.IPv6Net, false, ""},
|
||||
{"1.1.1.1/24", a.IPv6Net, false, ""},
|
||||
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
|
||||
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestModifiers(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"missed me!", m.Drop(a.Rune('w')), false, ""},
|
||||
{"where are you?", m.Drop(a.Rune('w')), true, ""},
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
||||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
|
||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||
})
|
||||
}
|
||||
|
||||
// When a TokenMaker encounters an error, this is considered a programmer error.
|
||||
// A TokenMaker should not be called, unless the input is already validated to
|
||||
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||
// input cannot be processed successfully.
|
||||
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||
var a, tok = tokenize.A, tokenize.T
|
||||
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||
tokenizer := tokenize.New(invalid)
|
||||
AssertPanic(t, PanicT{
|
||||
func() { tokenizer("no") }, false,
|
||||
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenMakers(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
AssertTokenMakers(t, []TokenMakerT{
|
||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||
[]tokenize.Token{{Type: "A", Value: ""}}},
|
||||
|
||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
|
||||
|
||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
|
||||
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
|
||||
|
||||
// I don't check the returned error here, but it's good enough to see that the parsing
|
||||
// stopped after the illegal \g escape sequence.
|
||||
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||
{Type: "bar", Value: byte('R')},
|
||||
{Type: "bar", Value: byte('O')},
|
||||
{Type: "bar", Value: byte('C')},
|
||||
{Type: "bar", Value: byte('K')},
|
||||
{Type: "bar", Value: byte('S')},
|
||||
}},
|
||||
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
|
||||
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
|
||||
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
||||
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
||||
|
||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
}},
|
||||
|
||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
}},
|
||||
|
||||
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenGroup_Match(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
tokenizer := tokenize.New(tok.Group("Group",
|
||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
|
||||
|
||||
api, err := tokenizer("xxxxx")
|
||||
AssertTrue(t, err == nil, "Tokenizer result")
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
|
||||
contained := tokens[0].Value.([]tokenize.Token)
|
||||
AssertEqual(t, 3, len(contained), "Length of contained tokens")
|
||||
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
|
||||
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
|
||||
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
|
||||
}
|
||||
|
||||
func TestTokenGroup_Mismatch(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
tokenizer := tokenize.New(tok.Group("Group",
|
||||
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
|
||||
|
||||
api, err := tokenizer("12345")
|
||||
AssertTrue(t, err == nil, "Tokenizer result")
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
|
||||
}
|
||||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
demonic := c.Seq(
|
||||
c.Optional(a.SquareOpen),
|
||||
m.Trim(
|
||||
c.Seq(
|
||||
c.Optional(a.Blanks),
|
||||
c.Repeated(3, a.AngleClose),
|
||||
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
|
||||
return fmt.Sprintf("%d", len(s))
|
||||
}),
|
||||
m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
|
||||
m.ToUpper(c.Min(1, a.ASCIILower)),
|
||||
m.Drop(a.Excl),
|
||||
c.Repeated(3, a.AngleOpen),
|
||||
c.Optional(a.Blanks),
|
||||
),
|
||||
" \t",
|
||||
),
|
||||
c.Optional(a.SquareClose),
|
||||
)
|
||||
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
||||
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
||||
})
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
// Package tokenize provides tooling to build a tokenizer in
|
||||
// parser/combinator-style, used to feed data to the parser.
|
||||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Func is the function signature as returned by New: a function that takes
|
||||
// any supported type of input, executes a tokenizer run and returns a
|
||||
// Result struct (possibly nil) and an error (possibly nil).
|
||||
type Func func(input interface{}) (*API, error)
|
||||
|
||||
// New instantiates a new tokenizer.
|
||||
//
|
||||
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
|
||||
// functions are used to move the state machine forward during tokenizing.
|
||||
// Using the New function, you can wrap a tokenize.Handler in a simple way,
|
||||
// making it possible to feed some input to the handler and retrieve the
|
||||
// tokenizing results.
|
||||
//
|
||||
// The startHandler argument points the tokenizer to the tokenize.Handler function
|
||||
// that must be executed at the start of the tokenizing process. From there on
|
||||
// other tokenize.Handler functions can be invoked recursively to implement the
|
||||
// tokenizing process.
|
||||
//
|
||||
// THis function returns a function that can be invoked to run the tokenizer
|
||||
// against the provided input data. For an overview of allowed inputs, take a
|
||||
// look at the documentation for parsekit.read.New().
|
||||
func New(tokenHandler Handler) Func {
|
||||
return func(input interface{}) (*API, error) {
|
||||
api := NewAPI(input)
|
||||
ok := tokenHandler(api)
|
||||
|
||||
if !ok {
|
||||
err := fmt.Errorf("mismatch at %s", Cursor{})
|
||||
return nil, err
|
||||
}
|
||||
return api, nil
|
||||
}
|
||||
}
|
|
@ -1,223 +0,0 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
// TODO For error handling, it would be really cool if for example the
|
||||
// 10.0.300.1/24 case would return an actual error stating that
|
||||
// 300 is not a valid octet for an IPv4 address.
|
||||
// Biggest thing to take care of here, is that errors should not stop
|
||||
// a Parser flow (since we might be trying to match different cases in
|
||||
// sequence), but a Parser flow should optionally be able to make use
|
||||
// of the actual error.
|
||||
// The same goes for a Tokenizer, since those can also make use of
|
||||
// optional matching using tokenize.C.Any(...) for example. If matching
|
||||
// for Any(IPv4, Digits), the example case should simply end up with 10
|
||||
// after the IPv4 mismatch.
|
||||
func ExampleNew() {
|
||||
// Build the tokenizer for ip/mask.
|
||||
var c, a, t = tokenize.C, tokenize.A, tokenize.T
|
||||
ip := t.Str("ip", a.IPv4)
|
||||
mask := t.Int8("mask", a.IPv4CIDRMask)
|
||||
cidr := c.Seq(ip, a.Slash, mask)
|
||||
tokenizer := tokenize.New(cidr)
|
||||
|
||||
for _, input := range []string{
|
||||
"000.000.000.000/000",
|
||||
"192.168.0.1/24",
|
||||
"255.255.255.255/32",
|
||||
"10.0.300.1/24",
|
||||
"not an IPv4 CIDR",
|
||||
} {
|
||||
// Execute returns a Result and an error, which is nil on success.
|
||||
result, err := tokenizer(input)
|
||||
|
||||
if err == nil {
|
||||
fmt.Printf("Result: %s\n", result.Tokens())
|
||||
} else {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// Result: [ip("0.0.0.0") mask((int8)0)]
|
||||
// Result: [ip("192.168.0.1") mask((int8)24)]
|
||||
// Result: [ip("255.255.255.255") mask((int8)32)]
|
||||
// Error: mismatch at start of file
|
||||
// Error: mismatch at start of file
|
||||
}
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
api := makeTokenizeAPI()
|
||||
r, _ := api.NextRune()
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := makeTokenizeAPI()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "Tes", i.String(), "i.String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
|
||||
`without a prior call to Accept\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
api := makeTokenizeAPI()
|
||||
AssertPanic(t, PanicT{
|
||||
Function: api.Accept,
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
|
||||
`without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
|
||||
api := tokenize.NewAPI("")
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
|
||||
`, but the prior call to NextRune\(\) failed`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Merge(0)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
child := i.Fork()
|
||||
i.Fork()
|
||||
i.Merge(child)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Dispose(0)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||
}
|
||||
|
||||
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
child := i.Fork()
|
||||
i.Fork()
|
||||
i.Dispose(child)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Fork()
|
||||
g := i.Fork()
|
||||
i.Fork()
|
||||
i.Merge(g)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||
child := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
i.Dispose(child) // brings the read offset back to the start
|
||||
r, err = i.NextRune() // so here we should see the same rune
|
||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func makeTokenizeAPI() *tokenize.API {
|
||||
return tokenize.NewAPI("Testing")
|
||||
}
|
|
@ -1,131 +0,0 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := NewAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
AssertEqual(t, "T", i.String(), "accepted rune in input")
|
||||
// Fork
|
||||
child := i.Fork()
|
||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
|
||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
|
||||
// Accept two runes via fork.
|
||||
i.NextRune()
|
||||
i.Accept() // e
|
||||
i.NextRune()
|
||||
i.Accept() // s
|
||||
AssertEqual(t, "es", i.String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
|
||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
|
||||
// Merge fork back into parent
|
||||
i.Merge(child)
|
||||
i.Dispose(child)
|
||||
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := NewAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f2 := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "s", i.String(), "f2 String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
|
||||
i.Merge(f2)
|
||||
i.Dispose(f2)
|
||||
AssertEqual(t, "es", i.String(), "f1 String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||
i.Merge(f1)
|
||||
i.Dispose(f1)
|
||||
AssertEqual(t, "Tes", i.String(), "top-level API String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := NewAPI("Testing")
|
||||
r, _ := i.NextRune()
|
||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
|
||||
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
|
||||
i.Accept()
|
||||
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestFlushInput(t *testing.T) {
|
||||
api := NewAPI("cool")
|
||||
|
||||
// Flushing without any read data is okay. FlushInput() will return
|
||||
// false in this case, and nothing else happens.
|
||||
AssertTrue(t, api.FlushInput() == false, "flush input at start")
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
|
||||
|
||||
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
|
||||
|
||||
// Read offset is now zero, but reading should continue after "co".
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "cool", api.String(), "end result")
|
||||
}
|
||||
|
||||
func TestInputFlusherWrapper(t *testing.T) {
|
||||
runeA := A.Rune('a')
|
||||
flushB := C.FlushInput(A.Rune('b'))
|
||||
api := NewAPI("abaab")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
|
||||
AssertEqual(t, "a", api.String(), "runes after 1 read")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
|
||||
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
|
||||
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
|
||||
runeA(api)
|
||||
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
|
||||
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
|
||||
flushB(api)
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
|
||||
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
|
||||
}
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue