Optimization round completed (for now :-) All tests successful.

This commit is contained in:
Maurice Makaay 2019-07-11 12:43:57 +00:00
parent 7598b62dd0
commit c532af67ca
36 changed files with 905 additions and 4335 deletions

View File

@ -77,7 +77,7 @@ var int64Token = tokenize.T.Int64(nil, bareInteger)
func (c *simpleCalculator) number(p *parse.API) {
if p.Accept(int64Token) {
c.Result += c.op * p.Result().Value(0).(int64)
c.Result += c.op * p.Result.Tokens[0].Value.(int64)
p.Handle(c.operatorOrEndOfFile)
} else {
p.Expected("integer number")

View File

@ -98,7 +98,7 @@ func (calc *calculator) expr(p *parse.API) {
var A = tokenize.A
if p.Handle(calc.term) {
for p.Accept(A.Add.Or(A.Subtract)) {
op := p.Result().Rune(0)
op := p.Result.Runes[0]
if !p.Handle(calc.term) {
return
}
@ -116,7 +116,7 @@ func (calc *calculator) term(p *parse.API) {
var A = tokenize.A
if p.Handle(calc.factor) {
for p.Accept(A.Multiply.Or(A.Divide)) {
op := p.Result().Rune(0)
op := p.Result.Runes[0]
if !p.Handle(calc.factor) {
return
}
@ -134,7 +134,7 @@ func (calc *calculator) factor(p *parse.API) {
p.Accept(A.Blanks)
switch {
case p.Accept(T.Float64(nil, A.Signed(A.Float))):
value := p.Result().Value(0).(float64)
value := p.Result.Tokens[0].Value.(float64)
calc.interpreter.pushValue(value)
case p.Accept(A.LeftParen):
if !p.Handle(calc.expr) {

View File

@ -116,7 +116,7 @@ func (h *helloparser1) name(p *parse.API) {
case p.Peek(a.Excl):
p.Handle(h.exclamation)
case p.Accept(a.AnyRune):
h.greetee += p.Result().String()
h.greetee += p.Result.String()
p.Handle(h.name)
default:
p.Expected("exclamation mark")

View File

@ -90,7 +90,7 @@ func (h *helloparser2) start(p *parse.API) {
return
}
if p.Accept(m.TrimSpace(c.OneOrMore(a.AnyRune.Except(a.Excl)))) {
h.greetee = p.Result().String()
h.greetee = p.Result.String()
if h.greetee == "" {
p.Error("the name cannot be empty")
return

View File

@ -22,7 +22,7 @@ func (l *Chunks) AddChopped(s string, chunkSize int) error {
parseChunks := parse.New(func(p *parse.API) {
for p.Accept(chunkOfRunes) {
*l = append(*l, p.Result().String())
*l = append(*l, p.Result.String())
}
})
return parseChunks(s)

View File

@ -16,14 +16,24 @@ import (
//
// • call other parse.Handler functions, the core of recursive-descent parsing (Handle)
type API struct {
tokenAPI tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
result *tokenize.Result // last tokenize.Handler result as produced by Accept() or Peek()
tokenAPI *tokenize.API // the tokenize.API, used for communicating with tokenize.Handler functions
Result TokenizeResult // a struct, holding the results of the last Peek() or Accept() call
sanityChecksEnabled bool // whether or not runtime sanity checks are enabled
loopCheck map[uintptr]bool // used for parser loop detection
err error // parse error, retrieved by Error(), using API methods is denied when set
stopped bool // a boolean set to true by Stop()
}
// TokenizeResult holds the results of the last Peek() or Accept() call.
type TokenizeResult struct {
Tokens []tokenize.Token // the resulting tokens from the last call to Peek() or Accept()
Runes []rune // the resulting runes from the last call to Peek() or Accept()
}
func (result *TokenizeResult) String() string {
return string(result.Runes)
}
// DisableSanityChecks disables the built-in parser implementation sanity checks,
// which detects parser implementation errors like loops and continuing parsing
// after an error or invoking Stop().
@ -40,16 +50,13 @@ func (p *API) DisableSanityChecks() {
// If it does, then true will be returned, false otherwise. The read cursor
// will be kept at the same position, so the next call to Peek() or Accept()
// will start from the same cursor position.
//
// After calling this method, you can retrieve the produced tokenize.Result
// struct using the Result() method.
func (p *API) Peek(tokenHandler tokenize.Handler) bool {
p.result = nil
forkedAPI, ok := p.invokeHandler("Peek", tokenHandler)
if ok {
p.result = forkedAPI.Result()
p.tokenAPI.Reset()
p.Result.Tokens = p.tokenAPI.Tokens()
p.Result.Runes = p.tokenAPI.Runes()
}
p.tokenAPI.Dispose(forkedAPI)
return ok
}
@ -58,24 +65,31 @@ func (p *API) Peek(tokenHandler tokenize.Handler) bool {
// forward to beyond the match that was found. Otherwise false will be
// and the read cursor will stay at the same position.
//
// After calling this method, you can retrieve the tokenize.Result
// using the Result() method.
// After calling this method, you can retrieve the results using the Result() method.
func (p *API) Accept(tokenHandler tokenize.Handler) bool {
p.result = nil
forkedAPI, ok := p.invokeHandler("Accept", tokenHandler)
if ok {
forkedAPI.Merge()
p.result = p.tokenAPI.Result()
// Keep track of the results.
p.Result.Tokens = p.tokenAPI.Tokens()
p.Result.Runes = p.tokenAPI.Runes()
// Merge to the parent level.
p.tokenAPI.Merge(forkedAPI)
p.tokenAPI.Dispose(forkedAPI)
// And flush the input reader buffer.
if p.tokenAPI.FlushInput() {
if p.sanityChecksEnabled {
p.initLoopCheck()
}
}
} else {
p.tokenAPI.Dispose(forkedAPI)
}
return ok
}
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokenize.API, bool) {
func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (int, bool) {
if p.sanityChecksEnabled {
p.panicWhenStoppedOrInError(name)
p.checkForLoops(name)
@ -84,10 +98,9 @@ func (p *API) invokeHandler(name string, tokenHandler tokenize.Handler) (tokeniz
}
}
p.result = nil
p.tokenAPI.Reset()
child := p.tokenAPI.Fork()
ok := tokenHandler(child)
ok := tokenHandler(p.tokenAPI)
return child, ok
}
@ -138,20 +151,6 @@ func (p *API) checkForLoops(name string) {
p.loopCheck[filepos] = true
}
// Result returns the tokenize.Result struct, containing results as produced by the
// last Peek() or Accept() call.
//
// When Result() is called without first doing a Peek() or Accept(), then no
// result will be available and the method will panic.
func (p *API) Result() *tokenize.Result {
result := p.result
if p.result == nil {
callerPanic("Result", "parsekit.parse.API.{name}(): {name}() called "+
"at {caller} without calling API.Peek() or API.Accept() on beforehand")
}
return result
}
// Handle executes other parse.Handler functions from within the active
// parse.Handler function.
//
@ -215,7 +214,7 @@ func (p *API) Error(format string, data ...interface{}) {
// No call to p.panicWhenStoppedOrInError(), to allow a parser to
// set a different error message when needed.
message := fmt.Sprintf(format, data...)
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Result().Cursor())
p.err = fmt.Errorf("%s at %s", message, p.tokenAPI.Cursor())
}
// ExpectEndOfFile can be used to check if the input is at end of file.

View File

@ -5,8 +5,6 @@ package parse
import (
"regexp"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
@ -56,39 +54,3 @@ func AssertPanic(t *testing.T, p PanicT) {
}()
p.Function()
}
type TokenMakerT struct {
Input string
Handler tokenize.Handler
Expected []tokenize.Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
for _, test := range testSet {
AssertTokenMaker(t, test)
}
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
tokenizer := tokenize.New(test.Handler)
result, err := tokenizer(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {
if len(result.Tokens()) != len(test.Expected) {
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
}
for i, expected := range test.Expected {
actual := result.Token(i)
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if string(expected.Runes) != string(actual.Runes) {
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}
}
}
}

View File

@ -16,7 +16,7 @@ func ExampleNew_usingAcceptedRunes() {
parser := parse.New(func(p *parse.API) {
for p.Accept(a.AnyRune) {
matches = append(matches, p.Result().String())
matches = append(matches, p.Result.String())
}
p.ExpectEndOfFile()
})
@ -33,9 +33,9 @@ func ExampleNew_usingTokens() {
parser := parse.New(func(p *parse.API) {
if p.Accept(c.OneOrMore(tok.Rune("RUNE", a.AnyRune))) {
fmt.Printf("Runes accepted: %q\n", p.Result().String())
fmt.Printf("Runes accepted: %q\n", p.Result.String())
fmt.Printf("Tokens:\n")
for i, token := range p.Result().Tokens() {
for i, token := range p.Result.Tokens {
fmt.Printf("[%d] %s\n", i, token)
}
}
@ -46,10 +46,10 @@ func ExampleNew_usingTokens() {
// Output:
// Runes accepted: "¡ök!"
// Tokens:
// [0] RUNE(161)
// [1] RUNE(246)
// [2] RUNE(107)
// [3] RUNE(33)
// [0] RUNE('¡')
// [1] RUNE('ö')
// [2] RUNE('k')
// [3] RUNE('!')
}
func ExampleAPI_Expected() {
@ -71,7 +71,7 @@ func ExampleAPI_Accept_inIfStatement() {
if p.Accept(tokenize.A.StrNoCase("Yowza!")) {
// Result.String() returns a string containing all
// accepted runes that were matched against.
fmt.Println(p.Result().String())
fmt.Println(p.Result.String())
}
})
parser("YOWZA!")
@ -88,7 +88,7 @@ func ExampleAPI_Accept_inSwitchStatement() {
case p.Accept(tokenize.A.Rune('X')):
// NOOP, skip this rune
case p.Accept(tokenize.A.AnyRune):
result += p.Result().String()
result += p.Result.String()
default:
loop = false
}
@ -107,7 +107,7 @@ func ExampleAPI_Stop() {
parser := parse.New(func(p *parse.API) {
fmt.Printf("First word: ")
for p.Accept(c.Not(a.Space)) {
fmt.Printf("%s", p.Result())
fmt.Printf("%s", p.Result.String())
}
p.Stop()
})
@ -123,7 +123,7 @@ func ExampleAPI_Stop_notCalledAndNoInputPending() {
parser := parse.New(func(p *parse.API) {
fmt.Printf("Word: ")
for p.Accept(c.Not(a.Space)) {
fmt.Printf("%s", p.Result())
fmt.Printf("%s", p.Result.String())
}
fmt.Printf("\n")
})
@ -141,7 +141,7 @@ func ExampleAPI_Stop_notCalledButInputPending() {
parser := parse.New(func(p *parse.API) {
fmt.Printf("First word: ")
for p.Accept(c.Not(a.Space)) {
fmt.Printf("%s", p.Result())
fmt.Printf("%s", p.Result.String())
}
fmt.Printf("\n")
})
@ -161,7 +161,7 @@ func ExampleAPI_Peek() {
// This handler is able to handle serial numbers.
serialnrHandler := func(p *parse.API) {
if p.Accept(serialnr) {
fmt.Println(p.Result().String())
fmt.Println(p.Result.String())
}
}
@ -255,17 +255,6 @@ func TestGivenParserWithErrorSet_HandlePanics(t *testing.T) {
`at /.*/parse_test\.go:\d+: no calls allowed after API\.Error\(\)`})
}
func TestGivenParserWithoutCallToPeekOrAccept_ResultPanics(t *testing.T) {
p := parse.New(func(p *parse.API) {
p.Result()
})
parse.AssertPanic(t, parse.PanicT{
Function: func() { p("") },
Regexp: true,
Expect: `parsekit\.parse\.API\.Result\(\): Result\(\) called at ` +
`/.*/parse_test.go:\d+ without calling API.Peek\(\) or API.Accept\(\) on beforehand`})
}
func TestGivenParserWhichIsNotStopped_WithNoMoreInput_FallbackExpectEndOfFileKicksIn(t *testing.T) {
p := parse.New(func(p *parse.API) {})
err := p("")

View File

@ -25,7 +25,7 @@ import (
//
// By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. Runes collected this way can later on be
// retrieved using the method Result().Runes().
// retrieved using the method Runes().
//
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.
@ -74,39 +74,40 @@ type API struct {
runeRead bool // whether or not a rune was read using NextRune()
runes []rune // the rune stack
tokens []Token // the token stack
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame
}
type stackFrame struct {
offset int // current rune offset relative to the Reader's sliding window
runeStart int
runeEnd int
tokenStart int
tokenEnd int
stackLevel int // the stack level for this API object
state *apiState // shared API state data
cursor Cursor
// TODO
err error // can be used by a Handler to report a specific issue with the input
}
type apiState struct {
stack []Result // the stack, used for forking / merging the API.
top int // the index of the current top item in the stack
}
// initialAPIstackDepth determines the initial stack depth for the API.
// When a parser requires a higher stack depth, then this is no problem.
// The API will automatically scale the stack when forking beyond this
// default number of stack levels.
const initialAPIstackDepth = 10
const initialStackDepth = 10
const initialTokenDepth = 10
const initialRuneDepth = 10
// NewAPI initializes a new API struct, wrapped around the provided input.
// For an overview of allowed inputs, take a look at the documentation
// for parsekit.read.New().
func NewAPI(input interface{}) API {
stack := make([]Result, 1, initialAPIstackDepth)
state := apiState{
stack: stack,
}
return API{
runes: make([]rune, initialAPIstackDepth),
tokens: make([]Token, initialAPIstackDepth),
func NewAPI(input interface{}) *API {
api := &API{
reader: read.New(input),
state: &state,
runes: make([]rune, 0, initialRuneDepth),
tokens: make([]Token, 0, initialTokenDepth),
stackFrames: make([]stackFrame, 1, initialStackDepth),
}
api.stackFrame = &api.stackFrames[0]
return api
}
// NextRune returns the rune at the current read offset.
@ -120,25 +121,16 @@ func NewAPI(input interface{}) API {
// without explicitly accepting, this method will panic. You can see this as a
// built-in unit test, enforcing correct serialization of API method calls.
func (i *API) NextRune() (rune, error) {
if i.stackLevel > i.state.top {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
if i.runeRead {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"without a prior call to Accept()")
}
readRune, err := i.reader.RuneAt(result.offset)
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
i.lastRune = readRune
i.lastRuneErr = err
i.runeRead = true
i.DisposeChilds()
return readRune, err
}
@ -148,22 +140,31 @@ func (i *API) NextRune() (rune, error) {
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *API) Accept() {
if i.stackLevel > i.state.top {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
}
result := &(i.state.stack[i.stackLevel])
// TODO can go after completing the code for performance.
//fmt.Println("STACK [", i.stackLevel, "] runes", len(i.runes), "/", cap(i.runes), "tokens", len(i.tokens), "/", cap(i.tokens))
if !i.runeRead {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} without first calling NextRune()")
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
"without first calling NextRune()")
} else if i.lastRuneErr != nil {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, but the prior call to NextRune() failed")
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
"but the prior call to NextRune() failed")
}
result.runes = append(result.runes, i.lastRune)
result.cursor.moveByRune(i.lastRune)
result.offset++
newRuneEnd := i.stackFrame.runeEnd + 1
// Grow the runes capacity when needed.
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
} else {
i.runes = i.runes[0:newRuneEnd]
}
i.runes[newRuneEnd-1] = i.lastRune
i.stackFrame.runeEnd++
i.stackFrame.cursor.moveByRune(i.lastRune)
i.stackFrame.offset++
i.runeRead = false
}
@ -184,44 +185,30 @@ func (i *API) Accept() {
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
func (i *API) Fork() API {
if i.stackLevel > i.state.top {
callerPanic("Fork", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
func (i *API) Fork() int {
newStackLevel := i.stackLevel + 1
newStackSize := newStackLevel + 1
// Grow the stack frames capacity when needed.
if cap(i.stackFrames) < newStackSize {
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
copy(newFrames, i.stackFrames)
i.stackFrames = newFrames
} else {
i.stackFrames = i.stackFrames[0:newStackSize]
}
i.DisposeChilds()
result := &(i.state.stack[i.stackLevel])
// Grow the stack storage when needed.
newStackSize := i.stackLevel + 2
if cap(i.state.stack) < newStackSize {
newStack := make([]Result, newStackSize, newStackSize+initialAPIstackDepth)
copy(newStack, i.state.stack)
i.state.stack = newStack
}
i.state.stack = i.state.stack[0 : i.stackLevel+1]
// Create the new fork.
child := API{
state: i.state,
stackLevel: i.stackLevel + 1,
reader: i.reader,
}
childResult := Result{
cursor: result.cursor,
offset: result.offset,
}
i.state.stack = append(i.state.stack, childResult)
//i.state.stack[i.stackLevel+1] = childResult
// Invalidate parent's last read rune.
i.stackLevel++
i.runeRead = false
i.state.top = child.stackLevel
parent := i.stackFrame
return child
i.stackFrame = &i.stackFrames[i.stackLevel]
*i.stackFrame = *parent
i.stackFrame.runeStart = parent.runeEnd
i.stackFrame.tokenStart = parent.tokenEnd
return i.stackLevel
}
// Merge appends the results of a forked child API (runes, tokens) to the
@ -232,56 +219,68 @@ func (i *API) Fork() API {
// be reused for performing another match. This means that all Result data are
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
func (i *API) Merge() {
if i.stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} on the top-level API")
}
if i.stackLevel > i.state.top {
//
// Once the child is no longer needed, it can be disposed of by using the
// method Dispose(), which will return the tokenizer to the parent.
func (i *API) Merge(stackLevel int) {
if stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"using a non-active API fork (a parent was read, forked or merged, "+
"causing this fork to be invalidated)")
"on the top-level API stack level 0")
}
if stackLevel != i.stackLevel {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
}
result := &(i.state.stack[i.stackLevel])
parentResult := &(i.state.stack[i.stackLevel-1])
parent := &i.stackFrames[stackLevel-1]
// // Grow parent rune storage when needed.
// newRuneSize := len(parentResult.runes) + len(result.runes)
// if cap(parentResult.runes) < newRuneSize {
// newRunes := make([]rune, len(parentResult.runes), 2*newRuneSize)
// copy(newRunes, parentResult.runes)
// parentResult.runes = newRunes
// //fmt.Println("Beefed up runes", i.stackLevel-1, newRuneSize*2)
// }
// The end of the parent slice aligns with the start of the child slice.
// Because of this, to merge the parent slice can simply be expanded
// to include the child slice.
// parent : |----------|
// child: |------|
// After merge operation:
// parent: |-----------------|
// child: |---> continue reading from here
parent.runeEnd = i.stackFrame.runeEnd
i.stackFrame.runeStart = i.stackFrame.runeEnd
// // Grow parent token storage when needed.
// newTokenSize := len(parentResult.tokens) + len(result.tokens)
// if cap(parentResult.tokens) < newTokenSize {
// newTokens := make([]Token, len(parentResult.tokens), 2*newTokenSize)
// copy(newTokens, parentResult.tokens)
// parentResult.tokens = newTokens
// //fmt.Println("Beefed up tokens", i.stackLevel-1, newTokenSize*2)
// }
// The same logic applies to tokens.
parent.tokenEnd = i.stackFrame.tokenEnd
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
parentResult.runes = append(parentResult.runes, result.runes...)
parentResult.tokens = append(parentResult.tokens, result.tokens...)
parentResult.offset = result.offset
parentResult.cursor = result.cursor
i.DisposeChilds()
i.Reset()
parent.offset = i.stackFrame.offset
parent.cursor = i.stackFrame.cursor
i.stackFrame.err = nil
i.runeRead = false
}
func (i *API) DisposeChilds() {
i.state.stack = i.state.stack[:i.stackLevel+1]
i.state.top = i.stackLevel
func (i *API) Dispose(stackLevel int) {
if stackLevel == 0 {
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != i.stackLevel {
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
}
i.runeRead = false
i.stackLevel = stackLevel - 1
i.stackFrames = i.stackFrames[:stackLevel]
i.stackFrame = &i.stackFrames[stackLevel-1]
i.runes = i.runes[0:i.stackFrame.runeEnd]
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
}
func (i *API) Reset() {
result := &(i.state.stack[i.stackLevel])
i.runeRead = false
result.runes = result.runes[:0]
result.tokens = result.tokens[:0]
result.err = nil
i.stackFrame.runeEnd = i.stackFrame.runeStart
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
i.stackFrame.err = nil
}
// FlushInput flushes processed input data from the read.Buffer.
@ -291,18 +290,126 @@ func (i *API) Reset() {
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when needed.
func (i API) FlushInput() bool {
result := &(i.state.stack[i.stackLevel])
if result.offset > 0 {
i.reader.Flush(result.offset)
result.offset = 0
func (i *API) FlushInput() bool {
// result := &(i.state.stack[i.stackLevel])
if i.stackFrame.offset > 0 {
i.reader.Flush(i.stackFrame.offset)
i.stackFrame.offset = 0
return true
}
return false
}
// Result returns the Result struct from the API. The returned struct
// can be used to retrieve and to modify result data.
func (i API) Result() *Result {
return &(i.state.stack[i.stackLevel])
func (i *API) String() string {
return string(i.Runes())
}
func (i *API) Runes() []rune {
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
}
func (i *API) Rune(offset int) rune {
return i.runes[i.stackFrame.runeStart+offset]
}
func (i *API) ClearRunes() {
i.runes = i.runes[:i.stackFrame.runeStart]
i.stackFrame.runeEnd = i.stackFrame.runeStart
}
func (i *API) SetRunes(runes ...rune) {
// Grow the runes capacity when needed.
newRuneEnd := i.stackFrame.runeStart + len(runes)
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
} else {
i.runes = i.runes[0:newRuneEnd]
}
for offset, r := range runes {
i.runes[i.stackFrame.runeStart+offset] = r
}
i.stackFrame.runeEnd = newRuneEnd
}
func (i *API) AddRunes(runes ...rune) {
// Grow the runes capacity when needed.
newRuneEnd := i.stackFrame.runeEnd + len(runes)
if cap(i.runes) < newRuneEnd {
newRunes := make([]rune, newRuneEnd, newRuneEnd*2)
copy(newRunes, i.runes)
i.runes = newRunes
} else {
i.runes = i.runes[0:newRuneEnd]
}
for offset, r := range runes {
i.runes[i.stackFrame.runeEnd+offset] = r
}
i.stackFrame.runeEnd = newRuneEnd
}
func (i *API) AddString(s string) {
i.AddRunes([]rune(s)...)
}
func (i *API) SetString(s string) {
i.SetRunes([]rune(s)...)
}
func (i *API) Cursor() Cursor {
return i.stackFrame.cursor
}
func (i *API) Tokens() []Token {
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
}
func (i *API) Token(offset int) Token {
return i.tokens[i.stackFrame.tokenStart+offset]
}
func (i *API) TokenValue(offset int) interface{} {
return i.tokens[i.stackFrame.tokenStart+offset].Value
}
func (i *API) ClearTokens() {
i.tokens = i.tokens[:i.stackFrame.tokenStart]
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
}
func (i *API) SetTokens(tokens ...Token) {
// Grow the tokens capacity when needed.
newTokenEnd := i.stackFrame.tokenStart + len(tokens)
if cap(i.tokens) < newTokenEnd {
newTokens := make([]Token, newTokenEnd, newTokenEnd*2)
copy(newTokens, tokens)
i.tokens = newTokens
} else {
i.tokens = i.tokens[0:newTokenEnd]
}
for offset, t := range tokens {
i.tokens[i.stackFrame.tokenStart+offset] = t
}
i.stackFrame.tokenEnd = newTokenEnd
}
func (i *API) AddTokens(tokens ...Token) {
// Grow the tokens capacity when needed.
newTokenEnd := i.stackFrame.tokenEnd + len(tokens)
if cap(i.tokens) < newTokenEnd {
newTokens := make([]Token, newTokenEnd, newTokenEnd*2)
copy(newTokens, i.tokens)
i.tokens = newTokens
} else {
i.tokens = i.tokens[0:newTokenEnd]
}
for offset, t := range tokens {
i.tokens[i.stackFrame.tokenEnd+offset] = t
}
i.stackFrame.tokenEnd = newTokenEnd
}

View File

@ -18,7 +18,7 @@ func ExampleAPI_NextRune() {
r, err := api.NextRune()
fmt.Printf("Rune read from input; %c\n", r)
fmt.Printf("The error: %v\n", err)
fmt.Printf("API results: %q\n", api.Result().String())
fmt.Printf("API results: %q\n", api.String())
// Output:
// Rune read from input; T
@ -34,38 +34,38 @@ func ExampleAPI_Accept() {
api.Accept() // adds 'h' to the API results
api.NextRune() // reads 'e', but it is not added to the API results
fmt.Printf("API results: %q\n", api.Result().String())
fmt.Printf("API results: %q\n", api.String())
// Output:
// API results: "Th"
}
func ExampleAPI_Result() {
func ExampleAPI_modifyingResults() {
api := tokenize.NewAPI("")
result := api.Result()
api.AddString("Some runes")
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
api.AddRunes(' ', 'i', 'n', ' ')
api.AddString("various ways")
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
fmt.Printf("API result runes as string: %q\n", api.String())
result.AddRunes("Some runes")
result.AddRunes([]rune{' ', 'a', 'd', 'd', 'e', 'd'})
result.AddRunes(' ', 'i', 'n', ' ', "various ways")
fmt.Printf("API result first 10 runes: %q\n", api.Result().Runes()[0:10])
fmt.Printf("API result runes as string: %q\n", api.Result().String())
api.SetString("new ")
api.AddString("set ")
api.AddString("of ")
api.AddRunes('r', 'u', 'n', 'e', 's')
fmt.Printf("API result runes as string: %q\n", api.String())
fmt.Printf("API result runes: %q\n", api.Runes())
fmt.Printf("API third rune: %q\n", api.Rune(2))
result.SetRunes("new ", "set ", "of ", 'r', 'u', 'n', 'e', 's')
fmt.Printf("API result runes as string: %q\n", api.Result().String())
fmt.Printf("API result runes: %q\n", api.Result().Runes())
fmt.Printf("API third rune: %q\n", api.Result().Rune(2))
result.AddTokens(tokenize.Token{
Runes: []rune("demo 1"),
api.AddTokens(tokenize.Token{
Type: 42,
Value: "towel"})
result.AddTokens(tokenize.Token{
Runes: []rune("demo 2"),
api.AddTokens(tokenize.Token{
Type: 73,
Value: "Zaphod"})
fmt.Printf("API result tokens: %v\n", api.Result().Tokens())
fmt.Printf("API second result token: %v\n", api.Result().Token(1))
fmt.Printf("API result tokens: %v\n", api.Tokens())
fmt.Printf("API second result token: %v\n", api.Token(1))
// Output:
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
@ -84,17 +84,17 @@ func ExampleAPI_Reset() {
api.Accept()
api.NextRune()
api.Accept()
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// Reset clears the results, but keeps the cursor position.
api.Reset()
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
fmt.Printf("API results: %q at %s\n", api.Result().String(), api.Result().Cursor())
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// Output:
// API results: "Ve" at line 1, column 3
@ -104,14 +104,16 @@ func ExampleAPI_Reset() {
func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'.
abcHandler := func(t tokenize.API) bool {
abcHandler := func(t *tokenize.API) bool {
a := tokenize.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
if a.Rune(r)(child) {
child.Merge() // accept results into parent t
if a.Rune(r)(t) {
t.Merge(child) // accept results into parent of child
t.Dispose(child) // return to the parent level
return true // and report a successful match
}
t.Dispose(child) // return to the parent level
}
// If we get here, then no match was found. Return false to communicate
// this to the caller.
@ -141,25 +143,27 @@ func ExampleAPI_Fork() {
}
func ExampleAPI_Merge() {
tokenHandler := func(t tokenize.API) bool {
tokenHandler := func(t *tokenize.API) bool {
child1 := t.Fork()
child1.NextRune() // reads 'H'
child1.Accept()
child1.NextRune() // reads 'i'
child1.Accept()
t.NextRune() // reads 'H'
t.Accept()
t.NextRune() // reads 'i'
t.Accept()
child2 := child1.Fork()
child2.NextRune() // reads ' '
child2.Accept()
child2.NextRune() // reads 'd'
child2.Accept()
child2 := t.Fork()
t.NextRune() // reads ' '
t.Accept()
t.NextRune() // reads 'm'
t.Accept()
t.Dispose(child2)
child1.Merge() // We merge child1, which has read 'H' and 'i' only.
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
t.Dispose(child1) // and clean up child1 to return to the parent
return true
}
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
fmt.Println(result)
fmt.Println(result.String())
// Output:
// Hi
@ -170,75 +174,157 @@ func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
// Fork a few levels.
child1 := api.Fork()
child2 := child1.Fork()
child3 := child2.Fork()
child4 := child3.Fork()
child2 := api.Fork()
child3 := api.Fork()
child4 := api.Fork()
// Read some data from child4.
r, _ := child4.NextRune()
child4.Accept()
// Read a rune 'a' from child4.
r, _ := api.NextRune()
AssertEqual(t, 'a', r, "child4 rune 1")
api.Accept()
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
r, _ = child4.NextRune()
child4.Accept()
// Read another rune 'b' from child4.
r, _ = api.NextRune()
AssertEqual(t, 'b', r, "child4 rune 2")
api.Accept()
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
// Merge it to child3.
child4.Merge()
// Merge "ab" from child4 to child3.
api.Merge(child4)
AssertEqual(t, "", api.String(), "child4 runes after first merge")
// Read some more from child4.
r, _ = child4.NextRune()
child4.Accept()
r, _ = api.NextRune()
AssertEqual(t, 'c', r, "child4 rune 3")
AssertEqual(t, "line 1, column 4", child4.Result().Cursor().String(), "cursor child4 rune 3")
api.Accept()
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
AssertEqual(t, "line 1, column 3", child3.Result().Cursor().String(), "cursor child3 rune 3, before merge of child 4")
// Merge "c" from child4 to child3.
api.Merge(child4)
// Again, merge it to child3.
child4.Merge()
AssertEqual(t, "line 1, column 4", child3.Result().Cursor().String(), "cursor child3 rune 3, after merge of child 4")
// And dispose of child4, making child3 the active stack level.
api.Dispose(child4)
// Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3.
r, _ = child3.NextRune()
child3.Accept()
r, _ = child3.NextRune()
child3.Accept()
r, _ = child3.NextRune()
child3.Accept()
AssertEqual(t, 'f', r, "child3 rune 5")
r, _ = api.NextRune()
AssertEqual(t, 'd', r, "child3 rune 5")
api.Accept()
AssertEqual(t, "abcdef", child3.Result().String(), "child3 total result after rune 6")
r, _ = api.NextRune()
AssertEqual(t, 'e', r, "child3 rune 5")
api.Accept()
r, _ = api.NextRune()
AssertEqual(t, 'f', r, "child3 rune 5")
api.Accept()
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
// Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := child3.Fork()
child3sub1.NextRune()
child3sub1.Accept()
child3sub1.NextRune()
child3sub1.Accept()
child3sub2 := child3sub1.Fork()
child3sub2.NextRune()
child3sub2.Accept()
child3sub2.Merge()
child3sub1 := api.Fork()
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
child3sub2 := api.Fork()
api.NextRune()
api.Accept()
api.Merge(child3sub2) // do merge sub2 down to sub1
api.Dispose(child3sub2) // and dispose of sub2
api.Dispose(child3sub1) // but dispose of sub1 without merging
// Instead merge the pre-forking results from child3 to child2.
child3.Merge()
// Instead merge the results from before this forking segway from child3 to child2
// and dispose of it.
api.Merge(child3)
api.Dispose(child3)
AssertEqual(t, "abcdef", child2.Result().String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", child2.Result().Cursor().String(), "cursor child2 after merge child3")
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
// Merge child2 to child1.
child2.Merge()
// Merge child2 to child1 and dispose of it.
api.Merge(child2)
api.Dispose(child2)
// Merge child1 a few times to the top level api.
child1.Merge()
child1.Merge()
child1.Merge()
child1.Merge()
api.Merge(child1)
api.Merge(child1)
api.Merge(child1)
api.Merge(child1)
// And dispose of it.
api.Dispose(child1)
// Read some data from the top level api.
r, _ = api.NextRune()
api.Accept()
AssertEqual(t, "abcdefg", api.Result().String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Result().Cursor().String(), "api cursor end result")
AssertEqual(t, "abcdefg", api.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
}
func TestClearRunes(t *testing.T) {
api := tokenize.NewAPI("Laphroaig")
api.NextRune() // Read 'L'
api.Accept() // Add to runes
api.NextRune() // Read 'a'
api.Accept() // Add to runes
api.ClearRunes() // Clear the runes, giving us a fresh start.
api.NextRune() // Read 'p'
api.Accept() // Add to runes
api.NextRune() // Read 'r'
api.Accept() // Add to runes
AssertEqual(t, "ph", api.String(), "api string end result")
}
func TestMergeScenariosForTokens(t *testing.T) {
api := tokenize.NewAPI("")
token1 := tokenize.Token{Value: 1}
token2 := tokenize.Token{Value: 2}
token3 := tokenize.Token{Value: 3}
token4 := tokenize.Token{Value: 4}
api.SetTokens(token1)
tokens := api.Tokens()
AssertEqual(t, 1, len(tokens), "Tokens 1")
child := api.Fork()
tokens = api.Tokens()
AssertEqual(t, 0, len(tokens), "Tokens 2")
api.AddTokens(token2)
// Here we can merge by expanding the token slice on the parent,
// because the end of the parent slice and the start of the child
// slice align.
api.Merge(child)
api.Dispose(child)
tokens = api.Tokens()
AssertEqual(t, 2, len(tokens), "Tokens 3")
child = api.Fork()
api.AddTokens(token3)
api.Reset()
api.AddTokens(token4)
// Here the merge means that token4 will be copied to the end of
// the token slice of the parent, since there's a gap at the place
// where token3 used to be.
api.Merge(child)
api.Dispose(child)
tokens = api.Tokens()
AssertEqual(t, 3, len(tokens), "Tokens 4")
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
}

View File

@ -6,7 +6,7 @@ import (
"regexp"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
@ -110,9 +110,6 @@ func AssertTokenMaker(t *testing.T, test TokenMakerT) {
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if string(expected.Runes) != string(actual.Runes) {
t.Errorf("Unexpected Runes in result.Tokens[%d]:\nexpected: %q\nactual: %q", i, expected.Runes, actual.Runes)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}

View File

@ -1,4 +1,4 @@
package tokenize2
package tokenize
import (
"strings"

View File

@ -7,11 +7,11 @@ package tokenize
// A Handler function gets an API as its input and returns a boolean to
// indicate whether or not it found a match on the input. The API is used
// for retrieving input data to match against and for reporting back results.
type Handler func(t API) bool
type Handler func(t *API) bool
// Match is syntactic sugar that allows you to write a construction like
// NewTokenizer(handler).Execute(input) as handler.Match(input).
func (handler Handler) Match(input interface{}) (*Result, error) {
func (handler Handler) Match(input interface{}) (*API, error) {
tokenizer := New(handler)
return tokenizer(input)
}

View File

@ -4,7 +4,7 @@ import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func TestSyntacticSugar(t *testing.T) {

View File

@ -230,7 +230,7 @@ var A = struct {
Lower: MatchUnicodeLower(),
Upper: MatchUnicodeUpper(),
HexDigit: MatchHexDigit(),
Octet: MatchOctet(false),
Octet: MatchOctet(true),
IPv4: MatchIPv4(true),
IPv4CIDRMask: MatchIPv4CIDRMask(true),
IPv4Netmask: MatchIPv4Netmask(true),
@ -306,7 +306,7 @@ var T = struct {
Float64 func(interface{}, Handler) Handler
Boolean func(interface{}, Handler) Handler
ByValue func(toktype interface{}, handler Handler, value interface{}) Handler
ByCallback func(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler
ByCallback func(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler
Group func(interface{}, Handler) Handler
}{
Str: MakeStrLiteralToken,
@ -405,9 +405,9 @@ func MatchUnicodeSpace() Handler {
// Note that the callback function matches the signature of the unicode.Is* functions,
// so those can be used. E.g. MatchRuneByCallback(unicode.IsLower).
func MatchRuneByCallback(callback func(rune) bool) Handler {
return func(t API) bool {
input, err := t.NextRune()
if err == nil && callback(input) {
return func(t *API) bool {
r, err := t.NextRune()
if err == nil && callback(r) {
t.Accept()
return true
}
@ -422,9 +422,9 @@ func MatchEndOfLine() Handler {
// MatchStr creates a Handler that matches the input against the provided string.
func MatchStr(expected string) Handler {
var handlers = []Handler{}
for _, r := range expected {
handlers = append(handlers, MatchRune(r))
var handlers = make([]Handler, len(expected))
for i, r := range expected {
handlers[i] = MatchRune(r)
}
return MatchSeq(handlers...)
}
@ -453,16 +453,20 @@ func MatchOptional(handler Handler) Handler {
// applied in their exact order. Only if all Handlers apply, the sequence
// reports successful match.
func MatchSeq(handlers ...Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
child := t.Fork()
for _, handler := range handlers {
subchild := child.Fork()
if !handler(subchild) {
subchild := t.Fork()
if !handler(t) {
t.Dispose(subchild)
t.Dispose(child)
return false
}
subchild.Merge()
t.Merge(subchild)
t.Dispose(subchild)
}
child.Merge()
t.Merge(child)
t.Dispose(child)
return true
}
}
@ -471,14 +475,17 @@ func MatchSeq(handlers ...Handler) Handler {
// can be applied. They are applied in their provided order. The first Handler
// that applies is used for reporting back a match.
func MatchAny(handlers ...Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
for _, handler := range handlers {
child := t.Fork()
if handler(child) {
child.Merge()
if handler(t) {
t.Merge(child)
t.Dispose(child)
return true
}
t.Dispose(child) // TODO switch to Reset() and move forking outside the loop?
}
return false
}
}
@ -487,10 +494,13 @@ func MatchAny(handlers ...Handler) Handler {
// the current input. If it does, then a failed match will be reported. If it
// does not, then the next rune from the input will be reported as a match.
func MatchNot(handler Handler) Handler {
return func(t API) bool {
if handler(t.Fork()) {
return func(t *API) bool {
child := t.Fork()
if handler(t) {
t.Dispose(child)
return false
}
t.Dispose(child)
_, err := t.NextRune()
if err == nil {
t.Accept()
@ -568,28 +578,30 @@ func matchMinMax(min int, max int, handler Handler, name string) Handler {
if max >= 0 && min > max {
callerPanic(name, "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
}
return func(t API) bool {
return func(t *API) bool {
total := 0
// Check for the minimum required amount of matches.
child := t.Fork()
for total < min {
total++
child := t.Fork()
if !handler(child) {
if !handler(t) {
t.Dispose(child)
return false
}
child.Merge()
}
// No specified max: include the rest of the available matches.
// Specified max: include the rest of the availble matches, up to the max.
//child.Merge()
for max < 0 || total < max {
total++
child := t.Fork()
if !handler(child) {
if !handler(t) {
break
}
child.Merge()
}
t.Merge(child)
t.Dispose(child)
return true
}
}
@ -607,10 +619,13 @@ func MatchSeparated(separator Handler, separated Handler) Handler {
// applied. If the handler applies, but the except Handler as well, then the match
// as a whole will be treated as a mismatch.
func MatchExcept(handler Handler, except Handler) Handler {
return func(t API) bool {
if except(t.Fork()) {
return func(t *API) bool {
child := t.Fork()
if except(t) {
t.Dispose(child)
return false
}
t.Dispose(child)
return handler(t)
}
}
@ -620,11 +635,12 @@ func MatchExcept(handler Handler, except Handler) Handler {
// When both handlers match, the match for the handler is accepted and the match
// for the lookAhead handler is ignored.
func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
if handler(t) {
child := t.Fork()
if handler(child) && lookAhead(child.Fork()) {
child.Merge()
return true
result := lookAhead(t)
t.Dispose(child)
return result
}
return false
}
@ -635,11 +651,12 @@ func MatchFollowedBy(lookAhead Handler, handler Handler) Handler {
// If the handler matches and the lookAhead handler doesn't, then the match for
// the handler is accepted.
func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
if handler(t) {
child := t.Fork()
if handler(child) && !lookAhead(child.Fork()) {
child.Merge()
return true
result := !lookAhead(t)
t.Dispose(child)
return result
}
return false
}
@ -654,14 +671,14 @@ func MatchNotFollowedBy(lookAhead Handler, handler Handler) Handler {
//
// Without flushing the input, the input reader will allocate memory
// during the parsing process, eventually enough to hold the full input
// in memory. By wrapping Handlers with DoFlushInput, you can tell parsekit
// in memory. By wrapping Handlers with an input flusher, you can tell parsekit
// that the accumulated input so far will no longer be needed, allowing
// this input to be flushed from memory.
//
// Rule of thumb is: only use it when you have to actually fix a memory
// hogging issue for your use case.
func MakeInputFlusher(handler Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
if handler(t) {
t.FlushInput()
return true
@ -689,11 +706,12 @@ func MatchIntegerBetween(min int64, max int64) Handler {
callerPanic("MatchIntegerBetween", "Handler: {name} definition error at {caller}: max %d must not be < min %d", max, min)
}
digits := MatchSigned(MatchDigits())
return func(t API) bool {
return func(t *API) bool {
if !digits(t) {
return false
}
value, _ := strconv.ParseInt(t.Result().String(), 10, 64)
value, _ := strconv.ParseInt(t.String(), 10, 64)
if value < min || value > max {
return false
}
@ -705,9 +723,10 @@ func MatchIntegerBetween(min int64, max int64) Handler {
// has been reached. This Handler will never produce output. It only reports
// a successful or a failing match through its boolean return value.
func MatchEndOfFile() Handler {
return func(t API) bool {
return func(t *API) bool {
child := t.Fork()
_, err := child.NextRune()
_, err := t.NextRune()
t.Dispose(child)
return err == io.EOF
}
}
@ -723,7 +742,7 @@ func MatchUntilEndOfLine() Handler {
// read from the input. Invalid runes on the input are replaced with the UTF8
// replacement rune \uFFFD (i.e. utf8.RuneError), which displays as <20>.
func MatchAnyRune() Handler {
return func(t API) bool {
return func(t *API) bool {
_, err := t.NextRune()
if err == nil {
t.Accept()
@ -736,7 +755,7 @@ func MatchAnyRune() Handler {
// MatchValidRune creates a Handler function that checks if a valid
// UTF8 rune can be read from the input.
func MatchValidRune() Handler {
return func(t API) bool {
return func(t *API) bool {
r, err := t.NextRune()
if err == nil && r != utf8.RuneError {
t.Accept()
@ -749,7 +768,7 @@ func MatchValidRune() Handler {
// MatchInvalidRune creates a Handler function that checks if an invalid
// UTF8 rune can be read from the input.
func MatchInvalidRune() Handler {
return func(t API) bool {
return func(t *API) bool {
r, err := t.NextRune()
if err == nil && r == utf8.RuneError {
t.Accept()
@ -860,20 +879,20 @@ func MatchHexDigit() Handler {
// stripped from the octet.
func MatchOctet(normalize bool) Handler {
max3Digits := MatchMinMax(1, 3, MatchDigit())
return func(t API) bool {
return func(t *API) bool {
if !max3Digits(t) {
return false
}
value, _ := strconv.ParseInt(t.Result().String(), 10, 16)
value, _ := strconv.ParseInt(t.String(), 10, 16)
if value > 255 {
return false
}
if normalize {
runes := t.Result().Runes()
runes := t.Runes()
for len(runes) > 1 && runes[0] == '0' {
runes = runes[1:]
}
t.Result().SetRunes(runes)
t.SetRunes(runes...)
}
return true
}
@ -909,20 +928,19 @@ func MatchIPv4Netmask(normalize bool) Handler {
dot := MatchRune('.')
netmask := MatchSeq(octet, dot, octet, dot, octet, dot, octet)
return func(t API) bool {
return func(t *API) bool {
if !netmask(t) {
return false
}
// Check if the mask is provided in canonical form (ones followed by zeroes).
r := t.Result()
mask := net.IPv4Mask(r.Value(0).(byte), r.Value(1).(byte), r.Value(2).(byte), r.Value(3).(byte))
// Check if the mask is provided in canonical form (at the binary level, ones followed by zeroes).
mask := net.IPv4Mask(t.TokenValue(0).(byte), t.TokenValue(1).(byte), t.TokenValue(2).(byte), t.TokenValue(3).(byte))
ones, bits := mask.Size()
if ones == 0 && bits == 0 {
return false
}
r.ClearTokens()
t.ClearTokens()
return true
}
}
@ -942,7 +960,7 @@ func MatchIPv4Net(normalize bool) Handler {
MakeUint8Token("cidr", MatchIPv4CIDRMask(normalize)))
ipnet := MatchSeq(ip, slash, mask)
return func(t API) bool {
return func(t *API) bool {
if !ipnet(t) {
return false
}
@ -951,19 +969,18 @@ func MatchIPv4Net(normalize bool) Handler {
return true
}
r := t.Result()
maskToken := r.Token(1)
maskToken := t.Token(1)
if maskToken.Type == "cidr" {
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), r.Value(1).(uint8)))
t.SetString(fmt.Sprintf("%s/%d", t.TokenValue(0), t.TokenValue(1).(uint8)))
} else {
o := strings.Split(r.Value(1).(string), ".")
o := strings.Split(t.TokenValue(1).(string), ".")
b := func(idx int) byte { i, _ := strconv.Atoi(o[idx]); return byte(i) }
mask := net.IPv4Mask(b(0), b(1), b(2), b(3))
bits, _ := mask.Size()
r.SetRunes(fmt.Sprintf("%s/%d", r.Value(0), bits))
t.SetString(fmt.Sprintf("%s/%d", t.TokenValue(0), bits))
}
r.ClearTokens()
t.ClearTokens()
return true
}
}
@ -975,7 +992,7 @@ func MatchIPv6(normalize bool) Handler {
colon := MatchRune(':')
empty := MatchSeq(colon, colon)
return func(t API) bool {
return func(t *API) bool {
nrOfHextets := 0
for nrOfHextets < 8 {
if hextet(t) {
@ -992,13 +1009,13 @@ func MatchIPv6(normalize bool) Handler {
}
// Invalid IPv6, when net.ParseIP() cannot handle it.
parsed := net.ParseIP(t.Result().String())
parsed := net.ParseIP(t.String())
if parsed == nil {
return false
}
if normalize {
t.Result().SetRunes(parsed.String())
t.SetString(parsed.String())
}
return true
}
@ -1017,13 +1034,12 @@ func matchCIDRMask(bits int64, normalize bool) Handler {
return mask
}
return func(t API) bool {
return func(t *API) bool {
if !mask(t) {
return false
}
r := t.Result()
bits, _ := strconv.Atoi(r.String())
t.Result().SetRunes(fmt.Sprintf("%d", bits))
bits, _ := strconv.Atoi(t.String())
t.SetString(fmt.Sprintf("%d", bits))
return true
}
}
@ -1057,13 +1073,15 @@ func MatchIPv6Net(normalize bool) Handler {
// string "bork" would not match against the second form, but " bork" would.
// In both cases, it would match the first form.
func ModifyDrop(handler Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
child := t.Fork()
if handler(child) {
child.Reset()
child.Merge()
if handler(t) {
t.Reset()
t.Merge(child)
t.Dispose(child)
return true
}
t.Dispose(child)
return false
}
}
@ -1137,14 +1155,16 @@ func ModifyReplace(handler Handler, replaceWith string) Handler {
// modified string on output. The return value of the modfunc will replace the
// resulting output.
func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
return func(t API) bool {
return func(t *API) bool {
child := t.Fork()
if handler(child) {
s := modfunc(child.Result().String())
child.Result().SetRunes(s)
child.Merge()
if handler(t) {
s := modfunc(t.String())
t.SetString(s)
t.Merge(child)
t.Dispose(child)
return true
}
t.Dispose(child)
return false
}
}
@ -1155,8 +1175,8 @@ func ModifyByCallback(handler Handler, modfunc func(string) string) Handler {
// escape sequence like "\n" is kept as-is (a backslash character, followed by
// an 'n'-character).
func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
literal := t.Result().String()
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
literal := t.String()
return literal
})
}
@ -1166,9 +1186,9 @@ func MakeStrLiteralToken(toktype interface{}, handler Handler) Handler {
// representation of the read Runes. This string is interpreted, meaning that an
// escape sequence like "\n" is translated to an actual newline control character
func MakeStrInterpretedToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
// TODO ERROR HANDLING
interpreted, _ := interpretString(t.Result().String())
interpreted, _ := interpretString(t.String())
return interpreted
})
}
@ -1190,9 +1210,9 @@ func interpretString(str string) (string, error) {
// Result, for which the Token.Value is set to a Rune-representation
// of the read Rune.
func MakeRuneToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
// TODO ERROR HANDLING --- not a 1 rune input
return t.Result().Rune(0)
return t.Rune(0)
})
}
@ -1200,9 +1220,9 @@ func MakeRuneToken(toktype interface{}, handler Handler) Handler {
// Result, for which the Token.Value is set to a Byte-representation
// of the read Rune.
func MakeByteToken(toktype interface{}, handler Handler) Handler {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
// TODO ERROR HANDLING --- not a 1 byte input
return byte(t.Result().Rune(0))
return byte(t.Rune(0))
})
}
@ -1406,8 +1426,8 @@ func MakeBooleanToken(toktype interface{}, handler Handler) Handler {
}
func makeStrconvToken(name string, toktype interface{}, handler Handler, convert func(s string) (interface{}, error)) Handler {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} {
value, err := convert(t.Result().String())
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} {
value, err := convert(t.String())
if err != nil {
// TODO meh, panic feels so bad here. Maybe just turn this case into "no match"?
panic(fmt.Sprintf("%s token invalid (%s)", name, err))
@ -1419,17 +1439,17 @@ func makeStrconvToken(name string, toktype interface{}, handler Handler, convert
// MakeTokenByValue creates a Handler that will add a static Token value
// to the Result.
func MakeTokenByValue(toktype interface{}, handler Handler, value interface{}) Handler {
return MakeTokenByCallback(toktype, handler, func(t API) interface{} { return value })
return MakeTokenByCallback(toktype, handler, func(t *API) interface{} { return value })
}
// MakeTokenByCallback creates a Handler that will add a Token to the
// Result, for which the Token.Value is to be generated by the provided
// makeValue() callback function. The function gets the current API as
// its input and must return the token value.
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t API) interface{}) Handler {
return func(t API) bool {
func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t *API) interface{}) Handler {
return func(t *API) bool {
child := t.Fork()
if handler(child) {
if handler(t) {
// The token is not added to the child here. The child might have produced its own
// tokens and we want those to come after the token for the current parsing level.
// By adding the token to the input API and then merging the child tokens, the order
@ -1437,12 +1457,14 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
// e.g. when a parsing hierarchy looks like ("date" ("year", "month" "day")), the
// tokens will end up in the order "date", "year", "month", "day". When we'd add the
// token to the child here, the order would have been "year", "month", "day", "date".
token := Token{Type: toktype, Runes: child.Result().Runes(), Value: makeValue(child)}
t.Result().AddTokens(token)
child.Merge()
token := Token{Type: toktype, Value: makeValue(t)}
t.AddTokens(token)
t.Merge(child)
t.Dispose(child)
return true
}
t.Dispose(child)
return false
}
}
@ -1450,15 +1472,18 @@ func MakeTokenByCallback(toktype interface{}, handler Handler, makeValue func(t
// MakeTokenGroup checks if the provided handler matches the input. If yes, then it will
// take the tokens as produced by the handler and group them together in a single token.
func MakeTokenGroup(toktype interface{}, handler Handler) Handler {
return func(t API) bool {
return func(t *API) bool {
child := t.Fork()
if handler(child) {
result := child.Result()
token := Token{Type: toktype, Runes: result.Runes(), Value: result.Tokens()}
result.SetTokens(token)
child.Merge()
if handler(t) {
tokens := t.Tokens()
tokensCopy := make([]Token, len(tokens))
copy(tokensCopy, tokens)
t.SetTokens(Token{Type: toktype, Value: tokensCopy})
t.Merge(child)
t.Dispose(child)
return true
}
t.Dispose(child)
return false
}
}

View File

@ -4,22 +4,32 @@ import (
"fmt"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func TestCombinatorsTempDebug(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
// {"024", a.IPv4CIDRMask, true, "24"},
// {"024", a.Octet, true, "24"},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
})
}
func TestCombinators(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{
{"abc", c.Not(a.Rune('b')), true, "a"},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"bcd", c.Not(a.Rune('b')), false, ""},
{"1010", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
{"2020", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
{"abc", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
{"bcd", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
{"cde", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
{"ababc", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
{"ababc", c.Repeated(5, a.Runes('a', 'b')), false, ""},
{"", c.Not(a.Rune('b')), false, ""},
{"abc not", c.Not(a.Rune('b')), true, "a"},
{"bcd not", c.Not(a.Rune('b')), false, ""},
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
{"", c.Min(0, a.Rune('a')), true, ""},
{"a", c.Min(0, a.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
@ -53,6 +63,7 @@ func TestCombinators(t *testing.T) {
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
@ -62,8 +73,20 @@ func TestCombinators(t *testing.T) {
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
})
}
@ -110,8 +133,10 @@ func TestAtoms(t *testing.T) {
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
{"", a.AnyRune, false, ""},
{"⌘", a.ValidRune, true, "⌘"},
{"\xbc with ValidRune", a.ValidRune, false, "<EFBFBD>"},
{"\xbc with ValidRune", a.ValidRune, false, ""},
{"", a.ValidRune, false, ""},
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
{"ok with InvalidRune", a.InvalidRune, false, ""},
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
@ -225,38 +250,73 @@ func TestAtoms(t *testing.T) {
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
{"fifteen", a.IntegerBetween(0, 10), false, ""},
})
}
func TestIPv4Atoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
// Not normalized octet.
{"0X", tokenize.MatchOctet(false), true, "0"},
{"00X", tokenize.MatchOctet(false), true, "00"},
{"000X", tokenize.MatchOctet(false), true, "000"},
{"10X", tokenize.MatchOctet(false), true, "10"},
{"010X", tokenize.MatchOctet(false), true, "010"},
{"255123", tokenize.MatchOctet(false), true, "255"},
{"256123", tokenize.MatchOctet(false), false, ""},
{"300", tokenize.MatchOctet(false), false, ""},
// Octet.
{"0", tokenize.MatchOctet(false), true, "0"},
{"02", tokenize.MatchOctet(false), true, "02"},
{"003", tokenize.MatchOctet(false), true, "003"},
{"256", tokenize.MatchOctet(false), false, ""},
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "00"},
{"000X", a.Octet, true, "000"},
{"00X", a.Octet, true, "0"},
{"000X", a.Octet, true, "0"},
{"10X", a.Octet, true, "10"},
{"010X", a.Octet, true, "010"},
{"010X", a.Octet, true, "10"},
{"255123", a.Octet, true, "255"},
{"256123", a.Octet, false, ""},
{"300", a.Octet, false, ""},
// IPv4 address.
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
{"256.255.255.255", a.IPv4, false, ""},
// IPv4 CIDR netmask.
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
{"0", a.IPv4CIDRMask, true, "0"},
{"00", a.IPv4CIDRMask, true, "0"},
{"000", a.IPv4CIDRMask, true, "0"},
{"32", a.IPv4CIDRMask, true, "32"},
{"032", a.IPv4CIDRMask, true, "32"},
{"33", a.IPv4CIDRMask, false, ""},
// IPv4 netmask in dotted quad format.
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
// IPv4 address + CIDR or dotted quad netmask.
{"192.168.6.123", a.IPv4Net, false, ""},
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
{"10.0.0.10/16.0.0.0", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
})
}
@ -292,7 +352,10 @@ func TestIPv6Atoms(t *testing.T) {
func TestModifiers(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{
{"missed me!", m.Drop(a.Rune('w')), false, ""},
{"where are you?", m.Drop(a.Rune('w')), true, ""},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
@ -300,6 +363,7 @@ func TestModifiers(t *testing.T) {
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
})
@ -323,64 +387,99 @@ func TestTokenMakers(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
AssertTokenMakers(t, []TokenMakerT{
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
[]tokenize.Token{{Type: "A", Runes: []rune(""), Value: ""}}},
[]tokenize.Token{{Type: "A", Value: ""}}},
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
[]tokenize.Token{{Type: "B", Runes: []rune(`Ѝюج literal \string`), Value: `Ѝюج literal \string`}}},
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
[]tokenize.Token{{Type: "C", Runes: []rune(`Ѝюجinterpreted \n string \u2318`), Value: "Ѝюجinterpreted \n string ⌘"}}},
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Runes: []rune("Ø"), Value: byte('Ø')}}},
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
// I don't check the returned error here, but it's good enough to see that the parsing
// stopped after the illegal \g escape sequence.
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
{Type: "bar", Runes: []rune("R"), Value: byte('R')},
{Type: "bar", Runes: []rune("O"), Value: byte('O')},
{Type: "bar", Runes: []rune("C"), Value: byte('C')},
{Type: "bar", Runes: []rune("K"), Value: byte('K')},
{Type: "bar", Runes: []rune("S"), Value: byte('S')},
{Type: "bar", Value: byte('R')},
{Type: "bar", Value: byte('O')},
{Type: "bar", Value: byte('C')},
{Type: "bar", Value: byte('K')},
{Type: "bar", Value: byte('S')},
}},
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Runes: []rune("Ø"), Value: rune('Ø')}}},
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Runes: []rune("2147483647"), Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Runes: []rune("-2147483647"), Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Runes: []rune("127"), Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Runes: []rune("-127"), Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Runes: []rune("32767"), Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Runes: []rune("-32767"), Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Runes: []rune("2147483647"), Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Runes: []rune("-2147483647"), Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Runes: []rune("-9223372036854775807"), Value: int64(-9223372036854775807)}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Runes: []rune("4294967295"), Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Runes: []rune("255"), Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Runes: []rune("65535"), Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Runes: []rune("4294967295"), Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Runes: []rune("18446744073709551615"), Value: uint64(18446744073709551615)}}},
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Runes: []rune("3.1415"), Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Runes: []rune("24.19287"), Value: float64(24.19287)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Runes: []rune("1"), Value: true},
{Type: "P", Runes: []rune("t"), Value: true},
{Type: "P", Runes: []rune("T"), Value: true},
{Type: "P", Runes: []rune("true"), Value: true},
{Type: "P", Runes: []rune("TRUE"), Value: true},
{Type: "P", Runes: []rune("True"), Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
}},
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Runes: []rune("0"), Value: false},
{Type: "P", Runes: []rune("f"), Value: false},
{Type: "P", Runes: []rune("F"), Value: false},
{Type: "P", Runes: []rune("false"), Value: false},
{Type: "P", Runes: []rune("FALSE"), Value: false},
{Type: "P", Runes: []rune("False"), Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
}},
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
})
}
func TestTokenGroup_Match(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
tokenizer := tokenize.New(tok.Group("Group",
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
api, err := tokenizer("xxxxx")
AssertTrue(t, err == nil, "Tokenizer result")
tokens := api.Tokens()
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
contained := tokens[0].Value.([]tokenize.Token)
AssertEqual(t, 3, len(contained), "Length of contained tokens")
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
}
func TestTokenGroup_Mismatch(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
tokenizer := tokenize.New(tok.Group("Group",
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
api, err := tokenizer("12345")
AssertTrue(t, err == nil, "Tokenizer result")
tokens := api.Tokens()
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M

View File

@ -1,155 +0,0 @@
package tokenize
import (
"fmt"
)
// Result is a struct that is used for holding tokenizer results as produced
// by a tokenize.Handler. It also provides the API that Handlers and Parsers
// can use to store and retrieve the results.
type Result struct {
runes []rune // runes as added to the result by tokenize.Handler functions
tokens []Token // Tokens as added to the result by tokenize.Handler functions
cursor Cursor // current read cursor position, relative to the start of the file
offset int // current rune offset relative to the Reader's sliding window
err error // can be used by a Handler to report a specific issue with the input
}
// Token defines a lexical token as produced by tokenize.Handlers.
//
// The only mandatory data in a Token are the Runes. The Type and Value fields
// are optional fields that can be filled with data at will.
//
// The use of the Type field is to let a tokenizer communicate to
// the parser what type of token it's handling.
//
// The use of the Value field is to store any kind af data along with the token.
// One use of this can be found in the built-in token maker functions like
// MakeInt8Token(), which store an interpreted version of the input string
// in the Value field.
type Token struct {
Runes []rune // the runes that make up the token
Type interface{} // optional token type, can be any type that a parser author sees fit
Value interface{} // optional token value, of any type as well
}
func (t Token) String() string {
tokenType := ""
if t.Type != nil {
tokenType = fmt.Sprintf("%v", t.Type)
}
value := ""
if t.Value != nil {
switch t.Value.(type) {
case []*Token:
return fmt.Sprintf("%v%v", tokenType, t.Value)
case string:
value = fmt.Sprintf("%q", t.Value)
case rune:
value = fmt.Sprintf("%v", t.Value)
case bool:
value = fmt.Sprintf("%v", t.Value)
default:
value = fmt.Sprintf("(%T)%v", t.Value, t.Value)
}
}
return fmt.Sprintf("%v(%s)", tokenType, value)
}
// newResult initializes an empty Result struct.
func newResult() Result {
return Result{}
}
// ClearRunes clears the runes in the Result.
func (r *Result) ClearRunes() {
r.runes = []rune{}
}
// SetRunes replaces the Runes from the Result with the provided input.
func (r *Result) SetRunes(s ...interface{}) {
r.ClearRunes()
r.addRunes("SetRunes", s...)
}
// AddRunes is used to add runes to the Result.
func (r *Result) AddRunes(set ...interface{}) {
r.addRunes("AddRunes", set...)
}
func (r *Result) addRunes(name string, set ...interface{}) {
for _, s := range set {
switch s := s.(type) {
case string:
r.runes = append(r.runes, []rune(s)...)
case []rune:
r.runes = append(r.runes, s...)
case rune:
r.runes = append(r.runes, s)
default:
callerPanic(name, "tokenize.Result.{name}(): unsupported type '%T' used at {caller}", s)
}
}
}
// Runes retrieves the Runes from the Result.
func (r *Result) Runes() []rune {
return r.runes
}
// Rune retrieve a single rune from the Result at the specified index.
func (r *Result) Rune(idx int) rune {
return r.runes[idx]
}
// String returns the Runes from the Result as a string.
func (r *Result) String() string {
return string(r.runes)
}
// ClearTokens clears the tokens in the Result.
func (r *Result) ClearTokens() {
r.tokens = []Token{}
}
// SetTokens replaces the Tokens from the Result with the provided tokens.
func (r *Result) SetTokens(tokens ...Token) {
r.tokens = tokens
}
// AddTokens is used to add Tokens to the Result.
func (r *Result) AddTokens(tokens ...Token) {
r.tokens = append(r.tokens, tokens...)
}
// Tokens retrieves the Tokens from the Result.
func (r *Result) Tokens() []Token {
return r.tokens
}
// Token retrieves a single Token from the Result at the specified index.
func (r *Result) Token(idx int) Token {
return r.tokens[idx]
}
// Values retrieves a slice containing only the Values for the Result Tokens.
func (r *Result) Values() []interface{} {
values := make([]interface{}, len(r.tokens))
for i, tok := range r.tokens {
values[i] = tok.Value
}
return values
}
// Value retrieves a single Value from the Result Token at the specified index.
func (r *Result) Value(idx int) interface{} {
return r.tokens[idx].Value
}
// Cursor retrieves the read cursor from the Result. This is the first
// cursor position after the runes that were read and accepted by the Handler.
func (r *Result) Cursor() Cursor {
return r.cursor
}

View File

@ -1,58 +0,0 @@
package tokenize_test
import (
"fmt"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleToken() {
t0 := tokenize.Token{}
t1 := tokenize.Token{
Type: "Number",
Value: 224,
}
const TName = 1
t2 := tokenize.Token{
Type: TName,
Value: "John",
}
t3 := tokenize.Token{
Value: 42,
}
fmt.Printf("%s\n%s\n%s\n%s\n", t0, t1, t2, t3)
// Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}
func TestSetResult_AcceptsVariousTypesAsInput(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("Testing"))
i.Result().SetRunes("string")
AssertEqual(t, "string", string(i.Result().String()), "i.Result() with string input")
i.Result().SetRunes([]rune("rune slice"))
AssertEqual(t, "rune slice", string(i.Result().String()), "i.Result() with rune slice input")
i.Result().SetRunes('X')
AssertEqual(t, "X", string(i.Result().String()), "i.Result() with rune input")
}
func TestSetResult_PanicsOnUnhandledInput(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := tokenize.NewAPI(strings.NewReader("Testing"))
i.Result().SetRunes(1234567)
},
Regexp: true,
Expect: `tokenize\.Result\.SetRunes\(\): unsupported type 'int' used at /.*/result_test.go:\d+`,
})
}

View File

@ -1,4 +1,4 @@
package tokenize2
package tokenize
import (
"fmt"

View File

@ -1,9 +1,9 @@
package tokenize2_test
package tokenize_test
import (
"fmt"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleToken_String() {

View File

@ -9,7 +9,7 @@ import (
// Func is the function signature as returned by New: a function that takes
// any supported type of input, executes a tokenizer run and returns a
// Result struct (possibly nil) and an error (possibly nil).
type Func func(input interface{}) (*Result, error)
type Func func(input interface{}) (*API, error)
// New instantiates a new tokenizer.
//
@ -28,7 +28,7 @@ type Func func(input interface{}) (*Result, error)
// against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func {
return func(input interface{}) (*Result, error) {
return func(input interface{}) (*API, error) {
api := NewAPI(input)
ok := tokenHandler(api)
@ -36,6 +36,6 @@ func New(tokenHandler Handler) Func {
err := fmt.Errorf("mismatch at %s", Cursor{})
return nil, err
}
return api.Result(), nil
return api, nil
}
}

View File

@ -7,7 +7,7 @@ import (
"testing"
"unicode/utf8"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// TODO For error handling, it would be really cool if for example the
@ -55,7 +55,7 @@ func ExampleNew() {
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
api := makeTokenizeAPI()
r, _ := (&api).NextRune()
r, _ := api.NextRune()
AssertEqual(t, 'T', r, "first rune")
}
@ -67,7 +67,7 @@ func TestInputCanAcceptRunesFromReader(t *testing.T) {
i.Accept()
i.NextRune()
i.Accept()
AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
AssertEqual(t, "Tes", i.String(), "i.String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
@ -78,52 +78,92 @@ func TestCallingNextRuneTwice_Panics(t *testing.T) {
i.NextRune()
},
Regexp: true,
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*/tokenizer_test\.go:\d+ without a prior call to Accept\(\)`,
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
`without a prior call to Accept\(\)`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
input := makeTokenizeAPI()
api := makeTokenizeAPI()
AssertPanic(t, PanicT{
Function: (&input).Accept,
Function: api.Accept,
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*/assertions_test\.go:\d+ without first calling NextRune()`,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
`without first calling NextRune\(\)`,
})
}
func TestCallingMergeOnNonForkedChild_Panics(t *testing.T) {
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
api := tokenize.NewAPI("")
AssertPanic(t, PanicT{
Function: func() {
api.NextRune()
api.Accept()
},
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
`, but the prior call to NextRune\(\) failed`,
})
}
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Merge()
i.Merge(0)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingNextRuneOnForkedParent_DetachesForkedChild(t *testing.T) {
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
f := i.Fork()
i.NextRune()
f.Merge()
child := i.Fork()
i.Fork()
i.Merge(child)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestCallingForkOnForkedParent_DetachesForkedChild(t *testing.T) {
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
f := i.Fork()
g := f.Fork()
i.Fork()
g.Merge()
i.Dispose(0)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ using a non-active API fork.*`})
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
child := i.Fork()
i.Fork()
i.Dispose(child)
},
Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Fork()
g := i.Fork()
i.Fork()
i.Merge(g)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
@ -135,26 +175,26 @@ func TestForkingInput_ClearsLastRune(t *testing.T) {
i.Accept()
},
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /hom.*/tokenizer_test\.go:\d+ without first calling NextRune\(\)`,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
})
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Result().Cursor().String(), "cursor 1")
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.Result().Cursor().String(), "cursor 2")
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.Result().Cursor().String(), "cursor 3")
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.Result().Cursor().String(), "cursor 4")
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
@ -167,16 +207,17 @@ func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
f := i.Fork()
f.NextRune()
f.Accept()
r, err := f.NextRune()
child := i.Fork()
i.NextRune()
i.Accept()
r, err := i.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
r, err = i.NextRune()
i.Dispose(child) // brings the read offset back to the start
r, err = i.NextRune() // so here we should see the same rune
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func makeTokenizeAPI() tokenize.API {
func makeTokenizeAPI() *tokenize.API {
return tokenize.NewAPI("Testing")
}

View File

@ -5,33 +5,33 @@ import (
)
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// TODO FIXME Speed change
// Create input, accept the first rune.
i := NewAPI("Testing")
i.NextRune()
i.Accept() // T
AssertEqual(t, "T", i.Result().String(), "accepted rune in input")
AssertEqual(t, "T", i.String(), "accepted rune in input")
// Fork
f := i.Fork()
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
AssertEqual(t, 1, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
AssertEqual(t, 1, f.state.stack[f.stackLevel].offset, "child offset")
child := i.Fork()
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// Accept two runes via fork.
f.NextRune()
f.Accept() // e
f.NextRune()
f.Accept() // s
AssertEqual(t, "es", f.Result().String(), "result runes in fork")
AssertEqual(t, 1, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.state.stack[i.stackLevel].offset, "parent offset")
AssertEqual(t, 3, f.state.stack[f.stackLevel].cursor.Byte, "child cursor.Byte")
AssertEqual(t, 3, f.state.stack[f.stackLevel].offset, "child offset")
i.NextRune()
i.Accept() // e
i.NextRune()
i.Accept() // s
AssertEqual(t, "es", i.String(), "result runes in fork")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// Merge fork back into parent
f.Merge()
AssertEqual(t, "Tes", i.Result().String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.state.stack[i.stackLevel].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 3, i.state.stack[i.stackLevel].offset, "parent offset")
i.Merge(child)
i.Dispose(child)
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
@ -39,86 +39,83 @@ func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult
i.NextRune()
i.Accept()
f1 := i.Fork()
f1.NextRune()
f1.Accept()
f2 := f1.Fork()
f2.NextRune()
f2.Accept()
// TODO FIXME Speed changes
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
// AssertEqual(t, 1, i.result.offset, "i.offset A")
// AssertEqual(t, "e", f1.Result().String(), "f1.Result().String()")
// AssertEqual(t, 2, f1.result.offset, "f1.offset A")
// AssertEqual(t, "s", f2.Result().String(), "f2.Result().String()")
// AssertEqual(t, 3, f2.result.offset, "f2.offset A")
// f2.Merge()
// AssertEqual(t, "T", i.Result().String(), "i.Result().String()")
// AssertEqual(t, 1, i.result.offset, "i.offset B")
// AssertEqual(t, "es", f1.Result().String(), "f1.Result().String()")
// AssertEqual(t, 3, f1.result.offset, "f1.offset B")
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
// AssertEqual(t, 3, f2.result.offset, "f2.offset B")
// f1.Merge()
// AssertEqual(t, "Tes", i.Result().String(), "i.Result().String()")
// AssertEqual(t, 3, i.result.offset, "i.offset C")
// AssertEqual(t, "", f1.Result().String(), "f1.Result().String()")
// AssertEqual(t, 3, f1.result.offset, "f1.offset C")
// AssertEqual(t, "", f2.Result().String(), "f2.Result().String()")
// AssertEqual(t, 3, f2.result.offset, "f2.offset C")
}
func TestGivenMultipleLevelsOfForks_WhenReturningToRootInput_ForksAreDetached(t *testing.T) {
i := NewAPI("Testing")
f1 := i.Fork()
f2 := f1.Fork()
//f3 := f2.Fork()
f2.Fork()
f4 := f1.Fork() // secret subtest: this Fork() detaches both forks f2 and f3
//f5 := f4.Fork()
f4.Fork()
// TODO FIXME Speed changes
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
// AssertEqual(t, true, i.child == &f1, "i.child == f1")
// AssertEqual(t, true, f1.parent == &i, "f1.parent == i")
// AssertEqual(t, true, f1.child == &f4, "f1.child == f4")
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
// AssertEqual(t, true, f4.parent == &f1, "f4.parent == f1")
// AssertEqual(t, true, f4.child == &f5, "f4.child == f5")
// AssertEqual(t, true, f5.parent == &f4, "f5.parent == f4")
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
i.NextRune()
// AssertEqual(t, true, i.parent == nil, "i.parent == nil")
// AssertEqual(t, true, i.child == nil, "i.child == nil")
// AssertEqual(t, true, f1.parent == nil, "f1.parent == nil")
// AssertEqual(t, true, f1.child == nil, "f1.child == nil")
// AssertEqual(t, true, f2.child == nil, "f2.child == nil")
// AssertEqual(t, true, f2.parent == nil, "f2.parent == nil")
// AssertEqual(t, true, f3.child == nil, "f3.child == nil")
// AssertEqual(t, true, f3.parent == nil, "f3.parent == nil")
// AssertEqual(t, true, f4.parent == nil, "f4.parent == nil")
// AssertEqual(t, true, f4.child == nil, "f4.child == nil")
// AssertEqual(t, true, f5.parent == nil, "f5.parent == nil")
// AssertEqual(t, true, f5.child == nil, "f5.child == nil")
i.Accept()
f2 := i.Fork()
i.NextRune()
i.Accept()
AssertEqual(t, "s", i.String(), "f2 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
i.Merge(f2)
i.Dispose(f2)
AssertEqual(t, "es", i.String(), "f1 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
i.Merge(f1)
i.Dispose(f1)
AssertEqual(t, "Tes", i.String(), "top-level API String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
// TODO FIXME Speed changes
i := NewAPI("Testing")
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
// AssertTrue(t, i.result.lastRune != nil, "API.result.lastRune after NextRune() is not nil")
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
i.Accept()
// AssertTrue(t, i.result.lastRune == nil, "API.result.lastRune after Accept() is nil")
// AssertEqual(t, 1, i.result.offset, "API.result.offset")
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}
func TestFlushInput(t *testing.T) {
api := NewAPI("cool")
// Flushing without any read data is okay. FlushInput() will return
// false in this case, and nothing else happens.
AssertTrue(t, api.FlushInput() == false, "flush input at start")
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
// Read offset is now zero, but reading should continue after "co".
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
AssertEqual(t, "cool", api.String(), "end result")
}
func TestInputFlusherWrapper(t *testing.T) {
runeA := A.Rune('a')
flushB := C.FlushInput(A.Rune('b'))
api := NewAPI("abaab")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
AssertEqual(t, "a", api.String(), "runes after 1 read")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
runeA(api)
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
}
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(

View File

@ -1,374 +0,0 @@
package tokenize2
import (
"git.makaay.nl/mauricem/go-parsekit/read"
)
// API holds the internal state of a tokenizer run and provides an API that
// tokenize.Handler functions can use to:
//
// • read and accept runes from the input (NextRune, Accept)
//
// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
//
// • flush already read input data when not needed anymore (FlushInput)
//
// • retrieve the tokenizer Result struct (Result) to read or modify the results
//
// BASIC OPERATION:
//
// To retrieve the next rune from the API, call the NextRune() method.
//
// When the rune is to be accepted as input, call the method Accept(). The rune
// is then added to the result runes of the API and the read cursor is moved
// forward.
//
// By invoking NextRune() + Accept() multiple times, the result can be extended
// with as many runes as needed. Runes collected this way can later on be
// retrieved using the method Result().Runes().
//
// It is mandatory to call Accept() after retrieving a rune, before calling
// NextRune() again. Failing to do so will result in a panic.
//
// Next to adding runes to the result, it is also possible to modify the
// stored runes or to add lexical Tokens to the result. For all things
// concerning results, take a look at the Result struct, which
// can be accessed though the method Result().
//
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
//
// Sometimes, we must be able to perform a lookahead, which might either
// succeed or fail. In case of a failing lookahead, the state of the
// API must be brought back to the original state, so we can try
// a different route.
//
// The way in which this is supported, is by forking an API struct by
// calling method Fork(). This will return a forked child API, with
// empty result data, but using the same read cursor position as the
// forked parent.
//
// After forking, the same interface as described for BASIC OPERATION can be
// used to fill the results. When the lookahead was successful, then
// Merge() can be called on the forked child to append the child's results
// to the parent's results, and to move the read cursor position to that
// of the child.
//
// When the lookahead was unsuccessful, then the forked child API can
// disposed by calling Dispose() on the forked child. This is not mandatory.
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
//
// Opinionized note:
// Many tokenizers/parsers take a different approach on lookaheads by using
// peeks and by moving the read cursor position back and forth, or by putting
// read input back on the input stream. That often leads to code that is
// efficient, however, in my opinion, not very intuitive to read. It can also
// be tedious to get the cursor position back at the correct position, which
// can lead to hard to track bugs. I much prefer this forking method, since
// no bookkeeping has to be implemented when implementing a parser.
type API struct {
reader *read.Buffer // the input data reader
lastRune rune // the rune as retrieved by the last NextRune() calll
lastRuneErr error // the error for the last NextRune() call
runeRead bool // whether or not a rune was read using NextRune()
runes []rune // the rune stack
tokens []Token // the token stack
stackFrames []stackFrame // the stack frames, containing stack level-specific data
stackLevel int // the current stack level
stackFrame *stackFrame // the current stack frame
}
type stackFrame struct {
offset int // current rune offset relative to the Reader's sliding window
runeStart int
runeEnd int
tokenStart int
tokenEnd int
cursor Cursor
// TODO
err error // can be used by a Handler to report a specific issue with the input
}
const initialStackDepth = 10
const initialTokenDepth = 10
const initialRuneDepth = 10
// NewAPI initializes a new API struct, wrapped around the provided input.
// For an overview of allowed inputs, take a look at the documentation
// for parsekit.read.New().
func NewAPI(input interface{}) *API {
api := &API{
reader: read.New(input),
runes: make([]rune, 0, initialRuneDepth),
tokens: make([]Token, 0, initialTokenDepth),
stackFrames: make([]stackFrame, 1, initialStackDepth),
}
api.stackFrame = &api.stackFrames[0]
return api
}
// NextRune returns the rune at the current read offset.
//
// When an invalid UTF8 rune is encountered on the input, it is replaced with
// the utf.RuneError rune. It's up to the caller to handle this as an error
// when needed.
//
// After reading a rune it must be Accept()-ed to move the read cursor forward
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
// without explicitly accepting, this method will panic. You can see this as a
// built-in unit test, enforcing correct serialization of API method calls.
func (i *API) NextRune() (rune, error) {
if i.runeRead {
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
"without a prior call to Accept()")
}
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
i.lastRune = readRune
i.lastRuneErr = err
i.runeRead = true
return readRune, err
}
// Accept the last rune as read by NextRune() into the Result runes and move
// the cursor forward.
//
// It is not allowed to call Accept() when the previous call to NextRune()
// returned an error. Calling Accept() in such case will result in a panic.
func (i *API) Accept() {
if !i.runeRead {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
"without first calling NextRune()")
} else if i.lastRuneErr != nil {
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
"but the prior call to NextRune() failed")
}
i.runes = append(i.runes, i.lastRune)
i.stackFrame.runeEnd++
i.stackFrame.cursor.moveByRune(i.lastRune)
i.stackFrame.offset++
i.runeRead = false
}
// Fork forks off a child of the API struct. It will reuse the same
// read buffer and cursor position, but for the rest this is a fresh API.
//
// By forking an API, you can freely work with the forked child, without
// affecting the parent API. This is for example useful when you must perform
// some form of lookahead.
//
// When processing of the Handler was successful and you want to add the results
// to the parent API, you can call Merge() on the forked child.
// This will add the results to the results of the parent (runes, tokens).
// It also updates the read cursor position of the parent to that of the child.
//
// When the lookahead was unsuccessful, then the forked child API can
// disposed by calling Dispose() on the forked child. This is not mandatory.
// Garbage collection will take care of this automatically.
// The parent API was never modified, so it can safely be used after disposal
// as if the lookahead never happened.
func (i *API) Fork() int {
newStackLevel := i.stackLevel + 1
newStackSize := newStackLevel + 1
// Grow the stack frames capacity when needed.
if cap(i.stackFrames) < newStackSize {
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
copy(newFrames, i.stackFrames)
i.stackFrames = newFrames
} else {
i.stackFrames = i.stackFrames[0:newStackSize]
}
parent := i.stackFrame
i.stackLevel++
i.stackFrame = &i.stackFrames[i.stackLevel]
*i.stackFrame = *parent
i.stackFrame.runeStart = parent.runeEnd
i.stackFrame.tokenStart = parent.tokenEnd
i.runeRead = false
return i.stackLevel
}
// Merge appends the results of a forked child API (runes, tokens) to the
// results of its parent. The read cursor of the parent is also updated
// to that of the forked child.
//
// After the merge operation, the child results are reset so it can immediately
// be reused for performing another match. This means that all Result data are
// cleared, but the read cursor position is kept at its current position.
// This allows a child to feed results in chunks to its parent.
//
// Once the child is no longer needed, it can be disposed of by using the
// method Dispose(), which will return the tokenizer to the parent.
func (i *API) Merge(stackLevel int) {
if stackLevel == 0 {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != i.stackLevel {
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
}
parent := &i.stackFrames[stackLevel-1]
if parent.runeEnd == i.stackFrame.runeStart {
// The end of the parent slice aligns with the start of the child slice.
// Because of this, to merge the parent slice can simply be expanded
// to include the child slice.
// parent : |----------|
// child: |------|
// After merge operation:
// parent: |-----------------|
// child: |---> continue reading from here
parent.runeEnd = i.stackFrame.runeEnd
i.stackFrame.runeStart = i.stackFrame.runeEnd
} else {
// The end of the parent slice does not align with the start of the
// child slice. The child slice has to be copied onto the end of
// the parent slice.
// parent : |----------|
// child: |------|
// After merge operation:
// parent: |-----------------|
// child: |---> continue reading from here
i.runes = append(i.runes[:parent.runeEnd], i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]...)
parent.runeEnd = len(i.runes)
i.stackFrame.runeStart = parent.runeEnd
i.stackFrame.runeEnd = parent.runeEnd
}
// The same logic applies to tokens.
if parent.tokenEnd == i.stackFrame.tokenStart {
parent.tokenEnd = i.stackFrame.tokenEnd
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
} else {
i.tokens = append(i.tokens[:parent.tokenEnd], i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]...)
parent.tokenEnd = len(i.tokens)
i.stackFrame.tokenStart = parent.tokenEnd
i.stackFrame.tokenEnd = parent.tokenEnd
}
parent.offset = i.stackFrame.offset
parent.cursor = i.stackFrame.cursor
i.stackFrame.err = nil
i.runeRead = false
}
func (i *API) Dispose(stackLevel int) {
if stackLevel == 0 {
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
"on the top-level API stack level 0")
}
if stackLevel != i.stackLevel {
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
"on API stack level %d, but the current stack level is %d "+
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
}
i.runeRead = false
i.stackLevel = stackLevel - 1
i.stackFrames = i.stackFrames[:stackLevel]
i.stackFrame = &i.stackFrames[stackLevel-1]
i.runes = i.runes[0:i.stackFrame.runeEnd]
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
}
func (i *API) Reset() {
i.runeRead = false
i.stackFrame.runeStart = i.stackFrame.runeEnd
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
i.stackFrame.err = nil
}
// FlushInput flushes processed input data from the read.Buffer.
// In this context 'processed' means all runes that were read using NextRune()
// and that were added to the results using Accept().
//
// Note:
// When writing your own TokenHandler, you normally won't have to call this
// method yourself. It is automatically called by parsekit when needed.
func (i *API) FlushInput() bool {
// result := &(i.state.stack[i.stackLevel])
if i.stackFrame.offset > 0 {
i.reader.Flush(i.stackFrame.offset)
i.stackFrame.offset = 0
return true
}
return false
}
func (i *API) String() string {
return string(i.Runes())
}
func (i *API) Runes() []rune {
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
}
func (i *API) Rune(offset int) rune {
return i.runes[i.stackFrame.runeStart+offset]
}
func (i *API) ClearRunes() {
i.runes = i.runes[:i.stackFrame.runeStart]
i.stackFrame.runeEnd = i.stackFrame.runeStart
}
func (i *API) SetRunes(runes ...rune) {
i.runes = append(i.runes[:i.stackFrame.runeStart], runes...)
i.stackFrame.runeEnd = i.stackFrame.runeStart + len(runes)
}
func (i *API) AddRunes(runes ...rune) {
i.runes = append(i.runes[:i.stackFrame.runeEnd], runes...)
i.stackFrame.runeEnd += len(runes)
}
func (i *API) AddString(s string) {
i.AddRunes([]rune(s)...)
}
func (i *API) SetString(s string) {
i.SetRunes([]rune(s)...)
}
func (i *API) Cursor() Cursor {
return i.stackFrame.cursor
}
func (i *API) Tokens() []Token {
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
}
func (i *API) Token(offset int) Token {
return i.tokens[i.stackFrame.tokenStart+offset]
}
func (i *API) TokenValue(offset int) interface{} {
return i.tokens[i.stackFrame.tokenStart+offset].Value
}
func (i *API) ClearTokens() {
i.tokens = i.tokens[:i.stackFrame.tokenStart]
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
}
func (i *API) SetTokens(tokens ...Token) {
i.tokens = append(i.tokens[:i.stackFrame.tokenStart], tokens...)
i.stackFrame.tokenEnd = i.stackFrame.tokenStart + len(tokens)
}
func (i *API) AddTokens(tokens ...Token) {
i.tokens = append(i.tokens[:i.stackFrame.tokenEnd], tokens...)
i.stackFrame.tokenEnd += len(tokens)
}

View File

@ -1,330 +0,0 @@
package tokenize2_test
import (
"fmt"
"testing"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
)
func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle")
// Output:
}
func ExampleAPI_NextRune() {
api := tokenize.NewAPI("The input that the API will handle")
r, err := api.NextRune()
fmt.Printf("Rune read from input; %c\n", r)
fmt.Printf("The error: %v\n", err)
fmt.Printf("API results: %q\n", api.String())
// Output:
// Rune read from input; T
// The error: <nil>
// API results: ""
}
func ExampleAPI_Accept() {
api := tokenize.NewAPI("The input that the API will handle")
api.NextRune() // reads 'T'
api.Accept() // adds 'T' to the API results
api.NextRune() // reads 'h'
api.Accept() // adds 'h' to the API results
api.NextRune() // reads 'e', but it is not added to the API results
fmt.Printf("API results: %q\n", api.String())
// Output:
// API results: "Th"
}
func ExampleAPI_modifyingResults() {
api := tokenize.NewAPI("")
api.AddString("Some runes")
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
api.AddRunes(' ', 'i', 'n', ' ')
api.AddString("various ways")
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
fmt.Printf("API result runes as string: %q\n", api.String())
api.SetString("new ")
api.AddString("set ")
api.AddString("of ")
api.AddRunes('r', 'u', 'n', 'e', 's')
fmt.Printf("API result runes as string: %q\n", api.String())
fmt.Printf("API result runes: %q\n", api.Runes())
fmt.Printf("API third rune: %q\n", api.Rune(2))
api.AddTokens(tokenize.Token{
Type: 42,
Value: "towel"})
api.AddTokens(tokenize.Token{
Type: 73,
Value: "Zaphod"})
fmt.Printf("API result tokens: %v\n", api.Tokens())
fmt.Printf("API second result token: %v\n", api.Token(1))
// Output:
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
// API result runes as string: "Some runes added in various ways"
// API result runes as string: "new set of runes"
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
// API third rune: 'w'
// API result tokens: [42("towel") 73("Zaphod")]
// API second result token: 73("Zaphod")
}
func ExampleAPI_Reset() {
api := tokenize.NewAPI("Very important input!")
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// Reset clears the results, but keeps the cursor position.
api.Reset()
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
// Output:
// API results: "Ve" at line 1, column 3
// API results: "" at line 1, column 3
// API results: "ry" at line 1, column 5
}
func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *tokenize.API) bool {
a := tokenize.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
if a.Rune(r)(t) {
t.Merge(child) // accept results into parent of child
t.Dispose(child) // return to the parent level
return true // and report a successful match
}
t.Dispose(child) // return to the parent level
}
// If we get here, then no match was found. Return false to communicate
// this to the caller.
return false
}
// Note: a custom Handler is normally not what you need.
// You can make use of the parser/combinator tooling to make the
// implementation a lot simpler and to take care of forking at
// the appropriate places. The handler from above can be replaced with:
simpler := tokenize.A.RuneRange('a', 'c')
result, err := tokenize.New(abcHandler)("another test")
fmt.Println(result, err)
result, err = tokenize.New(simpler)("curious")
fmt.Println(result, err)
result, err = tokenize.New(abcHandler)("bang on!")
fmt.Println(result, err)
result, err = tokenize.New(abcHandler)("not a match")
fmt.Println(result, err)
// Output:
// a <nil>
// c <nil>
// b <nil>
// <nil> mismatch at start of file
}
func ExampleAPI_Merge() {
tokenHandler := func(t *tokenize.API) bool {
child1 := t.Fork()
t.NextRune() // reads 'H'
t.Accept()
t.NextRune() // reads 'i'
t.Accept()
child2 := t.Fork()
t.NextRune() // reads ' '
t.Accept()
t.NextRune() // reads 'm'
t.Accept()
t.Dispose(child2)
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
t.Dispose(child1) // and clean up child1 to return to the parent
return true
}
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
fmt.Println(result.String())
// Output:
// Hi
}
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// Fork a few levels.
child1 := api.Fork()
child2 := api.Fork()
child3 := api.Fork()
child4 := api.Fork()
// Read a rune 'a' from child4.
r, _ := api.NextRune()
AssertEqual(t, 'a', r, "child4 rune 1")
api.Accept()
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
// Read another rune 'b' from child4.
r, _ = api.NextRune()
AssertEqual(t, 'b', r, "child4 rune 2")
api.Accept()
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
// Merge "ab" from child4 to child3.
api.Merge(child4)
AssertEqual(t, "", api.String(), "child4 runes after first merge")
// Read some more from child4.
r, _ = api.NextRune()
AssertEqual(t, 'c', r, "child4 rune 3")
api.Accept()
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
// Merge "c" from child4 to child3.
api.Merge(child4)
// And dispose of child4, making child3 the active stack level.
api.Dispose(child4)
// Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3.
r, _ = api.NextRune()
AssertEqual(t, 'd', r, "child3 rune 5")
api.Accept()
r, _ = api.NextRune()
AssertEqual(t, 'e', r, "child3 rune 5")
api.Accept()
r, _ = api.NextRune()
AssertEqual(t, 'f', r, "child3 rune 5")
api.Accept()
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
// Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := api.Fork()
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
child3sub2 := api.Fork()
api.NextRune()
api.Accept()
api.Merge(child3sub2) // do merge sub2 down to sub1
api.Dispose(child3sub2) // and dispose of sub2
api.Dispose(child3sub1) // but dispose of sub1 without merging
// Instead merge the results from before this forking segway from child3 to child2
// and dispose of it.
api.Merge(child3)
api.Dispose(child3)
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
// Merge child2 to child1 and dispose of it.
api.Merge(child2)
api.Dispose(child2)
// Merge child1 a few times to the top level api.
api.Merge(child1)
api.Merge(child1)
api.Merge(child1)
api.Merge(child1)
// And dispose of it.
api.Dispose(child1)
// Read some data from the top level api.
r, _ = api.NextRune()
api.Accept()
AssertEqual(t, "abcdefg", api.String(), "api string end result")
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
}
func TestClearRunes(t *testing.T) {
api := tokenize.NewAPI("Laphroaig")
api.NextRune() // Read 'L'
api.Accept() // Add to runes
api.NextRune() // Read 'a'
api.Accept() // Add to runes
api.ClearRunes() // Clear the runes, giving us a fresh start.
api.NextRune() // Read 'p'
api.Accept() // Add to runes
api.NextRune() // Read 'r'
api.Accept() // Add to runes
AssertEqual(t, "ph", api.String(), "api string end result")
}
func TestMergeScenariosForTokens(t *testing.T) {
api := tokenize.NewAPI("")
token1 := tokenize.Token{Value: 1}
token2 := tokenize.Token{Value: 2}
token3 := tokenize.Token{Value: 3}
token4 := tokenize.Token{Value: 4}
api.SetTokens(token1)
tokens := api.Tokens()
AssertEqual(t, 1, len(tokens), "Tokens 1")
child := api.Fork()
tokens = api.Tokens()
AssertEqual(t, 0, len(tokens), "Tokens 2")
api.AddTokens(token2)
// Here we can merge by expanding the token slice on the parent,
// because the end of the parent slice and the start of the child
// slice align.
api.Merge(child)
api.Dispose(child)
tokens = api.Tokens()
AssertEqual(t, 2, len(tokens), "Tokens 3")
child = api.Fork()
api.AddTokens(token3)
api.Reset()
api.AddTokens(token4)
// Here the merge means that token4 will be copied to the end of
// the token slice of the parent, since there's a gap at the place
// where token3 used to be.
api.Merge(child)
api.Dispose(child)
tokens = api.Tokens()
AssertEqual(t, 3, len(tokens), "Tokens 4")
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
}

View File

@ -1,118 +0,0 @@
package tokenize2_test
// This file contains some tools that are used for writing tests.
import (
"regexp"
"testing"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
)
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
}
}
type PanicT struct {
Function func()
Regexp bool
Expect string
}
func AssertPanics(t *testing.T, testSet []PanicT) {
for _, test := range testSet {
AssertPanic(t, test)
}
}
func AssertPanic(t *testing.T, p PanicT) {
defer func() {
if r := recover(); r != nil {
mismatch := false
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
mismatch = true
}
if !p.Regexp && p.Expect != r.(string) {
mismatch = true
}
if mismatch {
t.Errorf(
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
p.Expect, r)
}
} else {
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
}
}()
p.Function()
}
type HandlerT struct {
Input string
Handler tokenize.Handler
MustMatch bool
Expected string
}
func AssertHandlers(t *testing.T, testSet []HandlerT) {
for _, test := range testSet {
AssertHandler(t, test)
}
}
func AssertHandler(t *testing.T, test HandlerT) {
result, err := tokenize.New(test.Handler)(test.Input)
if test.MustMatch {
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else if output := result.String(); output != test.Expected {
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
}
} else {
if err == nil {
t.Errorf("Test %q failed: should not match, but it did", test.Input)
}
}
}
type TokenMakerT struct {
Input string
Handler tokenize.Handler
Expected []tokenize.Token
}
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
for _, test := range testSet {
AssertTokenMaker(t, test)
}
}
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
result, err := tokenize.New(test.Handler)(test.Input)
if err != nil {
t.Errorf("Test %q failed with error: %s", test.Input, err)
} else {
if len(result.Tokens()) != len(test.Expected) {
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
}
for i, expected := range test.Expected {
actual := result.Token(i)
if expected.Type != actual.Type {
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
}
if expected.Value != actual.Value {
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
}
}
}
}

View File

@ -1,33 +0,0 @@
package tokenize2
import (
"fmt"
"runtime"
"strings"
)
func callerPanic(name, f string, data ...interface{}) {
filepos := callerBefore(name)
m := fmt.Sprintf(f, data...)
m = strings.Replace(m, "{caller}", filepos, -1)
m = strings.Replace(m, "{name}", name, -1)
panic(m)
}
func callerBefore(name string) string {
found := false
for i := 1; ; i++ {
pc, file, line, ok := runtime.Caller(i)
if found {
return fmt.Sprintf("%s:%d", file, line)
}
if !ok {
return "unknown caller"
}
f := runtime.FuncForPC(pc)
if strings.HasSuffix(f.Name(), "."+name) {
found = true
}
}
}

View File

@ -1,45 +0,0 @@
package tokenize2
import (
"fmt"
"unicode/utf8"
)
// Cursor represents the position of a cursor in various ways.
type Cursor struct {
Byte int // The cursor offset in bytes
Rune int // The cursor offset in UTF8 runes
Column int // The column at which the cursor is (0-indexed)
Line int // The line at which the cursor is (0-indexed)
}
// String produces a string representation of the cursor position.
func (c Cursor) String() string {
if c.Line == 0 && c.Column == 0 {
return fmt.Sprintf("start of file")
}
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
}
// move updates the position of the cursor, based on the provided input string.
// The input string represents the runes that the cursor must be moved over.
// This method will take newlines into account to keep track of line numbers and
// column positions automatically.
func (c *Cursor) move(input string) *Cursor {
for _, r := range input {
c.moveByRune(r)
}
return c
}
func (c *Cursor) moveByRune(r rune) *Cursor {
c.Byte += utf8.RuneLen(r)
c.Rune++
if r == '\n' {
c.Column = 0
c.Line++
} else {
c.Column++
}
return c
}

View File

@ -1,69 +0,0 @@
package tokenize2
import (
"fmt"
"testing"
)
func ExampleCursor_move() {
c := Cursor{}
fmt.Printf("after initialization : %s\n", c)
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
// Output:
// after initialization : start of file
// after 'some words' : line 1, column 11
// after '\n' : line 2, column 1
// after '\r\nskip\nlines' : line 4, column 6
}
func ExampleCursor_String() {
c := Cursor{}
fmt.Println(c.String())
c.move("\nfoobar")
fmt.Println(c.String())
// Output:
// start of file
// line 2, column 7
}
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
for _, test := range []struct {
name string
input []string
byte int
rune int
line int
column int
}{
{"No input at all", []string{""}, 0, 0, 0, 0},
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
{"One newline", []string{"\n"}, 1, 1, 1, 0},
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
} {
c := Cursor{}
for _, s := range test.input {
c.move(s)
}
if c.Byte != test.byte {
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
}
if c.Rune != test.rune {
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
}
if c.Line != test.line {
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
}
if c.Column != test.column {
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
}
}
}

View File

@ -1,53 +0,0 @@
package tokenize2
// Handler is the function type that is involved in turning a low level
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
// data matches some kind of pattern and to report back the results.
//
// A Handler function gets an API as its input and returns a boolean to
// indicate whether or not it found a match on the input. The API is used
// for retrieving input data to match against and for reporting back results.
type Handler func(t *API) bool
// Match is syntactic sugar that allows you to write a construction like
// NewTokenizer(handler).Execute(input) as handler.Match(input).
func (handler Handler) Match(input interface{}) (*API, error) {
tokenizer := New(handler)
return tokenizer(input)
}
// Or is syntactic sugar that allows you to write a construction like
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
func (handler Handler) Or(otherHandler Handler) Handler {
return MatchAny(handler, otherHandler)
}
// Times is syntactic sugar that allows you to write a construction like
// MatchRep(3, handler) as handler.Times(3).
func (handler Handler) Times(n int) Handler {
return MatchRep(n, handler)
}
// Then is syntactic sugar that allows you to write a construction like
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
func (handler Handler) Then(otherHandler Handler) Handler {
return MatchSeq(handler, otherHandler)
}
// SeparatedBy is syntactic sugar that allows you to write a construction like
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
func (handler Handler) SeparatedBy(separator Handler) Handler {
return MatchSeparated(separator, handler)
}
// Optional is syntactic sugar that allows you to write a construction like
// MatchOptional(handler) as handler.Optional().
func (handler Handler) Optional() Handler {
return MatchOptional(handler)
}
// Except is syntactic sugar that allows you to write a construction like
// MatchExcept(handler) as handler.Optional().
func (handler Handler) Except(exceptHandler Handler) Handler {
return MatchExcept(handler, exceptHandler)
}

View File

@ -1,101 +0,0 @@
package tokenize2_test
import (
"fmt"
"testing"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
)
func TestSyntacticSugar(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
{"cccccc", a.Rune('c').Optional(), true, "c"},
{"dddddd", a.Rune('c').Optional(), true, ""},
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
})
}
func ExampleHandler_Times() {
c, a := tokenize.C, tokenize.A
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
fmt.Println(phoneNumber.Match("0201234567"))
// Output:
// 0201234567 <nil>
}
func ExampleHandler_Then() {
c, a := tokenize.C, tokenize.A
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
fmt.Println(phoneNumber.Match("0208888888"))
// Output:
// 0208888888 <nil>
}
func ExampleHandler_Or() {
c, a := tokenize.C, tokenize.A
phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit))
fmt.Println(phoneNumber.Match("+31209876543"))
fmt.Println(phoneNumber.Match("0031209876543"))
fmt.Println(phoneNumber.Match("0031020991234"))
fmt.Println(phoneNumber.Match("0031201234"))
// Output:
// +31209876543 <nil>
// 0031209876543 <nil>
// <nil> mismatch at start of file
// <nil> mismatch at start of file
}
func ExampleHandler_SeparatedBy() {
a, t := tokenize.A, tokenize.T
csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
r, _ := csv.Match("123,456,7,8,9")
for i, token := range r.Tokens() {
fmt.Printf("[%d] %v\n", i, token)
}
// Output:
// [0] number((int)123)
// [1] number((int)456)
// [2] number((int)7)
// [3] number((int)8)
// [4] number((int)9)
}
func ExampleHandler_Optional() {
c, a := tokenize.C, tokenize.A
spanish := c.Seq(
a.Rune('¿').Optional(),
c.OneOrMore(a.AnyRune.Except(a.Question)),
a.Rune('?').Optional())
fmt.Println(spanish.Match("¿Habla español María?"))
fmt.Println(spanish.Match("Sí, María habla español."))
// Output:
// ¿Habla español María? <nil>
// Sí, María habla español. <nil>
}
func ExampleHandler_Match() {
r, err := tokenize.A.IPv4.Match("001.002.003.004")
fmt.Println(r, err)
r, err = tokenize.A.IPv4.Match("1.2.3")
fmt.Println(r, err)
// Output:
// 1.2.3.4 <nil>
// <nil> mismatch at start of file
}

File diff suppressed because it is too large Load Diff

View File

@ -1,512 +0,0 @@
package tokenize2_test
import (
"fmt"
"testing"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
)
func TestCombinatorsTempDebug(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
// {"024", a.IPv4CIDRMask, true, "24"},
// {"024", a.Octet, true, "24"},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
})
}
func TestCombinators(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{
{"", c.Not(a.Rune('b')), false, ""},
{"abc not", c.Not(a.Rune('b')), true, "a"},
{"bcd not", c.Not(a.Rune('b')), false, ""},
{"aaaxxxb", c.OneOrMore(c.Not(a.Rune('b'))), true, "aaaxxx"},
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
{"", c.Min(0, a.Rune('a')), true, ""},
{"a", c.Min(0, a.Rune('a')), true, "a"},
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
{"", c.Max(4, a.Rune('b')), true, ""},
{"X", c.Max(4, a.Rune('b')), true, ""},
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
{"", c.OneOrMore(a.Rune('d')), false, ""},
{"X", c.OneOrMore(a.Rune('d')), false, ""},
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
{" a", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "a"},
{"a ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, "a"},
{" a ", m.TrimSpace(c.OneOrMore(a.AnyRune)), true, "a"},
{"ab", c.FollowedBy(a.Rune('b'), a.Rune('a')), true, "a"},
{"ba", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
{"aa", c.FollowedBy(a.Rune('b'), a.Rune('a')), false, ""},
{"aaabbbcccddd", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), true, "aaabbbccc"},
{"aaabbbcccxxx", c.FollowedBy(c.OneOrMore(a.Rune('d')), c.OneOrMore(a.Rune('a')).Then(c.OneOrMore(c.Not(a.Rune('d'))))), false, ""},
{"xy", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
{"yx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
{"xx", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), true, "x"},
{"xa", c.NotFollowedBy(a.Rune('a'), a.Rune('x')), false, ""},
{"xxxyyyzzzaaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), false, ""},
{"xxxyyyzzzbaa", c.NotFollowedBy(a.Rune('a'), c.OneOrMore(a.Runes('x', 'y', 'z'))), true, "xxxyyyzzz"},
})
}
func TestCombinatorPanics(t *testing.T) {
var c, a = tokenize.C, tokenize.A
AssertPanics(t, []PanicT{
{func() { a.RuneRange('z', 'a') }, true,
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
{func() { c.MinMax(-1, 1, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.MinMax(1, -1, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { c.MinMax(10, 5, a.Space) }, true,
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
{func() { c.Min(-10, a.Space) }, true,
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
{func() { c.Max(-42, a.Space) }, true,
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
{func() { a.IntegerBetween(10, -10) }, true,
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
})
}
func TestAtoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
{"dd", a.RuneRange('b', 'e'), true, "d"},
{"ee", a.RuneRange('b', 'e'), true, "e"},
{"ff", a.RuneRange('b', 'e'), false, ""},
{"Hello, world!", a.Str("Hello"), true, "Hello"},
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
{"xxx", a.Rune('x'), true, "x"},
{"x ", a.Rune(' '), false, ""},
{"aa", a.RuneRange('b', 'e'), false, ""},
{"bb", a.RuneRange('b', 'e'), true, "b"},
{"cc", a.RuneRange('b', 'e'), true, "c"},
{"", a.EndOfFile, true, ""},
{"⌘", a.AnyRune, true, "⌘"},
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
{"", a.AnyRune, false, ""},
{"⌘", a.ValidRune, true, "⌘"},
{"\xbc with ValidRune", a.ValidRune, false, ""},
{"", a.ValidRune, false, ""},
{"\xbc with InvalidRune", a.InvalidRune, true, "<22>"},
{"ok with InvalidRune", a.InvalidRune, false, ""},
{" ", a.Space, true, " "},
{"X", a.Space, false, ""},
{"\t", a.Tab, true, "\t"},
{"\r", a.CR, true, "\r"},
{"\n", a.LF, true, "\n"},
{"!", a.Excl, true, "!"},
{"\"", a.DoubleQuote, true, "\""},
{"#", a.Hash, true, "#"},
{"$", a.Dollar, true, "$"},
{"%", a.Percent, true, "%"},
{"&", a.Amp, true, "&"},
{"'", a.SingleQuote, true, "'"},
{"(", a.LeftParen, true, "("},
{"(", a.RoundOpen, true, "("},
{")", a.RightParen, true, ")"},
{")", a.RoundClose, true, ")"},
{"*", a.Asterisk, true, "*"},
{"*", a.Multiply, true, "*"},
{"+", a.Plus, true, "+"},
{"+", a.Add, true, "+"},
{",", a.Comma, true, ","},
{"-", a.Minus, true, "-"},
{"-", a.Subtract, true, "-"},
{".", a.Dot, true, "."},
{"/", a.Slash, true, "/"},
{"/", a.Divide, true, "/"},
{":", a.Colon, true, ":"},
{";", a.Semicolon, true, ";"},
{"<", a.AngleOpen, true, "<"},
{"<", a.LessThan, true, "<"},
{"=", a.Equal, true, "="},
{">", a.AngleClose, true, ">"},
{">", a.GreaterThan, true, ">"},
{"?", a.Question, true, "?"},
{"@", a.At, true, "@"},
{"[", a.SquareOpen, true, "["},
{"\\", a.Backslash, true, "\\"},
{"]", a.SquareClose, true, "]"},
{"^", a.Caret, true, "^"},
{"_", a.Underscore, true, "_"},
{"`", a.Backquote, true, "`"},
{"{", a.CurlyOpen, true, "{"},
{"|", a.Pipe, true, "|"},
{"}", a.CurlyClose, true, "}"},
{"~", a.Tilde, true, "~"},
{"\t \t \r\n", a.Blank, true, "\t"},
{" \t \t \r\n", a.Blanks, true, " \t \t "},
{"xxx", a.Whitespace, false, ""},
{" ", a.Whitespace, true, " "},
{"\t", a.Whitespace, true, "\t"},
{"\n", a.Whitespace, true, "\n"},
{"\r\n", a.Whitespace, true, "\r\n"},
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
{"xxx", a.UnicodeSpace, false, ""},
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
{"", a.EndOfLine, true, ""},
{"\r\n", a.EndOfLine, true, "\r\n"},
{"\n", a.EndOfLine, true, "\n"},
{"0", a.Digit, true, "0"},
{"1", a.Digit, true, "1"},
{"2", a.Digit, true, "2"},
{"3", a.Digit, true, "3"},
{"4", a.Digit, true, "4"},
{"5", a.Digit, true, "5"},
{"6", a.Digit, true, "6"},
{"7", a.Digit, true, "7"},
{"8", a.Digit, true, "8"},
{"9", a.Digit, true, "9"},
{"X", a.Digit, false, ""},
{"a", a.ASCIILower, true, "a"},
{"z", a.ASCIILower, true, "z"},
{"A", a.ASCIILower, false, ""},
{"Z", a.ASCIILower, false, ""},
{"A", a.ASCIIUpper, true, "A"},
{"Z", a.ASCIIUpper, true, "Z"},
{"a", a.ASCIIUpper, false, ""},
{"z", a.ASCIIUpper, false, ""},
{"1", a.Letter, false, ""},
{"a", a.Letter, true, "a"},
{"Ø", a.Letter, true, "Ø"},
{"Ë", a.Lower, false, ""},
{"ë", a.Lower, true, "ë"},
{"ä", a.Upper, false, "ä"},
{"Ä", a.Upper, true, "Ä"},
{"0", a.HexDigit, true, "0"},
{"9", a.HexDigit, true, "9"},
{"a", a.HexDigit, true, "a"},
{"f", a.HexDigit, true, "f"},
{"A", a.HexDigit, true, "A"},
{"F", a.HexDigit, true, "F"},
{"g", a.HexDigit, false, "g"},
{"G", a.HexDigit, false, "G"},
{"0", a.Integer, true, "0"},
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
{"1", a.Integer, true, "1"},
{"-10X", a.Integer, false, ""},
{"+10X", a.Integer, false, ""},
{"-10X", a.Signed(a.Integer), true, "-10"},
{"+10X", a.Signed(a.Integer), true, "+10"},
{"+10.1X", a.Signed(a.Integer), true, "+10"},
{"0X", a.Float, true, "0"},
{"0X", a.Float, true, "0"},
{"1X", a.Float, true, "1"},
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
{"123.321X", a.Float, true, "123.321"},
{"-3.14X", a.Float, false, ""},
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
{"-11", a.IntegerBetween(-10, 10), false, "0"},
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
{"0", a.IntegerBetween(-10, 10), true, "0"},
{"10", a.IntegerBetween(-10, 10), true, "10"},
{"11", a.IntegerBetween(0, 10), false, ""},
{"fifteen", a.IntegerBetween(0, 10), false, ""},
})
}
func TestIPv4Atoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
// Not normalized octet.
{"0X", tokenize.MatchOctet(false), true, "0"},
{"00X", tokenize.MatchOctet(false), true, "00"},
{"000X", tokenize.MatchOctet(false), true, "000"},
{"10X", tokenize.MatchOctet(false), true, "10"},
{"010X", tokenize.MatchOctet(false), true, "010"},
{"255123", tokenize.MatchOctet(false), true, "255"},
{"256123", tokenize.MatchOctet(false), false, ""},
{"300", tokenize.MatchOctet(false), false, ""},
// Octet.
{"0", tokenize.MatchOctet(false), true, "0"},
{"02", tokenize.MatchOctet(false), true, "02"},
{"003", tokenize.MatchOctet(false), true, "003"},
{"256", tokenize.MatchOctet(false), false, ""},
{"0X", a.Octet, true, "0"},
{"00X", a.Octet, true, "0"},
{"000X", a.Octet, true, "0"},
{"10X", a.Octet, true, "10"},
{"010X", a.Octet, true, "10"},
{"255123", a.Octet, true, "255"},
{"256123", a.Octet, false, ""},
{"300", a.Octet, false, ""},
// IPv4 address.
{"0.0.0.0", tokenize.MatchIPv4(false), true, "0.0.0.0"},
{"010.0.255.01", tokenize.MatchIPv4(false), true, "010.0.255.01"},
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
{"256.255.255.255", a.IPv4, false, ""},
// IPv4 CIDR netmask.
{"0", tokenize.MatchIPv4CIDRMask(false), true, "0"},
{"000", tokenize.MatchIPv4CIDRMask(false), true, "000"},
{"0", a.IPv4CIDRMask, true, "0"},
{"00", a.IPv4CIDRMask, true, "0"},
{"000", a.IPv4CIDRMask, true, "0"},
{"32", a.IPv4CIDRMask, true, "32"},
{"032", a.IPv4CIDRMask, true, "32"},
{"33", a.IPv4CIDRMask, false, ""},
// IPv4 netmask in dotted quad format.
{"0.0.0.0", tokenize.MatchIPv4Netmask(false), true, "0.0.0.0"},
{"255.128.000.000", tokenize.MatchIPv4Netmask(false), true, "255.128.000.000"},
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
// IPv4 address + CIDR or dotted quad netmask.
{"192.168.6.123", a.IPv4Net, false, ""},
{"192.168.6.123/24", tokenize.MatchIPv4Net(false), true, "192.168.6.123/24"},
{"001.002.003.004/016", tokenize.MatchIPv4Net(false), true, "001.002.003.004/016"},
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
})
}
func TestIPv6Atoms(t *testing.T) {
var a = tokenize.A
AssertHandlers(t, []HandlerT{
{"", a.IPv6, false, ""},
{"::", a.IPv6, true, "::"},
{"1::", a.IPv6, true, "1::"},
{"1::1", a.IPv6, true, "1::1"},
{"::1", a.IPv6, true, "::1"},
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
{"0", a.IPv6CIDRMask, true, "0"},
{"128", a.IPv6CIDRMask, true, "128"},
{"129", a.IPv6CIDRMask, false, ""},
{"::1/128", a.IPv6Net, true, "::1/128"},
{"::1/129", a.IPv6Net, false, ""},
{"1.1.1.1/24", a.IPv6Net, false, ""},
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
})
}
func TestModifiers(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
AssertHandlers(t, []HandlerT{
{"missed me!", m.Drop(a.Rune('w')), false, ""},
{"where are you?", m.Drop(a.Rune('w')), true, ""},
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
{"abcdefghijk", m.ByCallback(a.Str("xyz"), func(s string) string { return "X" }), false, ""},
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
})
}
// When a TokenMaker encounters an error, this is considered a programmer error.
// A TokenMaker should not be called, unless the input is already validated to
// follow the correct pattern. Therefore, tokenmakers will panic when the
// input cannot be processed successfully.
func TestTokenMakerErrorHandling(t *testing.T) {
var a, tok = tokenize.A, tokenize.T
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
tokenizer := tokenize.New(invalid)
AssertPanic(t, PanicT{
func() { tokenizer("no") }, false,
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
})
}
func TestTokenMakers(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
AssertTokenMakers(t, []TokenMakerT{
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
[]tokenize.Token{{Type: "A", Value: ""}}},
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
{`\uD801 invalid rune`, tok.StrInterpreted("D", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "D", Value: "<22> invalid rune"}}},
// I don't check the returned error here, but it's good enough to see that the parsing
// stopped after the illegal \g escape sequence.
{`invalid \g escape`, tok.StrInterpreted("E", c.OneOrMore(a.AnyRune)), []tokenize.Token{{Type: "E", Value: "invalid "}}},
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
{Type: "bar", Value: byte('R')},
{Type: "bar", Value: byte('O')},
{Type: "bar", Value: byte('C')},
{Type: "bar", Value: byte('K')},
{Type: "bar", Value: byte('S')},
}},
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
{Type: "P", Value: true},
}},
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
{Type: "P", Value: false},
}},
{`anything`, tok.ByValue("Q", c.OneOrMore(a.AnyRune), "Kaboom!"), []tokenize.Token{{Type: "Q", Value: "Kaboom!"}}},
})
}
func TestTokenGroup_Match(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
tokenizer := tokenize.New(tok.Group("Group",
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))))
api, err := tokenizer("xxxxx")
AssertTrue(t, err == nil, "Tokenizer result")
tokens := api.Tokens()
AssertEqual(t, 1, len(tokens), "Length of tokens slice")
contained := tokens[0].Value.([]tokenize.Token)
AssertEqual(t, 3, len(contained), "Length of contained tokens")
AssertEqual(t, 1, contained[0].Type.(int), "Value of contained Token 1")
AssertEqual(t, 2, contained[1].Type.(int), "Value of contained Token 2")
AssertEqual(t, 3, contained[2].Type.(int), "Value of contained Token 3")
}
func TestTokenGroup_Mismatch(t *testing.T) {
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
tokenizer := tokenize.New(tok.Group("Group",
c.Seq(tok.Rune(1, a.Letter), tok.Rune(2, a.Letter), tok.Rune(3, a.Letter))).Optional())
api, err := tokenizer("12345")
AssertTrue(t, err == nil, "Tokenizer result")
tokens := api.Tokens()
AssertEqual(t, 0, len(tokens), "Length of tokens slice")
}
// I know, this is hell, but that's the whole point for this test :->
func TestCombination(t *testing.T) {
var c, a, m = tokenize.C, tokenize.A, tokenize.M
demonic := c.Seq(
c.Optional(a.SquareOpen),
m.Trim(
c.Seq(
c.Optional(a.Blanks),
c.Repeated(3, a.AngleClose),
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
return fmt.Sprintf("%d", len(s))
}),
m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
m.ToUpper(c.Min(1, a.ASCIILower)),
m.Drop(a.Excl),
c.Repeated(3, a.AngleOpen),
c.Optional(a.Blanks),
),
" \t",
),
c.Optional(a.SquareClose),
)
AssertHandlers(t, []HandlerT{
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
})
}

View File

@ -1,41 +0,0 @@
// Package tokenize provides tooling to build a tokenizer in
// parser/combinator-style, used to feed data to the parser.
package tokenize2
import (
"fmt"
)
// Func is the function signature as returned by New: a function that takes
// any supported type of input, executes a tokenizer run and returns a
// Result struct (possibly nil) and an error (possibly nil).
type Func func(input interface{}) (*API, error)
// New instantiates a new tokenizer.
//
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
// functions are used to move the state machine forward during tokenizing.
// Using the New function, you can wrap a tokenize.Handler in a simple way,
// making it possible to feed some input to the handler and retrieve the
// tokenizing results.
//
// The startHandler argument points the tokenizer to the tokenize.Handler function
// that must be executed at the start of the tokenizing process. From there on
// other tokenize.Handler functions can be invoked recursively to implement the
// tokenizing process.
//
// THis function returns a function that can be invoked to run the tokenizer
// against the provided input data. For an overview of allowed inputs, take a
// look at the documentation for parsekit.read.New().
func New(tokenHandler Handler) Func {
return func(input interface{}) (*API, error) {
api := NewAPI(input)
ok := tokenHandler(api)
if !ok {
err := fmt.Errorf("mismatch at %s", Cursor{})
return nil, err
}
return api, nil
}
}

View File

@ -1,223 +0,0 @@
package tokenize2_test
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
)
// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using tokenize.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleNew() {
// Build the tokenizer for ip/mask.
var c, a, t = tokenize.C, tokenize.A, tokenize.T
ip := t.Str("ip", a.IPv4)
mask := t.Int8("mask", a.IPv4CIDRMask)
cidr := c.Seq(ip, a.Slash, mask)
tokenizer := tokenize.New(cidr)
for _, input := range []string{
"000.000.000.000/000",
"192.168.0.1/24",
"255.255.255.255/32",
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a Result and an error, which is nil on success.
result, err := tokenizer(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens())
} else {
fmt.Printf("Error: %s\n", err)
}
}
// Output:
// Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
api := makeTokenizeAPI()
r, _ := api.NextRune()
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := makeTokenizeAPI()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
i.NextRune()
i.Accept()
AssertEqual(t, "Tes", i.String(), "i.String()")
}
func TestCallingNextRuneTwice_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.NextRune()
i.NextRune()
},
Regexp: true,
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
`without a prior call to Accept\(\)`,
})
}
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
api := makeTokenizeAPI()
AssertPanic(t, PanicT{
Function: api.Accept,
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
`without first calling NextRune\(\)`,
})
}
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
api := tokenize.NewAPI("")
AssertPanic(t, PanicT{
Function: func() {
api.NextRune()
api.Accept()
},
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
`, but the prior call to NextRune\(\) failed`,
})
}
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Merge(0)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
child := i.Fork()
i.Fork()
i.Merge(child)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Dispose(0)
},
Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
child := i.Fork()
i.Fork()
i.Dispose(child)
},
Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Fork()
g := i.Fork()
i.Fork()
i.Merge(g)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestForkingInput_ClearsLastRune(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.NextRune()
i.Fork()
i.Accept()
},
Regexp: true,
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
})
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
i.NextRune() // read "\n", cursor ends up at start of new line
i.Accept()
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
i.NextRune()
i.Accept()
}
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
}
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
i.NextRune()
i.Accept()
r, err := i.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
child := i.Fork()
i.NextRune()
i.Accept()
r, err := i.NextRune()
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
i.Dispose(child) // brings the read offset back to the start
r, err = i.NextRune() // so here we should see the same rune
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func makeTokenizeAPI() *tokenize.API {
return tokenize.NewAPI("Testing")
}

View File

@ -1,131 +0,0 @@
package tokenize2
import (
"testing"
)
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
// Create input, accept the first rune.
i := NewAPI("Testing")
i.NextRune()
i.Accept() // T
AssertEqual(t, "T", i.String(), "accepted rune in input")
// Fork
child := i.Fork()
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
// Accept two runes via fork.
i.NextRune()
i.Accept() // e
i.NextRune()
i.Accept() // s
AssertEqual(t, "es", i.String(), "result runes in fork")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
// Merge fork back into parent
i.Merge(child)
i.Dispose(child)
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
}
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
i := NewAPI("Testing")
i.NextRune()
i.Accept()
f1 := i.Fork()
i.NextRune()
i.Accept()
f2 := i.Fork()
i.NextRune()
i.Accept()
AssertEqual(t, "s", i.String(), "f2 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
i.Merge(f2)
i.Dispose(f2)
AssertEqual(t, "es", i.String(), "f1 String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
i.Merge(f1)
i.Dispose(f1)
AssertEqual(t, "Tes", i.String(), "top-level API String()")
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
}
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
i := NewAPI("Testing")
r, _ := i.NextRune()
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
i.Accept()
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
r, _ = i.NextRune()
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
}
func TestFlushInput(t *testing.T) {
api := NewAPI("cool")
// Flushing without any read data is okay. FlushInput() will return
// false in this case, and nothing else happens.
AssertTrue(t, api.FlushInput() == false, "flush input at start")
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
// Read offset is now zero, but reading should continue after "co".
api.NextRune()
api.Accept()
api.NextRune()
api.Accept()
AssertEqual(t, "cool", api.String(), "end result")
}
func TestInputFlusherWrapper(t *testing.T) {
runeA := A.Rune('a')
flushB := C.FlushInput(A.Rune('b'))
api := NewAPI("abaab")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 1 read")
AssertEqual(t, "a", api.String(), "runes after 1 read")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 2 reads + input flush")
AssertEqual(t, "ab", api.String(), "runes after 2 reads")
runeA(api)
AssertEqual(t, 1, api.stackFrame.offset, "offset after 3 reads")
AssertEqual(t, "aba", api.String(), "runes after 3 reads")
runeA(api)
AssertEqual(t, 2, api.stackFrame.offset, "offset after 4 reads")
AssertEqual(t, "abaa", api.String(), "runes after 4 reads")
flushB(api)
AssertEqual(t, 0, api.stackFrame.offset, "offset after 5 reads + input flush")
AssertEqual(t, "abaab", api.String(), "runes after 5 reads")
}
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
if expected != actual {
t.Errorf(
"Unexpected value for %s:\nexpected: %q\nactual: %q",
forWhat, expected, actual)
}
}
func AssertTrue(t *testing.T, b bool, assertion string) {
if !b {
t.Errorf("Assertion %s is false", assertion)
}
}