New implementation for performance.
This commit is contained in:
parent
7795588fe6
commit
48d7fda9f8
|
@ -0,0 +1,374 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||||
|
)
|
||||||
|
|
||||||
|
// API holds the internal state of a tokenizer run and provides an API that
|
||||||
|
// tokenize.Handler functions can use to:
|
||||||
|
//
|
||||||
|
// • read and accept runes from the input (NextRune, Accept)
|
||||||
|
//
|
||||||
|
// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
|
||||||
|
//
|
||||||
|
// • flush already read input data when not needed anymore (FlushInput)
|
||||||
|
//
|
||||||
|
// • retrieve the tokenizer Result struct (Result) to read or modify the results
|
||||||
|
//
|
||||||
|
// BASIC OPERATION:
|
||||||
|
//
|
||||||
|
// To retrieve the next rune from the API, call the NextRune() method.
|
||||||
|
//
|
||||||
|
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||||
|
// is then added to the result runes of the API and the read cursor is moved
|
||||||
|
// forward.
|
||||||
|
//
|
||||||
|
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||||
|
// with as many runes as needed. Runes collected this way can later on be
|
||||||
|
// retrieved using the method Result().Runes().
|
||||||
|
//
|
||||||
|
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||||
|
// NextRune() again. Failing to do so will result in a panic.
|
||||||
|
//
|
||||||
|
// Next to adding runes to the result, it is also possible to modify the
|
||||||
|
// stored runes or to add lexical Tokens to the result. For all things
|
||||||
|
// concerning results, take a look at the Result struct, which
|
||||||
|
// can be accessed though the method Result().
|
||||||
|
//
|
||||||
|
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||||
|
//
|
||||||
|
// Sometimes, we must be able to perform a lookahead, which might either
|
||||||
|
// succeed or fail. In case of a failing lookahead, the state of the
|
||||||
|
// API must be brought back to the original state, so we can try
|
||||||
|
// a different route.
|
||||||
|
//
|
||||||
|
// The way in which this is supported, is by forking an API struct by
|
||||||
|
// calling method Fork(). This will return a forked child API, with
|
||||||
|
// empty result data, but using the same read cursor position as the
|
||||||
|
// forked parent.
|
||||||
|
//
|
||||||
|
// After forking, the same interface as described for BASIC OPERATION can be
|
||||||
|
// used to fill the results. When the lookahead was successful, then
|
||||||
|
// Merge() can be called on the forked child to append the child's results
|
||||||
|
// to the parent's results, and to move the read cursor position to that
|
||||||
|
// of the child.
|
||||||
|
//
|
||||||
|
// When the lookahead was unsuccessful, then the forked child API can
|
||||||
|
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||||
|
// Garbage collection will take care of this automatically.
|
||||||
|
// The parent API was never modified, so it can safely be used after disposal
|
||||||
|
// as if the lookahead never happened.
|
||||||
|
//
|
||||||
|
// Opinionized note:
|
||||||
|
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||||
|
// peeks and by moving the read cursor position back and forth, or by putting
|
||||||
|
// read input back on the input stream. That often leads to code that is
|
||||||
|
// efficient, however, in my opinion, not very intuitive to read. It can also
|
||||||
|
// be tedious to get the cursor position back at the correct position, which
|
||||||
|
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||||
|
// no bookkeeping has to be implemented when implementing a parser.
|
||||||
|
type API struct {
|
||||||
|
reader *read.Buffer // the input data reader
|
||||||
|
lastRune rune // the rune as retrieved by the last NextRune() calll
|
||||||
|
lastRuneErr error // the error for the last NextRune() call
|
||||||
|
runeRead bool // whether or not a rune was read using NextRune()
|
||||||
|
runes []rune // the rune stack
|
||||||
|
tokens []Token // the token stack
|
||||||
|
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||||
|
stackLevel int // the current stack level
|
||||||
|
stackFrame *stackFrame // the current stack frame
|
||||||
|
}
|
||||||
|
|
||||||
|
type stackFrame struct {
|
||||||
|
offset int // current rune offset relative to the Reader's sliding window
|
||||||
|
runeStart int
|
||||||
|
runeEnd int
|
||||||
|
tokenStart int
|
||||||
|
tokenEnd int
|
||||||
|
cursor Cursor
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
err error // can be used by a Handler to report a specific issue with the input
|
||||||
|
}
|
||||||
|
|
||||||
|
const initialStackDepth = 10
|
||||||
|
const initialTokenDepth = 10
|
||||||
|
const initialRuneDepth = 10
|
||||||
|
|
||||||
|
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||||
|
// For an overview of allowed inputs, take a look at the documentation
|
||||||
|
// for parsekit.read.New().
|
||||||
|
func NewAPI(input interface{}) *API {
|
||||||
|
api := &API{
|
||||||
|
reader: read.New(input),
|
||||||
|
runes: make([]rune, 0, initialRuneDepth),
|
||||||
|
tokens: make([]Token, 0, initialTokenDepth),
|
||||||
|
stackFrames: make([]stackFrame, 1, initialStackDepth),
|
||||||
|
}
|
||||||
|
api.stackFrame = &api.stackFrames[0]
|
||||||
|
|
||||||
|
return api
|
||||||
|
}
|
||||||
|
|
||||||
|
// NextRune returns the rune at the current read offset.
|
||||||
|
//
|
||||||
|
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
||||||
|
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
||||||
|
// when needed.
|
||||||
|
//
|
||||||
|
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
||||||
|
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
||||||
|
// without explicitly accepting, this method will panic. You can see this as a
|
||||||
|
// built-in unit test, enforcing correct serialization of API method calls.
|
||||||
|
func (i *API) NextRune() (rune, error) {
|
||||||
|
if i.runeRead {
|
||||||
|
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"without a prior call to Accept()")
|
||||||
|
}
|
||||||
|
|
||||||
|
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||||
|
i.lastRune = readRune
|
||||||
|
i.lastRuneErr = err
|
||||||
|
i.runeRead = true
|
||||||
|
|
||||||
|
return readRune, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accept the last rune as read by NextRune() into the Result runes and move
|
||||||
|
// the cursor forward.
|
||||||
|
//
|
||||||
|
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||||
|
// returned an error. Calling Accept() in such case will result in a panic.
|
||||||
|
func (i *API) Accept() {
|
||||||
|
if !i.runeRead {
|
||||||
|
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"without first calling NextRune()")
|
||||||
|
} else if i.lastRuneErr != nil {
|
||||||
|
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
|
||||||
|
"but the prior call to NextRune() failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
i.runes = append(i.runes, i.lastRune)
|
||||||
|
i.stackFrame.runeEnd++
|
||||||
|
i.stackFrame.cursor.moveByRune(i.lastRune)
|
||||||
|
i.stackFrame.offset++
|
||||||
|
i.runeRead = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fork forks off a child of the API struct. It will reuse the same
|
||||||
|
// read buffer and cursor position, but for the rest this is a fresh API.
|
||||||
|
//
|
||||||
|
// By forking an API, you can freely work with the forked child, without
|
||||||
|
// affecting the parent API. This is for example useful when you must perform
|
||||||
|
// some form of lookahead.
|
||||||
|
//
|
||||||
|
// When processing of the Handler was successful and you want to add the results
|
||||||
|
// to the parent API, you can call Merge() on the forked child.
|
||||||
|
// This will add the results to the results of the parent (runes, tokens).
|
||||||
|
// It also updates the read cursor position of the parent to that of the child.
|
||||||
|
//
|
||||||
|
// When the lookahead was unsuccessful, then the forked child API can
|
||||||
|
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||||
|
// Garbage collection will take care of this automatically.
|
||||||
|
// The parent API was never modified, so it can safely be used after disposal
|
||||||
|
// as if the lookahead never happened.
|
||||||
|
func (i *API) Fork() int {
|
||||||
|
newStackLevel := i.stackLevel + 1
|
||||||
|
newStackSize := newStackLevel + 1
|
||||||
|
|
||||||
|
// Grow the stack frames capacity when needed.
|
||||||
|
if cap(i.stackFrames) < newStackSize {
|
||||||
|
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
|
||||||
|
copy(newFrames, i.stackFrames)
|
||||||
|
i.stackFrames = newFrames
|
||||||
|
} else {
|
||||||
|
i.stackFrames = i.stackFrames[0:newStackSize]
|
||||||
|
}
|
||||||
|
|
||||||
|
parent := i.stackFrame
|
||||||
|
i.stackLevel++
|
||||||
|
i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||||
|
*i.stackFrame = *parent
|
||||||
|
i.stackFrame.runeStart = parent.runeEnd
|
||||||
|
i.stackFrame.tokenStart = parent.tokenEnd
|
||||||
|
i.runeRead = false
|
||||||
|
|
||||||
|
return i.stackLevel
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||||
|
// results of its parent. The read cursor of the parent is also updated
|
||||||
|
// to that of the forked child.
|
||||||
|
//
|
||||||
|
// After the merge operation, the child results are reset so it can immediately
|
||||||
|
// be reused for performing another match. This means that all Result data are
|
||||||
|
// cleared, but the read cursor position is kept at its current position.
|
||||||
|
// This allows a child to feed results in chunks to its parent.
|
||||||
|
//
|
||||||
|
// Once the child is no longer needed, it can be disposed of by using the
|
||||||
|
// method Dispose(), which will return the tokenizer to the parent.
|
||||||
|
func (i *API) Merge(stackLevel int) {
|
||||||
|
if stackLevel == 0 {
|
||||||
|
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on the top-level API stack level 0")
|
||||||
|
}
|
||||||
|
if stackLevel != i.stackLevel {
|
||||||
|
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on API stack level %d, but the current stack level is %d "+
|
||||||
|
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
parent := &i.stackFrames[stackLevel-1]
|
||||||
|
|
||||||
|
if parent.runeEnd == i.stackFrame.runeStart {
|
||||||
|
// The end of the parent slice aligns with the start of the child slice.
|
||||||
|
// Because of this, to merge the parent slice can simply be expanded
|
||||||
|
// to include the child slice.
|
||||||
|
// parent : |----------|
|
||||||
|
// child: |------|
|
||||||
|
// After merge operation:
|
||||||
|
// parent: |-----------------|
|
||||||
|
// child: |---> continue reading from here
|
||||||
|
parent.runeEnd = i.stackFrame.runeEnd
|
||||||
|
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||||
|
} else {
|
||||||
|
// The end of the parent slice does not align with the start of the
|
||||||
|
// child slice. The child slice has to be copied onto the end of
|
||||||
|
// the parent slice.
|
||||||
|
// parent : |----------|
|
||||||
|
// child: |------|
|
||||||
|
// After merge operation:
|
||||||
|
// parent: |-----------------|
|
||||||
|
// child: |---> continue reading from here
|
||||||
|
i.runes = append(i.runes[:parent.runeEnd], i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]...)
|
||||||
|
parent.runeEnd = len(i.runes)
|
||||||
|
i.stackFrame.runeStart = parent.runeEnd
|
||||||
|
i.stackFrame.runeEnd = parent.runeEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
// The same logic applies to tokens.
|
||||||
|
if parent.tokenEnd == i.stackFrame.tokenStart {
|
||||||
|
parent.tokenEnd = i.stackFrame.tokenEnd
|
||||||
|
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||||
|
} else {
|
||||||
|
i.tokens = append(i.tokens[:parent.tokenEnd], i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]...)
|
||||||
|
parent.tokenEnd = len(i.tokens)
|
||||||
|
i.stackFrame.tokenStart = parent.tokenEnd
|
||||||
|
i.stackFrame.tokenEnd = parent.tokenEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.offset = i.stackFrame.offset
|
||||||
|
parent.cursor = i.stackFrame.cursor
|
||||||
|
|
||||||
|
i.stackFrame.err = nil
|
||||||
|
i.runeRead = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Dispose(stackLevel int) {
|
||||||
|
if stackLevel == 0 {
|
||||||
|
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on the top-level API stack level 0")
|
||||||
|
}
|
||||||
|
if stackLevel != i.stackLevel {
|
||||||
|
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||||
|
"on API stack level %d, but the current stack level is %d "+
|
||||||
|
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
i.runeRead = false
|
||||||
|
i.stackLevel = stackLevel - 1
|
||||||
|
i.stackFrames = i.stackFrames[:stackLevel]
|
||||||
|
i.stackFrame = &i.stackFrames[stackLevel-1]
|
||||||
|
i.runes = i.runes[0:i.stackFrame.runeEnd]
|
||||||
|
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Reset() {
|
||||||
|
i.runeRead = false
|
||||||
|
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||||
|
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||||
|
i.stackFrame.err = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FlushInput flushes processed input data from the read.Buffer.
|
||||||
|
// In this context 'processed' means all runes that were read using NextRune()
|
||||||
|
// and that were added to the results using Accept().
|
||||||
|
//
|
||||||
|
// Note:
|
||||||
|
// When writing your own TokenHandler, you normally won't have to call this
|
||||||
|
// method yourself. It is automatically called by parsekit when needed.
|
||||||
|
func (i *API) FlushInput() bool {
|
||||||
|
// result := &(i.state.stack[i.stackLevel])
|
||||||
|
if i.stackFrame.offset > 0 {
|
||||||
|
i.reader.Flush(i.stackFrame.offset)
|
||||||
|
i.stackFrame.offset = 0
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) String() string {
|
||||||
|
return string(i.Runes())
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Runes() []rune {
|
||||||
|
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Rune(offset int) rune {
|
||||||
|
return i.runes[i.stackFrame.runeStart+offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) ClearRunes() {
|
||||||
|
i.runes = i.runes[:i.stackFrame.runeStart]
|
||||||
|
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) SetRunes(runes ...rune) {
|
||||||
|
i.runes = append(i.runes[:i.stackFrame.runeStart], runes...)
|
||||||
|
i.stackFrame.runeEnd = i.stackFrame.runeStart + len(runes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) AddRunes(runes ...rune) {
|
||||||
|
i.runes = append(i.runes[:i.stackFrame.runeEnd], runes...)
|
||||||
|
i.stackFrame.runeEnd += len(runes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) AddString(s string) {
|
||||||
|
i.AddRunes([]rune(s)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) SetString(s string) {
|
||||||
|
i.SetRunes([]rune(s)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Cursor() Cursor {
|
||||||
|
return i.stackFrame.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Tokens() []Token {
|
||||||
|
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) Token(offset int) Token {
|
||||||
|
return i.tokens[i.stackFrame.tokenStart+offset]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) TokenValue(offset int) interface{} {
|
||||||
|
return i.tokens[i.stackFrame.tokenStart+offset].Value
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) ClearTokens() {
|
||||||
|
i.tokens = i.tokens[:i.stackFrame.tokenStart]
|
||||||
|
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) SetTokens(tokens ...Token) {
|
||||||
|
i.tokens = append(i.tokens[:i.stackFrame.tokenStart], tokens...)
|
||||||
|
i.stackFrame.tokenEnd = i.stackFrame.tokenStart + len(tokens)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *API) AddTokens(tokens ...Token) {
|
||||||
|
i.tokens = append(i.tokens[:i.stackFrame.tokenEnd], tokens...)
|
||||||
|
i.stackFrame.tokenEnd += len(tokens)
|
||||||
|
}
|
|
@ -0,0 +1,330 @@
|
||||||
|
package tokenize2_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleNewAPI() {
|
||||||
|
tokenize.NewAPI("The input that the API will handle")
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAPI_NextRune() {
|
||||||
|
api := tokenize.NewAPI("The input that the API will handle")
|
||||||
|
r, err := api.NextRune()
|
||||||
|
fmt.Printf("Rune read from input; %c\n", r)
|
||||||
|
fmt.Printf("The error: %v\n", err)
|
||||||
|
fmt.Printf("API results: %q\n", api.String())
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Rune read from input; T
|
||||||
|
// The error: <nil>
|
||||||
|
// API results: ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAPI_Accept() {
|
||||||
|
api := tokenize.NewAPI("The input that the API will handle")
|
||||||
|
api.NextRune() // reads 'T'
|
||||||
|
api.Accept() // adds 'T' to the API results
|
||||||
|
api.NextRune() // reads 'h'
|
||||||
|
api.Accept() // adds 'h' to the API results
|
||||||
|
api.NextRune() // reads 'e', but it is not added to the API results
|
||||||
|
|
||||||
|
fmt.Printf("API results: %q\n", api.String())
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// API results: "Th"
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAPI_modifyingResults() {
|
||||||
|
api := tokenize.NewAPI("")
|
||||||
|
|
||||||
|
api.AddString("Some runes")
|
||||||
|
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
|
||||||
|
api.AddRunes(' ', 'i', 'n', ' ')
|
||||||
|
api.AddString("various ways")
|
||||||
|
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
|
||||||
|
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||||
|
|
||||||
|
api.SetString("new ")
|
||||||
|
api.AddString("set ")
|
||||||
|
api.AddString("of ")
|
||||||
|
api.AddRunes('r', 'u', 'n', 'e', 's')
|
||||||
|
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||||
|
fmt.Printf("API result runes: %q\n", api.Runes())
|
||||||
|
fmt.Printf("API third rune: %q\n", api.Rune(2))
|
||||||
|
|
||||||
|
api.AddTokens(tokenize.Token{
|
||||||
|
Type: 42,
|
||||||
|
Value: "towel"})
|
||||||
|
api.AddTokens(tokenize.Token{
|
||||||
|
Type: 73,
|
||||||
|
Value: "Zaphod"})
|
||||||
|
fmt.Printf("API result tokens: %v\n", api.Tokens())
|
||||||
|
fmt.Printf("API second result token: %v\n", api.Token(1))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
||||||
|
// API result runes as string: "Some runes added in various ways"
|
||||||
|
// API result runes as string: "new set of runes"
|
||||||
|
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
|
||||||
|
// API third rune: 'w'
|
||||||
|
// API result tokens: [42("towel") 73("Zaphod")]
|
||||||
|
// API second result token: 73("Zaphod")
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAPI_Reset() {
|
||||||
|
api := tokenize.NewAPI("Very important input!")
|
||||||
|
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||||
|
|
||||||
|
// Reset clears the results, but keeps the cursor position.
|
||||||
|
api.Reset()
|
||||||
|
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||||
|
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// API results: "Ve" at line 1, column 3
|
||||||
|
// API results: "" at line 1, column 3
|
||||||
|
// API results: "ry" at line 1, column 5
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAPI_Fork() {
|
||||||
|
// This custom Handler checks for input 'a', 'b' or 'c'.
|
||||||
|
abcHandler := func(t *tokenize.API) bool {
|
||||||
|
a := tokenize.A
|
||||||
|
for _, r := range []rune{'a', 'b', 'c'} {
|
||||||
|
child := t.Fork() // fork, so we won't change parent t
|
||||||
|
if a.Rune(r)(t) {
|
||||||
|
t.Merge(child) // accept results into parent of child
|
||||||
|
t.Dispose(child) // return to the parent level
|
||||||
|
return true // and report a successful match
|
||||||
|
}
|
||||||
|
t.Dispose(child) // return to the parent level
|
||||||
|
}
|
||||||
|
// If we get here, then no match was found. Return false to communicate
|
||||||
|
// this to the caller.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: a custom Handler is normally not what you need.
|
||||||
|
// You can make use of the parser/combinator tooling to make the
|
||||||
|
// implementation a lot simpler and to take care of forking at
|
||||||
|
// the appropriate places. The handler from above can be replaced with:
|
||||||
|
simpler := tokenize.A.RuneRange('a', 'c')
|
||||||
|
|
||||||
|
result, err := tokenize.New(abcHandler)("another test")
|
||||||
|
fmt.Println(result, err)
|
||||||
|
result, err = tokenize.New(simpler)("curious")
|
||||||
|
fmt.Println(result, err)
|
||||||
|
result, err = tokenize.New(abcHandler)("bang on!")
|
||||||
|
fmt.Println(result, err)
|
||||||
|
result, err = tokenize.New(abcHandler)("not a match")
|
||||||
|
fmt.Println(result, err)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// a <nil>
|
||||||
|
// c <nil>
|
||||||
|
// b <nil>
|
||||||
|
// <nil> mismatch at start of file
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleAPI_Merge() {
|
||||||
|
tokenHandler := func(t *tokenize.API) bool {
|
||||||
|
child1 := t.Fork()
|
||||||
|
t.NextRune() // reads 'H'
|
||||||
|
t.Accept()
|
||||||
|
t.NextRune() // reads 'i'
|
||||||
|
t.Accept()
|
||||||
|
|
||||||
|
child2 := t.Fork()
|
||||||
|
t.NextRune() // reads ' '
|
||||||
|
t.Accept()
|
||||||
|
t.NextRune() // reads 'm'
|
||||||
|
t.Accept()
|
||||||
|
t.Dispose(child2)
|
||||||
|
|
||||||
|
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
||||||
|
t.Dispose(child1) // and clean up child1 to return to the parent
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
||||||
|
fmt.Println(result.String())
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// Hi
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
|
||||||
|
|
||||||
|
// Fork a few levels.
|
||||||
|
child1 := api.Fork()
|
||||||
|
child2 := api.Fork()
|
||||||
|
child3 := api.Fork()
|
||||||
|
child4 := api.Fork()
|
||||||
|
|
||||||
|
// Read a rune 'a' from child4.
|
||||||
|
r, _ := api.NextRune()
|
||||||
|
AssertEqual(t, 'a', r, "child4 rune 1")
|
||||||
|
api.Accept()
|
||||||
|
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
|
||||||
|
|
||||||
|
// Read another rune 'b' from child4.
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'b', r, "child4 rune 2")
|
||||||
|
api.Accept()
|
||||||
|
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
|
||||||
|
|
||||||
|
// Merge "ab" from child4 to child3.
|
||||||
|
api.Merge(child4)
|
||||||
|
AssertEqual(t, "", api.String(), "child4 runes after first merge")
|
||||||
|
|
||||||
|
// Read some more from child4.
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'c', r, "child4 rune 3")
|
||||||
|
api.Accept()
|
||||||
|
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
||||||
|
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
||||||
|
|
||||||
|
// Merge "c" from child4 to child3.
|
||||||
|
api.Merge(child4)
|
||||||
|
|
||||||
|
// And dispose of child4, making child3 the active stack level.
|
||||||
|
api.Dispose(child4)
|
||||||
|
|
||||||
|
// Child3 should now have the compbined results "abc" from child4's work.
|
||||||
|
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
||||||
|
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
||||||
|
|
||||||
|
// Now read some data from child3.
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'd', r, "child3 rune 5")
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'e', r, "child3 rune 5")
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
AssertEqual(t, 'f', r, "child3 rune 5")
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
|
||||||
|
|
||||||
|
// Temporarily go some new forks from here, but don't use their outcome.
|
||||||
|
child3sub1 := api.Fork()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
child3sub2 := api.Fork()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.Merge(child3sub2) // do merge sub2 down to sub1
|
||||||
|
api.Dispose(child3sub2) // and dispose of sub2
|
||||||
|
api.Dispose(child3sub1) // but dispose of sub1 without merging
|
||||||
|
|
||||||
|
// Instead merge the results from before this forking segway from child3 to child2
|
||||||
|
// and dispose of it.
|
||||||
|
api.Merge(child3)
|
||||||
|
api.Dispose(child3)
|
||||||
|
|
||||||
|
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
||||||
|
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
||||||
|
|
||||||
|
// Merge child2 to child1 and dispose of it.
|
||||||
|
api.Merge(child2)
|
||||||
|
api.Dispose(child2)
|
||||||
|
|
||||||
|
// Merge child1 a few times to the top level api.
|
||||||
|
api.Merge(child1)
|
||||||
|
api.Merge(child1)
|
||||||
|
api.Merge(child1)
|
||||||
|
api.Merge(child1)
|
||||||
|
|
||||||
|
// And dispose of it.
|
||||||
|
api.Dispose(child1)
|
||||||
|
|
||||||
|
// Read some data from the top level api.
|
||||||
|
r, _ = api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
||||||
|
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClearRunes(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("Laphroaig")
|
||||||
|
api.NextRune() // Read 'L'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
api.NextRune() // Read 'a'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
api.ClearRunes() // Clear the runes
|
||||||
|
api.NextRune() // Read 'p'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
api.NextRune() // Read 'r'
|
||||||
|
api.Accept() // Add to runes
|
||||||
|
|
||||||
|
AssertEqual(t, "ph", api.String(), "api string end result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMergeScenariosForTokens(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("")
|
||||||
|
|
||||||
|
token1 := tokenize.Token{Value: 1}
|
||||||
|
token2 := tokenize.Token{Value: 2}
|
||||||
|
token3 := tokenize.Token{Value: 3}
|
||||||
|
token4 := tokenize.Token{Value: 4}
|
||||||
|
|
||||||
|
api.SetTokens(token1)
|
||||||
|
tokens := api.Tokens()
|
||||||
|
AssertEqual(t, 1, len(tokens), "Tokens 1")
|
||||||
|
|
||||||
|
child := api.Fork()
|
||||||
|
|
||||||
|
tokens = api.Tokens()
|
||||||
|
AssertEqual(t, 0, len(tokens), "Tokens 2")
|
||||||
|
|
||||||
|
api.AddTokens(token2)
|
||||||
|
|
||||||
|
// Here we can merge by expanding the token slice on the parent,
|
||||||
|
// because the end of the parent slice and the start of the child
|
||||||
|
// slice align.
|
||||||
|
api.Merge(child)
|
||||||
|
api.Dispose(child)
|
||||||
|
|
||||||
|
tokens = api.Tokens()
|
||||||
|
AssertEqual(t, 2, len(tokens), "Tokens 3")
|
||||||
|
|
||||||
|
child = api.Fork()
|
||||||
|
api.AddTokens(token3)
|
||||||
|
api.Reset()
|
||||||
|
api.AddTokens(token4)
|
||||||
|
|
||||||
|
// Here the merge means that token4 will be copied to the end of
|
||||||
|
// the token slice of the parent, since there's a gap at the place
|
||||||
|
// where token3 used to be.
|
||||||
|
api.Merge(child)
|
||||||
|
api.Dispose(child)
|
||||||
|
|
||||||
|
tokens = api.Tokens()
|
||||||
|
AssertEqual(t, 3, len(tokens), "Tokens 4")
|
||||||
|
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
|
||||||
|
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
|
||||||
|
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
|
||||||
|
}
|
|
@ -0,0 +1,118 @@
|
||||||
|
package tokenize2_test
|
||||||
|
|
||||||
|
// This file contains some tools that are used for writing tests.
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||||
|
if expected != actual {
|
||||||
|
t.Errorf(
|
||||||
|
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||||
|
forWhat, expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||||
|
if !b {
|
||||||
|
t.Errorf("Assertion %s is false", assertion)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type PanicT struct {
|
||||||
|
Function func()
|
||||||
|
Regexp bool
|
||||||
|
Expect string
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertPanics(t *testing.T, testSet []PanicT) {
|
||||||
|
for _, test := range testSet {
|
||||||
|
AssertPanic(t, test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertPanic(t *testing.T, p PanicT) {
|
||||||
|
defer func() {
|
||||||
|
if r := recover(); r != nil {
|
||||||
|
mismatch := false
|
||||||
|
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||||
|
mismatch = true
|
||||||
|
}
|
||||||
|
if !p.Regexp && p.Expect != r.(string) {
|
||||||
|
mismatch = true
|
||||||
|
}
|
||||||
|
if mismatch {
|
||||||
|
t.Errorf(
|
||||||
|
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||||
|
p.Expect, r)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
p.Function()
|
||||||
|
}
|
||||||
|
|
||||||
|
type HandlerT struct {
|
||||||
|
Input string
|
||||||
|
Handler tokenize.Handler
|
||||||
|
MustMatch bool
|
||||||
|
Expected string
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertHandlers(t *testing.T, testSet []HandlerT) {
|
||||||
|
for _, test := range testSet {
|
||||||
|
AssertHandler(t, test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertHandler(t *testing.T, test HandlerT) {
|
||||||
|
result, err := tokenize.New(test.Handler)(test.Input)
|
||||||
|
if test.MustMatch {
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||||
|
} else if output := result.String(); output != test.Expected {
|
||||||
|
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type TokenMakerT struct {
|
||||||
|
Input string
|
||||||
|
Handler tokenize.Handler
|
||||||
|
Expected []tokenize.Token
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||||
|
for _, test := range testSet {
|
||||||
|
AssertTokenMaker(t, test)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||||
|
result, err := tokenize.New(test.Handler)(test.Input)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||||
|
} else {
|
||||||
|
if len(result.Tokens()) != len(test.Expected) {
|
||||||
|
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||||
|
}
|
||||||
|
for i, expected := range test.Expected {
|
||||||
|
actual := result.Token(i)
|
||||||
|
if expected.Type != actual.Type {
|
||||||
|
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||||
|
}
|
||||||
|
if expected.Value != actual.Value {
|
||||||
|
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func callerPanic(name, f string, data ...interface{}) {
|
||||||
|
filepos := callerBefore(name)
|
||||||
|
m := fmt.Sprintf(f, data...)
|
||||||
|
m = strings.Replace(m, "{caller}", filepos, -1)
|
||||||
|
m = strings.Replace(m, "{name}", name, -1)
|
||||||
|
panic(m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func callerBefore(name string) string {
|
||||||
|
found := false
|
||||||
|
for i := 1; ; i++ {
|
||||||
|
pc, file, line, ok := runtime.Caller(i)
|
||||||
|
if found {
|
||||||
|
return fmt.Sprintf("%s:%d", file, line)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
return "unknown caller"
|
||||||
|
}
|
||||||
|
f := runtime.FuncForPC(pc)
|
||||||
|
|
||||||
|
if strings.HasSuffix(f.Name(), "."+name) {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func SomeFunc1() {
|
||||||
|
SomeFunc2()
|
||||||
|
}
|
||||||
|
|
||||||
|
func SomeFunc2() {
|
||||||
|
SomeFunc3()
|
||||||
|
}
|
||||||
|
|
||||||
|
func SomeFunc3() {
|
||||||
|
callerPanic("SomeFunc2", "{name} was called from {caller}")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallerPanic(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
r := recover()
|
||||||
|
err := r.(string)
|
||||||
|
|
||||||
|
if !strings.Contains(err, "SomeFunc2 was called from") || !strings.Contains(err, "callerinfo_test.go:") {
|
||||||
|
t.Fatalf("Unexpected error message: %s", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
SomeFunc1()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallerBefore_WithFunctionNameNotInStack(t *testing.T) {
|
||||||
|
caller := callerBefore("NotExistingAtAll")
|
||||||
|
AssertEqual(t, "unknown caller", caller, "result for name not in stack")
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unicode/utf8"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Cursor represents the position of a cursor in various ways.
|
||||||
|
type Cursor struct {
|
||||||
|
Byte int // The cursor offset in bytes
|
||||||
|
Rune int // The cursor offset in UTF8 runes
|
||||||
|
Column int // The column at which the cursor is (0-indexed)
|
||||||
|
Line int // The line at which the cursor is (0-indexed)
|
||||||
|
}
|
||||||
|
|
||||||
|
// String produces a string representation of the cursor position.
|
||||||
|
func (c Cursor) String() string {
|
||||||
|
if c.Line == 0 && c.Column == 0 {
|
||||||
|
return fmt.Sprintf("start of file")
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// move updates the position of the cursor, based on the provided input string.
|
||||||
|
// The input string represents the runes that the cursor must be moved over.
|
||||||
|
// This method will take newlines into account to keep track of line numbers and
|
||||||
|
// column positions automatically.
|
||||||
|
func (c *Cursor) move(input string) *Cursor {
|
||||||
|
for _, r := range input {
|
||||||
|
c.moveByRune(r)
|
||||||
|
}
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Cursor) moveByRune(r rune) *Cursor {
|
||||||
|
c.Byte += utf8.RuneLen(r)
|
||||||
|
c.Rune++
|
||||||
|
if r == '\n' {
|
||||||
|
c.Column = 0
|
||||||
|
c.Line++
|
||||||
|
} else {
|
||||||
|
c.Column++
|
||||||
|
}
|
||||||
|
return c
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ExampleCursor_move() {
|
||||||
|
c := Cursor{}
|
||||||
|
fmt.Printf("after initialization : %s\n", c)
|
||||||
|
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
|
||||||
|
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
|
||||||
|
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// after initialization : start of file
|
||||||
|
// after 'some words' : line 1, column 11
|
||||||
|
// after '\n' : line 2, column 1
|
||||||
|
// after '\r\nskip\nlines' : line 4, column 6
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleCursor_String() {
|
||||||
|
c := Cursor{}
|
||||||
|
fmt.Println(c.String())
|
||||||
|
|
||||||
|
c.move("\nfoobar")
|
||||||
|
fmt.Println(c.String())
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// start of file
|
||||||
|
// line 2, column 7
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||||
|
for _, test := range []struct {
|
||||||
|
name string
|
||||||
|
input []string
|
||||||
|
byte int
|
||||||
|
rune int
|
||||||
|
line int
|
||||||
|
column int
|
||||||
|
}{
|
||||||
|
{"No input at all", []string{""}, 0, 0, 0, 0},
|
||||||
|
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
|
||||||
|
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
|
||||||
|
{"One newline", []string{"\n"}, 1, 1, 1, 0},
|
||||||
|
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
|
||||||
|
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
|
||||||
|
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||||
|
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||||
|
} {
|
||||||
|
c := Cursor{}
|
||||||
|
for _, s := range test.input {
|
||||||
|
c.move(s)
|
||||||
|
}
|
||||||
|
if c.Byte != test.byte {
|
||||||
|
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
||||||
|
}
|
||||||
|
if c.Rune != test.rune {
|
||||||
|
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
|
||||||
|
}
|
||||||
|
if c.Line != test.line {
|
||||||
|
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
||||||
|
}
|
||||||
|
if c.Column != test.column {
|
||||||
|
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
// Handler is the function type that is involved in turning a low level
|
||||||
|
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||||
|
// data matches some kind of pattern and to report back the results.
|
||||||
|
//
|
||||||
|
// A Handler function gets an API as its input and returns a boolean to
|
||||||
|
// indicate whether or not it found a match on the input. The API is used
|
||||||
|
// for retrieving input data to match against and for reporting back results.
|
||||||
|
type Handler func(t *API) bool
|
||||||
|
|
||||||
|
// Match is syntactic sugar that allows you to write a construction like
|
||||||
|
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
||||||
|
func (handler Handler) Match(input interface{}) (*API, error) {
|
||||||
|
tokenizer := New(handler)
|
||||||
|
return tokenizer(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Or is syntactic sugar that allows you to write a construction like
|
||||||
|
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
|
||||||
|
func (handler Handler) Or(otherHandler Handler) Handler {
|
||||||
|
return MatchAny(handler, otherHandler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Times is syntactic sugar that allows you to write a construction like
|
||||||
|
// MatchRep(3, handler) as handler.Times(3).
|
||||||
|
func (handler Handler) Times(n int) Handler {
|
||||||
|
return MatchRep(n, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then is syntactic sugar that allows you to write a construction like
|
||||||
|
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
|
||||||
|
func (handler Handler) Then(otherHandler Handler) Handler {
|
||||||
|
return MatchSeq(handler, otherHandler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SeparatedBy is syntactic sugar that allows you to write a construction like
|
||||||
|
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
|
||||||
|
func (handler Handler) SeparatedBy(separator Handler) Handler {
|
||||||
|
return MatchSeparated(separator, handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional is syntactic sugar that allows you to write a construction like
|
||||||
|
// MatchOptional(handler) as handler.Optional().
|
||||||
|
func (handler Handler) Optional() Handler {
|
||||||
|
return MatchOptional(handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Except is syntactic sugar that allows you to write a construction like
|
||||||
|
// MatchExcept(handler) as handler.Optional().
|
||||||
|
func (handler Handler) Except(exceptHandler Handler) Handler {
|
||||||
|
return MatchExcept(handler, exceptHandler)
|
||||||
|
}
|
|
@ -0,0 +1,101 @@
|
||||||
|
package tokenize2_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSyntacticSugar(t *testing.T) {
|
||||||
|
var a = tokenize.A
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
||||||
|
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
||||||
|
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
||||||
|
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
|
||||||
|
{"cccccc", a.Rune('c').Optional(), true, "c"},
|
||||||
|
{"dddddd", a.Rune('c').Optional(), true, ""},
|
||||||
|
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
|
||||||
|
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
|
||||||
|
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
|
||||||
|
{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
|
||||||
|
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHandler_Times() {
|
||||||
|
c, a := tokenize.C, tokenize.A
|
||||||
|
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
|
||||||
|
|
||||||
|
fmt.Println(phoneNumber.Match("0201234567"))
|
||||||
|
// Output:
|
||||||
|
// 0201234567 <nil>
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHandler_Then() {
|
||||||
|
c, a := tokenize.C, tokenize.A
|
||||||
|
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
|
||||||
|
|
||||||
|
fmt.Println(phoneNumber.Match("0208888888"))
|
||||||
|
// Output:
|
||||||
|
// 0208888888 <nil>
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHandler_Or() {
|
||||||
|
c, a := tokenize.C, tokenize.A
|
||||||
|
phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit))
|
||||||
|
|
||||||
|
fmt.Println(phoneNumber.Match("+31209876543"))
|
||||||
|
fmt.Println(phoneNumber.Match("0031209876543"))
|
||||||
|
fmt.Println(phoneNumber.Match("0031020991234"))
|
||||||
|
fmt.Println(phoneNumber.Match("0031201234"))
|
||||||
|
// Output:
|
||||||
|
// +31209876543 <nil>
|
||||||
|
// 0031209876543 <nil>
|
||||||
|
// <nil> mismatch at start of file
|
||||||
|
// <nil> mismatch at start of file
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHandler_SeparatedBy() {
|
||||||
|
a, t := tokenize.A, tokenize.T
|
||||||
|
csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
|
||||||
|
|
||||||
|
r, _ := csv.Match("123,456,7,8,9")
|
||||||
|
for i, token := range r.Tokens() {
|
||||||
|
fmt.Printf("[%d] %v\n", i, token)
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// [0] number((int)123)
|
||||||
|
// [1] number((int)456)
|
||||||
|
// [2] number((int)7)
|
||||||
|
// [3] number((int)8)
|
||||||
|
// [4] number((int)9)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHandler_Optional() {
|
||||||
|
c, a := tokenize.C, tokenize.A
|
||||||
|
|
||||||
|
spanish := c.Seq(
|
||||||
|
a.Rune('¿').Optional(),
|
||||||
|
c.OneOrMore(a.AnyRune.Except(a.Question)),
|
||||||
|
a.Rune('?').Optional())
|
||||||
|
|
||||||
|
fmt.Println(spanish.Match("¿Habla español María?"))
|
||||||
|
fmt.Println(spanish.Match("Sí, María habla español."))
|
||||||
|
// Output:
|
||||||
|
// ¿Habla español María? <nil>
|
||||||
|
// Sí, María habla español. <nil>
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExampleHandler_Match() {
|
||||||
|
r, err := tokenize.A.IPv4.Match("001.002.003.004")
|
||||||
|
fmt.Println(r, err)
|
||||||
|
|
||||||
|
r, err = tokenize.A.IPv4.Match("1.2.3")
|
||||||
|
fmt.Println(r, err)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// 1.2.3.4 <nil>
|
||||||
|
// <nil> mismatch at start of file
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,445 @@
|
||||||
|
package tokenize2_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCombinatorsTempDebug(t *testing.T) {
|
||||||
|
var a = tokenize.A
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
// {"024", a.IPv4CIDRMask, true, "24"},
|
||||||
|
// {"024", a.Octet, true, "24"},
|
||||||
|
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCombinators(t *testing.T) {
|
||||||
|
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||||
|
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||||
|
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||||
|
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||||
|
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||||
|
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||||
|
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||||
|
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||||
|
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||||
|
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||||
|
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||||
|
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||||
|
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
||||||
|
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
||||||
|
{"", c.Max(4, a.Rune('b')), true, ""},
|
||||||
|
{"X", c.Max(4, a.Rune('b')), true, ""},
|
||||||
|
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
||||||
|
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
||||||
|
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
||||||
|
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||||
|
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||||
|
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
||||||
|
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||||
|
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
||||||
|
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
||||||
|
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||||
|
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||||
|
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
||||||
|
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
||||||
|
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
||||||
|
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
||||||
|
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
||||||
|
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
||||||
|
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
||||||
|
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||||
|
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||||
|
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||||
|
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||||
|
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
||||||
|
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||||
|
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||||
|
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||||
|
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
|
||||||
|
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
|
||||||
|
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
||||||
|
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||||
|
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||||
|
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCombinatorPanics(t *testing.T) {
|
||||||
|
var c, a = tokenize.C, tokenize.A
|
||||||
|
AssertPanics(t, []PanicT{
|
||||||
|
{func() { a.RuneRange('z', 'a') }, true,
|
||||||
|
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
|
||||||
|
{func() { c.MinMax(-1, 1, a.Space) }, true,
|
||||||
|
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||||
|
{func() { c.MinMax(1, -1, a.Space) }, true,
|
||||||
|
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||||
|
{func() { c.MinMax(10, 5, a.Space) }, true,
|
||||||
|
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
|
||||||
|
{func() { c.Min(-10, a.Space) }, true,
|
||||||
|
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||||
|
{func() { c.Max(-42, a.Space) }, true,
|
||||||
|
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||||
|
{func() { a.IntegerBetween(10, -10) }, true,
|
||||||
|
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAtoms(t *testing.T) {
|
||||||
|
var a = tokenize.A
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||||
|
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||||
|
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||||
|
{"Hello, world!", a.Str("Hello"), true, "Hello"},
|
||||||
|
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
|
||||||
|
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
||||||
|
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
||||||
|
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
||||||
|
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
||||||
|
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
||||||
|
{"xxx", a.Rune('x'), true, "x"},
|
||||||
|
{"x ", a.Rune(' '), false, ""},
|
||||||
|
{"aa", a.RuneRange('b', 'e'), false, ""},
|
||||||
|
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
||||||
|
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
||||||
|
{"", a.EndOfFile, true, ""},
|
||||||
|
{"⌘", a.AnyRune, true, "⌘"},
|
||||||
|
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||||
|
{"", a.AnyRune, false, ""},
|
||||||
|
{"⌘", a.ValidRune, true, "⌘"},
|
||||||
|
{"\xbc with ValidRune", a.ValidRune, false, "<22>"},
|
||||||
|
{"", a.ValidRune, false, ""},
|
||||||
|
{" ", a.Space, true, " "},
|
||||||
|
{"X", a.Space, false, ""},
|
||||||
|
{"\t", a.Tab, true, "\t"},
|
||||||
|
{"\r", a.CR, true, "\r"},
|
||||||
|
{"\n", a.LF, true, "\n"},
|
||||||
|
{"!", a.Excl, true, "!"},
|
||||||
|
{"\"", a.DoubleQuote, true, "\""},
|
||||||
|
{"#", a.Hash, true, "#"},
|
||||||
|
{"$", a.Dollar, true, "$"},
|
||||||
|
{"%", a.Percent, true, "%"},
|
||||||
|
{"&", a.Amp, true, "&"},
|
||||||
|
{"'", a.SingleQuote, true, "'"},
|
||||||
|
{"(", a.LeftParen, true, "("},
|
||||||
|
{"(", a.RoundOpen, true, "("},
|
||||||
|
{")", a.RightParen, true, ")"},
|
||||||
|
{")", a.RoundClose, true, ")"},
|
||||||
|
{"*", a.Asterisk, true, "*"},
|
||||||
|
{"*", a.Multiply, true, "*"},
|
||||||
|
{"+", a.Plus, true, "+"},
|
||||||
|
{"+", a.Add, true, "+"},
|
||||||
|
{",", a.Comma, true, ","},
|
||||||
|
{"-", a.Minus, true, "-"},
|
||||||
|
{"-", a.Subtract, true, "-"},
|
||||||
|
{".", a.Dot, true, "."},
|
||||||
|
{"/", a.Slash, true, "/"},
|
||||||
|
{"/", a.Divide, true, "/"},
|
||||||
|
{":", a.Colon, true, ":"},
|
||||||
|
{";", a.Semicolon, true, ";"},
|
||||||
|
{"<", a.AngleOpen, true, "<"},
|
||||||
|
{"<", a.LessThan, true, "<"},
|
||||||
|
{"=", a.Equal, true, "="},
|
||||||
|
{">", a.AngleClose, true, ">"},
|
||||||
|
{">", a.GreaterThan, true, ">"},
|
||||||
|
{"?", a.Question, true, "?"},
|
||||||
|
{"@", a.At, true, "@"},
|
||||||
|
{"[", a.SquareOpen, true, "["},
|
||||||
|
{"\\", a.Backslash, true, "\\"},
|
||||||
|
{"]", a.SquareClose, true, "]"},
|
||||||
|
{"^", a.Caret, true, "^"},
|
||||||
|
{"_", a.Underscore, true, "_"},
|
||||||
|
{"`", a.Backquote, true, "`"},
|
||||||
|
{"{", a.CurlyOpen, true, "{"},
|
||||||
|
{"|", a.Pipe, true, "|"},
|
||||||
|
{"}", a.CurlyClose, true, "}"},
|
||||||
|
{"~", a.Tilde, true, "~"},
|
||||||
|
{"\t \t \r\n", a.Blank, true, "\t"},
|
||||||
|
{" \t \t \r\n", a.Blanks, true, " \t \t "},
|
||||||
|
{"xxx", a.Whitespace, false, ""},
|
||||||
|
{" ", a.Whitespace, true, " "},
|
||||||
|
{"\t", a.Whitespace, true, "\t"},
|
||||||
|
{"\n", a.Whitespace, true, "\n"},
|
||||||
|
{"\r\n", a.Whitespace, true, "\r\n"},
|
||||||
|
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
|
||||||
|
{"xxx", a.UnicodeSpace, false, ""},
|
||||||
|
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
|
||||||
|
{"", a.EndOfLine, true, ""},
|
||||||
|
{"\r\n", a.EndOfLine, true, "\r\n"},
|
||||||
|
{"\n", a.EndOfLine, true, "\n"},
|
||||||
|
{"0", a.Digit, true, "0"},
|
||||||
|
{"1", a.Digit, true, "1"},
|
||||||
|
{"2", a.Digit, true, "2"},
|
||||||
|
{"3", a.Digit, true, "3"},
|
||||||
|
{"4", a.Digit, true, "4"},
|
||||||
|
{"5", a.Digit, true, "5"},
|
||||||
|
{"6", a.Digit, true, "6"},
|
||||||
|
{"7", a.Digit, true, "7"},
|
||||||
|
{"8", a.Digit, true, "8"},
|
||||||
|
{"9", a.Digit, true, "9"},
|
||||||
|
{"X", a.Digit, false, ""},
|
||||||
|
{"a", a.ASCIILower, true, "a"},
|
||||||
|
{"z", a.ASCIILower, true, "z"},
|
||||||
|
{"A", a.ASCIILower, false, ""},
|
||||||
|
{"Z", a.ASCIILower, false, ""},
|
||||||
|
{"A", a.ASCIIUpper, true, "A"},
|
||||||
|
{"Z", a.ASCIIUpper, true, "Z"},
|
||||||
|
{"a", a.ASCIIUpper, false, ""},
|
||||||
|
{"z", a.ASCIIUpper, false, ""},
|
||||||
|
{"1", a.Letter, false, ""},
|
||||||
|
{"a", a.Letter, true, "a"},
|
||||||
|
{"Ø", a.Letter, true, "Ø"},
|
||||||
|
{"Ë", a.Lower, false, ""},
|
||||||
|
{"ë", a.Lower, true, "ë"},
|
||||||
|
{"ä", a.Upper, false, "ä"},
|
||||||
|
{"Ä", a.Upper, true, "Ä"},
|
||||||
|
{"0", a.HexDigit, true, "0"},
|
||||||
|
{"9", a.HexDigit, true, "9"},
|
||||||
|
{"a", a.HexDigit, true, "a"},
|
||||||
|
{"f", a.HexDigit, true, "f"},
|
||||||
|
{"A", a.HexDigit, true, "A"},
|
||||||
|
{"F", a.HexDigit, true, "F"},
|
||||||
|
{"g", a.HexDigit, false, "g"},
|
||||||
|
{"G", a.HexDigit, false, "G"},
|
||||||
|
{"0", a.Integer, true, "0"},
|
||||||
|
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
|
||||||
|
{"1", a.Integer, true, "1"},
|
||||||
|
{"-10X", a.Integer, false, ""},
|
||||||
|
{"+10X", a.Integer, false, ""},
|
||||||
|
{"-10X", a.Signed(a.Integer), true, "-10"},
|
||||||
|
{"+10X", a.Signed(a.Integer), true, "+10"},
|
||||||
|
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
||||||
|
{"0X", a.Float, true, "0"},
|
||||||
|
{"0X", a.Float, true, "0"},
|
||||||
|
{"1X", a.Float, true, "1"},
|
||||||
|
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||||
|
{"123.321X", a.Float, true, "123.321"},
|
||||||
|
{"-3.14X", a.Float, false, ""},
|
||||||
|
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
||||||
|
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
||||||
|
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
||||||
|
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
||||||
|
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||||
|
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||||
|
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIPv4Atoms(t *testing.T) {
|
||||||
|
var a = tokenize.A
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
// Not normalized octet.
|
||||||
|
{"0X", tokenize.MatchOctet(false), true, "0"},
|
||||||
|
{"00X", tokenize.MatchOctet(false), true, "00"},
|
||||||
|
{"000X", tokenize.MatchOctet(false), true, "000"},
|
||||||
|
{"10X", tokenize.MatchOctet(false), true, "10"},
|
||||||
|
{"010X", tokenize.MatchOctet(false), true, "010"},
|
||||||
|
{"255123", tokenize.MatchOctet(false), true, "255"},
|
||||||
|
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||||
|
{"300", tokenize.MatchOctet(false), false, ""},
|
||||||
|
|
||||||
|
// Normalized octet.
|
||||||
|
{"0X", a.Octet, true, "0"},
|
||||||
|
{"00X", a.Octet, true, "0"},
|
||||||
|
{"000X", a.Octet, true, "0"},
|
||||||
|
{"10X", a.Octet, true, "10"},
|
||||||
|
{"010X", a.Octet, true, "10"},
|
||||||
|
{"255123", a.Octet, true, "255"},
|
||||||
|
{"256123", a.Octet, false, ""},
|
||||||
|
{"300", a.Octet, false, ""},
|
||||||
|
|
||||||
|
// IPv4 address.
|
||||||
|
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||||
|
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||||
|
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||||
|
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||||
|
{"256.255.255.255", a.IPv4, false, ""},
|
||||||
|
|
||||||
|
// IPv4 CIDR netmask.
|
||||||
|
{"0", a.IPv4CIDRMask, true, "0"},
|
||||||
|
{"00", a.IPv4CIDRMask, true, "0"},
|
||||||
|
{"000", a.IPv4CIDRMask, true, "0"},
|
||||||
|
{"32", a.IPv4CIDRMask, true, "32"},
|
||||||
|
{"032", a.IPv4CIDRMask, true, "32"},
|
||||||
|
{"33", a.IPv4CIDRMask, false, ""},
|
||||||
|
|
||||||
|
// IPv4 netmask in dotted quad format.
|
||||||
|
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
||||||
|
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
||||||
|
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
||||||
|
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
||||||
|
|
||||||
|
// IPv4 address + CIDR or dotted quad netmask.
|
||||||
|
{"192.168.6.123", a.IPv4Net, false, ""},
|
||||||
|
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||||
|
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
||||||
|
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
||||||
|
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
||||||
|
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIPv6Atoms(t *testing.T) {
|
||||||
|
var a = tokenize.A
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"", a.IPv6, false, ""},
|
||||||
|
{"::", a.IPv6, true, "::"},
|
||||||
|
{"1::", a.IPv6, true, "1::"},
|
||||||
|
{"1::1", a.IPv6, true, "1::1"},
|
||||||
|
{"::1", a.IPv6, true, "::1"},
|
||||||
|
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
|
||||||
|
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
|
||||||
|
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
|
||||||
|
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
||||||
|
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
||||||
|
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
|
||||||
|
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
|
||||||
|
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
|
||||||
|
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
|
||||||
|
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
|
||||||
|
{"0", a.IPv6CIDRMask, true, "0"},
|
||||||
|
{"128", a.IPv6CIDRMask, true, "128"},
|
||||||
|
{"129", a.IPv6CIDRMask, false, ""},
|
||||||
|
{"::1/128", a.IPv6Net, true, "::1/128"},
|
||||||
|
{"::1/129", a.IPv6Net, false, ""},
|
||||||
|
{"1.1.1.1/24", a.IPv6Net, false, ""},
|
||||||
|
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
|
||||||
|
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestModifiers(t *testing.T) {
|
||||||
|
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||||
|
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||||
|
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||||
|
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||||
|
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||||
|
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
||||||
|
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||||
|
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||||
|
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||||
|
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||||
|
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// When a TokenMaker encounters an error, this is considered a programmer error.
|
||||||
|
// A TokenMaker should not be called, unless the input is already validated to
|
||||||
|
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||||
|
// input cannot be processed successfully.
|
||||||
|
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||||
|
var a, tok = tokenize.A, tokenize.T
|
||||||
|
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||||
|
tokenizer := tokenize.New(invalid)
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
func() { tokenizer("no") }, false,
|
||||||
|
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTokenMakers(t *testing.T) {
|
||||||
|
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||||
|
AssertTokenMakers(t, []TokenMakerT{
|
||||||
|
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||||
|
[]tokenize.Token{{Type: "A", Value: ""}}},
|
||||||
|
|
||||||
|
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||||
|
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
|
||||||
|
|
||||||
|
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||||
|
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||||
|
|
||||||
|
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
||||||
|
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||||
|
{Type: "bar", Value: byte('R')},
|
||||||
|
{Type: "bar", Value: byte('O')},
|
||||||
|
{Type: "bar", Value: byte('C')},
|
||||||
|
{Type: "bar", Value: byte('K')},
|
||||||
|
{Type: "bar", Value: byte('S')},
|
||||||
|
}},
|
||||||
|
|
||||||
|
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
|
||||||
|
|
||||||
|
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
|
||||||
|
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
|
||||||
|
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
|
||||||
|
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
|
||||||
|
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
|
||||||
|
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
|
||||||
|
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
|
||||||
|
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
|
||||||
|
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
|
||||||
|
|
||||||
|
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
|
||||||
|
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
|
||||||
|
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
|
||||||
|
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
||||||
|
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
||||||
|
|
||||||
|
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
||||||
|
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
||||||
|
|
||||||
|
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||||
|
{Type: "P", Value: true},
|
||||||
|
{Type: "P", Value: true},
|
||||||
|
{Type: "P", Value: true},
|
||||||
|
{Type: "P", Value: true},
|
||||||
|
{Type: "P", Value: true},
|
||||||
|
{Type: "P", Value: true},
|
||||||
|
}},
|
||||||
|
|
||||||
|
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||||
|
{Type: "P", Value: false},
|
||||||
|
{Type: "P", Value: false},
|
||||||
|
{Type: "P", Value: false},
|
||||||
|
{Type: "P", Value: false},
|
||||||
|
{Type: "P", Value: false},
|
||||||
|
{Type: "P", Value: false},
|
||||||
|
}},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// I know, this is hell, but that's the whole point for this test :->
|
||||||
|
func TestCombination(t *testing.T) {
|
||||||
|
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||||
|
demonic := c.Seq(
|
||||||
|
c.Optional(a.SquareOpen),
|
||||||
|
m.Trim(
|
||||||
|
c.Seq(
|
||||||
|
c.Optional(a.Blanks),
|
||||||
|
c.Repeated(3, a.AngleClose),
|
||||||
|
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
|
||||||
|
return fmt.Sprintf("%d", len(s))
|
||||||
|
}),
|
||||||
|
m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
|
||||||
|
m.ToUpper(c.Min(1, a.ASCIILower)),
|
||||||
|
m.Drop(a.Excl),
|
||||||
|
c.Repeated(3, a.AngleOpen),
|
||||||
|
c.Optional(a.Blanks),
|
||||||
|
),
|
||||||
|
" \t",
|
||||||
|
),
|
||||||
|
c.Optional(a.SquareClose),
|
||||||
|
)
|
||||||
|
|
||||||
|
AssertHandlers(t, []HandlerT{
|
||||||
|
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||||
|
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||||
|
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
||||||
|
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
||||||
|
})
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Token defines a lexical token as produced by tokenize.Handlers.
|
||||||
|
//
|
||||||
|
// The only mandatory data in a Token are the Runes. The Type and Value fields
|
||||||
|
// are optional fields that can be filled with data at will.
|
||||||
|
//
|
||||||
|
// The use of the Type field is to let a tokenizer communicate to
|
||||||
|
// the parser what type of token it's handling.
|
||||||
|
//
|
||||||
|
// The use of the Value field is to store any kind af data along with the token.
|
||||||
|
// One use of this can be found in the built-in token maker functions like
|
||||||
|
// MakeInt8Token(), which store an interpreted version of the input string
|
||||||
|
// in the Value field.
|
||||||
|
type Token struct {
|
||||||
|
Type interface{} // optional token type, can be any type that a parser author sees fit
|
||||||
|
Value interface{} // optional token value, of any type as well
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t Token) String() string {
|
||||||
|
tokenType := ""
|
||||||
|
if t.Type != nil {
|
||||||
|
tokenType = fmt.Sprintf("%v", t.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
value := ""
|
||||||
|
if t.Value != nil {
|
||||||
|
switch t.Value.(type) {
|
||||||
|
case []*Token:
|
||||||
|
return fmt.Sprintf("%v%v", tokenType, t.Value)
|
||||||
|
case string:
|
||||||
|
value = fmt.Sprintf("%q", t.Value)
|
||||||
|
case rune:
|
||||||
|
value = fmt.Sprintf("%v", t.Value)
|
||||||
|
case bool:
|
||||||
|
value = fmt.Sprintf("%v", t.Value)
|
||||||
|
default:
|
||||||
|
value = fmt.Sprintf("(%T)%v", t.Value, t.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf("%v(%s)", tokenType, value)
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
// Package tokenize provides tooling to build a tokenizer in
|
||||||
|
// parser/combinator-style, used to feed data to the parser.
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Func is the function signature as returned by New: a function that takes
|
||||||
|
// any supported type of input, executes a tokenizer run and returns a
|
||||||
|
// Result struct (possibly nil) and an error (possibly nil).
|
||||||
|
type Func func(input interface{}) (*API, error)
|
||||||
|
|
||||||
|
// New instantiates a new tokenizer.
|
||||||
|
//
|
||||||
|
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
|
||||||
|
// functions are used to move the state machine forward during tokenizing.
|
||||||
|
// Using the New function, you can wrap a tokenize.Handler in a simple way,
|
||||||
|
// making it possible to feed some input to the handler and retrieve the
|
||||||
|
// tokenizing results.
|
||||||
|
//
|
||||||
|
// The startHandler argument points the tokenizer to the tokenize.Handler function
|
||||||
|
// that must be executed at the start of the tokenizing process. From there on
|
||||||
|
// other tokenize.Handler functions can be invoked recursively to implement the
|
||||||
|
// tokenizing process.
|
||||||
|
//
|
||||||
|
// THis function returns a function that can be invoked to run the tokenizer
|
||||||
|
// against the provided input data. For an overview of allowed inputs, take a
|
||||||
|
// look at the documentation for parsekit.read.New().
|
||||||
|
func New(tokenHandler Handler) Func {
|
||||||
|
return func(input interface{}) (*API, error) {
|
||||||
|
api := NewAPI(input)
|
||||||
|
ok := tokenHandler(api)
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
err := fmt.Errorf("mismatch at %s", Cursor{})
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return api, nil
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,223 @@
|
||||||
|
package tokenize2_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
|
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TODO For error handling, it would be really cool if for example the
|
||||||
|
// 10.0.300.1/24 case would return an actual error stating that
|
||||||
|
// 300 is not a valid octet for an IPv4 address.
|
||||||
|
// Biggest thing to take care of here, is that errors should not stop
|
||||||
|
// a Parser flow (since we might be trying to match different cases in
|
||||||
|
// sequence), but a Parser flow should optionally be able to make use
|
||||||
|
// of the actual error.
|
||||||
|
// The same goes for a Tokenizer, since those can also make use of
|
||||||
|
// optional matching using tokenize.C.Any(...) for example. If matching
|
||||||
|
// for Any(IPv4, Digits), the example case should simply end up with 10
|
||||||
|
// after the IPv4 mismatch.
|
||||||
|
func ExampleNew() {
|
||||||
|
// Build the tokenizer for ip/mask.
|
||||||
|
var c, a, t = tokenize.C, tokenize.A, tokenize.T
|
||||||
|
ip := t.Str("ip", a.IPv4)
|
||||||
|
mask := t.Int8("mask", a.IPv4CIDRMask)
|
||||||
|
cidr := c.Seq(ip, a.Slash, mask)
|
||||||
|
tokenizer := tokenize.New(cidr)
|
||||||
|
|
||||||
|
for _, input := range []string{
|
||||||
|
"000.000.000.000/000",
|
||||||
|
"192.168.0.1/24",
|
||||||
|
"255.255.255.255/32",
|
||||||
|
"10.0.300.1/24",
|
||||||
|
"not an IPv4 CIDR",
|
||||||
|
} {
|
||||||
|
// Execute returns a Result and an error, which is nil on success.
|
||||||
|
result, err := tokenizer(input)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
fmt.Printf("Result: %s\n", result.Tokens())
|
||||||
|
} else {
|
||||||
|
fmt.Printf("Error: %s\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Output:
|
||||||
|
// Result: [ip("0.0.0.0") mask((int8)0)]
|
||||||
|
// Result: [ip("192.168.0.1") mask((int8)24)]
|
||||||
|
// Result: [ip("255.255.255.255") mask((int8)32)]
|
||||||
|
// Error: mismatch at start of file
|
||||||
|
// Error: mismatch at start of file
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||||
|
api := makeTokenizeAPI()
|
||||||
|
r, _ := api.NextRune()
|
||||||
|
AssertEqual(t, 'T', r, "first rune")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
AssertEqual(t, "Tes", i.String(), "i.String()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.NextRune()
|
||||||
|
i.NextRune()
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
|
||||||
|
`without a prior call to Accept\(\)`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||||
|
api := makeTokenizeAPI()
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: api.Accept,
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
|
||||||
|
`without first calling NextRune\(\)`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
|
||||||
|
api := tokenize.NewAPI("")
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
|
||||||
|
`, but the prior call to NextRune\(\) failed`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.Merge(0)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
child := i.Fork()
|
||||||
|
i.Fork()
|
||||||
|
i.Merge(child)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||||
|
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.Dispose(0)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
child := i.Fork()
|
||||||
|
i.Fork()
|
||||||
|
i.Dispose(child)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
|
||||||
|
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.Fork()
|
||||||
|
g := i.Fork()
|
||||||
|
i.Fork()
|
||||||
|
i.Merge(g)
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||||
|
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||||
|
AssertPanic(t, PanicT{
|
||||||
|
Function: func() {
|
||||||
|
i := makeTokenizeAPI()
|
||||||
|
i.NextRune()
|
||||||
|
i.Fork()
|
||||||
|
i.Accept()
|
||||||
|
},
|
||||||
|
Regexp: true,
|
||||||
|
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||||
|
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||||
|
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
||||||
|
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
}
|
||||||
|
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
||||||
|
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||||
|
i.Accept()
|
||||||
|
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
||||||
|
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
}
|
||||||
|
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||||
|
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
r, err := i.NextRune()
|
||||||
|
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||||
|
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||||
|
}
|
||||||
|
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||||
|
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||||
|
child := i.Fork()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
r, err := i.NextRune()
|
||||||
|
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||||
|
i.Dispose(child) // brings the read offset back to the start
|
||||||
|
r, err = i.NextRune() // so here we should see the same rune
|
||||||
|
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||||
|
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeTokenizeAPI() *tokenize.API {
|
||||||
|
return tokenize.NewAPI("Testing")
|
||||||
|
}
|
|
@ -0,0 +1,110 @@
|
||||||
|
package tokenize2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||||
|
// Create input, accept the first rune.
|
||||||
|
i := NewAPI("Testing")
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept() // T
|
||||||
|
AssertEqual(t, "T", i.String(), "accepted rune in input")
|
||||||
|
// Fork
|
||||||
|
child := i.Fork()
|
||||||
|
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||||
|
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
|
||||||
|
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||||
|
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
|
||||||
|
// Accept two runes via fork.
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept() // e
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept() // s
|
||||||
|
AssertEqual(t, "es", i.String(), "result runes in fork")
|
||||||
|
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
|
||||||
|
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
|
||||||
|
// Merge fork back into parent
|
||||||
|
i.Merge(child)
|
||||||
|
i.Dispose(child)
|
||||||
|
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||||
|
i := NewAPI("Testing")
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
f1 := i.Fork()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
f2 := i.Fork()
|
||||||
|
i.NextRune()
|
||||||
|
i.Accept()
|
||||||
|
AssertEqual(t, "s", i.String(), "f2 String()")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
|
||||||
|
i.Merge(f2)
|
||||||
|
i.Dispose(f2)
|
||||||
|
AssertEqual(t, "es", i.String(), "f1 String()")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||||
|
i.Merge(f1)
|
||||||
|
i.Dispose(f1)
|
||||||
|
AssertEqual(t, "Tes", i.String(), "top-level API String()")
|
||||||
|
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||||
|
i := NewAPI("Testing")
|
||||||
|
r, _ := i.NextRune()
|
||||||
|
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||||
|
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
|
||||||
|
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
|
||||||
|
i.Accept()
|
||||||
|
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
|
||||||
|
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
|
||||||
|
r, _ = i.NextRune()
|
||||||
|
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFlushInput(t *testing.T) {
|
||||||
|
api := NewAPI("cool")
|
||||||
|
|
||||||
|
// Flushing without any read data is okay. FlushInput() will return
|
||||||
|
// false in this case, and nothing else happens.
|
||||||
|
AssertTrue(t, api.FlushInput() == false, "flush input at start")
|
||||||
|
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
|
||||||
|
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
|
||||||
|
|
||||||
|
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
|
||||||
|
|
||||||
|
// Read offset is now zero, but reading should continue after "co".
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
api.NextRune()
|
||||||
|
api.Accept()
|
||||||
|
|
||||||
|
AssertEqual(t, "cool", api.String(), "end result")
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||||
|
if expected != actual {
|
||||||
|
t.Errorf(
|
||||||
|
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||||
|
forWhat, expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||||
|
if !b {
|
||||||
|
t.Errorf("Assertion %s is false", assertion)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue