New implementation for performance.
This commit is contained in:
parent
7795588fe6
commit
48d7fda9f8
|
@ -0,0 +1,374 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"git.makaay.nl/mauricem/go-parsekit/read"
|
||||
)
|
||||
|
||||
// API holds the internal state of a tokenizer run and provides an API that
|
||||
// tokenize.Handler functions can use to:
|
||||
//
|
||||
// • read and accept runes from the input (NextRune, Accept)
|
||||
//
|
||||
// • fork the API for easy lookahead support (Fork, Merge, Reset, Dispose)
|
||||
//
|
||||
// • flush already read input data when not needed anymore (FlushInput)
|
||||
//
|
||||
// • retrieve the tokenizer Result struct (Result) to read or modify the results
|
||||
//
|
||||
// BASIC OPERATION:
|
||||
//
|
||||
// To retrieve the next rune from the API, call the NextRune() method.
|
||||
//
|
||||
// When the rune is to be accepted as input, call the method Accept(). The rune
|
||||
// is then added to the result runes of the API and the read cursor is moved
|
||||
// forward.
|
||||
//
|
||||
// By invoking NextRune() + Accept() multiple times, the result can be extended
|
||||
// with as many runes as needed. Runes collected this way can later on be
|
||||
// retrieved using the method Result().Runes().
|
||||
//
|
||||
// It is mandatory to call Accept() after retrieving a rune, before calling
|
||||
// NextRune() again. Failing to do so will result in a panic.
|
||||
//
|
||||
// Next to adding runes to the result, it is also possible to modify the
|
||||
// stored runes or to add lexical Tokens to the result. For all things
|
||||
// concerning results, take a look at the Result struct, which
|
||||
// can be accessed though the method Result().
|
||||
//
|
||||
// FORKING OPERATION FOR EASY LOOKEAHEAD SUPPORT:
|
||||
//
|
||||
// Sometimes, we must be able to perform a lookahead, which might either
|
||||
// succeed or fail. In case of a failing lookahead, the state of the
|
||||
// API must be brought back to the original state, so we can try
|
||||
// a different route.
|
||||
//
|
||||
// The way in which this is supported, is by forking an API struct by
|
||||
// calling method Fork(). This will return a forked child API, with
|
||||
// empty result data, but using the same read cursor position as the
|
||||
// forked parent.
|
||||
//
|
||||
// After forking, the same interface as described for BASIC OPERATION can be
|
||||
// used to fill the results. When the lookahead was successful, then
|
||||
// Merge() can be called on the forked child to append the child's results
|
||||
// to the parent's results, and to move the read cursor position to that
|
||||
// of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child API can
|
||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
//
|
||||
// Opinionized note:
|
||||
// Many tokenizers/parsers take a different approach on lookaheads by using
|
||||
// peeks and by moving the read cursor position back and forth, or by putting
|
||||
// read input back on the input stream. That often leads to code that is
|
||||
// efficient, however, in my opinion, not very intuitive to read. It can also
|
||||
// be tedious to get the cursor position back at the correct position, which
|
||||
// can lead to hard to track bugs. I much prefer this forking method, since
|
||||
// no bookkeeping has to be implemented when implementing a parser.
|
||||
type API struct {
|
||||
reader *read.Buffer // the input data reader
|
||||
lastRune rune // the rune as retrieved by the last NextRune() calll
|
||||
lastRuneErr error // the error for the last NextRune() call
|
||||
runeRead bool // whether or not a rune was read using NextRune()
|
||||
runes []rune // the rune stack
|
||||
tokens []Token // the token stack
|
||||
stackFrames []stackFrame // the stack frames, containing stack level-specific data
|
||||
stackLevel int // the current stack level
|
||||
stackFrame *stackFrame // the current stack frame
|
||||
}
|
||||
|
||||
type stackFrame struct {
|
||||
offset int // current rune offset relative to the Reader's sliding window
|
||||
runeStart int
|
||||
runeEnd int
|
||||
tokenStart int
|
||||
tokenEnd int
|
||||
cursor Cursor
|
||||
|
||||
// TODO
|
||||
err error // can be used by a Handler to report a specific issue with the input
|
||||
}
|
||||
|
||||
const initialStackDepth = 10
|
||||
const initialTokenDepth = 10
|
||||
const initialRuneDepth = 10
|
||||
|
||||
// NewAPI initializes a new API struct, wrapped around the provided input.
|
||||
// For an overview of allowed inputs, take a look at the documentation
|
||||
// for parsekit.read.New().
|
||||
func NewAPI(input interface{}) *API {
|
||||
api := &API{
|
||||
reader: read.New(input),
|
||||
runes: make([]rune, 0, initialRuneDepth),
|
||||
tokens: make([]Token, 0, initialTokenDepth),
|
||||
stackFrames: make([]stackFrame, 1, initialStackDepth),
|
||||
}
|
||||
api.stackFrame = &api.stackFrames[0]
|
||||
|
||||
return api
|
||||
}
|
||||
|
||||
// NextRune returns the rune at the current read offset.
|
||||
//
|
||||
// When an invalid UTF8 rune is encountered on the input, it is replaced with
|
||||
// the utf.RuneError rune. It's up to the caller to handle this as an error
|
||||
// when needed.
|
||||
//
|
||||
// After reading a rune it must be Accept()-ed to move the read cursor forward
|
||||
// to the next rune. Doing so is mandatory. When doing a second call to NextRune()
|
||||
// without explicitly accepting, this method will panic. You can see this as a
|
||||
// built-in unit test, enforcing correct serialization of API method calls.
|
||||
func (i *API) NextRune() (rune, error) {
|
||||
if i.runeRead {
|
||||
callerPanic("NextRune", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"without a prior call to Accept()")
|
||||
}
|
||||
|
||||
readRune, err := i.reader.RuneAt(i.stackFrame.offset)
|
||||
i.lastRune = readRune
|
||||
i.lastRuneErr = err
|
||||
i.runeRead = true
|
||||
|
||||
return readRune, err
|
||||
}
|
||||
|
||||
// Accept the last rune as read by NextRune() into the Result runes and move
|
||||
// the cursor forward.
|
||||
//
|
||||
// It is not allowed to call Accept() when the previous call to NextRune()
|
||||
// returned an error. Calling Accept() in such case will result in a panic.
|
||||
func (i *API) Accept() {
|
||||
if !i.runeRead {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"without first calling NextRune()")
|
||||
} else if i.lastRuneErr != nil {
|
||||
callerPanic("Accept", "tokenize.API.{name}(): {name}() called at {caller}, "+
|
||||
"but the prior call to NextRune() failed")
|
||||
}
|
||||
|
||||
i.runes = append(i.runes, i.lastRune)
|
||||
i.stackFrame.runeEnd++
|
||||
i.stackFrame.cursor.moveByRune(i.lastRune)
|
||||
i.stackFrame.offset++
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
// Fork forks off a child of the API struct. It will reuse the same
|
||||
// read buffer and cursor position, but for the rest this is a fresh API.
|
||||
//
|
||||
// By forking an API, you can freely work with the forked child, without
|
||||
// affecting the parent API. This is for example useful when you must perform
|
||||
// some form of lookahead.
|
||||
//
|
||||
// When processing of the Handler was successful and you want to add the results
|
||||
// to the parent API, you can call Merge() on the forked child.
|
||||
// This will add the results to the results of the parent (runes, tokens).
|
||||
// It also updates the read cursor position of the parent to that of the child.
|
||||
//
|
||||
// When the lookahead was unsuccessful, then the forked child API can
|
||||
// disposed by calling Dispose() on the forked child. This is not mandatory.
|
||||
// Garbage collection will take care of this automatically.
|
||||
// The parent API was never modified, so it can safely be used after disposal
|
||||
// as if the lookahead never happened.
|
||||
func (i *API) Fork() int {
|
||||
newStackLevel := i.stackLevel + 1
|
||||
newStackSize := newStackLevel + 1
|
||||
|
||||
// Grow the stack frames capacity when needed.
|
||||
if cap(i.stackFrames) < newStackSize {
|
||||
newFrames := make([]stackFrame, newStackSize, newStackSize*2)
|
||||
copy(newFrames, i.stackFrames)
|
||||
i.stackFrames = newFrames
|
||||
} else {
|
||||
i.stackFrames = i.stackFrames[0:newStackSize]
|
||||
}
|
||||
|
||||
parent := i.stackFrame
|
||||
i.stackLevel++
|
||||
i.stackFrame = &i.stackFrames[i.stackLevel]
|
||||
*i.stackFrame = *parent
|
||||
i.stackFrame.runeStart = parent.runeEnd
|
||||
i.stackFrame.tokenStart = parent.tokenEnd
|
||||
i.runeRead = false
|
||||
|
||||
return i.stackLevel
|
||||
}
|
||||
|
||||
// Merge appends the results of a forked child API (runes, tokens) to the
|
||||
// results of its parent. The read cursor of the parent is also updated
|
||||
// to that of the forked child.
|
||||
//
|
||||
// After the merge operation, the child results are reset so it can immediately
|
||||
// be reused for performing another match. This means that all Result data are
|
||||
// cleared, but the read cursor position is kept at its current position.
|
||||
// This allows a child to feed results in chunks to its parent.
|
||||
//
|
||||
// Once the child is no longer needed, it can be disposed of by using the
|
||||
// method Dispose(), which will return the tokenizer to the parent.
|
||||
func (i *API) Merge(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
callerPanic("Merge", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
}
|
||||
|
||||
parent := &i.stackFrames[stackLevel-1]
|
||||
|
||||
if parent.runeEnd == i.stackFrame.runeStart {
|
||||
// The end of the parent slice aligns with the start of the child slice.
|
||||
// Because of this, to merge the parent slice can simply be expanded
|
||||
// to include the child slice.
|
||||
// parent : |----------|
|
||||
// child: |------|
|
||||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
parent.runeEnd = i.stackFrame.runeEnd
|
||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||
} else {
|
||||
// The end of the parent slice does not align with the start of the
|
||||
// child slice. The child slice has to be copied onto the end of
|
||||
// the parent slice.
|
||||
// parent : |----------|
|
||||
// child: |------|
|
||||
// After merge operation:
|
||||
// parent: |-----------------|
|
||||
// child: |---> continue reading from here
|
||||
i.runes = append(i.runes[:parent.runeEnd], i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]...)
|
||||
parent.runeEnd = len(i.runes)
|
||||
i.stackFrame.runeStart = parent.runeEnd
|
||||
i.stackFrame.runeEnd = parent.runeEnd
|
||||
}
|
||||
|
||||
// The same logic applies to tokens.
|
||||
if parent.tokenEnd == i.stackFrame.tokenStart {
|
||||
parent.tokenEnd = i.stackFrame.tokenEnd
|
||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
} else {
|
||||
i.tokens = append(i.tokens[:parent.tokenEnd], i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]...)
|
||||
parent.tokenEnd = len(i.tokens)
|
||||
i.stackFrame.tokenStart = parent.tokenEnd
|
||||
i.stackFrame.tokenEnd = parent.tokenEnd
|
||||
}
|
||||
|
||||
parent.offset = i.stackFrame.offset
|
||||
parent.cursor = i.stackFrame.cursor
|
||||
|
||||
i.stackFrame.err = nil
|
||||
i.runeRead = false
|
||||
}
|
||||
|
||||
func (i *API) Dispose(stackLevel int) {
|
||||
if stackLevel == 0 {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on the top-level API stack level 0")
|
||||
}
|
||||
if stackLevel != i.stackLevel {
|
||||
callerPanic("Dispose", "tokenize.API.{name}(): {name}() called at {caller} "+
|
||||
"on API stack level %d, but the current stack level is %d "+
|
||||
"(forgot to Dispose() a forked child?)", stackLevel, i.stackLevel)
|
||||
}
|
||||
|
||||
i.runeRead = false
|
||||
i.stackLevel = stackLevel - 1
|
||||
i.stackFrames = i.stackFrames[:stackLevel]
|
||||
i.stackFrame = &i.stackFrames[stackLevel-1]
|
||||
i.runes = i.runes[0:i.stackFrame.runeEnd]
|
||||
i.tokens = i.tokens[0:i.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (i *API) Reset() {
|
||||
i.runeRead = false
|
||||
i.stackFrame.runeStart = i.stackFrame.runeEnd
|
||||
i.stackFrame.tokenStart = i.stackFrame.tokenEnd
|
||||
i.stackFrame.err = nil
|
||||
}
|
||||
|
||||
// FlushInput flushes processed input data from the read.Buffer.
|
||||
// In this context 'processed' means all runes that were read using NextRune()
|
||||
// and that were added to the results using Accept().
|
||||
//
|
||||
// Note:
|
||||
// When writing your own TokenHandler, you normally won't have to call this
|
||||
// method yourself. It is automatically called by parsekit when needed.
|
||||
func (i *API) FlushInput() bool {
|
||||
// result := &(i.state.stack[i.stackLevel])
|
||||
if i.stackFrame.offset > 0 {
|
||||
i.reader.Flush(i.stackFrame.offset)
|
||||
i.stackFrame.offset = 0
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (i *API) String() string {
|
||||
return string(i.Runes())
|
||||
}
|
||||
|
||||
func (i *API) Runes() []rune {
|
||||
return i.runes[i.stackFrame.runeStart:i.stackFrame.runeEnd]
|
||||
}
|
||||
|
||||
func (i *API) Rune(offset int) rune {
|
||||
return i.runes[i.stackFrame.runeStart+offset]
|
||||
}
|
||||
|
||||
func (i *API) ClearRunes() {
|
||||
i.runes = i.runes[:i.stackFrame.runeStart]
|
||||
i.stackFrame.runeEnd = i.stackFrame.runeStart
|
||||
}
|
||||
|
||||
func (i *API) SetRunes(runes ...rune) {
|
||||
i.runes = append(i.runes[:i.stackFrame.runeStart], runes...)
|
||||
i.stackFrame.runeEnd = i.stackFrame.runeStart + len(runes)
|
||||
}
|
||||
|
||||
func (i *API) AddRunes(runes ...rune) {
|
||||
i.runes = append(i.runes[:i.stackFrame.runeEnd], runes...)
|
||||
i.stackFrame.runeEnd += len(runes)
|
||||
}
|
||||
|
||||
func (i *API) AddString(s string) {
|
||||
i.AddRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) SetString(s string) {
|
||||
i.SetRunes([]rune(s)...)
|
||||
}
|
||||
|
||||
func (i *API) Cursor() Cursor {
|
||||
return i.stackFrame.cursor
|
||||
}
|
||||
|
||||
func (i *API) Tokens() []Token {
|
||||
return i.tokens[i.stackFrame.tokenStart:i.stackFrame.tokenEnd]
|
||||
}
|
||||
|
||||
func (i *API) Token(offset int) Token {
|
||||
return i.tokens[i.stackFrame.tokenStart+offset]
|
||||
}
|
||||
|
||||
func (i *API) TokenValue(offset int) interface{} {
|
||||
return i.tokens[i.stackFrame.tokenStart+offset].Value
|
||||
}
|
||||
|
||||
func (i *API) ClearTokens() {
|
||||
i.tokens = i.tokens[:i.stackFrame.tokenStart]
|
||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart
|
||||
}
|
||||
|
||||
func (i *API) SetTokens(tokens ...Token) {
|
||||
i.tokens = append(i.tokens[:i.stackFrame.tokenStart], tokens...)
|
||||
i.stackFrame.tokenEnd = i.stackFrame.tokenStart + len(tokens)
|
||||
}
|
||||
|
||||
func (i *API) AddTokens(tokens ...Token) {
|
||||
i.tokens = append(i.tokens[:i.stackFrame.tokenEnd], tokens...)
|
||||
i.stackFrame.tokenEnd += len(tokens)
|
||||
}
|
|
@ -0,0 +1,330 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func ExampleNewAPI() {
|
||||
tokenize.NewAPI("The input that the API will handle")
|
||||
|
||||
// Output:
|
||||
}
|
||||
|
||||
func ExampleAPI_NextRune() {
|
||||
api := tokenize.NewAPI("The input that the API will handle")
|
||||
r, err := api.NextRune()
|
||||
fmt.Printf("Rune read from input; %c\n", r)
|
||||
fmt.Printf("The error: %v\n", err)
|
||||
fmt.Printf("API results: %q\n", api.String())
|
||||
|
||||
// Output:
|
||||
// Rune read from input; T
|
||||
// The error: <nil>
|
||||
// API results: ""
|
||||
}
|
||||
|
||||
func ExampleAPI_Accept() {
|
||||
api := tokenize.NewAPI("The input that the API will handle")
|
||||
api.NextRune() // reads 'T'
|
||||
api.Accept() // adds 'T' to the API results
|
||||
api.NextRune() // reads 'h'
|
||||
api.Accept() // adds 'h' to the API results
|
||||
api.NextRune() // reads 'e', but it is not added to the API results
|
||||
|
||||
fmt.Printf("API results: %q\n", api.String())
|
||||
|
||||
// Output:
|
||||
// API results: "Th"
|
||||
}
|
||||
|
||||
func ExampleAPI_modifyingResults() {
|
||||
api := tokenize.NewAPI("")
|
||||
|
||||
api.AddString("Some runes")
|
||||
api.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
|
||||
api.AddRunes(' ', 'i', 'n', ' ')
|
||||
api.AddString("various ways")
|
||||
fmt.Printf("API result first 10 runes: %q\n", api.Runes()[0:10])
|
||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||
|
||||
api.SetString("new ")
|
||||
api.AddString("set ")
|
||||
api.AddString("of ")
|
||||
api.AddRunes('r', 'u', 'n', 'e', 's')
|
||||
fmt.Printf("API result runes as string: %q\n", api.String())
|
||||
fmt.Printf("API result runes: %q\n", api.Runes())
|
||||
fmt.Printf("API third rune: %q\n", api.Rune(2))
|
||||
|
||||
api.AddTokens(tokenize.Token{
|
||||
Type: 42,
|
||||
Value: "towel"})
|
||||
api.AddTokens(tokenize.Token{
|
||||
Type: 73,
|
||||
Value: "Zaphod"})
|
||||
fmt.Printf("API result tokens: %v\n", api.Tokens())
|
||||
fmt.Printf("API second result token: %v\n", api.Token(1))
|
||||
|
||||
// Output:
|
||||
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
|
||||
// API result runes as string: "Some runes added in various ways"
|
||||
// API result runes as string: "new set of runes"
|
||||
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
|
||||
// API third rune: 'w'
|
||||
// API result tokens: [42("towel") 73("Zaphod")]
|
||||
// API second result token: 73("Zaphod")
|
||||
}
|
||||
|
||||
func ExampleAPI_Reset() {
|
||||
api := tokenize.NewAPI("Very important input!")
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
// Reset clears the results, but keeps the cursor position.
|
||||
api.Reset()
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
fmt.Printf("API results: %q at %s\n", api.String(), api.Cursor())
|
||||
|
||||
// Output:
|
||||
// API results: "Ve" at line 1, column 3
|
||||
// API results: "" at line 1, column 3
|
||||
// API results: "ry" at line 1, column 5
|
||||
}
|
||||
|
||||
func ExampleAPI_Fork() {
|
||||
// This custom Handler checks for input 'a', 'b' or 'c'.
|
||||
abcHandler := func(t *tokenize.API) bool {
|
||||
a := tokenize.A
|
||||
for _, r := range []rune{'a', 'b', 'c'} {
|
||||
child := t.Fork() // fork, so we won't change parent t
|
||||
if a.Rune(r)(t) {
|
||||
t.Merge(child) // accept results into parent of child
|
||||
t.Dispose(child) // return to the parent level
|
||||
return true // and report a successful match
|
||||
}
|
||||
t.Dispose(child) // return to the parent level
|
||||
}
|
||||
// If we get here, then no match was found. Return false to communicate
|
||||
// this to the caller.
|
||||
return false
|
||||
}
|
||||
|
||||
// Note: a custom Handler is normally not what you need.
|
||||
// You can make use of the parser/combinator tooling to make the
|
||||
// implementation a lot simpler and to take care of forking at
|
||||
// the appropriate places. The handler from above can be replaced with:
|
||||
simpler := tokenize.A.RuneRange('a', 'c')
|
||||
|
||||
result, err := tokenize.New(abcHandler)("another test")
|
||||
fmt.Println(result, err)
|
||||
result, err = tokenize.New(simpler)("curious")
|
||||
fmt.Println(result, err)
|
||||
result, err = tokenize.New(abcHandler)("bang on!")
|
||||
fmt.Println(result, err)
|
||||
result, err = tokenize.New(abcHandler)("not a match")
|
||||
fmt.Println(result, err)
|
||||
|
||||
// Output:
|
||||
// a <nil>
|
||||
// c <nil>
|
||||
// b <nil>
|
||||
// <nil> mismatch at start of file
|
||||
}
|
||||
|
||||
func ExampleAPI_Merge() {
|
||||
tokenHandler := func(t *tokenize.API) bool {
|
||||
child1 := t.Fork()
|
||||
t.NextRune() // reads 'H'
|
||||
t.Accept()
|
||||
t.NextRune() // reads 'i'
|
||||
t.Accept()
|
||||
|
||||
child2 := t.Fork()
|
||||
t.NextRune() // reads ' '
|
||||
t.Accept()
|
||||
t.NextRune() // reads 'm'
|
||||
t.Accept()
|
||||
t.Dispose(child2)
|
||||
|
||||
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
|
||||
t.Dispose(child1) // and clean up child1 to return to the parent
|
||||
return true
|
||||
}
|
||||
|
||||
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
|
||||
fmt.Println(result.String())
|
||||
|
||||
// Output:
|
||||
// Hi
|
||||
}
|
||||
|
||||
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
|
||||
api := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
|
||||
|
||||
// Fork a few levels.
|
||||
child1 := api.Fork()
|
||||
child2 := api.Fork()
|
||||
child3 := api.Fork()
|
||||
child4 := api.Fork()
|
||||
|
||||
// Read a rune 'a' from child4.
|
||||
r, _ := api.NextRune()
|
||||
AssertEqual(t, 'a', r, "child4 rune 1")
|
||||
api.Accept()
|
||||
AssertEqual(t, "a", api.String(), "child4 runes after rune 1")
|
||||
|
||||
// Read another rune 'b' from child4.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'b', r, "child4 rune 2")
|
||||
api.Accept()
|
||||
AssertEqual(t, "ab", api.String(), "child4 runes after rune 2")
|
||||
|
||||
// Merge "ab" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
AssertEqual(t, "", api.String(), "child4 runes after first merge")
|
||||
|
||||
// Read some more from child4.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'c', r, "child4 rune 3")
|
||||
api.Accept()
|
||||
AssertEqual(t, "c", api.String(), "child4 runes after rune 1")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child4 rune 3")
|
||||
|
||||
// Merge "c" from child4 to child3.
|
||||
api.Merge(child4)
|
||||
|
||||
// And dispose of child4, making child3 the active stack level.
|
||||
api.Dispose(child4)
|
||||
|
||||
// Child3 should now have the compbined results "abc" from child4's work.
|
||||
AssertEqual(t, "abc", api.String(), "child3 after merge of child4")
|
||||
AssertEqual(t, "line 1, column 4", api.Cursor().String(), "cursor child3 rune 3, after merge of child4")
|
||||
|
||||
// Now read some data from child3.
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'd', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'e', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
r, _ = api.NextRune()
|
||||
AssertEqual(t, 'f', r, "child3 rune 5")
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdef", api.String(), "child3 total result after rune 6")
|
||||
|
||||
// Temporarily go some new forks from here, but don't use their outcome.
|
||||
child3sub1 := api.Fork()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
child3sub2 := api.Fork()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.Merge(child3sub2) // do merge sub2 down to sub1
|
||||
api.Dispose(child3sub2) // and dispose of sub2
|
||||
api.Dispose(child3sub1) // but dispose of sub1 without merging
|
||||
|
||||
// Instead merge the results from before this forking segway from child3 to child2
|
||||
// and dispose of it.
|
||||
api.Merge(child3)
|
||||
api.Dispose(child3)
|
||||
|
||||
AssertEqual(t, "abcdef", api.String(), "child2 total result after merge of child3")
|
||||
AssertEqual(t, "line 1, column 7", api.Cursor().String(), "cursor child2 after merge child3")
|
||||
|
||||
// Merge child2 to child1 and dispose of it.
|
||||
api.Merge(child2)
|
||||
api.Dispose(child2)
|
||||
|
||||
// Merge child1 a few times to the top level api.
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
api.Merge(child1)
|
||||
|
||||
// And dispose of it.
|
||||
api.Dispose(child1)
|
||||
|
||||
// Read some data from the top level api.
|
||||
r, _ = api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "abcdefg", api.String(), "api string end result")
|
||||
AssertEqual(t, "line 1, column 8", api.Cursor().String(), "api cursor end result")
|
||||
}
|
||||
|
||||
func TestClearRunes(t *testing.T) {
|
||||
api := tokenize.NewAPI("Laphroaig")
|
||||
api.NextRune() // Read 'L'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'a'
|
||||
api.Accept() // Add to runes
|
||||
api.ClearRunes() // Clear the runes
|
||||
api.NextRune() // Read 'p'
|
||||
api.Accept() // Add to runes
|
||||
api.NextRune() // Read 'r'
|
||||
api.Accept() // Add to runes
|
||||
|
||||
AssertEqual(t, "ph", api.String(), "api string end result")
|
||||
}
|
||||
|
||||
func TestMergeScenariosForTokens(t *testing.T) {
|
||||
api := tokenize.NewAPI("")
|
||||
|
||||
token1 := tokenize.Token{Value: 1}
|
||||
token2 := tokenize.Token{Value: 2}
|
||||
token3 := tokenize.Token{Value: 3}
|
||||
token4 := tokenize.Token{Value: 4}
|
||||
|
||||
api.SetTokens(token1)
|
||||
tokens := api.Tokens()
|
||||
AssertEqual(t, 1, len(tokens), "Tokens 1")
|
||||
|
||||
child := api.Fork()
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 0, len(tokens), "Tokens 2")
|
||||
|
||||
api.AddTokens(token2)
|
||||
|
||||
// Here we can merge by expanding the token slice on the parent,
|
||||
// because the end of the parent slice and the start of the child
|
||||
// slice align.
|
||||
api.Merge(child)
|
||||
api.Dispose(child)
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 2, len(tokens), "Tokens 3")
|
||||
|
||||
child = api.Fork()
|
||||
api.AddTokens(token3)
|
||||
api.Reset()
|
||||
api.AddTokens(token4)
|
||||
|
||||
// Here the merge means that token4 will be copied to the end of
|
||||
// the token slice of the parent, since there's a gap at the place
|
||||
// where token3 used to be.
|
||||
api.Merge(child)
|
||||
api.Dispose(child)
|
||||
|
||||
tokens = api.Tokens()
|
||||
AssertEqual(t, 3, len(tokens), "Tokens 4")
|
||||
AssertEqual(t, 1, api.TokenValue(0).(int), "Tokens 4, value 0")
|
||||
AssertEqual(t, 2, api.TokenValue(1).(int), "Tokens 4, value 1")
|
||||
AssertEqual(t, 4, api.TokenValue(2).(int), "Tokens 4, value 2")
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
package tokenize2_test
|
||||
|
||||
// This file contains some tools that are used for writing tests.
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
||||
|
||||
type PanicT struct {
|
||||
Function func()
|
||||
Regexp bool
|
||||
Expect string
|
||||
}
|
||||
|
||||
func AssertPanics(t *testing.T, testSet []PanicT) {
|
||||
for _, test := range testSet {
|
||||
AssertPanic(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertPanic(t *testing.T, p PanicT) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
mismatch := false
|
||||
if p.Regexp && !regexp.MustCompile(p.Expect).MatchString(r.(string)) {
|
||||
mismatch = true
|
||||
}
|
||||
if !p.Regexp && p.Expect != r.(string) {
|
||||
mismatch = true
|
||||
}
|
||||
if mismatch {
|
||||
t.Errorf(
|
||||
"Code did panic, but unexpected panic message received:\nexpected: %q\nactual: %q",
|
||||
p.Expect, r)
|
||||
}
|
||||
} else {
|
||||
t.Errorf("Function did not panic (expected panic message: %s)", p.Expect)
|
||||
}
|
||||
}()
|
||||
p.Function()
|
||||
}
|
||||
|
||||
type HandlerT struct {
|
||||
Input string
|
||||
Handler tokenize.Handler
|
||||
MustMatch bool
|
||||
Expected string
|
||||
}
|
||||
|
||||
func AssertHandlers(t *testing.T, testSet []HandlerT) {
|
||||
for _, test := range testSet {
|
||||
AssertHandler(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertHandler(t *testing.T, test HandlerT) {
|
||||
result, err := tokenize.New(test.Handler)(test.Input)
|
||||
if test.MustMatch {
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else if output := result.String(); output != test.Expected {
|
||||
t.Errorf("Test %q failed: not expected output:\nexpected: %q\nactual: %q\n", test.Input, test.Expected, output)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Errorf("Test %q failed: should not match, but it did", test.Input)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type TokenMakerT struct {
|
||||
Input string
|
||||
Handler tokenize.Handler
|
||||
Expected []tokenize.Token
|
||||
}
|
||||
|
||||
func AssertTokenMakers(t *testing.T, testSet []TokenMakerT) {
|
||||
for _, test := range testSet {
|
||||
AssertTokenMaker(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTokenMaker(t *testing.T, test TokenMakerT) {
|
||||
result, err := tokenize.New(test.Handler)(test.Input)
|
||||
if err != nil {
|
||||
t.Errorf("Test %q failed with error: %s", test.Input, err)
|
||||
} else {
|
||||
if len(result.Tokens()) != len(test.Expected) {
|
||||
t.Errorf("Unexpected number of tokens in output:\nexpected: %d\nactual: %d", len(test.Expected), len(result.Tokens()))
|
||||
}
|
||||
for i, expected := range test.Expected {
|
||||
actual := result.Token(i)
|
||||
if expected.Type != actual.Type {
|
||||
t.Errorf("Unexpected Type in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Type, expected.Type, actual.Type, actual.Type)
|
||||
}
|
||||
if expected.Value != actual.Value {
|
||||
t.Errorf("Unexpected Value in result.Tokens[%d]:\nexpected: (%T) %s\nactual: (%T) %s", i, expected.Value, expected.Value, actual.Value, actual.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func callerPanic(name, f string, data ...interface{}) {
|
||||
filepos := callerBefore(name)
|
||||
m := fmt.Sprintf(f, data...)
|
||||
m = strings.Replace(m, "{caller}", filepos, -1)
|
||||
m = strings.Replace(m, "{name}", name, -1)
|
||||
panic(m)
|
||||
}
|
||||
|
||||
func callerBefore(name string) string {
|
||||
found := false
|
||||
for i := 1; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
if found {
|
||||
return fmt.Sprintf("%s:%d", file, line)
|
||||
}
|
||||
if !ok {
|
||||
return "unknown caller"
|
||||
}
|
||||
f := runtime.FuncForPC(pc)
|
||||
|
||||
if strings.HasSuffix(f.Name(), "."+name) {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func SomeFunc1() {
|
||||
SomeFunc2()
|
||||
}
|
||||
|
||||
func SomeFunc2() {
|
||||
SomeFunc3()
|
||||
}
|
||||
|
||||
func SomeFunc3() {
|
||||
callerPanic("SomeFunc2", "{name} was called from {caller}")
|
||||
}
|
||||
|
||||
func TestCallerPanic(t *testing.T) {
|
||||
defer func() {
|
||||
r := recover()
|
||||
err := r.(string)
|
||||
|
||||
if !strings.Contains(err, "SomeFunc2 was called from") || !strings.Contains(err, "callerinfo_test.go:") {
|
||||
t.Fatalf("Unexpected error message: %s", err)
|
||||
}
|
||||
}()
|
||||
SomeFunc1()
|
||||
}
|
||||
|
||||
func TestCallerBefore_WithFunctionNameNotInStack(t *testing.T) {
|
||||
caller := callerBefore("NotExistingAtAll")
|
||||
AssertEqual(t, "unknown caller", caller, "result for name not in stack")
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Cursor represents the position of a cursor in various ways.
|
||||
type Cursor struct {
|
||||
Byte int // The cursor offset in bytes
|
||||
Rune int // The cursor offset in UTF8 runes
|
||||
Column int // The column at which the cursor is (0-indexed)
|
||||
Line int // The line at which the cursor is (0-indexed)
|
||||
}
|
||||
|
||||
// String produces a string representation of the cursor position.
|
||||
func (c Cursor) String() string {
|
||||
if c.Line == 0 && c.Column == 0 {
|
||||
return fmt.Sprintf("start of file")
|
||||
}
|
||||
return fmt.Sprintf("line %d, column %d", c.Line+1, c.Column+1)
|
||||
}
|
||||
|
||||
// move updates the position of the cursor, based on the provided input string.
|
||||
// The input string represents the runes that the cursor must be moved over.
|
||||
// This method will take newlines into account to keep track of line numbers and
|
||||
// column positions automatically.
|
||||
func (c *Cursor) move(input string) *Cursor {
|
||||
for _, r := range input {
|
||||
c.moveByRune(r)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Cursor) moveByRune(r rune) *Cursor {
|
||||
c.Byte += utf8.RuneLen(r)
|
||||
c.Rune++
|
||||
if r == '\n' {
|
||||
c.Column = 0
|
||||
c.Line++
|
||||
} else {
|
||||
c.Column++
|
||||
}
|
||||
return c
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExampleCursor_move() {
|
||||
c := Cursor{}
|
||||
fmt.Printf("after initialization : %s\n", c)
|
||||
fmt.Printf("after 'some words' : %s\n", c.move("some words"))
|
||||
fmt.Printf("after '\\n' : %s\n", c.move("\n"))
|
||||
fmt.Printf("after '\\r\\nskip\\nlines' : %s\n", c.move("\r\nskip\nlines"))
|
||||
|
||||
// Output:
|
||||
// after initialization : start of file
|
||||
// after 'some words' : line 1, column 11
|
||||
// after '\n' : line 2, column 1
|
||||
// after '\r\nskip\nlines' : line 4, column 6
|
||||
}
|
||||
|
||||
func ExampleCursor_String() {
|
||||
c := Cursor{}
|
||||
fmt.Println(c.String())
|
||||
|
||||
c.move("\nfoobar")
|
||||
fmt.Println(c.String())
|
||||
|
||||
// Output:
|
||||
// start of file
|
||||
// line 2, column 7
|
||||
}
|
||||
|
||||
func TestGivenCursor_WhenMoving_CursorIsUpdated(t *testing.T) {
|
||||
for _, test := range []struct {
|
||||
name string
|
||||
input []string
|
||||
byte int
|
||||
rune int
|
||||
line int
|
||||
column int
|
||||
}{
|
||||
{"No input at all", []string{""}, 0, 0, 0, 0},
|
||||
{"One ASCII char", []string{"a"}, 1, 1, 0, 1},
|
||||
{"Multiple ASCII chars", []string{"abc"}, 3, 3, 0, 3},
|
||||
{"One newline", []string{"\n"}, 1, 1, 1, 0},
|
||||
{"Carriage return", []string{"\r\r\r"}, 3, 3, 0, 3},
|
||||
{"One UTF8 3 byte char", []string{"⌘"}, 3, 1, 0, 1},
|
||||
{"Mixture", []string{"Hello\n\npretty\nW⌘O⌘R⌘L⌘D"}, 31, 23, 3, 9},
|
||||
{"Multiple calls", []string{"hello", "world"}, 10, 10, 0, 10},
|
||||
} {
|
||||
c := Cursor{}
|
||||
for _, s := range test.input {
|
||||
c.move(s)
|
||||
}
|
||||
if c.Byte != test.byte {
|
||||
t.Errorf("[%s] Unexpected byte offset %d (expected %d)", test.name, c.Byte, test.byte)
|
||||
}
|
||||
if c.Rune != test.rune {
|
||||
t.Errorf("[%s] Unexpected rune offset %d (expected %d)", test.name, c.Rune, test.rune)
|
||||
}
|
||||
if c.Line != test.line {
|
||||
t.Errorf("[%s] Unexpected line offset %d (expected %d)", test.name, c.Line, test.line)
|
||||
}
|
||||
if c.Column != test.column {
|
||||
t.Errorf("[%s] Unexpected column offset %d (expected %d)", test.name, c.Column, test.column)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package tokenize2
|
||||
|
||||
// Handler is the function type that is involved in turning a low level
|
||||
// stream of UTF8 runes into lexical tokens. Its purpose is to check if input
|
||||
// data matches some kind of pattern and to report back the results.
|
||||
//
|
||||
// A Handler function gets an API as its input and returns a boolean to
|
||||
// indicate whether or not it found a match on the input. The API is used
|
||||
// for retrieving input data to match against and for reporting back results.
|
||||
type Handler func(t *API) bool
|
||||
|
||||
// Match is syntactic sugar that allows you to write a construction like
|
||||
// NewTokenizer(handler).Execute(input) as handler.Match(input).
|
||||
func (handler Handler) Match(input interface{}) (*API, error) {
|
||||
tokenizer := New(handler)
|
||||
return tokenizer(input)
|
||||
}
|
||||
|
||||
// Or is syntactic sugar that allows you to write a construction like
|
||||
// MatchAny(tokenHandler1, tokenHandler2) as tokenHandler1.Or(tokenHandler2).
|
||||
func (handler Handler) Or(otherHandler Handler) Handler {
|
||||
return MatchAny(handler, otherHandler)
|
||||
}
|
||||
|
||||
// Times is syntactic sugar that allows you to write a construction like
|
||||
// MatchRep(3, handler) as handler.Times(3).
|
||||
func (handler Handler) Times(n int) Handler {
|
||||
return MatchRep(n, handler)
|
||||
}
|
||||
|
||||
// Then is syntactic sugar that allows you to write a construction like
|
||||
// MatchSeq(handler1, handler2, handler3) as handler1.Then(handler2).Then(handler3).
|
||||
func (handler Handler) Then(otherHandler Handler) Handler {
|
||||
return MatchSeq(handler, otherHandler)
|
||||
}
|
||||
|
||||
// SeparatedBy is syntactic sugar that allows you to write a construction like
|
||||
// MatchSeparated(handler, separator) as handler.SeparatedBy(separator).
|
||||
func (handler Handler) SeparatedBy(separator Handler) Handler {
|
||||
return MatchSeparated(separator, handler)
|
||||
}
|
||||
|
||||
// Optional is syntactic sugar that allows you to write a construction like
|
||||
// MatchOptional(handler) as handler.Optional().
|
||||
func (handler Handler) Optional() Handler {
|
||||
return MatchOptional(handler)
|
||||
}
|
||||
|
||||
// Except is syntactic sugar that allows you to write a construction like
|
||||
// MatchExcept(handler) as handler.Optional().
|
||||
func (handler Handler) Except(exceptHandler Handler) Handler {
|
||||
return MatchExcept(handler, exceptHandler)
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func TestSyntacticSugar(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"aaaaaa", a.Rune('a').Times(4), true, "aaaa"},
|
||||
{"ababab", a.Rune('a').Or(a.Rune('b')).Times(4), true, "abab"},
|
||||
{"ababab", a.Rune('a').Then(a.Rune('b')), true, "ab"},
|
||||
{"bababa", a.Rune('a').Then(a.Rune('b')), false, ""},
|
||||
{"cccccc", a.Rune('c').Optional(), true, "c"},
|
||||
{"dddddd", a.Rune('c').Optional(), true, ""},
|
||||
{"a,b,c,d", a.ASCII.SeparatedBy(a.Comma), true, "a,b,c,d"},
|
||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space)), true, "a, b, c, d"},
|
||||
{"a, b,c,d", a.ASCII.SeparatedBy(a.Comma.Then(a.Space.Optional())), true, "a, b,c,d"},
|
||||
{"a, b, c, d", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma.Then(a.Space.Optional()))), true, "a, b, c, d"},
|
||||
{"a,b ,c, d|", a.ASCII.SeparatedBy(a.Space.Optional().Then(a.Comma).Then(a.Space.Optional())), true, "a,b ,c, d"},
|
||||
})
|
||||
}
|
||||
|
||||
func ExampleHandler_Times() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := c.Seq(a.Rune('0'), a.Digit.Times(9))
|
||||
|
||||
fmt.Println(phoneNumber.Match("0201234567"))
|
||||
// Output:
|
||||
// 0201234567 <nil>
|
||||
}
|
||||
|
||||
func ExampleHandler_Then() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := a.Rune('0').Then(c.Repeated(9, a.Digit))
|
||||
|
||||
fmt.Println(phoneNumber.Match("0208888888"))
|
||||
// Output:
|
||||
// 0208888888 <nil>
|
||||
}
|
||||
|
||||
func ExampleHandler_Or() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
phoneNumber := c.Seq(a.Str("00").Or(a.Plus), a.Str("31"), a.DigitNotZero, c.Repeated(8, a.Digit))
|
||||
|
||||
fmt.Println(phoneNumber.Match("+31209876543"))
|
||||
fmt.Println(phoneNumber.Match("0031209876543"))
|
||||
fmt.Println(phoneNumber.Match("0031020991234"))
|
||||
fmt.Println(phoneNumber.Match("0031201234"))
|
||||
// Output:
|
||||
// +31209876543 <nil>
|
||||
// 0031209876543 <nil>
|
||||
// <nil> mismatch at start of file
|
||||
// <nil> mismatch at start of file
|
||||
}
|
||||
|
||||
func ExampleHandler_SeparatedBy() {
|
||||
a, t := tokenize.A, tokenize.T
|
||||
csv := t.Int("number", a.Digits).SeparatedBy(a.Comma)
|
||||
|
||||
r, _ := csv.Match("123,456,7,8,9")
|
||||
for i, token := range r.Tokens() {
|
||||
fmt.Printf("[%d] %v\n", i, token)
|
||||
}
|
||||
// Output:
|
||||
// [0] number((int)123)
|
||||
// [1] number((int)456)
|
||||
// [2] number((int)7)
|
||||
// [3] number((int)8)
|
||||
// [4] number((int)9)
|
||||
}
|
||||
|
||||
func ExampleHandler_Optional() {
|
||||
c, a := tokenize.C, tokenize.A
|
||||
|
||||
spanish := c.Seq(
|
||||
a.Rune('¿').Optional(),
|
||||
c.OneOrMore(a.AnyRune.Except(a.Question)),
|
||||
a.Rune('?').Optional())
|
||||
|
||||
fmt.Println(spanish.Match("¿Habla español María?"))
|
||||
fmt.Println(spanish.Match("Sí, María habla español."))
|
||||
// Output:
|
||||
// ¿Habla español María? <nil>
|
||||
// Sí, María habla español. <nil>
|
||||
}
|
||||
|
||||
func ExampleHandler_Match() {
|
||||
r, err := tokenize.A.IPv4.Match("001.002.003.004")
|
||||
fmt.Println(r, err)
|
||||
|
||||
r, err = tokenize.A.IPv4.Match("1.2.3")
|
||||
fmt.Println(r, err)
|
||||
|
||||
// Output:
|
||||
// 1.2.3.4 <nil>
|
||||
// <nil> mismatch at start of file
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,445 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
func TestCombinatorsTempDebug(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
// {"024", a.IPv4CIDRMask, true, "24"},
|
||||
// {"024", a.Octet, true, "24"},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestCombinators(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"abc not", c.Not(a.Rune('b')), true, "a"},
|
||||
{"bcd not", c.Not(a.Rune('b')), false, ""},
|
||||
{"1010 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), true, "1"},
|
||||
{"2020 not", c.Not(c.Seq(a.Rune('2'), a.Rune('0'))), false, ""},
|
||||
{"abc any", c.Any(a.Rune('a'), a.Rune('b')), true, "a"},
|
||||
{"bcd any", c.Any(a.Rune('a'), a.Rune('b')), true, "b"},
|
||||
{"cde any", c.Any(a.Rune('a'), a.Rune('b')), false, ""},
|
||||
{"ababc repeated", c.Repeated(4, a.Runes('a', 'b')), true, "abab"},
|
||||
{"ababc repeated", c.Repeated(5, a.Runes('a', 'b')), false, ""},
|
||||
{"", c.Min(0, a.Rune('a')), true, ""},
|
||||
{"a", c.Min(0, a.Rune('a')), true, "a"},
|
||||
{"aaaaa", c.Min(4, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(5, a.Rune('a')), true, "aaaaa"},
|
||||
{"aaaaa", c.Min(6, a.Rune('a')), false, ""},
|
||||
{"", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"X", c.Max(4, a.Rune('b')), true, ""},
|
||||
{"bbbbbX", c.Max(4, a.Rune('b')), true, "bbbb"},
|
||||
{"bbbbbX", c.Max(5, a.Rune('b')), true, "bbbbb"},
|
||||
{"bbbbbX", c.Max(6, a.Rune('b')), true, "bbbbb"},
|
||||
{"", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"X", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccc", c.MinMax(0, 5, a.Rune('c')), true, "cccc"},
|
||||
{"ccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccc", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 0, a.Rune('c')), true, ""},
|
||||
{"cccccX", c.MinMax(0, 1, a.Rune('c')), true, "c"},
|
||||
{"cccccX", c.MinMax(0, 5, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(0, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(1, 1, a.Rune('c')), true, "c"},
|
||||
{"", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"X", c.MinMax(1, 1, a.Rune('c')), false, ""},
|
||||
{"cccccX", c.MinMax(1, 3, a.Rune('c')), true, "ccc"},
|
||||
{"cccccX", c.MinMax(1, 6, a.Rune('c')), true, "ccccc"},
|
||||
{"cccccX", c.MinMax(3, 4, a.Rune('c')), true, "cccc"},
|
||||
{"", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"X", c.OneOrMore(a.Rune('d')), false, ""},
|
||||
{"dX", c.OneOrMore(a.Rune('d')), true, "d"},
|
||||
{"dddddX", c.OneOrMore(a.Rune('d')), true, "ddddd"},
|
||||
{"", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"X", c.ZeroOrMore(a.Rune('e')), true, ""},
|
||||
{"eX", c.ZeroOrMore(a.Rune('e')), true, "e"},
|
||||
{"eeeeeX", c.ZeroOrMore(a.Rune('e')), true, "eeeee"},
|
||||
{"HI!", c.Seq(a.Rune('H'), a.Rune('I'), a.Rune('!')), true, "HI!"},
|
||||
{"Hello, world!X", c.Seq(a.Str("Hello"), a.Comma, a.Space, a.Str("world"), a.Excl), true, "Hello, world!"},
|
||||
{"101010123", c.OneOrMore(c.Seq(a.Rune('1'), a.Rune('0'))), true, "101010"},
|
||||
{"", c.Optional(c.OneOrMore(a.Rune('f'))), true, ""},
|
||||
{"ghijkl", c.Optional(a.Rune('h')), true, ""},
|
||||
{"ghijkl", c.Optional(a.Rune('g')), true, "g"},
|
||||
{"fffffX", c.Optional(c.OneOrMore(a.Rune('f'))), true, "fffff"},
|
||||
{"1,2,3,b,c", c.Separated(a.Comma, a.Digit), true, "1,2,3"},
|
||||
{`\x9a\x01\xF0\xfCAndSomeMoreStuff`, c.OneOrMore(c.Seq(a.Backslash, a.Rune('x'), c.Repeated(2, a.HexDigit))), true, `\x9a\x01\xF0\xfC`},
|
||||
{" ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
{" ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestCombinatorPanics(t *testing.T) {
|
||||
var c, a = tokenize.C, tokenize.A
|
||||
AssertPanics(t, []PanicT{
|
||||
{func() { a.RuneRange('z', 'a') }, true,
|
||||
`Handler: MatchRuneRange definition error at /.*/handlers_builtin_test\.go:\d+: start 'z' must not be < end 'a'`},
|
||||
{func() { c.MinMax(-1, 1, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.MinMax(1, -1, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||
{func() { c.MinMax(10, 5, a.Space) }, true,
|
||||
`Handler: MatchMinMax definition error at /.*/handlers_builtin_test\.go:\d+: max 5 must not be < min 10`},
|
||||
{func() { c.Min(-10, a.Space) }, true,
|
||||
`Handler: MatchMin definition error at /.*/handlers_builtin_test\.go:\d+: min must be >= 0`},
|
||||
{func() { c.Max(-42, a.Space) }, true,
|
||||
`Handler: MatchMax definition error at /.*/handlers_builtin_test\.go:\d+: max must be >= 0`},
|
||||
{func() { a.IntegerBetween(10, -10) }, true,
|
||||
`Handler: MatchIntegerBetween definition error at /.*/handlers_builtin_test.go:\d+: max -10 must not be < min 10`},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAtoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"dd", a.RuneRange('b', 'e'), true, "d"},
|
||||
{"ee", a.RuneRange('b', 'e'), true, "e"},
|
||||
{"ff", a.RuneRange('b', 'e'), false, ""},
|
||||
{"Hello, world!", a.Str("Hello"), true, "Hello"},
|
||||
{"HellÖ, world!", a.StrNoCase("hellö"), true, "HellÖ"},
|
||||
{"+X", a.Runes('+', '-', '*', '/'), true, "+"},
|
||||
{"-X", a.Runes('+', '-', '*', '/'), true, "-"},
|
||||
{"*X", a.Runes('+', '-', '*', '/'), true, "*"},
|
||||
{"/X", a.Runes('+', '-', '*', '/'), true, "/"},
|
||||
{"!X", a.Runes('+', '-', '*', '/'), false, ""},
|
||||
{"xxx", a.Rune('x'), true, "x"},
|
||||
{"x ", a.Rune(' '), false, ""},
|
||||
{"aa", a.RuneRange('b', 'e'), false, ""},
|
||||
{"bb", a.RuneRange('b', 'e'), true, "b"},
|
||||
{"cc", a.RuneRange('b', 'e'), true, "c"},
|
||||
{"", a.EndOfFile, true, ""},
|
||||
{"⌘", a.AnyRune, true, "⌘"},
|
||||
{"\xbc with AnyRune", a.AnyRune, true, "<22>"},
|
||||
{"", a.AnyRune, false, ""},
|
||||
{"⌘", a.ValidRune, true, "⌘"},
|
||||
{"\xbc with ValidRune", a.ValidRune, false, "<22>"},
|
||||
{"", a.ValidRune, false, ""},
|
||||
{" ", a.Space, true, " "},
|
||||
{"X", a.Space, false, ""},
|
||||
{"\t", a.Tab, true, "\t"},
|
||||
{"\r", a.CR, true, "\r"},
|
||||
{"\n", a.LF, true, "\n"},
|
||||
{"!", a.Excl, true, "!"},
|
||||
{"\"", a.DoubleQuote, true, "\""},
|
||||
{"#", a.Hash, true, "#"},
|
||||
{"$", a.Dollar, true, "$"},
|
||||
{"%", a.Percent, true, "%"},
|
||||
{"&", a.Amp, true, "&"},
|
||||
{"'", a.SingleQuote, true, "'"},
|
||||
{"(", a.LeftParen, true, "("},
|
||||
{"(", a.RoundOpen, true, "("},
|
||||
{")", a.RightParen, true, ")"},
|
||||
{")", a.RoundClose, true, ")"},
|
||||
{"*", a.Asterisk, true, "*"},
|
||||
{"*", a.Multiply, true, "*"},
|
||||
{"+", a.Plus, true, "+"},
|
||||
{"+", a.Add, true, "+"},
|
||||
{",", a.Comma, true, ","},
|
||||
{"-", a.Minus, true, "-"},
|
||||
{"-", a.Subtract, true, "-"},
|
||||
{".", a.Dot, true, "."},
|
||||
{"/", a.Slash, true, "/"},
|
||||
{"/", a.Divide, true, "/"},
|
||||
{":", a.Colon, true, ":"},
|
||||
{";", a.Semicolon, true, ";"},
|
||||
{"<", a.AngleOpen, true, "<"},
|
||||
{"<", a.LessThan, true, "<"},
|
||||
{"=", a.Equal, true, "="},
|
||||
{">", a.AngleClose, true, ">"},
|
||||
{">", a.GreaterThan, true, ">"},
|
||||
{"?", a.Question, true, "?"},
|
||||
{"@", a.At, true, "@"},
|
||||
{"[", a.SquareOpen, true, "["},
|
||||
{"\\", a.Backslash, true, "\\"},
|
||||
{"]", a.SquareClose, true, "]"},
|
||||
{"^", a.Caret, true, "^"},
|
||||
{"_", a.Underscore, true, "_"},
|
||||
{"`", a.Backquote, true, "`"},
|
||||
{"{", a.CurlyOpen, true, "{"},
|
||||
{"|", a.Pipe, true, "|"},
|
||||
{"}", a.CurlyClose, true, "}"},
|
||||
{"~", a.Tilde, true, "~"},
|
||||
{"\t \t \r\n", a.Blank, true, "\t"},
|
||||
{" \t \t \r\n", a.Blanks, true, " \t \t "},
|
||||
{"xxx", a.Whitespace, false, ""},
|
||||
{" ", a.Whitespace, true, " "},
|
||||
{"\t", a.Whitespace, true, "\t"},
|
||||
{"\n", a.Whitespace, true, "\n"},
|
||||
{"\r\n", a.Whitespace, true, "\r\n"},
|
||||
{" \t\r\n \n \t\t\r\n ", a.Whitespace, true, " \t\r\n \n \t\t\r\n "},
|
||||
{"xxx", a.UnicodeSpace, false, ""},
|
||||
{" \t\r\n \r\v\f ", a.UnicodeSpace, true, " \t\r\n \r\v\f "},
|
||||
{"", a.EndOfLine, true, ""},
|
||||
{"\r\n", a.EndOfLine, true, "\r\n"},
|
||||
{"\n", a.EndOfLine, true, "\n"},
|
||||
{"0", a.Digit, true, "0"},
|
||||
{"1", a.Digit, true, "1"},
|
||||
{"2", a.Digit, true, "2"},
|
||||
{"3", a.Digit, true, "3"},
|
||||
{"4", a.Digit, true, "4"},
|
||||
{"5", a.Digit, true, "5"},
|
||||
{"6", a.Digit, true, "6"},
|
||||
{"7", a.Digit, true, "7"},
|
||||
{"8", a.Digit, true, "8"},
|
||||
{"9", a.Digit, true, "9"},
|
||||
{"X", a.Digit, false, ""},
|
||||
{"a", a.ASCIILower, true, "a"},
|
||||
{"z", a.ASCIILower, true, "z"},
|
||||
{"A", a.ASCIILower, false, ""},
|
||||
{"Z", a.ASCIILower, false, ""},
|
||||
{"A", a.ASCIIUpper, true, "A"},
|
||||
{"Z", a.ASCIIUpper, true, "Z"},
|
||||
{"a", a.ASCIIUpper, false, ""},
|
||||
{"z", a.ASCIIUpper, false, ""},
|
||||
{"1", a.Letter, false, ""},
|
||||
{"a", a.Letter, true, "a"},
|
||||
{"Ø", a.Letter, true, "Ø"},
|
||||
{"Ë", a.Lower, false, ""},
|
||||
{"ë", a.Lower, true, "ë"},
|
||||
{"ä", a.Upper, false, "ä"},
|
||||
{"Ä", a.Upper, true, "Ä"},
|
||||
{"0", a.HexDigit, true, "0"},
|
||||
{"9", a.HexDigit, true, "9"},
|
||||
{"a", a.HexDigit, true, "a"},
|
||||
{"f", a.HexDigit, true, "f"},
|
||||
{"A", a.HexDigit, true, "A"},
|
||||
{"F", a.HexDigit, true, "F"},
|
||||
{"g", a.HexDigit, false, "g"},
|
||||
{"G", a.HexDigit, false, "G"},
|
||||
{"0", a.Integer, true, "0"},
|
||||
{"09", a.Integer, true, "0"}, // following Go: 09 is invalid octal, so only 0 is valid for the integer
|
||||
{"1", a.Integer, true, "1"},
|
||||
{"-10X", a.Integer, false, ""},
|
||||
{"+10X", a.Integer, false, ""},
|
||||
{"-10X", a.Signed(a.Integer), true, "-10"},
|
||||
{"+10X", a.Signed(a.Integer), true, "+10"},
|
||||
{"+10.1X", a.Signed(a.Integer), true, "+10"},
|
||||
{"0X", a.Float, true, "0"},
|
||||
{"0X", a.Float, true, "0"},
|
||||
{"1X", a.Float, true, "1"},
|
||||
{"1.", a.Float, true, "1"}, // incomplete float, so only the 1 is picked up
|
||||
{"123.321X", a.Float, true, "123.321"},
|
||||
{"-3.14X", a.Float, false, ""},
|
||||
{"-3.14X", a.Signed(a.Float), true, "-3.14"},
|
||||
{"-003.0014X", a.Signed(a.Float), true, "-003.0014"},
|
||||
{"-11", a.IntegerBetween(-10, 10), false, "0"},
|
||||
{"-10", a.IntegerBetween(-10, 10), true, "-10"},
|
||||
{"0", a.IntegerBetween(-10, 10), true, "0"},
|
||||
{"10", a.IntegerBetween(-10, 10), true, "10"},
|
||||
{"11", a.IntegerBetween(0, 10), false, ""},
|
||||
})
|
||||
}
|
||||
|
||||
func TestIPv4Atoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
// Not normalized octet.
|
||||
{"0X", tokenize.MatchOctet(false), true, "0"},
|
||||
{"00X", tokenize.MatchOctet(false), true, "00"},
|
||||
{"000X", tokenize.MatchOctet(false), true, "000"},
|
||||
{"10X", tokenize.MatchOctet(false), true, "10"},
|
||||
{"010X", tokenize.MatchOctet(false), true, "010"},
|
||||
{"255123", tokenize.MatchOctet(false), true, "255"},
|
||||
{"256123", tokenize.MatchOctet(false), false, ""},
|
||||
{"300", tokenize.MatchOctet(false), false, ""},
|
||||
|
||||
// Normalized octet.
|
||||
{"0X", a.Octet, true, "0"},
|
||||
{"00X", a.Octet, true, "0"},
|
||||
{"000X", a.Octet, true, "0"},
|
||||
{"10X", a.Octet, true, "10"},
|
||||
{"010X", a.Octet, true, "10"},
|
||||
{"255123", a.Octet, true, "255"},
|
||||
{"256123", a.Octet, false, ""},
|
||||
{"300", a.Octet, false, ""},
|
||||
|
||||
// IPv4 address.
|
||||
{"0.0.0.0", a.IPv4, true, "0.0.0.0"},
|
||||
{"10.20.30.40", a.IPv4, true, "10.20.30.40"},
|
||||
{"010.020.003.004", a.IPv4, true, "10.20.3.4"},
|
||||
{"255.255.255.255", a.IPv4, true, "255.255.255.255"},
|
||||
{"256.255.255.255", a.IPv4, false, ""},
|
||||
|
||||
// IPv4 CIDR netmask.
|
||||
{"0", a.IPv4CIDRMask, true, "0"},
|
||||
{"00", a.IPv4CIDRMask, true, "0"},
|
||||
{"000", a.IPv4CIDRMask, true, "0"},
|
||||
{"32", a.IPv4CIDRMask, true, "32"},
|
||||
{"032", a.IPv4CIDRMask, true, "32"},
|
||||
{"33", a.IPv4CIDRMask, false, ""},
|
||||
|
||||
// IPv4 netmask in dotted quad format.
|
||||
{"0.0.0.0", a.IPv4Netmask, true, "0.0.0.0"},
|
||||
{"255.255.128.0", a.IPv4Netmask, true, "255.255.128.0"},
|
||||
{"255.255.255.255", a.IPv4Netmask, true, "255.255.255.255"},
|
||||
{"255.255.132.0", a.IPv4Netmask, false, ""}, // not a canonical netmask (1-bits followed by 0-bits)
|
||||
|
||||
// IPv4 address + CIDR or dotted quad netmask.
|
||||
{"192.168.6.123", a.IPv4Net, false, ""},
|
||||
{"192.168.6.123/024", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"192.168.6.123/255.255.255.0", a.IPv4Net, true, "192.168.6.123/24"},
|
||||
{"10.0.0.10/192.0.0.0", a.IPv4Net, true, "10.0.0.10/2"},
|
||||
{"10.0.0.10/193.0.0.0", a.IPv4Net, false, ""}, // invalid netmask and 193 is also invalid cidr
|
||||
{"010.000.000.010/16.000.000.000", a.IPv4Net, true, "10.0.0.10/16"}, // invalid netmask, but 16 cidr is ok, remainder input = ".0.0.0"
|
||||
})
|
||||
}
|
||||
|
||||
func TestIPv6Atoms(t *testing.T) {
|
||||
var a = tokenize.A
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"", a.IPv6, false, ""},
|
||||
{"::", a.IPv6, true, "::"},
|
||||
{"1::", a.IPv6, true, "1::"},
|
||||
{"1::1", a.IPv6, true, "1::1"},
|
||||
{"::1", a.IPv6, true, "::1"},
|
||||
{"1:2:3:4:5:6:7::", a.IPv6, false, ""},
|
||||
{"::1:2:3:4:5:6:7:8:9", a.IPv6, true, "::1:2:3:4:5:6"},
|
||||
{"1:2:3:4::5:6:7:8:9", a.IPv6, true, "1:2:3:4::5:6"},
|
||||
{"a:b::ffff:0:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
||||
{"000a:000b:0000:000:00:ffff:0000:1111", a.IPv6, true, "a:b::ffff:0:1111"},
|
||||
{"000a:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "a::1:0:0:ffff:1111"},
|
||||
{"0000:0000:0000:001:00:0:ffff:1111", a.IPv6, true, "::1:0:0:ffff:1111"},
|
||||
{"aaaa:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, true, "aaaa:bbbb:cccc:dddd:eeee:ffff:0:1111"},
|
||||
{"gggg:bbbb:cccc:dddd:eeee:ffff:0000:1111", a.IPv6, false, ""},
|
||||
{"ffff::gggg:eeee:ffff:0000:1111", a.IPv6, true, "ffff::"},
|
||||
{"0", a.IPv6CIDRMask, true, "0"},
|
||||
{"128", a.IPv6CIDRMask, true, "128"},
|
||||
{"129", a.IPv6CIDRMask, false, ""},
|
||||
{"::1/128", a.IPv6Net, true, "::1/128"},
|
||||
{"::1/129", a.IPv6Net, false, ""},
|
||||
{"1.1.1.1/24", a.IPv6Net, false, ""},
|
||||
{"ffff:0:0:0::1010/0", a.IPv6Net, true, "ffff::1010/0"},
|
||||
{"fe80:0:0:0:0216:3eff:fe96:0002/64", a.IPv6Net, true, "fe80::216:3eff:fe96:2/64"},
|
||||
})
|
||||
}
|
||||
|
||||
func TestModifiers(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"--cool", c.Seq(m.Drop(c.OneOrMore(a.Minus)), a.Str("cool")), true, "cool"},
|
||||
{"12345", c.Seq(a.Digit, m.Drop(a.Digit), a.Digit, m.Drop(a.Digit), a.Digit), true, "135"},
|
||||
{" trim ", m.Trim(c.OneOrMore(a.AnyRune), " "), true, "trim"},
|
||||
{" \t trim \t ", m.Trim(c.OneOrMore(a.AnyRune), " \t"), true, "trim"},
|
||||
{" trim ", m.TrimLeft(c.OneOrMore(a.AnyRune), " "), true, "trim "},
|
||||
{" trim ", m.TrimRight(c.OneOrMore(a.AnyRune), " "), true, " trim"},
|
||||
{" \t trim \t ", m.TrimRight(c.OneOrMore(a.AnyRune), " \t"), true, " \t trim"},
|
||||
{"dirtyword", m.Replace(c.OneOrMore(a.AnyRune), "*******"), true, "*******"},
|
||||
{"abcdefghijk", m.ByCallback(a.Str("abc"), func(s string) string { return "X" }), true, "X"},
|
||||
{"NoTaLlUpPeR", m.ToUpper(a.StrNoCase("notallUPPER")), true, "NOTALLUPPER"},
|
||||
{"NoTaLlLoWeR", m.ToLower(a.StrNoCase("NOTALLlower")), true, "notalllower"},
|
||||
})
|
||||
}
|
||||
|
||||
// When a TokenMaker encounters an error, this is considered a programmer error.
|
||||
// A TokenMaker should not be called, unless the input is already validated to
|
||||
// follow the correct pattern. Therefore, tokenmakers will panic when the
|
||||
// input cannot be processed successfully.
|
||||
func TestTokenMakerErrorHandling(t *testing.T) {
|
||||
var a, tok = tokenize.A, tokenize.T
|
||||
invalid := tok.Boolean("BOOL", a.Str("no")) // not valid for strconv.ParseBool()
|
||||
tokenizer := tokenize.New(invalid)
|
||||
AssertPanic(t, PanicT{
|
||||
func() { tokenizer("no") }, false,
|
||||
`boolean token invalid (strconv.ParseBool: parsing "no": invalid syntax)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestTokenMakers(t *testing.T) {
|
||||
var c, a, tok = tokenize.C, tokenize.A, tokenize.T
|
||||
AssertTokenMakers(t, []TokenMakerT{
|
||||
{`empty token`, tok.Str("A", c.ZeroOrMore(a.Digit)),
|
||||
[]tokenize.Token{{Type: "A", Value: ""}}},
|
||||
|
||||
{`Ѝюج literal \string`, tok.Str("B", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "B", Value: `Ѝюج literal \string`}}},
|
||||
|
||||
{`Ѝюجinterpreted \n string \u2318`, tok.StrInterpreted("C", c.OneOrMore(a.AnyRune)),
|
||||
[]tokenize.Token{{Type: "C", Value: "Ѝюجinterpreted \n string ⌘"}}},
|
||||
|
||||
{"Ø*", tok.Byte("Q", a.AnyRune), []tokenize.Token{{Type: "Q", Value: byte('Ø')}}},
|
||||
{"ROCKS", c.OneOrMore(tok.Byte("bar", a.ASCII)), []tokenize.Token{
|
||||
{Type: "bar", Value: byte('R')},
|
||||
{Type: "bar", Value: byte('O')},
|
||||
{Type: "bar", Value: byte('C')},
|
||||
{Type: "bar", Value: byte('K')},
|
||||
{Type: "bar", Value: byte('S')},
|
||||
}},
|
||||
|
||||
{"Ø*", tok.Rune("P", a.AnyRune), []tokenize.Token{{Type: "P", Value: rune('Ø')}}},
|
||||
|
||||
{`2147483647XYZ`, tok.Int("D", a.Integer), []tokenize.Token{{Type: "D", Value: int(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int("D", a.Signed(a.Integer)), []tokenize.Token{{Type: "D", Value: int(-2147483647)}}},
|
||||
{`127XYZ`, tok.Int8("E", a.Integer), []tokenize.Token{{Type: "E", Value: int8(127)}}},
|
||||
{`-127XYZ`, tok.Int8("E", a.Signed(a.Integer)), []tokenize.Token{{Type: "E", Value: int8(-127)}}},
|
||||
{`32767XYZ`, tok.Int16("F", a.Integer), []tokenize.Token{{Type: "F", Value: int16(32767)}}},
|
||||
{`-32767XYZ`, tok.Int16("F", a.Signed(a.Integer)), []tokenize.Token{{Type: "F", Value: int16(-32767)}}},
|
||||
{`2147483647XYZ`, tok.Int32("G", a.Integer), []tokenize.Token{{Type: "G", Value: int32(2147483647)}}},
|
||||
{`-2147483647XYZ`, tok.Int32("G", a.Signed(a.Integer)), []tokenize.Token{{Type: "G", Value: int32(-2147483647)}}},
|
||||
{`-9223372036854775807XYZ`, tok.Int64("H", a.Signed(a.Integer)), []tokenize.Token{{Type: "H", Value: int64(-9223372036854775807)}}},
|
||||
|
||||
{`4294967295`, tok.Uint("I", a.Integer), []tokenize.Token{{Type: "I", Value: uint(4294967295)}}},
|
||||
{`255XYZ`, tok.Uint8("J", a.Integer), []tokenize.Token{{Type: "J", Value: uint8(255)}}},
|
||||
{`65535XYZ`, tok.Uint16("K", a.Integer), []tokenize.Token{{Type: "K", Value: uint16(65535)}}},
|
||||
{`4294967295XYZ`, tok.Uint32("L", a.Integer), []tokenize.Token{{Type: "L", Value: uint32(4294967295)}}},
|
||||
{`18446744073709551615XYZ`, tok.Uint64("M", a.Integer), []tokenize.Token{{Type: "M", Value: uint64(18446744073709551615)}}},
|
||||
|
||||
{`3.1415=PI`, tok.Float32("N", a.Float), []tokenize.Token{{Type: "N", Value: float32(3.1415)}}},
|
||||
{`24.19287=PI`, tok.Float64("O", a.Float), []tokenize.Token{{Type: "O", Value: float64(24.19287)}}},
|
||||
|
||||
{`1tTtrueTRUETrue`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
{Type: "P", Value: true},
|
||||
}},
|
||||
|
||||
{`0fFfalseFALSEFalse`, c.OneOrMore(tok.Boolean("P", a.Boolean)), []tokenize.Token{
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
{Type: "P", Value: false},
|
||||
}},
|
||||
})
|
||||
}
|
||||
|
||||
// I know, this is hell, but that's the whole point for this test :->
|
||||
func TestCombination(t *testing.T) {
|
||||
var c, a, m = tokenize.C, tokenize.A, tokenize.M
|
||||
demonic := c.Seq(
|
||||
c.Optional(a.SquareOpen),
|
||||
m.Trim(
|
||||
c.Seq(
|
||||
c.Optional(a.Blanks),
|
||||
c.Repeated(3, a.AngleClose),
|
||||
m.ByCallback(c.OneOrMore(a.StrNoCase("hello")), func(s string) string {
|
||||
return fmt.Sprintf("%d", len(s))
|
||||
}),
|
||||
m.Replace(c.Separated(a.Comma, c.Optional(a.Blanks)), ", "),
|
||||
m.ToUpper(c.Min(1, a.ASCIILower)),
|
||||
m.Drop(a.Excl),
|
||||
c.Repeated(3, a.AngleOpen),
|
||||
c.Optional(a.Blanks),
|
||||
),
|
||||
" \t",
|
||||
),
|
||||
c.Optional(a.SquareClose),
|
||||
)
|
||||
|
||||
AssertHandlers(t, []HandlerT{
|
||||
{"[ \t >>>Hello, world!<<< ]", demonic, true, "[>>>5, WORLD<<<]"},
|
||||
{"[ \t >>>Hello, world!<<< ", demonic, true, "[>>>5, WORLD<<<"},
|
||||
{">>>HellohellO, world!<<< ]", demonic, true, ">>>10, WORLD<<<]"},
|
||||
{"[ \t >>>HellohellO , , , world!<<< ", demonic, true, "[>>>10, WORLD<<<"},
|
||||
})
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Token defines a lexical token as produced by tokenize.Handlers.
|
||||
//
|
||||
// The only mandatory data in a Token are the Runes. The Type and Value fields
|
||||
// are optional fields that can be filled with data at will.
|
||||
//
|
||||
// The use of the Type field is to let a tokenizer communicate to
|
||||
// the parser what type of token it's handling.
|
||||
//
|
||||
// The use of the Value field is to store any kind af data along with the token.
|
||||
// One use of this can be found in the built-in token maker functions like
|
||||
// MakeInt8Token(), which store an interpreted version of the input string
|
||||
// in the Value field.
|
||||
type Token struct {
|
||||
Type interface{} // optional token type, can be any type that a parser author sees fit
|
||||
Value interface{} // optional token value, of any type as well
|
||||
}
|
||||
|
||||
func (t Token) String() string {
|
||||
tokenType := ""
|
||||
if t.Type != nil {
|
||||
tokenType = fmt.Sprintf("%v", t.Type)
|
||||
}
|
||||
|
||||
value := ""
|
||||
if t.Value != nil {
|
||||
switch t.Value.(type) {
|
||||
case []*Token:
|
||||
return fmt.Sprintf("%v%v", tokenType, t.Value)
|
||||
case string:
|
||||
value = fmt.Sprintf("%q", t.Value)
|
||||
case rune:
|
||||
value = fmt.Sprintf("%v", t.Value)
|
||||
case bool:
|
||||
value = fmt.Sprintf("%v", t.Value)
|
||||
default:
|
||||
value = fmt.Sprintf("(%T)%v", t.Value, t.Value)
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%v(%s)", tokenType, value)
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
// Package tokenize provides tooling to build a tokenizer in
|
||||
// parser/combinator-style, used to feed data to the parser.
|
||||
package tokenize2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Func is the function signature as returned by New: a function that takes
|
||||
// any supported type of input, executes a tokenizer run and returns a
|
||||
// Result struct (possibly nil) and an error (possibly nil).
|
||||
type Func func(input interface{}) (*API, error)
|
||||
|
||||
// New instantiates a new tokenizer.
|
||||
//
|
||||
// The tokenizer is a tokenizing state machine, in which tokenize.Handler
|
||||
// functions are used to move the state machine forward during tokenizing.
|
||||
// Using the New function, you can wrap a tokenize.Handler in a simple way,
|
||||
// making it possible to feed some input to the handler and retrieve the
|
||||
// tokenizing results.
|
||||
//
|
||||
// The startHandler argument points the tokenizer to the tokenize.Handler function
|
||||
// that must be executed at the start of the tokenizing process. From there on
|
||||
// other tokenize.Handler functions can be invoked recursively to implement the
|
||||
// tokenizing process.
|
||||
//
|
||||
// THis function returns a function that can be invoked to run the tokenizer
|
||||
// against the provided input data. For an overview of allowed inputs, take a
|
||||
// look at the documentation for parsekit.read.New().
|
||||
func New(tokenHandler Handler) Func {
|
||||
return func(input interface{}) (*API, error) {
|
||||
api := NewAPI(input)
|
||||
ok := tokenHandler(api)
|
||||
|
||||
if !ok {
|
||||
err := fmt.Errorf("mismatch at %s", Cursor{})
|
||||
return nil, err
|
||||
}
|
||||
return api, nil
|
||||
}
|
||||
}
|
|
@ -0,0 +1,223 @@
|
|||
package tokenize2_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize2"
|
||||
)
|
||||
|
||||
// TODO For error handling, it would be really cool if for example the
|
||||
// 10.0.300.1/24 case would return an actual error stating that
|
||||
// 300 is not a valid octet for an IPv4 address.
|
||||
// Biggest thing to take care of here, is that errors should not stop
|
||||
// a Parser flow (since we might be trying to match different cases in
|
||||
// sequence), but a Parser flow should optionally be able to make use
|
||||
// of the actual error.
|
||||
// The same goes for a Tokenizer, since those can also make use of
|
||||
// optional matching using tokenize.C.Any(...) for example. If matching
|
||||
// for Any(IPv4, Digits), the example case should simply end up with 10
|
||||
// after the IPv4 mismatch.
|
||||
func ExampleNew() {
|
||||
// Build the tokenizer for ip/mask.
|
||||
var c, a, t = tokenize.C, tokenize.A, tokenize.T
|
||||
ip := t.Str("ip", a.IPv4)
|
||||
mask := t.Int8("mask", a.IPv4CIDRMask)
|
||||
cidr := c.Seq(ip, a.Slash, mask)
|
||||
tokenizer := tokenize.New(cidr)
|
||||
|
||||
for _, input := range []string{
|
||||
"000.000.000.000/000",
|
||||
"192.168.0.1/24",
|
||||
"255.255.255.255/32",
|
||||
"10.0.300.1/24",
|
||||
"not an IPv4 CIDR",
|
||||
} {
|
||||
// Execute returns a Result and an error, which is nil on success.
|
||||
result, err := tokenizer(input)
|
||||
|
||||
if err == nil {
|
||||
fmt.Printf("Result: %s\n", result.Tokens())
|
||||
} else {
|
||||
fmt.Printf("Error: %s\n", err)
|
||||
}
|
||||
}
|
||||
// Output:
|
||||
// Result: [ip("0.0.0.0") mask((int8)0)]
|
||||
// Result: [ip("192.168.0.1") mask((int8)24)]
|
||||
// Result: [ip("255.255.255.255") mask((int8)32)]
|
||||
// Error: mismatch at start of file
|
||||
// Error: mismatch at start of file
|
||||
}
|
||||
|
||||
func TestCallingNextRune_ReturnsNextRune(t *testing.T) {
|
||||
api := makeTokenizeAPI()
|
||||
r, _ := api.NextRune()
|
||||
AssertEqual(t, 'T', r, "first rune")
|
||||
}
|
||||
|
||||
func TestInputCanAcceptRunesFromReader(t *testing.T) {
|
||||
i := makeTokenizeAPI()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "Tes", i.String(), "i.String()")
|
||||
}
|
||||
|
||||
func TestCallingNextRuneTwice_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.NextRune()
|
||||
i.NextRune()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.NextRune\(\): NextRune\(\) called at /.*_test\.go:\d+ ` +
|
||||
`without a prior call to Accept\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptWithoutCallingNextRune_Panics(t *testing.T) {
|
||||
api := makeTokenizeAPI()
|
||||
AssertPanic(t, PanicT{
|
||||
Function: api.Accept,
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*test\.go:\d+ ` +
|
||||
`without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterReadError_Panics(t *testing.T) {
|
||||
api := tokenize.NewAPI("")
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+` +
|
||||
`, but the prior call to NextRune\(\) failed`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Merge(0)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||
}
|
||||
|
||||
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
child := i.Fork()
|
||||
i.Fork()
|
||||
i.Merge(child)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Dispose(0)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
|
||||
}
|
||||
|
||||
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
child := i.Fork()
|
||||
i.Fork()
|
||||
i.Dispose(child)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.Fork()
|
||||
g := i.Fork()
|
||||
i.Fork()
|
||||
i.Merge(g)
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
|
||||
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
|
||||
}
|
||||
|
||||
func TestForkingInput_ClearsLastRune(t *testing.T) {
|
||||
AssertPanic(t, PanicT{
|
||||
Function: func() {
|
||||
i := makeTokenizeAPI()
|
||||
i.NextRune()
|
||||
i.Fork()
|
||||
i.Accept()
|
||||
},
|
||||
Regexp: true,
|
||||
Expect: `tokenize\.API\.Accept\(\): Accept\(\) called at /.*_test\.go:\d+ without first calling NextRune\(\)`,
|
||||
})
|
||||
}
|
||||
|
||||
func TestAccept_UpdatesCursor(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
|
||||
AssertEqual(t, "start of file", i.Cursor().String(), "cursor 1")
|
||||
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 1, column 7", i.Cursor().String(), "cursor 2")
|
||||
i.NextRune() // read "\n", cursor ends up at start of new line
|
||||
i.Accept()
|
||||
AssertEqual(t, "line 2, column 1", i.Cursor().String(), "cursor 3")
|
||||
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
}
|
||||
AssertEqual(t, "line 3, column 5", i.Cursor().String(), "cursor 4")
|
||||
}
|
||||
|
||||
func TestWhenCallingNextruneAtEndOfFile_EOFIsReturned(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
|
||||
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
|
||||
}
|
||||
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
|
||||
i := tokenize.NewAPI(strings.NewReader("X"))
|
||||
child := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
r, err := i.NextRune()
|
||||
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
|
||||
i.Dispose(child) // brings the read offset back to the start
|
||||
r, err = i.NextRune() // so here we should see the same rune
|
||||
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
|
||||
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
|
||||
}
|
||||
|
||||
func makeTokenizeAPI() *tokenize.API {
|
||||
return tokenize.NewAPI("Testing")
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
package tokenize2
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFork_CreatesForkOfInputAtSameCursorPosition(t *testing.T) {
|
||||
// Create input, accept the first rune.
|
||||
i := NewAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept() // T
|
||||
AssertEqual(t, "T", i.String(), "accepted rune in input")
|
||||
// Fork
|
||||
child := i.Fork()
|
||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "parent offset")
|
||||
AssertEqual(t, 1, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "child offset")
|
||||
// Accept two runes via fork.
|
||||
i.NextRune()
|
||||
i.Accept() // e
|
||||
i.NextRune()
|
||||
i.Accept() // s
|
||||
AssertEqual(t, "es", i.String(), "result runes in fork")
|
||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 1, i.stackFrames[i.stackLevel-1].offset, "parent offset")
|
||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "child cursor.Byte")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "child offset")
|
||||
// Merge fork back into parent
|
||||
i.Merge(child)
|
||||
i.Dispose(child)
|
||||
AssertEqual(t, "Tes", i.String(), "result runes in parent Input after Merge()")
|
||||
AssertEqual(t, 3, i.stackFrame.cursor.Byte, "parent cursor.Byte")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "parent offset")
|
||||
}
|
||||
|
||||
func TestGivenForkedChildWhichAcceptedRune_AfterMerging_RuneEndsUpInParentResult(t *testing.T) {
|
||||
i := NewAPI("Testing")
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f1 := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
f2 := i.Fork()
|
||||
i.NextRune()
|
||||
i.Accept()
|
||||
AssertEqual(t, "s", i.String(), "f2 String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f2.offset A")
|
||||
i.Merge(f2)
|
||||
i.Dispose(f2)
|
||||
AssertEqual(t, "es", i.String(), "f1 String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||
i.Merge(f1)
|
||||
i.Dispose(f1)
|
||||
AssertEqual(t, "Tes", i.String(), "top-level API String()")
|
||||
AssertEqual(t, 3, i.stackFrame.offset, "f1.offset A")
|
||||
}
|
||||
|
||||
func TestCallingAcceptAfterNextRune_AcceptsRuneAndMovesReadOffsetForward(t *testing.T) {
|
||||
i := NewAPI("Testing")
|
||||
r, _ := i.NextRune()
|
||||
AssertEqual(t, 'T', r, "result from 1st call to NextRune()")
|
||||
AssertTrue(t, i.lastRune == 'T', "API.lastRune after NextRune() is not 'T'")
|
||||
AssertTrue(t, i.runeRead, "API.runeRead after NextRune() is not true")
|
||||
i.Accept()
|
||||
AssertTrue(t, i.runeRead == false, "API.runeRead after Accept() is not false")
|
||||
AssertEqual(t, 1, i.stackFrame.offset, "API.stackFrame.offset")
|
||||
r, _ = i.NextRune()
|
||||
AssertEqual(t, 'e', r, "result from 2nd call to NextRune()")
|
||||
}
|
||||
|
||||
func TestFlushInput(t *testing.T) {
|
||||
api := NewAPI("cool")
|
||||
|
||||
// Flushing without any read data is okay. FlushInput() will return
|
||||
// false in this case, and nothing else happens.
|
||||
AssertTrue(t, api.FlushInput() == false, "flush input at start")
|
||||
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertTrue(t, api.FlushInput() == true, "flush input after reading some data")
|
||||
AssertEqual(t, 0, api.stackFrame.offset, "offset after flush input")
|
||||
|
||||
AssertTrue(t, api.FlushInput() == false, "flush input after flush input")
|
||||
|
||||
// Read offset is now zero, but reading should continue after "co".
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
api.NextRune()
|
||||
api.Accept()
|
||||
|
||||
AssertEqual(t, "cool", api.String(), "end result")
|
||||
}
|
||||
|
||||
func AssertEqual(t *testing.T, expected interface{}, actual interface{}, forWhat string) {
|
||||
if expected != actual {
|
||||
t.Errorf(
|
||||
"Unexpected value for %s:\nexpected: %q\nactual: %q",
|
||||
forWhat, expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func AssertTrue(t *testing.T, b bool, assertion string) {
|
||||
if !b {
|
||||
t.Errorf("Assertion %s is false", assertion)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue