go-parsekit/tokenize/api_test.go

419 lines
13 KiB
Go

package tokenize_test
import (
"fmt"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func BenchmarkMemclrOptimization(b *testing.B) {
// TODO use or cleanup this one and the next. I'm playing around here.
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
for i := range x {
x[i] = s{}
}
}
}
func BenchmarkCodedClear(b *testing.B) {
type s struct {
a int
b string
}
x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}}
for i := 0; i < b.N; i++ {
x[0] = s{}
x[1] = s{}
x[2] = s{}
}
}
func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle")
}
func ExampleAPI_PeekByte() {
tokenAPI := tokenize.NewAPI("The input that the API will handle")
r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'A',
r2, _, err := tokenAPI.Input.Rune.Peek(20) // 'P'
r3, _, err := tokenAPI.Input.Rune.Peek(21) // 'I'
_, _, err = tokenAPI.Input.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
// Output:
// API EOF
}
func ExampleAPI_PeekRune() {
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
r2, _, err := tokenAPI.Input.Rune.Peek(21) // 'P'
r3, _, err := tokenAPI.Input.Rune.Peek(22) // 'I'
_, _, err = tokenAPI.Input.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
// Output:
// ДPI EOF
}
func ExampleAPI_AcceptRune() {
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
// Reads 'T' and accepts it to the API output data.
r, _, _ := tokenAPI.Input.Rune.Peek(0)
tokenAPI.Input.Rune.Accept(r)
// Reads 'h' and accepts it to the API output data.
r, _, _ = tokenAPI.Input.Rune.Peek(0)
tokenAPI.Input.Rune.Accept(r)
// Reads 'e', but does not accept it to the API output data.
r, _, _ = tokenAPI.Input.Rune.Peek(0)
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output:
// API results: "Th"
}
func ExampleAPI_AcceptRunes() {
tokenAPI := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'.
r0, _, _ := tokenAPI.Input.Rune.Peek(0)
r1, _, _ := tokenAPI.Input.Rune.Peek(1)
// Peeks at the third rune 'e'.
tokenAPI.Input.Rune.Peek(2)
// Accepts only 'T' and 'h' into the API results.
tokenAPI.Input.Rune.AcceptMulti(r0, r1)
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output:
// API results: "Th"
}
func ExampleAPI_SkipRune() {
tokenAPI := tokenize.NewAPI("The input that the API will handle")
for {
r, _, err := tokenAPI.Input.Rune.Peek(0)
// EOF reached.
if err != nil {
break
}
// Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) {
tokenAPI.Input.Rune.Accept(r)
} else {
tokenAPI.Input.Rune.MoveCursor(r)
}
}
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output:
// API results: "eiuaeAIiae"
}
func ExampleAPI_modifyingResults() {
tokenAPI := tokenize.NewAPI("")
tokenAPI.Output.AddString("Some runes")
tokenAPI.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
tokenAPI.Output.AddRunes(' ', 'i', 'n', ' ')
tokenAPI.Output.AddString("various ways")
fmt.Printf("API result first 10 runes: %q\n", tokenAPI.Output.Runes()[0:10])
fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String())
tokenAPI.Output.SetString("new ")
tokenAPI.Output.AddString("set ")
tokenAPI.Output.AddString("of ")
tokenAPI.Output.AddRunes('r', 'u', 'n', 'e', 's')
fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String())
fmt.Printf("API result runes: %q\n", tokenAPI.Output.Runes())
fmt.Printf("API third rune: %q\n", tokenAPI.Output.Rune(2))
tokenAPI.Output.AddToken(tokenize.Token{
Type: 42,
Value: "towel"})
tokenAPI.Output.AddToken(tokenize.Token{
Type: 73,
Value: "Zaphod"})
fmt.Printf("API result tokens: %v\n", tokenAPI.Output.Tokens())
fmt.Printf("API second result token: %v\n", tokenAPI.Output.Token(1))
// Output:
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
// API result runes as string: "Some runes added in various ways"
// API result runes as string: "new set of runes"
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
// API third rune: 'w'
// API result tokens: [42("towel") 73("Zaphod")]
// API second result token: 73("Zaphod")
}
// func ExampleAPI_Reset() {
// tokenAPI := tokenize.NewAPI("Very important input!")
// r, _, _ := tokenAPI.Input.Rune.Peek(0) // read 'V'
// tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e'
// tokenAPI.Input.Rune.Accept(r)
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// // Reset input and output.
// tokenAPI.Input.Reset()
// tokenAPI.Output.Reset()
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// // So then doing the same read operations, the same data are read.
// r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'V'
// tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e'
// tokenAPI.Input.Rune.Accept(r)
// fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// // Output:
// // API results: "Ve" at line 1, column 3
// // API results: "" at start of file
// // API results: "Ve" at line 1, column 3
// }
// func ExampleAPI_Fork() {
// // This custom Handler checks for input 'a', 'b' or 'c'.
// abcHandler := func(t *tokenize.API) bool {
// a := tokenize.A
// for _, r := range []rune{'a', 'b', 'c'} {
// child := t.Fork() // fork, so we won't change parent t
// if a.Char(r)(t) {
// t.Merge(child) // accept results into parent of child
// t.Dispose(child) // return to the parent level
// return true // and report a successful match
// }
// t.Dispose(child) // return to the parent level
// }
// // If we get here, then no match was found. Return false to communicate
// // this to the caller.
// return false
// }
// // Note: a custom Handler is normally not what you need.
// // You can make use of the parser/combinator tooling to make the
// // implementation a lot simpler and to take care of forking at
// // the appropriate places. The handler from above can be replaced with:
// simpler := tokenize.A.CharRange('a', 'c')
// result, err := tokenize.New(abcHandler)("another test")
// fmt.Println(result, err)
// result, err = tokenize.New(simpler)("curious")
// fmt.Println(result, err)
// result, err = tokenize.New(abcHandler)("bang on!")
// fmt.Println(result, err)
// result, err = tokenize.New(abcHandler)("not a match")
// fmt.Println(result, err)
// // Output:
// // a <nil>
// // c <nil>
// // b <nil>
// // <nil> mismatch at start of file
// }
// func ExampleAPI_Merge() {
// tokenHandler := func(t *tokenize.API) bool {
// child1 := t.Fork()
// r0, _, _ := t.Input.Rune.Peek(0) // reads 'H'
// r1, _, _ := t.Input.Rune.Peek(1) // reads 'i'
// t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1
// child2 := t.Fork()
// r0, _, _ = t.Input.Rune.Peek(0) // reads ' '
// r1, _, _ = t.Input.Rune.Peek(1) // reads 'm'
// t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2
// t.Dispose(child2) // but they are not merged and thefore not used by child1
// t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
// t.Dispose(child1) // and clean up child1 to return to the parent
// return true
// }
// result, _ := tokenize.New(tokenHandler)("Hi mister X!")
// fmt.Println(result.String())
// // Output:
// // Hi
// }
// func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
// tokenAPI := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// // Fork a few levels.
// child1 := tokenAPI.Fork()
// child2 := tokenAPI.Fork()
// child3 := tokenAPI.Fork()
// child4 := tokenAPI.Fork()
// // Read a rune 'a' from child4.
// r, _, _ := tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'a', r, "child4 rune 1")
// tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1")
// // Read another rune 'b' from child4.
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'b', r, "child4 rune 2")
// tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2")
// // Merge "ab" from child4 to child3.
// tokenAPI.Merge(child4)
// AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge")
// // Read some more from child4.
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'c', r, "child4 rune 3")
// tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1")
// AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3")
// // Merge "c" from child4 to child3.
// tokenAPI.Merge(child4)
// // And dispose of child4, making child3 the active stack level.
// tokenAPI.Dispose(child4)
// // Child3 should now have the compbined results "abc" from child4's work.
// AssertEqual(t, "abc", tokenAPI.Output.String(), "child3 after merge of child4")
// AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4")
// // Now read some data from child3.
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'd', r, "child3 rune 5")
// tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'e', r, "child3 rune 5")
// tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// AssertEqual(t, 'f', r, "child3 rune 5")
// tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6")
// // Temporarily go some new forks from here, but don't use their outcome.
// child3sub1 := tokenAPI.Fork()
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Input.Rune.Accept(r)
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Input.Rune.Accept(r)
// child3sub2 := tokenAPI.Fork()
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Input.Rune.Accept(r)
// tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1
// tokenAPI.Dispose(child3sub2) // and dispose of sub2
// tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging
// // Instead merge the results from before this forking segway from child3 to child2
// // and dispose of it.
// tokenAPI.Merge(child3)
// tokenAPI.Dispose(child3)
// AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child2 total result after merge of child3")
// AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor child2 after merge child3")
// // Merge child2 to child1 and dispose of it.
// tokenAPI.Merge(child2)
// tokenAPI.Dispose(child2)
// // Merge child1 a few times to the top level api.
// tokenAPI.Merge(child1)
// tokenAPI.Merge(child1)
// tokenAPI.Merge(child1)
// tokenAPI.Merge(child1)
// // And dispose of it.
// tokenAPI.Dispose(child1)
// // Read some data from the top level api.
// r, _, _ = tokenAPI.Input.Rune.Peek(0)
// tokenAPI.Input.Rune.Accept(r)
// AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result")
// AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result")
// }
func TestClearData(t *testing.T) {
tokenAPI := tokenize.NewAPI("Laphroaig")
r, _, _ := tokenAPI.Input.Rune.Peek(0) // Read 'L'
tokenAPI.Input.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a'
tokenAPI.Input.Rune.Accept(r) // Add to runes
tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p'
tokenAPI.Input.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r'
tokenAPI.Input.Rune.Accept(r) // Add to runes
AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result")
}
// func TestMergeScenariosForTokens(t *testing.T) {
// tokenAPI := tokenize.NewAPI("")
// token1 := tokenize.Token{Value: 1}
// token2 := tokenize.Token{Value: 2}
// token3 := tokenize.Token{Value: 3}
// token4 := tokenize.Token{Value: 4}
// tokenAPI.Output.SetTokens(token1)
// tokens := tokenAPI.Output.Tokens()
// AssertEqual(t, 1, len(tokens), "Tokens 1")
// child := tokenAPI.Fork()
// tokens = tokenAPI.Output.Tokens()
// AssertEqual(t, 0, len(tokens), "Tokens 2")
// tokenAPI.Output.AddToken(token2)
// tokenAPI.Merge(child)
// tokenAPI.Dispose(child)
// tokens = tokenAPI.Output.Tokens()
// AssertEqual(t, 2, len(tokens), "Tokens 3")
// child = tokenAPI.Fork()
// tokenAPI.Output.AddToken(token3)
// tokenAPI.Output.Reset()
// tokenAPI.Output.AddToken(token4)
// tokenAPI.Merge(child)
// tokenAPI.Dispose(child)
// tokens = tokenAPI.Output.Tokens()
// AssertEqual(t, 3, len(tokens), "Tokens 4")
// AssertEqual(t, 1, tokenAPI.Output.TokenValue(0).(int), "Tokens 4, value 0")
// AssertEqual(t, 2, tokenAPI.Output.TokenValue(1).(int), "Tokens 4, value 1")
// AssertEqual(t, 4, tokenAPI.Output.TokenValue(2).(int), "Tokens 4, value 2")
// }