package tokenize_test import ( "fmt" "strings" "testing" "git.makaay.nl/mauricem/go-parsekit/tokenize" ) func BenchmarkMemclrOptimization(b *testing.B) { // TODO use or cleanup this one and the next. I'm playing around here. type s struct { a int b string } x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}} for i := 0; i < b.N; i++ { for i := range x { x[i] = s{} } } } func BenchmarkCodedClear(b *testing.B) { type s struct { a int b string } x := []s{{10, "hoi"}, {20, "doei"}, {30, "jadag"}} for i := 0; i < b.N; i++ { x[0] = s{} x[1] = s{} x[2] = s{} } } func ExampleNewAPI() { tokenize.NewAPI("The input that the API will handle") } func ExampleAPI_PeekByte() { tokenAPI := tokenize.NewAPI("The input that the API will handle") r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'A', r2, _, err := tokenAPI.Input.Rune.Peek(20) // 'P' r3, _, err := tokenAPI.Input.Rune.Peek(21) // 'I' _, _, err = tokenAPI.Input.Rune.Peek(100) // EOF fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) // Output: // API EOF } func ExampleAPI_PeekRune() { tokenAPI := tokenize.NewAPI("The input that the ДPI will handle") r1, _, err := tokenAPI.Input.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21 r2, _, err := tokenAPI.Input.Rune.Peek(21) // 'P' r3, _, err := tokenAPI.Input.Rune.Peek(22) // 'I' _, _, err = tokenAPI.Input.Rune.Peek(100) // EOF fmt.Printf("%c%c%c %s\n", r1, r2, r3, err) // Output: // ДPI EOF } func ExampleAPI_AcceptRune() { tokenAPI := tokenize.NewAPI("The input that the ДPI will handle") // Reads 'T' and accepts it to the API output data. r, _, _ := tokenAPI.Input.Rune.Peek(0) tokenAPI.Input.Rune.Accept(r) // Reads 'h' and accepts it to the API output data. r, _, _ = tokenAPI.Input.Rune.Peek(0) tokenAPI.Input.Rune.Accept(r) // Reads 'e', but does not accept it to the API output data. r, _, _ = tokenAPI.Input.Rune.Peek(0) fmt.Printf("API results: %q\n", tokenAPI.Output.String()) // Output: // API results: "Th" } func ExampleAPI_AcceptRunes() { tokenAPI := tokenize.NewAPI("The input that the API will handle") // Peeks at the first two runes 'T' and 'h'. r0, _, _ := tokenAPI.Input.Rune.Peek(0) r1, _, _ := tokenAPI.Input.Rune.Peek(1) // Peeks at the third rune 'e'. tokenAPI.Input.Rune.Peek(2) // Accepts only 'T' and 'h' into the API results. tokenAPI.Input.Rune.AcceptMulti(r0, r1) fmt.Printf("API results: %q\n", tokenAPI.Output.String()) // Output: // API results: "Th" } func ExampleAPI_SkipRune() { tokenAPI := tokenize.NewAPI("The input that the API will handle") for { r, _, err := tokenAPI.Input.Rune.Peek(0) // EOF reached. if err != nil { break } // Only accept runes that are vowels. if strings.ContainsRune("aeiouAEIOU", r) { tokenAPI.Input.Rune.Accept(r) } else { tokenAPI.Input.Rune.MoveCursor(r) } } fmt.Printf("API results: %q\n", tokenAPI.Output.String()) // Output: // API results: "eiuaeAIiae" } func ExampleAPI_modifyingResults() { tokenAPI := tokenize.NewAPI("") tokenAPI.Output.AddString("Some runes") tokenAPI.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd') tokenAPI.Output.AddRunes(' ', 'i', 'n', ' ') tokenAPI.Output.AddString("various ways") fmt.Printf("API result first 10 runes: %q\n", tokenAPI.Output.Runes()[0:10]) fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String()) tokenAPI.Output.SetString("new ") tokenAPI.Output.AddString("set ") tokenAPI.Output.AddString("of ") tokenAPI.Output.AddRunes('r', 'u', 'n', 'e', 's') fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String()) fmt.Printf("API result runes: %q\n", tokenAPI.Output.Runes()) fmt.Printf("API third rune: %q\n", tokenAPI.Output.Rune(2)) tokenAPI.Output.AddToken(tokenize.Token{ Type: 42, Value: "towel"}) tokenAPI.Output.AddToken(tokenize.Token{ Type: 73, Value: "Zaphod"}) fmt.Printf("API result tokens: %v\n", tokenAPI.Output.Tokens()) fmt.Printf("API second result token: %v\n", tokenAPI.Output.Token(1)) // Output: // API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's'] // API result runes as string: "Some runes added in various ways" // API result runes as string: "new set of runes" // API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's'] // API third rune: 'w' // API result tokens: [42("towel") 73("Zaphod")] // API second result token: 73("Zaphod") } // func ExampleAPI_Reset() { // tokenAPI := tokenize.NewAPI("Very important input!") // r, _, _ := tokenAPI.Input.Rune.Peek(0) // read 'V' // tokenAPI.Input.Rune.Accept(r) // r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e' // tokenAPI.Input.Rune.Accept(r) // fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // // Reset input and output. // tokenAPI.Input.Reset() // tokenAPI.Output.Reset() // fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // // So then doing the same read operations, the same data are read. // r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'V' // tokenAPI.Input.Rune.Accept(r) // r, _, _ = tokenAPI.Input.Rune.Peek(0) // read 'e' // tokenAPI.Input.Rune.Accept(r) // fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor()) // // Output: // // API results: "Ve" at line 1, column 3 // // API results: "" at start of file // // API results: "Ve" at line 1, column 3 // } // func ExampleAPI_Fork() { // // This custom Handler checks for input 'a', 'b' or 'c'. // abcHandler := func(t *tokenize.API) bool { // a := tokenize.A // for _, r := range []rune{'a', 'b', 'c'} { // child := t.Fork() // fork, so we won't change parent t // if a.Char(r)(t) { // t.Merge(child) // accept results into parent of child // t.Dispose(child) // return to the parent level // return true // and report a successful match // } // t.Dispose(child) // return to the parent level // } // // If we get here, then no match was found. Return false to communicate // // this to the caller. // return false // } // // Note: a custom Handler is normally not what you need. // // You can make use of the parser/combinator tooling to make the // // implementation a lot simpler and to take care of forking at // // the appropriate places. The handler from above can be replaced with: // simpler := tokenize.A.CharRange('a', 'c') // result, err := tokenize.New(abcHandler)("another test") // fmt.Println(result, err) // result, err = tokenize.New(simpler)("curious") // fmt.Println(result, err) // result, err = tokenize.New(abcHandler)("bang on!") // fmt.Println(result, err) // result, err = tokenize.New(abcHandler)("not a match") // fmt.Println(result, err) // // Output: // // a // // c // // b // // mismatch at start of file // } // func ExampleAPI_Merge() { // tokenHandler := func(t *tokenize.API) bool { // child1 := t.Fork() // r0, _, _ := t.Input.Rune.Peek(0) // reads 'H' // r1, _, _ := t.Input.Rune.Peek(1) // reads 'i' // t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1 // child2 := t.Fork() // r0, _, _ = t.Input.Rune.Peek(0) // reads ' ' // r1, _, _ = t.Input.Rune.Peek(1) // reads 'm' // t.Input.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2 // t.Dispose(child2) // but they are not merged and thefore not used by child1 // t.Merge(child1) // We merge child1, which has read 'H' and 'i' only. // t.Dispose(child1) // and clean up child1 to return to the parent // return true // } // result, _ := tokenize.New(tokenHandler)("Hi mister X!") // fmt.Println(result.String()) // // Output: // // Hi // } // func TestMultipleLevelsOfForksAndMerges(t *testing.T) { // tokenAPI := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz") // // Fork a few levels. // child1 := tokenAPI.Fork() // child2 := tokenAPI.Fork() // child3 := tokenAPI.Fork() // child4 := tokenAPI.Fork() // // Read a rune 'a' from child4. // r, _, _ := tokenAPI.Input.Rune.Peek(0) // AssertEqual(t, 'a', r, "child4 rune 1") // tokenAPI.Input.Rune.Accept(r) // AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1") // // Read another rune 'b' from child4. // r, _, _ = tokenAPI.Input.Rune.Peek(0) // AssertEqual(t, 'b', r, "child4 rune 2") // tokenAPI.Input.Rune.Accept(r) // AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2") // // Merge "ab" from child4 to child3. // tokenAPI.Merge(child4) // AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge") // // Read some more from child4. // r, _, _ = tokenAPI.Input.Rune.Peek(0) // AssertEqual(t, 'c', r, "child4 rune 3") // tokenAPI.Input.Rune.Accept(r) // AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1") // AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3") // // Merge "c" from child4 to child3. // tokenAPI.Merge(child4) // // And dispose of child4, making child3 the active stack level. // tokenAPI.Dispose(child4) // // Child3 should now have the compbined results "abc" from child4's work. // AssertEqual(t, "abc", tokenAPI.Output.String(), "child3 after merge of child4") // AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4") // // Now read some data from child3. // r, _, _ = tokenAPI.Input.Rune.Peek(0) // AssertEqual(t, 'd', r, "child3 rune 5") // tokenAPI.Input.Rune.Accept(r) // r, _, _ = tokenAPI.Input.Rune.Peek(0) // AssertEqual(t, 'e', r, "child3 rune 5") // tokenAPI.Input.Rune.Accept(r) // r, _, _ = tokenAPI.Input.Rune.Peek(0) // AssertEqual(t, 'f', r, "child3 rune 5") // tokenAPI.Input.Rune.Accept(r) // AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6") // // Temporarily go some new forks from here, but don't use their outcome. // child3sub1 := tokenAPI.Fork() // r, _, _ = tokenAPI.Input.Rune.Peek(0) // tokenAPI.Input.Rune.Accept(r) // r, _, _ = tokenAPI.Input.Rune.Peek(0) // tokenAPI.Input.Rune.Accept(r) // child3sub2 := tokenAPI.Fork() // r, _, _ = tokenAPI.Input.Rune.Peek(0) // tokenAPI.Input.Rune.Accept(r) // tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1 // tokenAPI.Dispose(child3sub2) // and dispose of sub2 // tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging // // Instead merge the results from before this forking segway from child3 to child2 // // and dispose of it. // tokenAPI.Merge(child3) // tokenAPI.Dispose(child3) // AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child2 total result after merge of child3") // AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor child2 after merge child3") // // Merge child2 to child1 and dispose of it. // tokenAPI.Merge(child2) // tokenAPI.Dispose(child2) // // Merge child1 a few times to the top level api. // tokenAPI.Merge(child1) // tokenAPI.Merge(child1) // tokenAPI.Merge(child1) // tokenAPI.Merge(child1) // // And dispose of it. // tokenAPI.Dispose(child1) // // Read some data from the top level api. // r, _, _ = tokenAPI.Input.Rune.Peek(0) // tokenAPI.Input.Rune.Accept(r) // AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result") // AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result") // } func TestClearData(t *testing.T) { tokenAPI := tokenize.NewAPI("Laphroaig") r, _, _ := tokenAPI.Input.Rune.Peek(0) // Read 'L' tokenAPI.Input.Rune.Accept(r) // Add to runes r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'a' tokenAPI.Input.Rune.Accept(r) // Add to runes tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start. r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'p' tokenAPI.Input.Rune.Accept(r) // Add to runes r, _, _ = tokenAPI.Input.Rune.Peek(0) // Read 'r' tokenAPI.Input.Rune.Accept(r) // Add to runes AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result") } // func TestMergeScenariosForTokens(t *testing.T) { // tokenAPI := tokenize.NewAPI("") // token1 := tokenize.Token{Value: 1} // token2 := tokenize.Token{Value: 2} // token3 := tokenize.Token{Value: 3} // token4 := tokenize.Token{Value: 4} // tokenAPI.Output.SetTokens(token1) // tokens := tokenAPI.Output.Tokens() // AssertEqual(t, 1, len(tokens), "Tokens 1") // child := tokenAPI.Fork() // tokens = tokenAPI.Output.Tokens() // AssertEqual(t, 0, len(tokens), "Tokens 2") // tokenAPI.Output.AddToken(token2) // tokenAPI.Merge(child) // tokenAPI.Dispose(child) // tokens = tokenAPI.Output.Tokens() // AssertEqual(t, 2, len(tokens), "Tokens 3") // child = tokenAPI.Fork() // tokenAPI.Output.AddToken(token3) // tokenAPI.Output.Reset() // tokenAPI.Output.AddToken(token4) // tokenAPI.Merge(child) // tokenAPI.Dispose(child) // tokens = tokenAPI.Output.Tokens() // AssertEqual(t, 3, len(tokens), "Tokens 4") // AssertEqual(t, 1, tokenAPI.Output.TokenValue(0).(int), "Tokens 4, value 0") // AssertEqual(t, 2, tokenAPI.Output.TokenValue(1).(int), "Tokens 4, value 1") // AssertEqual(t, 4, tokenAPI.Output.TokenValue(2).(int), "Tokens 4, value 2") // }