go-parsekit/tokenize/api_test.go

388 lines
12 KiB
Go

package tokenize_test
import (
"fmt"
"strings"
"testing"
"git.makaay.nl/mauricem/go-parsekit/tokenize"
)
func ExampleNewAPI() {
tokenize.NewAPI("The input that the API will handle")
}
func ExampleAPI_PeekByte() {
tokenAPI := tokenize.NewAPI("The input that the API will handle")
r1, _, err := tokenAPI.Rune.Peek(19) // 'A',
r2, _, err := tokenAPI.Rune.Peek(20) // 'P'
r3, _, err := tokenAPI.Rune.Peek(21) // 'I'
_, _, err = tokenAPI.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
// Output:
// API EOF
}
func ExampleAPI_PeekRune() {
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
r1, _, err := tokenAPI.Rune.Peek(19) // 'Д', 2 bytes so next rune starts at 21
r2, _, err := tokenAPI.Rune.Peek(21) // 'P'
r3, _, err := tokenAPI.Rune.Peek(22) // 'I'
_, _, err = tokenAPI.Rune.Peek(100) // EOF
fmt.Printf("%c%c%c %s\n", r1, r2, r3, err)
// Output:
// ДPI EOF
}
func ExampleAPI_AcceptRune() {
tokenAPI := tokenize.NewAPI("The input that the ДPI will handle")
// Reads 'T' and accepts it to the API output data.
r, _, _ := tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
// Reads 'h' and accepts it to the API output data.
r, _, _ = tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
// Reads 'e', but does not accept it to the API output data.
r, _, _ = tokenAPI.Rune.Peek(0)
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output:
// API results: "Th"
}
func ExampleAPI_AcceptRunes() {
tokenAPI := tokenize.NewAPI("The input that the API will handle")
// Peeks at the first two runes 'T' and 'h'.
r0, _, _ := tokenAPI.Rune.Peek(0)
r1, _, _ := tokenAPI.Rune.Peek(1)
// Peeks at the third rune 'e'.
tokenAPI.Rune.Peek(2)
// Accepts only 'T' and 'h' into the API results.
tokenAPI.Rune.AcceptMulti(r0, r1)
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output:
// API results: "Th"
}
func ExampleAPI_SkipRune() {
tokenAPI := tokenize.NewAPI("The input that the API will handle")
for {
r, _, err := tokenAPI.Rune.Peek(0)
// EOF reached.
if err != nil {
break
}
// Only accept runes that are vowels.
if strings.ContainsRune("aeiouAEIOU", r) {
tokenAPI.Rune.Accept(r)
} else {
tokenAPI.Rune.MoveCursor(r)
}
}
fmt.Printf("API results: %q\n", tokenAPI.Output.String())
// Output:
// API results: "eiuaeAIiae"
}
func ExampleAPI_modifyingResults() {
tokenAPI := tokenize.NewAPI("")
tokenAPI.Output.AddString("Some runes")
tokenAPI.Output.AddRunes(' ', 'a', 'd', 'd', 'e', 'd')
tokenAPI.Output.AddRunes(' ', 'i', 'n', ' ')
tokenAPI.Output.AddString("various ways")
fmt.Printf("API result first 10 runes: %q\n", tokenAPI.Output.Runes()[0:10])
fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String())
tokenAPI.Output.SetString("new ")
tokenAPI.Output.AddString("set ")
tokenAPI.Output.AddString("of ")
tokenAPI.Output.AddRunes('r', 'u', 'n', 'e', 's')
fmt.Printf("API result runes as string: %q\n", tokenAPI.Output.String())
fmt.Printf("API result runes: %q\n", tokenAPI.Output.Runes())
fmt.Printf("API third rune: %q\n", tokenAPI.Output.Rune(2))
tokenAPI.Output.AddToken(tokenize.Token{
Type: 42,
Value: "towel"})
tokenAPI.Output.AddToken(tokenize.Token{
Type: 73,
Value: "Zaphod"})
fmt.Printf("API result tokens: %v\n", tokenAPI.Output.Tokens())
fmt.Printf("API second result token: %v\n", tokenAPI.Output.Token(1))
// Output:
// API result first 10 runes: ['S' 'o' 'm' 'e' ' ' 'r' 'u' 'n' 'e' 's']
// API result runes as string: "Some runes added in various ways"
// API result runes as string: "new set of runes"
// API result runes: ['n' 'e' 'w' ' ' 's' 'e' 't' ' ' 'o' 'f' ' ' 'r' 'u' 'n' 'e' 's']
// API third rune: 'w'
// API result tokens: [42("towel") 73("Zaphod")]
// API second result token: 73("Zaphod")
}
func ExampleAPI_Reset() {
tokenAPI := tokenize.NewAPI("Very important input!")
r, _, _ := tokenAPI.Rune.Peek(0) // read 'V'
tokenAPI.Rune.Accept(r)
r, _, _ = tokenAPI.Rune.Peek(0) // read 'e'
tokenAPI.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// Reset clears the results.
tokenAPI.Reset()
fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// So then doing the same read operations, the same data are read.
r, _, _ = tokenAPI.Rune.Peek(0) // read 'V'
tokenAPI.Rune.Accept(r)
r, _, _ = tokenAPI.Rune.Peek(0) // read 'e'
tokenAPI.Rune.Accept(r)
fmt.Printf("API results: %q at %s\n", tokenAPI.Output.String(), tokenAPI.Input.Cursor())
// Output:
// API results: "Ve" at line 1, column 3
// API results: "" at start of file
// API results: "Ve" at line 1, column 3
}
func ExampleAPI_Fork() {
// This custom Handler checks for input 'a', 'b' or 'c'.
abcHandler := func(t *tokenize.API) bool {
a := tokenize.A
for _, r := range []rune{'a', 'b', 'c'} {
child := t.Fork() // fork, so we won't change parent t
if a.Rune(r)(t) {
t.Merge(child) // accept results into parent of child
t.Dispose(child) // return to the parent level
return true // and report a successful match
}
t.Dispose(child) // return to the parent level
}
// If we get here, then no match was found. Return false to communicate
// this to the caller.
return false
}
// Note: a custom Handler is normally not what you need.
// You can make use of the parser/combinator tooling to make the
// implementation a lot simpler and to take care of forking at
// the appropriate places. The handler from above can be replaced with:
simpler := tokenize.A.RuneRange('a', 'c')
result, err := tokenize.New(abcHandler)("another test")
fmt.Println(result, err)
result, err = tokenize.New(simpler)("curious")
fmt.Println(result, err)
result, err = tokenize.New(abcHandler)("bang on!")
fmt.Println(result, err)
result, err = tokenize.New(abcHandler)("not a match")
fmt.Println(result, err)
// Output:
// a <nil>
// c <nil>
// b <nil>
// <nil> mismatch at start of file
}
func ExampleAPI_Merge() {
tokenHandler := func(t *tokenize.API) bool {
child1 := t.Fork()
r0, _, _ := t.Rune.Peek(0) // reads 'H'
r1, _, _ := t.Rune.Peek(1) // reads 'i'
t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child1
child2 := t.Fork()
r0, _, _ = t.Rune.Peek(0) // reads ' '
r1, _, _ = t.Rune.Peek(1) // reads 'm'
t.Rune.AcceptMulti(r0, r1) // these runes are accepted in the API results for child2
t.Dispose(child2) // but they are not merged and thefore not used by child1
t.Merge(child1) // We merge child1, which has read 'H' and 'i' only.
t.Dispose(child1) // and clean up child1 to return to the parent
return true
}
result, _ := tokenize.New(tokenHandler)("Hi mister X!")
fmt.Println(result.String())
// Output:
// Hi
}
func TestMultipleLevelsOfForksAndMerges(t *testing.T) {
tokenAPI := tokenize.NewAPI("abcdefghijklmnopqrstuvwxyz")
// Fork a few levels.
child1 := tokenAPI.Fork()
child2 := tokenAPI.Fork()
child3 := tokenAPI.Fork()
child4 := tokenAPI.Fork()
// Read a rune 'a' from child4.
r, _, _ := tokenAPI.Rune.Peek(0)
AssertEqual(t, 'a', r, "child4 rune 1")
tokenAPI.Rune.Accept(r)
AssertEqual(t, "a", tokenAPI.Output.String(), "child4 runes after rune 1")
// Read another rune 'b' from child4.
r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'b', r, "child4 rune 2")
tokenAPI.Rune.Accept(r)
AssertEqual(t, "ab", tokenAPI.Output.String(), "child4 runes after rune 2")
// Merge "ab" from child4 to child3.
tokenAPI.Merge(child4)
AssertEqual(t, "", tokenAPI.Output.String(), "child4 runes after first merge")
// Read some more from child4.
r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'c', r, "child4 rune 3")
tokenAPI.Rune.Accept(r)
AssertEqual(t, "c", tokenAPI.Output.String(), "child4 runes after rune 1")
AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child4 rune 3")
// Merge "c" from child4 to child3.
tokenAPI.Merge(child4)
// And dispose of child4, making child3 the active stack level.
tokenAPI.Dispose(child4)
// Child3 should now have the compbined results "abc" from child4's work.
AssertEqual(t, "abc", tokenAPI.Output.String(), "child3 after merge of child4")
AssertEqual(t, "line 1, column 4", tokenAPI.Input.Cursor(), "cursor child3 rune 3, after merge of child4")
// Now read some data from child3.
r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'd', r, "child3 rune 5")
tokenAPI.Rune.Accept(r)
r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'e', r, "child3 rune 5")
tokenAPI.Rune.Accept(r)
r, _, _ = tokenAPI.Rune.Peek(0)
AssertEqual(t, 'f', r, "child3 rune 5")
tokenAPI.Rune.Accept(r)
AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child3 total result after rune 6")
// Temporarily go some new forks from here, but don't use their outcome.
child3sub1 := tokenAPI.Fork()
r, _, _ = tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
r, _, _ = tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
child3sub2 := tokenAPI.Fork()
r, _, _ = tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
tokenAPI.Merge(child3sub2) // do merge sub2 down to sub1
tokenAPI.Dispose(child3sub2) // and dispose of sub2
tokenAPI.Dispose(child3sub1) // but dispose of sub1 without merging
// Instead merge the results from before this forking segway from child3 to child2
// and dispose of it.
tokenAPI.Merge(child3)
tokenAPI.Dispose(child3)
AssertEqual(t, "abcdef", tokenAPI.Output.String(), "child2 total result after merge of child3")
AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor child2 after merge child3")
// Merge child2 to child1 and dispose of it.
tokenAPI.Merge(child2)
tokenAPI.Dispose(child2)
// Merge child1 a few times to the top level api.
tokenAPI.Merge(child1)
tokenAPI.Merge(child1)
tokenAPI.Merge(child1)
tokenAPI.Merge(child1)
// And dispose of it.
tokenAPI.Dispose(child1)
// Read some data from the top level api.
r, _, _ = tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
AssertEqual(t, "abcdefg", tokenAPI.Output.String(), "api string end result")
AssertEqual(t, "line 1, column 8", tokenAPI.Input.Cursor(), "api cursor end result")
}
func TestClearData(t *testing.T) {
tokenAPI := tokenize.NewAPI("Laphroaig")
r, _, _ := tokenAPI.Rune.Peek(0) // Read 'L'
tokenAPI.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'a'
tokenAPI.Rune.Accept(r) // Add to runes
tokenAPI.Output.ClearData() // Clear the runes, giving us a fresh start.
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'p'
tokenAPI.Rune.Accept(r) // Add to runes
r, _, _ = tokenAPI.Rune.Peek(0) // Read 'r'
tokenAPI.Rune.Accept(r) // Add to runes
AssertEqual(t, "ph", tokenAPI.Output.String(), "api string end result")
}
func TestMergeScenariosForTokens(t *testing.T) {
tokenAPI := tokenize.NewAPI("")
token1 := tokenize.Token{Value: 1}
token2 := tokenize.Token{Value: 2}
token3 := tokenize.Token{Value: 3}
token4 := tokenize.Token{Value: 4}
tokenAPI.Output.SetTokens(token1)
tokens := tokenAPI.Output.Tokens()
AssertEqual(t, 1, len(tokens), "Tokens 1")
child := tokenAPI.Fork()
tokens = tokenAPI.Output.Tokens()
AssertEqual(t, 0, len(tokens), "Tokens 2")
tokenAPI.Output.AddToken(token2)
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
tokens = tokenAPI.Output.Tokens()
AssertEqual(t, 2, len(tokens), "Tokens 3")
child = tokenAPI.Fork()
tokenAPI.Output.AddToken(token3)
tokenAPI.Reset()
tokenAPI.Output.AddToken(token4)
tokenAPI.Merge(child)
tokenAPI.Dispose(child)
tokens = tokenAPI.Output.Tokens()
AssertEqual(t, 3, len(tokens), "Tokens 4")
AssertEqual(t, 1, tokenAPI.Output.TokenValue(0).(int), "Tokens 4, value 0")
AssertEqual(t, 2, tokenAPI.Output.TokenValue(1).(int), "Tokens 4, value 1")
AssertEqual(t, 4, tokenAPI.Output.TokenValue(2).(int), "Tokens 4, value 2")
}