go-parsekit/tokenize/tokenizer_test.go

188 lines
5.6 KiB
Go

package tokenize_test
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using tokenize.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleNew() {
// Build the tokenizer for ip/mask.
var c, a, t = tokenize.C, tokenize.A, tokenize.T
ip := t.Str("ip", a.IPv4)
mask := t.Int8("mask", a.IPv4CIDRMask)
cidr := c.Seq(ip, a.Slash, mask)
tokenizer := tokenize.New(cidr)
for _, input := range []string{
"000.000.000.000/000",
"192.168.0.1/24",
"255.255.255.255/32",
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a Result and an error, which is nil on success.
result, err := tokenizer(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens)
} else {
fmt.Printf("Error: %s\n", err)
}
}
// Output:
// Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
api := makeTokenizeAPI()
r, _, _ := api.Input.PeekRune(0)
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
i := makeTokenizeAPI()
r0, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r0)
r1, _, _ := i.Input.PeekRune(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := i.Input.PeekRune(1)
i.Input.AcceptRunes(r1, r2)
AssertEqual(t, "Tes", i.Output.String(), "i.String()")
}
func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Merge(0)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
child := i.Fork()
i.Fork()
i.Merge(child)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Dispose(0)
},
Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
}
func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
child := i.Fork()
i.Fork()
i.Dispose(child)
},
Regexp: true,
Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
`on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
AssertPanic(t, PanicT{
Function: func() {
i := makeTokenizeAPI()
i.Fork()
g := i.Fork()
i.Fork()
i.Merge(g)
},
Regexp: true,
Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
`on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
}
func TestAccept_UpdatesCursor(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", i.Input.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
}
AssertEqual(t, "line 1, column 7", i.Input.Cursor(), "cursor 2")
r, _, _ := i.Input.PeekRune(0) // read "\n", cursor ends up at start of new line
i.Input.AcceptRune(r)
AssertEqual(t, "line 2, column 1", i.Input.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
b, _ := i.Input.PeekByte(0)
i.Input.AcceptByte(b)
}
AssertEqual(t, "line 3, column 5", i.Input.Cursor(), "cursor 4")
}
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
r, _, err := i.Input.PeekRune(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
i := tokenize.NewAPI(strings.NewReader("X"))
child := i.Fork()
// To to the EOF.
r, _, _ := i.Input.PeekRune(0)
i.Input.AcceptRune(r)
r, _, err := i.Input.PeekRune(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
// Brings the read offset back to the start.
i.Dispose(child)
// So here we should see the same input data as before.
r, _, err = i.Input.PeekRune(0)
AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
}
func makeTokenizeAPI() *tokenize.API {
return tokenize.NewAPI("Testing")
}