go-parsekit/tokenize/tokenizer_test.go

188 lines
6.0 KiB
Go

package tokenize_test
import (
"fmt"
"io"
"strings"
"testing"
"unicode/utf8"
tokenize "git.makaay.nl/mauricem/go-parsekit/tokenize"
)
// TODO For error handling, it would be really cool if for example the
// 10.0.300.1/24 case would return an actual error stating that
// 300 is not a valid octet for an IPv4 address.
// Biggest thing to take care of here, is that errors should not stop
// a Parser flow (since we might be trying to match different cases in
// sequence), but a Parser flow should optionally be able to make use
// of the actual error.
// The same goes for a Tokenizer, since those can also make use of
// optional matching using tokenize.C.Any(...) for example. If matching
// for Any(IPv4, Digits), the example case should simply end up with 10
// after the IPv4 mismatch.
func ExampleNew() {
// Build the tokenizer for ip/mask.
var c, a, t = tokenize.C, tokenize.A, tokenize.T
ip := t.Str("ip", a.IPv4)
mask := t.Int8("mask", a.IPv4CIDRMask)
cidr := c.Seq(ip, a.Slash, mask)
tokenizer := tokenize.New(cidr)
for _, input := range []string{
"000.000.000.000/000",
"192.168.0.1/24",
"255.255.255.255/32",
"10.0.300.1/24",
"not an IPv4 CIDR",
} {
// Execute returns a Result and an error, which is nil on success.
result, err := tokenizer(input)
if err == nil {
fmt.Printf("Result: %s\n", result.Tokens)
} else {
fmt.Printf("Error: %s\n", err)
}
}
// Output:
// Result: [ip("0.0.0.0") mask((int8)0)]
// Result: [ip("192.168.0.1") mask((int8)24)]
// Result: [ip("255.255.255.255") mask((int8)32)]
// Error: mismatch at start of file
// Error: mismatch at start of file
}
func TestCallingPeekRune_PeeksRuneOnInput(t *testing.T) {
tokenizeAPI := makeTokenizeAPI()
r, _, _ := tokenizeAPI.Rune.Peek(0)
AssertEqual(t, 'T', r, "first rune")
}
func TestInputCanAcceptRunesFromReader(t *testing.T) {
tokenAPI := makeTokenizeAPI()
r0, _, _ := tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r0)
r1, _, _ := tokenAPI.Rune.Peek(0) // 0, because read offset resets to 0 after Accept* calls.
r2, _, _ := tokenAPI.Rune.Peek(1)
tokenAPI.Rune.AcceptMulti(r1, r2)
AssertEqual(t, "Tes", tokenAPI.Output.String(), "i.String()")
}
// func TestCallingMergeOnTopLevelAPI_Panics(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// tokenAPI := makeTokenizeAPI()
// tokenAPI.Merge(0)
// },
// Regexp: true,
// Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ on the top-level API`})
// }
// func TestCallingMergeOnForkParentAPI_Panics(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// tokenAPI := makeTokenizeAPI()
// child := tokenAPI.Fork()
// tokenAPI.Fork()
// tokenAPI.Merge(child)
// },
// Regexp: true,
// Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
// `on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
// }
// func TestCallingDisposeOnTopLevelAPI_Panics(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// tokenAPI := makeTokenizeAPI()
// tokenAPI.Dispose(0)
// },
// Regexp: true,
// Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ on the top-level API`})
// }
// func TestCallingDisposeOnForkParentAPI_Panics(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// tokenAPI := makeTokenizeAPI()
// child := tokenAPI.Fork()
// tokenAPI.Fork()
// tokenAPI.Dispose(child)
// },
// Regexp: true,
// Expect: `tokenize\.API\.Dispose\(\): Dispose\(\) called at /.*_test.go:\d+ ` +
// `on API stack level 1, but the current stack level is 2 \(forgot to Dispose\(\) a forked child\?\)`})
// }
// func TestCallingForkOnForkedParentAPI_Panics(t *testing.T) {
// AssertPanic(t, PanicT{
// Function: func() {
// tokenAPI := makeTokenizeAPI()
// tokenAPI.Fork()
// g := tokenAPI.Fork()
// tokenAPI.Fork()
// tokenAPI.Merge(g)
// },
// Regexp: true,
// Expect: `tokenize\.API\.Merge\(\): Merge\(\) called at /.*_test.go:\d+ ` +
// `on API stack level 2, but the current stack level is 3 \(forgot to Dispose\(\) a forked child\?\)`})
// }
func TestAccept_UpdatesCursor(t *testing.T) {
tokenAPI := tokenize.NewAPI(strings.NewReader("input\r\nwith\r\nnewlines"))
AssertEqual(t, "start of file", tokenAPI.Input.Cursor(), "cursor 1")
for j := 0; j < 6; j++ { // read "input\r", cursor end up at "\n"
r, _, _ := tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
}
AssertEqual(t, "line 1, column 7", tokenAPI.Input.Cursor(), "cursor 2")
r, _, _ := tokenAPI.Rune.Peek(0) // read "\n", cursor ends up at start of new line
tokenAPI.Rune.Accept(r)
AssertEqual(t, "line 2, column 1", tokenAPI.Input.Cursor(), "cursor 3")
for j := 0; j < 10; j++ { // read "with\r\nnewl", cursor end up at "i"
b, _ := tokenAPI.Byte.Peek(0)
tokenAPI.Byte.Accept(b)
}
AssertEqual(t, "line 3, column 5", tokenAPI.Input.Cursor(), "cursor 4")
}
func TestWhenCallingPeekruneAtEndOfFile_EOFIsReturned(t *testing.T) {
tokenAPI := tokenize.NewAPI(strings.NewReader("X"))
r, _, _ := tokenAPI.Rune.Peek(0)
tokenAPI.Rune.Accept(r)
r, _, err := tokenAPI.Rune.Peek(0)
AssertEqual(t, true, r == utf8.RuneError, "returned rune from NextRune()")
AssertEqual(t, true, err == io.EOF, "returned error from NextRune()")
}
// func TestAfterReadingruneAtEndOfFile_EarlierRunesCanStillBeAccessed(t *testing.T) {
// i := tokenize.NewAPI(strings.NewReader("X"))
// child := i.Fork()
// // To to the EOF.
// r, _, _ := i.Rune.Peek(0)
// i.Rune.Accept(r)
// r, _, err := i.Rune.Peek(0)
// AssertEqual(t, true, r == utf8.RuneError, "returned rune from 2nd NextRune()")
// AssertEqual(t, true, err == io.EOF, "returned error from 2nd NextRune()")
// // Brings the read offset back to the start.
// i.Dispose(child)
// // So here we should see the same input data as before.
// r, _, err = i.Rune.Peek(0)
// AssertEqual(t, 'X', r, "returned rune from 2nd NextRune()")
// AssertEqual(t, true, err == nil, "returned error from 2nd NextRune()")
// }
func makeTokenizeAPI() *tokenize.API {
return tokenize.NewAPI("Testing")
}